sklearn-1.2.1.patch: add; fix test failures past sklearn 1.2

author: Étienne Mollier <emollier@debian.org> 2023-02-03 21:34:47 +0100
committer: Étienne Mollier <emollier@debian.org> 2023-02-03 21:34:47 +0100
commit: 255625c5b5d2eae415358a423e4a2b31094eea72 (patch)
tree: 421af90ad6bc52a6186d3b4e7c91d1ab11449960
parent: 6a67eb4c6ebfe4a401c659ba607bfba03e6492a3 (diff)
2 files changed, 69 insertions, 0 deletions
diff --git a/debian/patches/series b/debian/patches/series
index 268890f..70969ce 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,4 @@
 py2to3test.patch
 reduce-precision-in-tests.patch
 fix-autopkgtest.patch
+sklearn-1.2.1.patch
diff --git a/debian/patches/sklearn-1.2.1.patch b/debian/patches/sklearn-1.2.1.patch
new file mode 100644
index 0000000..be11980
--- /dev/null
+++ b/debian/patches/sklearn-1.2.1.patch
@@ -0,0 +1,68 @@
+Description: fix get_feature_names deprecation with sklearn 1.2.1
+ Since sklearn 1.2.1, autopkgtest are failing due to occurrence of:
+ AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names'
+ This function is replaced by get_feature_names_out.
+Author: Étienne Mollier <emollier@debian.org>
+Forwarded: no
+Last-Update: 2023-02-02
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py
++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py
+@@ -134,7 +134,7 @@
+         dv = DictVectorizer()
+         dv.fit(dicts)
+         features = table.ids('observation')
+-        self.assertEqual(set(dv.get_feature_names()), set(features))
++        self.assertEqual(set(dv.get_feature_names_out()), set(features))
+         self.assertEqual(len(dicts), len(table.ids()))
+         for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
+             for feature, count in zip(features, table_row):
+--- q2-sample-classifier.orig/q2_sample_classifier/utilities.py
++++ q2-sample-classifier/q2_sample_classifier/utilities.py
+@@ -234,7 +234,7 @@
+     # Describe top features
+     n_opt = rfecv.named_steps.est.n_features_
+     importance = _extract_important_features(
+-        rfecv.named_steps.dv.get_feature_names(),
++        rfecv.named_steps.dv.get_feature_names_out(),
+         rfecv.named_steps.est.ranking_)
+     importance = sort_importances(importance, ascending=True)[:n_opt]
+ 
+@@ -248,9 +248,10 @@
+     # If using fractional step, step = integer of fraction * n_features
+     if rfecv.step < 1:
+         rfecv.step = int(rfecv.step * n_features)
+-    # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
++    # Need to manually calculate x-axis, as
++    # rfecv.cv_results_['mean_test_score'] are a 1-d array
+     x = [n_features - (n * rfecv.step)
+-         for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
++         for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
+     if x[0] < 1:
+         x[0] = 1
+     return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
+@@ -400,12 +401,12 @@
+     # feature_importances_ or coef_ to report feature importance/weights
+     try:
+         importances = _extract_important_features(
+-            estimator.named_steps.dv.get_feature_names(),
++            estimator.named_steps.dv.get_feature_names_out(),
+             estimator.named_steps.est.feature_importances_)
+     # is there a better way to determine whether estimator has coef_ ?
+     except AttributeError:
+         importances = _extract_important_features(
+-            estimator.named_steps.dv.get_feature_names(),
++            estimator.named_steps.dv.get_feature_names_out(),
+             estimator.named_steps.est.coef_)
+     return importances
+ 
+@@ -707,7 +708,7 @@
+ def _null_feature_importance(table):
+     feature_extractor = DictVectorizer()
+     feature_extractor.fit(table)
+-    imp = pd.DataFrame(index=feature_extractor.get_feature_names())
++    imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
+     imp.index.name = "feature"
+     imp["importance"] = 1
+     return imp
author	Étienne Mollier <emollier@debian.org>	2023-02-03 21:34:47 +0100
committer	Étienne Mollier <emollier@debian.org>	2023-02-03 21:34:47 +0100
commit	255625c5b5d2eae415358a423e4a2b31094eea72 (patch)
tree	421af90ad6bc52a6186d3b4e7c91d1ab11449960
parent	6a67eb4c6ebfe4a401c659ba607bfba03e6492a3 (diff)