diff options
-rw-r--r-- | debian/changelog | 8 | ||||
-rw-r--r-- | debian/control | 3 | ||||
-rw-r--r-- | debian/patches/reduce-precision-in-tests.patch | 10 | ||||
-rw-r--r-- | debian/patches/series | 1 | ||||
-rw-r--r-- | debian/patches/sklearn-1.2.1.patch | 68 | ||||
-rw-r--r-- | q2_sample_classifier/tests/test_estimators.py | 2 | ||||
-rw-r--r-- | q2_sample_classifier/utilities.py | 13 |
7 files changed, 92 insertions, 13 deletions
diff --git a/debian/changelog b/debian/changelog index 36986fc..d917857 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +q2-sample-classifier (2022.11.1-3) unstable; urgency=medium + + * reduce-precision-in-tests.patch: unfuzz. + * sklearn-1.2.1.patch: add; fix test failures past sklearn 1.2 + * d/control: add myself to uploaders + + -- Étienne Mollier <emollier@debian.org> Fri, 03 Feb 2023 21:35:26 +0100 + q2-sample-classifier (2022.11.1-2) unstable; urgency=medium * Team upload. diff --git a/debian/control b/debian/control index 0308ec1..c9b89eb 100644 --- a/debian/control +++ b/debian/control @@ -1,7 +1,8 @@ Source: q2-sample-classifier Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org> Uploaders: Liubov Chuprikova <chuprikovalv@gmail.com>, - Steffen Moeller <moeller@debian.org> + Steffen Moeller <moeller@debian.org>, + Étienne Mollier <emollier@debian.org> Section: science Priority: optional Build-Depends: debhelper-compat (= 13), diff --git a/debian/patches/reduce-precision-in-tests.patch b/debian/patches/reduce-precision-in-tests.patch index 14be00f..874d867 100644 --- a/debian/patches/reduce-precision-in-tests.patch +++ b/debian/patches/reduce-precision-in-tests.patch @@ -3,9 +3,9 @@ Description: Add more tolerance in 3 tests in order to avoid test failures due t Author: Nilesh Patra <nilesh@debian.org> Forwarded: yes Last-Update: 2021-07-25 ---- a/q2_sample_classifier/tests/test_estimators.py -+++ b/q2_sample_classifier/tests/test_estimators.py -@@ -247,9 +247,9 @@ class EstimatorsTests(SampleClassifierTe +--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py ++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py +@@ -250,9 +250,9 @@ pred, self.mdc_chard_fp.to_series(), 'ignore') accuracy = accuracy_score(truth, pred) self.assertAlmostEqual( @@ -17,7 +17,7 @@ Last-Update: 2021-07-25 # test if training classifier with pipeline classify_samples raises # warning when test_size = 0.0 -@@ -378,7 +378,7 @@ class EstimatorsTests(SampleClassifierTe +@@ -381,7 +381,7 @@ regressor, accuracy, seeded_results[regressor])) else: self.assertAlmostEqual( @@ -26,7 +26,7 @@ Last-Update: 2021-07-25 msg='Accuracy of %s regressor was %f, but expected %f' % ( regressor, accuracy, seeded_results[regressor])) -@@ -520,7 +520,7 @@ class EstimatorsTests(SampleClassifierTe +@@ -523,7 +523,7 @@ self.assertAlmostEqual( mse, seeded_predict_results[regressor], msg='Accuracy of %s regressor was %f, but expected %f' % ( diff --git a/debian/patches/series b/debian/patches/series index 268890f..70969ce 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,4 @@ py2to3test.patch reduce-precision-in-tests.patch fix-autopkgtest.patch +sklearn-1.2.1.patch diff --git a/debian/patches/sklearn-1.2.1.patch b/debian/patches/sklearn-1.2.1.patch new file mode 100644 index 0000000..be11980 --- /dev/null +++ b/debian/patches/sklearn-1.2.1.patch @@ -0,0 +1,68 @@ +Description: fix get_feature_names deprecation with sklearn 1.2.1 + Since sklearn 1.2.1, autopkgtest are failing due to occurrence of: + AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names' + This function is replaced by get_feature_names_out. +Author: Étienne Mollier <emollier@debian.org> +Forwarded: no +Last-Update: 2023-02-02 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py ++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py +@@ -134,7 +134,7 @@ + dv = DictVectorizer() + dv.fit(dicts) + features = table.ids('observation') +- self.assertEqual(set(dv.get_feature_names()), set(features)) ++ self.assertEqual(set(dv.get_feature_names_out()), set(features)) + self.assertEqual(len(dicts), len(table.ids())) + for dict_row, (table_row, _, _) in zip(dicts, table.iter()): + for feature, count in zip(features, table_row): +--- q2-sample-classifier.orig/q2_sample_classifier/utilities.py ++++ q2-sample-classifier/q2_sample_classifier/utilities.py +@@ -234,7 +234,7 @@ + # Describe top features + n_opt = rfecv.named_steps.est.n_features_ + importance = _extract_important_features( +- rfecv.named_steps.dv.get_feature_names(), ++ rfecv.named_steps.dv.get_feature_names_out(), + rfecv.named_steps.est.ranking_) + importance = sort_importances(importance, ascending=True)[:n_opt] + +@@ -248,9 +248,10 @@ + # If using fractional step, step = integer of fraction * n_features + if rfecv.step < 1: + rfecv.step = int(rfecv.step * n_features) +- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array ++ # Need to manually calculate x-axis, as ++ # rfecv.cv_results_['mean_test_score'] are a 1-d array + x = [n_features - (n * rfecv.step) +- for n in range(len(rfecv.grid_scores_)-1, -1, -1)] ++ for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)] + if x[0] < 1: + x[0] = 1 + return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy') +@@ -400,12 +401,12 @@ + # feature_importances_ or coef_ to report feature importance/weights + try: + importances = _extract_important_features( +- estimator.named_steps.dv.get_feature_names(), ++ estimator.named_steps.dv.get_feature_names_out(), + estimator.named_steps.est.feature_importances_) + # is there a better way to determine whether estimator has coef_ ? + except AttributeError: + importances = _extract_important_features( +- estimator.named_steps.dv.get_feature_names(), ++ estimator.named_steps.dv.get_feature_names_out(), + estimator.named_steps.est.coef_) + return importances + +@@ -707,7 +708,7 @@ + def _null_feature_importance(table): + feature_extractor = DictVectorizer() + feature_extractor.fit(table) +- imp = pd.DataFrame(index=feature_extractor.get_feature_names()) ++ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out()) + imp.index.name = "feature" + imp["importance"] = 1 + return imp diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py index dfcc0ce..e6760bd 100644 --- a/q2_sample_classifier/tests/test_estimators.py +++ b/q2_sample_classifier/tests/test_estimators.py @@ -134,7 +134,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase): dv = DictVectorizer() dv.fit(dicts) features = table.ids('observation') - self.assertEqual(set(dv.get_feature_names()), set(features)) + self.assertEqual(set(dv.get_feature_names_out()), set(features)) self.assertEqual(len(dicts), len(table.ids())) for dict_row, (table_row, _, _) in zip(dicts, table.iter()): for feature, count in zip(features, table_row): diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py index d4e7477..377f734 100644 --- a/q2_sample_classifier/utilities.py +++ b/q2_sample_classifier/utilities.py @@ -234,7 +234,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator, # Describe top features n_opt = rfecv.named_steps.est.n_features_ importance = _extract_important_features( - rfecv.named_steps.dv.get_feature_names(), + rfecv.named_steps.dv.get_feature_names_out(), rfecv.named_steps.est.ranking_) importance = sort_importances(importance, ascending=True)[:n_opt] @@ -248,9 +248,10 @@ def _extract_rfe_scores(rfecv): # If using fractional step, step = integer of fraction * n_features if rfecv.step < 1: rfecv.step = int(rfecv.step * n_features) - # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array + # Need to manually calculate x-axis, as + # rfecv.cv_results_['mean_test_score'] are a 1-d array x = [n_features - (n * rfecv.step) - for n in range(len(rfecv.grid_scores_)-1, -1, -1)] + for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)] if x[0] < 1: x[0] = 1 return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy') @@ -400,12 +401,12 @@ def _calculate_feature_importances(estimator): # feature_importances_ or coef_ to report feature importance/weights try: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.feature_importances_) # is there a better way to determine whether estimator has coef_ ? except AttributeError: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.coef_) return importances @@ -707,7 +708,7 @@ def _mean_feature_importance(importances): def _null_feature_importance(table): feature_extractor = DictVectorizer() feature_extractor.fit(table) - imp = pd.DataFrame(index=feature_extractor.get_feature_names()) + imp = pd.DataFrame(index=feature_extractor.get_feature_names_out()) imp.index.name = "feature" imp["importance"] = 1 return imp |