diff options
author | Étienne Mollier <emollier@debian.org> | 2023-02-03 21:34:47 +0100 |
---|---|---|
committer | Étienne Mollier <emollier@debian.org> | 2023-02-03 21:34:47 +0100 |
commit | 255625c5b5d2eae415358a423e4a2b31094eea72 (patch) | |
tree | 421af90ad6bc52a6186d3b4e7c91d1ab11449960 | |
parent | 6a67eb4c6ebfe4a401c659ba607bfba03e6492a3 (diff) |
sklearn-1.2.1.patch: add; fix test failures past sklearn 1.2
-rw-r--r-- | debian/patches/series | 1 | ||||
-rw-r--r-- | debian/patches/sklearn-1.2.1.patch | 68 |
2 files changed, 69 insertions, 0 deletions
diff --git a/debian/patches/series b/debian/patches/series index 268890f..70969ce 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,4 @@ py2to3test.patch reduce-precision-in-tests.patch fix-autopkgtest.patch +sklearn-1.2.1.patch diff --git a/debian/patches/sklearn-1.2.1.patch b/debian/patches/sklearn-1.2.1.patch new file mode 100644 index 0000000..be11980 --- /dev/null +++ b/debian/patches/sklearn-1.2.1.patch @@ -0,0 +1,68 @@ +Description: fix get_feature_names deprecation with sklearn 1.2.1 + Since sklearn 1.2.1, autopkgtest are failing due to occurrence of: + AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names' + This function is replaced by get_feature_names_out. +Author: Étienne Mollier <emollier@debian.org> +Forwarded: no +Last-Update: 2023-02-02 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py ++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py +@@ -134,7 +134,7 @@ + dv = DictVectorizer() + dv.fit(dicts) + features = table.ids('observation') +- self.assertEqual(set(dv.get_feature_names()), set(features)) ++ self.assertEqual(set(dv.get_feature_names_out()), set(features)) + self.assertEqual(len(dicts), len(table.ids())) + for dict_row, (table_row, _, _) in zip(dicts, table.iter()): + for feature, count in zip(features, table_row): +--- q2-sample-classifier.orig/q2_sample_classifier/utilities.py ++++ q2-sample-classifier/q2_sample_classifier/utilities.py +@@ -234,7 +234,7 @@ + # Describe top features + n_opt = rfecv.named_steps.est.n_features_ + importance = _extract_important_features( +- rfecv.named_steps.dv.get_feature_names(), ++ rfecv.named_steps.dv.get_feature_names_out(), + rfecv.named_steps.est.ranking_) + importance = sort_importances(importance, ascending=True)[:n_opt] + +@@ -248,9 +248,10 @@ + # If using fractional step, step = integer of fraction * n_features + if rfecv.step < 1: + rfecv.step = int(rfecv.step * n_features) +- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array ++ # Need to manually calculate x-axis, as ++ # rfecv.cv_results_['mean_test_score'] are a 1-d array + x = [n_features - (n * rfecv.step) +- for n in range(len(rfecv.grid_scores_)-1, -1, -1)] ++ for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)] + if x[0] < 1: + x[0] = 1 + return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy') +@@ -400,12 +401,12 @@ + # feature_importances_ or coef_ to report feature importance/weights + try: + importances = _extract_important_features( +- estimator.named_steps.dv.get_feature_names(), ++ estimator.named_steps.dv.get_feature_names_out(), + estimator.named_steps.est.feature_importances_) + # is there a better way to determine whether estimator has coef_ ? + except AttributeError: + importances = _extract_important_features( +- estimator.named_steps.dv.get_feature_names(), ++ estimator.named_steps.dv.get_feature_names_out(), + estimator.named_steps.est.coef_) + return importances + +@@ -707,7 +708,7 @@ + def _null_feature_importance(table): + feature_extractor = DictVectorizer() + feature_extractor.fit(table) +- imp = pd.DataFrame(index=feature_extractor.get_feature_names()) ++ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out()) + imp.index.name = "feature" + imp["importance"] = 1 + return imp |