fix get_feature_names deprecation with sklearn 1.2.1

Forwarded: https://github.com/qiime2/q2-sample-classifier/issues/227 Last-Update: 2023-02-02 Since sklearn 1.2.1, autopkgtest are failing due to occurrence of: AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names' This function is replaced by get_feature_names_out. Last-Update: 2023-02-02 Gbp-Pq: Name sklearn-1.2.1.patch
author: Étienne Mollier <emollier@debian.org> 2023-08-30 22:38:55 +0200
committer: Étienne Mollier <emollier@debian.org> 2023-08-30 22:38:55 +0200
commit: 0b74c56faae10607005d30eb1fd02854a1967601 (patch)
tree: a2313bb6b2d0daf43681cd579cc84b7b6e190dbd
parent: dc7675248be513b982613b5416e93018630075a0 (diff)
2 files changed, 8 insertions, 7 deletions
diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
index 95fd084..f8d9d66 100644
--- a/q2_sample_classifier/tests/test_estimators.py
+++ b/q2_sample_classifier/tests/test_estimators.py
@@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
         dv = DictVectorizer()
         dv.fit(dicts)
         features = table.ids('observation')
-        self.assertEqual(set(dv.get_feature_names()), set(features))
+        self.assertEqual(set(dv.get_feature_names_out()), set(features))
         self.assertEqual(len(dicts), len(table.ids()))
         for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
             for feature, count in zip(features, table_row):
diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
index 06d7778..e179a9a 100644
--- a/q2_sample_classifier/utilities.py
+++ b/q2_sample_classifier/utilities.py
@@ -238,7 +238,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
     # Describe top features
     n_opt = rfecv.named_steps.est.n_features_
     importance = _extract_important_features(
-        rfecv.named_steps.dv.get_feature_names(),
+        rfecv.named_steps.dv.get_feature_names_out(),
         rfecv.named_steps.est.ranking_)
     importance = sort_importances(importance, ascending=True)[:n_opt]
 
@@ -252,9 +252,10 @@ def _extract_rfe_scores(rfecv):
     # If using fractional step, step = integer of fraction * n_features
     if rfecv.step < 1:
         rfecv.step = int(rfecv.step * n_features)
-    # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+    # Need to manually calculate x-axis, as
+    # rfecv.cv_results_['mean_test_score'] are a 1-d array
     x = [n_features - (n * rfecv.step)
-         for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+         for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
     if x[0] < 1:
         x[0] = 1
     return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
@@ -404,12 +405,12 @@ def _calculate_feature_importances(estimator):
     # feature_importances_ or coef_ to report feature importance/weights
     try:
         importances = _extract_important_features(
-            estimator.named_steps.dv.get_feature_names(),
+            estimator.named_steps.dv.get_feature_names_out(),
             estimator.named_steps.est.feature_importances_)
     # is there a better way to determine whether estimator has coef_ ?
     except AttributeError:
         importances = _extract_important_features(
-            estimator.named_steps.dv.get_feature_names(),
+            estimator.named_steps.dv.get_feature_names_out(),
             estimator.named_steps.est.coef_)
     return importances
 
@@ -711,7 +712,7 @@ def _mean_feature_importance(importances):
 def _null_feature_importance(table):
     feature_extractor = DictVectorizer()
     feature_extractor.fit(table)
-    imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+    imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
     imp.index.name = "feature"
     imp["importance"] = 1
     return imp
author	Étienne Mollier <emollier@debian.org>	2023-08-30 22:38:55 +0200
committer	Étienne Mollier <emollier@debian.org>	2023-08-30 22:38:55 +0200
commit	0b74c56faae10607005d30eb1fd02854a1967601 (patch)
tree	a2313bb6b2d0daf43681cd579cc84b7b6e190dbd
parent	dc7675248be513b982613b5416e93018630075a0 (diff)