From 65bc51d72da59998c1913530991f5522bf73b44b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Mollier?= <emollier@debian.org>
Date: Thu, 14 Dec 2023 19:06:53 +0100
Subject: fix get_feature_names deprecation with sklearn 1.2.1

Forwarded: https://github.com/qiime2/q2-sample-classifier/issues/227
Last-Update: 2023-02-02

Since sklearn 1.2.1, autopkgtest are failing due to occurrence of:
AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names'
This function is replaced by get_feature_names_out.
Last-Update: 2023-02-02
Gbp-Pq: Name sklearn-1.2.1.patch
---
 q2_sample_classifier/tests/test_estimators.py |  2 +-
 q2_sample_classifier/utilities.py             | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
index 95fd084..f8d9d66 100644
--- a/q2_sample_classifier/tests/test_estimators.py
+++ b/q2_sample_classifier/tests/test_estimators.py
@@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
         dv = DictVectorizer()
         dv.fit(dicts)
         features = table.ids('observation')
-        self.assertEqual(set(dv.get_feature_names()), set(features))
+        self.assertEqual(set(dv.get_feature_names_out()), set(features))
         self.assertEqual(len(dicts), len(table.ids()))
         for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
             for feature, count in zip(features, table_row):
diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
index 06d7778..e179a9a 100644
--- a/q2_sample_classifier/utilities.py
+++ b/q2_sample_classifier/utilities.py
@@ -238,7 +238,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
     # Describe top features
     n_opt = rfecv.named_steps.est.n_features_
     importance = _extract_important_features(
-        rfecv.named_steps.dv.get_feature_names(),
+        rfecv.named_steps.dv.get_feature_names_out(),
         rfecv.named_steps.est.ranking_)
     importance = sort_importances(importance, ascending=True)[:n_opt]
 
@@ -252,9 +252,10 @@ def _extract_rfe_scores(rfecv):
     # If using fractional step, step = integer of fraction * n_features
     if rfecv.step < 1:
         rfecv.step = int(rfecv.step * n_features)
-    # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+    # Need to manually calculate x-axis, as
+    # rfecv.cv_results_['mean_test_score'] are a 1-d array
     x = [n_features - (n * rfecv.step)
-         for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+         for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
     if x[0] < 1:
         x[0] = 1
     return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
@@ -404,12 +405,12 @@ def _calculate_feature_importances(estimator):
     # feature_importances_ or coef_ to report feature importance/weights
     try:
         importances = _extract_important_features(
-            estimator.named_steps.dv.get_feature_names(),
+            estimator.named_steps.dv.get_feature_names_out(),
             estimator.named_steps.est.feature_importances_)
     # is there a better way to determine whether estimator has coef_ ?
     except AttributeError:
         importances = _extract_important_features(
-            estimator.named_steps.dv.get_feature_names(),
+            estimator.named_steps.dv.get_feature_names_out(),
             estimator.named_steps.est.coef_)
     return importances
 
@@ -711,7 +712,7 @@ def _mean_feature_importance(importances):
 def _null_feature_importance(table):
     feature_extractor = DictVectorizer()
     feature_extractor.fit(table)
-    imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+    imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
     imp.index.name = "feature"
     imp["importance"] = 1
     return imp
-- 
cgit v1.2.3