From 65bc51d72da59998c1913530991f5522bf73b44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Mollier?= Date: Thu, 14 Dec 2023 19:06:53 +0100 Subject: fix get_feature_names deprecation with sklearn 1.2.1 Forwarded: https://github.com/qiime2/q2-sample-classifier/issues/227 Last-Update: 2023-02-02 Since sklearn 1.2.1, autopkgtest are failing due to occurrence of: AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names' This function is replaced by get_feature_names_out. Last-Update: 2023-02-02 Gbp-Pq: Name sklearn-1.2.1.patch --- q2_sample_classifier/tests/test_estimators.py | 2 +- q2_sample_classifier/utilities.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py index 95fd084..f8d9d66 100644 --- a/q2_sample_classifier/tests/test_estimators.py +++ b/q2_sample_classifier/tests/test_estimators.py @@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase): dv = DictVectorizer() dv.fit(dicts) features = table.ids('observation') - self.assertEqual(set(dv.get_feature_names()), set(features)) + self.assertEqual(set(dv.get_feature_names_out()), set(features)) self.assertEqual(len(dicts), len(table.ids())) for dict_row, (table_row, _, _) in zip(dicts, table.iter()): for feature, count in zip(features, table_row): diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py index 06d7778..e179a9a 100644 --- a/q2_sample_classifier/utilities.py +++ b/q2_sample_classifier/utilities.py @@ -238,7 +238,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator, # Describe top features n_opt = rfecv.named_steps.est.n_features_ importance = _extract_important_features( - rfecv.named_steps.dv.get_feature_names(), + rfecv.named_steps.dv.get_feature_names_out(), rfecv.named_steps.est.ranking_) importance = sort_importances(importance, ascending=True)[:n_opt] @@ -252,9 +252,10 @@ def _extract_rfe_scores(rfecv): # If using fractional step, step = integer of fraction * n_features if rfecv.step < 1: rfecv.step = int(rfecv.step * n_features) - # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array + # Need to manually calculate x-axis, as + # rfecv.cv_results_['mean_test_score'] are a 1-d array x = [n_features - (n * rfecv.step) - for n in range(len(rfecv.grid_scores_)-1, -1, -1)] + for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)] if x[0] < 1: x[0] = 1 return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy') @@ -404,12 +405,12 @@ def _calculate_feature_importances(estimator): # feature_importances_ or coef_ to report feature importance/weights try: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.feature_importances_) # is there a better way to determine whether estimator has coef_ ? except AttributeError: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.coef_) return importances @@ -711,7 +712,7 @@ def _mean_feature_importance(importances): def _null_feature_importance(table): feature_extractor = DictVectorizer() feature_extractor.fit(table) - imp = pd.DataFrame(index=feature_extractor.get_feature_names()) + imp = pd.DataFrame(index=feature_extractor.get_feature_names_out()) imp.index.name = "feature" imp["importance"] = 1 return imp -- cgit v1.2.3