diff options
author | Étienne Mollier <emollier@debian.org> | 2023-08-30 22:38:55 +0200 |
---|---|---|
committer | Étienne Mollier <emollier@debian.org> | 2023-08-30 22:38:55 +0200 |
commit | 0b74c56faae10607005d30eb1fd02854a1967601 (patch) | |
tree | a2313bb6b2d0daf43681cd579cc84b7b6e190dbd | |
parent | dc7675248be513b982613b5416e93018630075a0 (diff) |
fix get_feature_names deprecation with sklearn 1.2.1
Forwarded: https://github.com/qiime2/q2-sample-classifier/issues/227
Last-Update: 2023-02-02
Since sklearn 1.2.1, autopkgtest are failing due to occurrence of:
AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names'
This function is replaced by get_feature_names_out.
Last-Update: 2023-02-02
Gbp-Pq: Name sklearn-1.2.1.patch
-rw-r--r-- | q2_sample_classifier/tests/test_estimators.py | 2 | ||||
-rw-r--r-- | q2_sample_classifier/utilities.py | 13 |
2 files changed, 8 insertions, 7 deletions
diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py index 95fd084..f8d9d66 100644 --- a/q2_sample_classifier/tests/test_estimators.py +++ b/q2_sample_classifier/tests/test_estimators.py @@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase): dv = DictVectorizer() dv.fit(dicts) features = table.ids('observation') - self.assertEqual(set(dv.get_feature_names()), set(features)) + self.assertEqual(set(dv.get_feature_names_out()), set(features)) self.assertEqual(len(dicts), len(table.ids())) for dict_row, (table_row, _, _) in zip(dicts, table.iter()): for feature, count in zip(features, table_row): diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py index 06d7778..e179a9a 100644 --- a/q2_sample_classifier/utilities.py +++ b/q2_sample_classifier/utilities.py @@ -238,7 +238,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator, # Describe top features n_opt = rfecv.named_steps.est.n_features_ importance = _extract_important_features( - rfecv.named_steps.dv.get_feature_names(), + rfecv.named_steps.dv.get_feature_names_out(), rfecv.named_steps.est.ranking_) importance = sort_importances(importance, ascending=True)[:n_opt] @@ -252,9 +252,10 @@ def _extract_rfe_scores(rfecv): # If using fractional step, step = integer of fraction * n_features if rfecv.step < 1: rfecv.step = int(rfecv.step * n_features) - # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array + # Need to manually calculate x-axis, as + # rfecv.cv_results_['mean_test_score'] are a 1-d array x = [n_features - (n * rfecv.step) - for n in range(len(rfecv.grid_scores_)-1, -1, -1)] + for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)] if x[0] < 1: x[0] = 1 return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy') @@ -404,12 +405,12 @@ def _calculate_feature_importances(estimator): # feature_importances_ or coef_ to report feature importance/weights try: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.feature_importances_) # is there a better way to determine whether estimator has coef_ ? except AttributeError: importances = _extract_important_features( - estimator.named_steps.dv.get_feature_names(), + estimator.named_steps.dv.get_feature_names_out(), estimator.named_steps.est.coef_) return importances @@ -711,7 +712,7 @@ def _mean_feature_importance(importances): def _null_feature_importance(table): feature_extractor = DictVectorizer() feature_extractor.fit(table) - imp = pd.DataFrame(index=feature_extractor.get_feature_names()) + imp = pd.DataFrame(index=feature_extractor.get_feature_names_out()) imp.index.name = "feature" imp["importance"] = 1 return imp |