summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Mollier <emollier@debian.org>2023-12-14 19:06:53 +0100
committerÉtienne Mollier <emollier@debian.org>2023-12-14 19:06:53 +0100
commit65bc51d72da59998c1913530991f5522bf73b44b (patch)
tree0ec114495655ba2c1a992cdf29ecaaf11a8d8aad
parentd957910e1da7e61dc6eafd5e62149f40872fdd0f (diff)
fix get_feature_names deprecation with sklearn 1.2.1
Forwarded: https://github.com/qiime2/q2-sample-classifier/issues/227 Last-Update: 2023-02-02 Since sklearn 1.2.1, autopkgtest are failing due to occurrence of: AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names' This function is replaced by get_feature_names_out. Last-Update: 2023-02-02 Gbp-Pq: Name sklearn-1.2.1.patch
-rw-r--r--q2_sample_classifier/tests/test_estimators.py2
-rw-r--r--q2_sample_classifier/utilities.py13
2 files changed, 8 insertions, 7 deletions
diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
index 95fd084..f8d9d66 100644
--- a/q2_sample_classifier/tests/test_estimators.py
+++ b/q2_sample_classifier/tests/test_estimators.py
@@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
dv = DictVectorizer()
dv.fit(dicts)
features = table.ids('observation')
- self.assertEqual(set(dv.get_feature_names()), set(features))
+ self.assertEqual(set(dv.get_feature_names_out()), set(features))
self.assertEqual(len(dicts), len(table.ids()))
for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
for feature, count in zip(features, table_row):
diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
index 06d7778..e179a9a 100644
--- a/q2_sample_classifier/utilities.py
+++ b/q2_sample_classifier/utilities.py
@@ -238,7 +238,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
# Describe top features
n_opt = rfecv.named_steps.est.n_features_
importance = _extract_important_features(
- rfecv.named_steps.dv.get_feature_names(),
+ rfecv.named_steps.dv.get_feature_names_out(),
rfecv.named_steps.est.ranking_)
importance = sort_importances(importance, ascending=True)[:n_opt]
@@ -252,9 +252,10 @@ def _extract_rfe_scores(rfecv):
# If using fractional step, step = integer of fraction * n_features
if rfecv.step < 1:
rfecv.step = int(rfecv.step * n_features)
- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+ # Need to manually calculate x-axis, as
+ # rfecv.cv_results_['mean_test_score'] are a 1-d array
x = [n_features - (n * rfecv.step)
- for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+ for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
if x[0] < 1:
x[0] = 1
return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
@@ -404,12 +405,12 @@ def _calculate_feature_importances(estimator):
# feature_importances_ or coef_ to report feature importance/weights
try:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.feature_importances_)
# is there a better way to determine whether estimator has coef_ ?
except AttributeError:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.coef_)
return importances
@@ -711,7 +712,7 @@ def _mean_feature_importance(importances):
def _null_feature_importance(table):
feature_extractor = DictVectorizer()
feature_extractor.fit(table)
- imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
imp.index.name = "feature"
imp["importance"] = 1
return imp