summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--debian/changelog8
-rw-r--r--debian/control3
-rw-r--r--debian/patches/reduce-precision-in-tests.patch10
-rw-r--r--debian/patches/series1
-rw-r--r--debian/patches/sklearn-1.2.1.patch68
-rw-r--r--q2_sample_classifier/tests/test_estimators.py2
-rw-r--r--q2_sample_classifier/utilities.py13
7 files changed, 92 insertions, 13 deletions
diff --git a/debian/changelog b/debian/changelog
index 36986fc..d917857 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+q2-sample-classifier (2022.11.1-3) unstable; urgency=medium
+
+ * reduce-precision-in-tests.patch: unfuzz.
+ * sklearn-1.2.1.patch: add; fix test failures past sklearn 1.2
+ * d/control: add myself to uploaders
+
+ -- Étienne Mollier <emollier@debian.org> Fri, 03 Feb 2023 21:35:26 +0100
+
q2-sample-classifier (2022.11.1-2) unstable; urgency=medium
* Team upload.
diff --git a/debian/control b/debian/control
index 0308ec1..c9b89eb 100644
--- a/debian/control
+++ b/debian/control
@@ -1,7 +1,8 @@
Source: q2-sample-classifier
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Liubov Chuprikova <chuprikovalv@gmail.com>,
- Steffen Moeller <moeller@debian.org>
+ Steffen Moeller <moeller@debian.org>,
+ Étienne Mollier <emollier@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper-compat (= 13),
diff --git a/debian/patches/reduce-precision-in-tests.patch b/debian/patches/reduce-precision-in-tests.patch
index 14be00f..874d867 100644
--- a/debian/patches/reduce-precision-in-tests.patch
+++ b/debian/patches/reduce-precision-in-tests.patch
@@ -3,9 +3,9 @@ Description: Add more tolerance in 3 tests in order to avoid test failures due t
Author: Nilesh Patra <nilesh@debian.org>
Forwarded: yes
Last-Update: 2021-07-25
---- a/q2_sample_classifier/tests/test_estimators.py
-+++ b/q2_sample_classifier/tests/test_estimators.py
-@@ -247,9 +247,9 @@ class EstimatorsTests(SampleClassifierTe
+--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py
++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py
+@@ -250,9 +250,9 @@
pred, self.mdc_chard_fp.to_series(), 'ignore')
accuracy = accuracy_score(truth, pred)
self.assertAlmostEqual(
@@ -17,7 +17,7 @@ Last-Update: 2021-07-25
# test if training classifier with pipeline classify_samples raises
# warning when test_size = 0.0
-@@ -378,7 +378,7 @@ class EstimatorsTests(SampleClassifierTe
+@@ -381,7 +381,7 @@
regressor, accuracy, seeded_results[regressor]))
else:
self.assertAlmostEqual(
@@ -26,7 +26,7 @@ Last-Update: 2021-07-25
msg='Accuracy of %s regressor was %f, but expected %f' % (
regressor, accuracy, seeded_results[regressor]))
-@@ -520,7 +520,7 @@ class EstimatorsTests(SampleClassifierTe
+@@ -523,7 +523,7 @@
self.assertAlmostEqual(
mse, seeded_predict_results[regressor],
msg='Accuracy of %s regressor was %f, but expected %f' % (
diff --git a/debian/patches/series b/debian/patches/series
index 268890f..70969ce 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,4 @@
py2to3test.patch
reduce-precision-in-tests.patch
fix-autopkgtest.patch
+sklearn-1.2.1.patch
diff --git a/debian/patches/sklearn-1.2.1.patch b/debian/patches/sklearn-1.2.1.patch
new file mode 100644
index 0000000..be11980
--- /dev/null
+++ b/debian/patches/sklearn-1.2.1.patch
@@ -0,0 +1,68 @@
+Description: fix get_feature_names deprecation with sklearn 1.2.1
+ Since sklearn 1.2.1, autopkgtest are failing due to occurrence of:
+ AttributeError: 'DictVectorizer' object has no attribute 'get_feature_names'
+ This function is replaced by get_feature_names_out.
+Author: Étienne Mollier <emollier@debian.org>
+Forwarded: no
+Last-Update: 2023-02-02
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- q2-sample-classifier.orig/q2_sample_classifier/tests/test_estimators.py
++++ q2-sample-classifier/q2_sample_classifier/tests/test_estimators.py
+@@ -134,7 +134,7 @@
+ dv = DictVectorizer()
+ dv.fit(dicts)
+ features = table.ids('observation')
+- self.assertEqual(set(dv.get_feature_names()), set(features))
++ self.assertEqual(set(dv.get_feature_names_out()), set(features))
+ self.assertEqual(len(dicts), len(table.ids()))
+ for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
+ for feature, count in zip(features, table_row):
+--- q2-sample-classifier.orig/q2_sample_classifier/utilities.py
++++ q2-sample-classifier/q2_sample_classifier/utilities.py
+@@ -234,7 +234,7 @@
+ # Describe top features
+ n_opt = rfecv.named_steps.est.n_features_
+ importance = _extract_important_features(
+- rfecv.named_steps.dv.get_feature_names(),
++ rfecv.named_steps.dv.get_feature_names_out(),
+ rfecv.named_steps.est.ranking_)
+ importance = sort_importances(importance, ascending=True)[:n_opt]
+
+@@ -248,9 +248,10 @@
+ # If using fractional step, step = integer of fraction * n_features
+ if rfecv.step < 1:
+ rfecv.step = int(rfecv.step * n_features)
+- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
++ # Need to manually calculate x-axis, as
++ # rfecv.cv_results_['mean_test_score'] are a 1-d array
+ x = [n_features - (n * rfecv.step)
+- for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
++ for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
+ if x[0] < 1:
+ x[0] = 1
+ return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
+@@ -400,12 +401,12 @@
+ # feature_importances_ or coef_ to report feature importance/weights
+ try:
+ importances = _extract_important_features(
+- estimator.named_steps.dv.get_feature_names(),
++ estimator.named_steps.dv.get_feature_names_out(),
+ estimator.named_steps.est.feature_importances_)
+ # is there a better way to determine whether estimator has coef_ ?
+ except AttributeError:
+ importances = _extract_important_features(
+- estimator.named_steps.dv.get_feature_names(),
++ estimator.named_steps.dv.get_feature_names_out(),
+ estimator.named_steps.est.coef_)
+ return importances
+
+@@ -707,7 +708,7 @@
+ def _null_feature_importance(table):
+ feature_extractor = DictVectorizer()
+ feature_extractor.fit(table)
+- imp = pd.DataFrame(index=feature_extractor.get_feature_names())
++ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
+ imp.index.name = "feature"
+ imp["importance"] = 1
+ return imp
diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
index dfcc0ce..e6760bd 100644
--- a/q2_sample_classifier/tests/test_estimators.py
+++ b/q2_sample_classifier/tests/test_estimators.py
@@ -134,7 +134,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
dv = DictVectorizer()
dv.fit(dicts)
features = table.ids('observation')
- self.assertEqual(set(dv.get_feature_names()), set(features))
+ self.assertEqual(set(dv.get_feature_names_out()), set(features))
self.assertEqual(len(dicts), len(table.ids()))
for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
for feature, count in zip(features, table_row):
diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
index d4e7477..377f734 100644
--- a/q2_sample_classifier/utilities.py
+++ b/q2_sample_classifier/utilities.py
@@ -234,7 +234,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
# Describe top features
n_opt = rfecv.named_steps.est.n_features_
importance = _extract_important_features(
- rfecv.named_steps.dv.get_feature_names(),
+ rfecv.named_steps.dv.get_feature_names_out(),
rfecv.named_steps.est.ranking_)
importance = sort_importances(importance, ascending=True)[:n_opt]
@@ -248,9 +248,10 @@ def _extract_rfe_scores(rfecv):
# If using fractional step, step = integer of fraction * n_features
if rfecv.step < 1:
rfecv.step = int(rfecv.step * n_features)
- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+ # Need to manually calculate x-axis, as
+ # rfecv.cv_results_['mean_test_score'] are a 1-d array
x = [n_features - (n * rfecv.step)
- for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+ for n in range(len(rfecv.cv_results_['mean_test_score'])-1, -1, -1)]
if x[0] < 1:
x[0] = 1
return pd.Series(rfecv.cv_results_['mean_test_score'], index=x, name='Accuracy')
@@ -400,12 +401,12 @@ def _calculate_feature_importances(estimator):
# feature_importances_ or coef_ to report feature importance/weights
try:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.feature_importances_)
# is there a better way to determine whether estimator has coef_ ?
except AttributeError:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.coef_)
return importances
@@ -707,7 +708,7 @@ def _mean_feature_importance(importances):
def _null_feature_importance(table):
feature_extractor = DictVectorizer()
feature_extractor.fit(table)
- imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
imp.index.name = "feature"
imp["importance"] = 1
return imp