summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLance Lin <lq27267@gmail.com>2023-01-12 16:13:52 +0100
committerLance Lin <lq27267@gmail.com>2023-01-12 16:13:52 +0100
commitad4a7c5935ad3ff57460d731dc1f6484c6099049 (patch)
treec261337b44739b5b4dcc0a17c5525c15e32638d5
Import q2-sample-classifier_2022.11.1.orig.tar.gz
[dgit import orig q2-sample-classifier_2022.11.1.orig.tar.gz]
-rw-r--r--.coveragerc14
-rw-r--r--.gitattributes1
-rw-r--r--.github/CONTRIBUTING.md23
-rw-r--r--.github/ISSUE_TEMPLATE/1-user-need-help.md14
-rw-r--r--.github/ISSUE_TEMPLATE/2-dev-need-help.md12
-rw-r--r--.github/ISSUE_TEMPLATE/3-found-bug.md36
-rw-r--r--.github/ISSUE_TEMPLATE/4-make-better.md26
-rw-r--r--.github/ISSUE_TEMPLATE/5-make-new.md26
-rw-r--r--.github/ISSUE_TEMPLATE/6-where-to-go.md147
-rw-r--r--.github/SUPPORT.md112
-rw-r--r--.github/pull_request_template.md11
-rw-r--r--.github/rubric.pngbin0 -> 230155 bytes
-rw-r--r--.github/workflows/add-to-project-ci.yml21
-rw-r--r--.github/workflows/ci.yml55
-rw-r--r--.gitignore76
-rw-r--r--LICENSE29
-rw-r--r--MANIFEST.in2
-rw-r--r--Makefile25
-rw-r--r--README.md5
-rw-r--r--ci/recipe/meta.yaml49
-rw-r--r--paper/fig1.pngbin0 -> 135510 bytes
-rw-r--r--paper/paper.md58
-rw-r--r--paper/references.bib324
-rw-r--r--q2_sample_classifier/__init__.py31
-rw-r--r--q2_sample_classifier/_format.py183
-rw-r--r--q2_sample_classifier/_transformer.py176
-rw-r--r--q2_sample_classifier/_type.py30
-rw-r--r--q2_sample_classifier/_version.py520
-rw-r--r--q2_sample_classifier/assets/index.html101
-rw-r--r--q2_sample_classifier/citations.bib20
-rw-r--r--q2_sample_classifier/classify.py514
-rw-r--r--q2_sample_classifier/plugin_setup.py677
-rw-r--r--q2_sample_classifier/tests/__init__.py7
-rw-r--r--q2_sample_classifier/tests/data/categorical_predictions.tsv9
-rw-r--r--q2_sample_classifier/tests/data/chardonnay.map.txt22
-rw-r--r--q2_sample_classifier/tests/data/chardonnay.table.qzabin0 -> 65810 bytes
-rw-r--r--q2_sample_classifier/tests/data/class_probabilities.tsv8
-rw-r--r--q2_sample_classifier/tests/data/coordinates.tsv5
-rw-r--r--q2_sample_classifier/tests/data/ecam-table-maturity.qzabin0 -> 449698 bytes
-rw-r--r--q2_sample_classifier/tests/data/ecam_map_maturity.txt127
-rw-r--r--q2_sample_classifier/tests/data/empty_file.txt1
-rw-r--r--q2_sample_classifier/tests/data/garbage.txt6
-rw-r--r--q2_sample_classifier/tests/data/importance.tsv1057
-rw-r--r--q2_sample_classifier/tests/data/importance_cv.tsv1057
-rw-r--r--q2_sample_classifier/tests/data/outliers.tsv7
-rw-r--r--q2_sample_classifier/tests/data/predictions.tsv127
-rw-r--r--q2_sample_classifier/tests/data/true_targets.tsv9
-rw-r--r--q2_sample_classifier/tests/data/vaw.qzabin0 -> 8154 bytes
-rw-r--r--q2_sample_classifier/tests/data/vaw.txt7
-rw-r--r--q2_sample_classifier/tests/data/vaw_importance.tsv6
-rw-r--r--q2_sample_classifier/tests/test_actions.py183
-rw-r--r--q2_sample_classifier/tests/test_base_class.py27
-rw-r--r--q2_sample_classifier/tests/test_classifier.py236
-rw-r--r--q2_sample_classifier/tests/test_estimators.py593
-rw-r--r--q2_sample_classifier/tests/test_types_formats_transformers.py439
-rw-r--r--q2_sample_classifier/tests/test_utilities.py155
-rw-r--r--q2_sample_classifier/tests/test_visualization.py237
-rw-r--r--q2_sample_classifier/utilities.py861
-rw-r--r--q2_sample_classifier/visuals.py388
-rw-r--r--setup.cfg8
-rw-r--r--setup.py33
-rw-r--r--versioneer.py1823
62 files changed, 10756 insertions, 0 deletions
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..f4647c9
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,14 @@
+[run]
+branch = True
+omit =
+ */tests*
+ */__init__.py
+ q2_sample_classifier/_version.py
+ versioneer.py
+
+[report]
+omit =
+ */tests*
+ */__init__.py
+ q2_sample_classifier/_version.py
+ versioneer.py
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a7e3128
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+q2_sample_classifier/_version.py export-subst
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..ad36f7b
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,23 @@
+# Contributing to this project
+
+Thanks for thinking of us :heart: :tada: - we would love a helping hand!
+
+## I just have a question
+
+> Note: Please don't file an issue to ask a question. You'll get faster results
+> by using the resources below.
+
+### QIIME 2 Users
+
+Check out the [User Docs](https://docs.qiime2.org) - there are many tutorials,
+walkthroughs, and guides available. If you still need help, please visit us at
+the [QIIME 2 Forum](https://forum.qiime2.org/c/user-support).
+
+### QIIME 2 Developers
+
+Check out the [Developer Docs](https://dev.qiime2.org) - there are many
+tutorials, walkthroughs, and guides available. If you still need help, please
+visit us at the [QIIME 2 Forum](https://forum.qiime2.org/c/dev-discussion).
+
+This document is based heavily on the following:
+https://github.com/atom/atom/blob/master/CONTRIBUTING.md
diff --git a/.github/ISSUE_TEMPLATE/1-user-need-help.md b/.github/ISSUE_TEMPLATE/1-user-need-help.md
new file mode 100644
index 0000000..39643d3
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1-user-need-help.md
@@ -0,0 +1,14 @@
+---
+name: I am a user and I need help with QIIME 2...
+about: I am using QIIME 2 and have a question or am experiencing a problem
+
+---
+
+Have you had a chance to check out the docs?
+https://docs.qiime2.org
+There are many tutorials, walkthroughs, and guides available.
+
+If you still need help, please visit:
+https://forum.qiime2.org/c/user-support
+
+Help requests filed here will not be answered.
diff --git a/.github/ISSUE_TEMPLATE/2-dev-need-help.md b/.github/ISSUE_TEMPLATE/2-dev-need-help.md
new file mode 100644
index 0000000..e60d0c0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2-dev-need-help.md
@@ -0,0 +1,12 @@
+---
+name: I am a developer and I need help with QIIME 2...
+about: I am developing a QIIME 2 plugin or interface and have a question or a problem
+
+---
+
+Have you had a chance to check out the developer docs?
+https://dev.qiime2.org
+There are many tutorials, walkthroughs, and guides available.
+
+If you still need help, please visit:
+https://forum.qiime2.org/c/dev-discussion
diff --git a/.github/ISSUE_TEMPLATE/3-found-bug.md b/.github/ISSUE_TEMPLATE/3-found-bug.md
new file mode 100644
index 0000000..4bd9996
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/3-found-bug.md
@@ -0,0 +1,36 @@
+---
+name: I am a developer and I found a bug...
+about: I am a developer and I found a bug that I can describe
+
+---
+
+**Bug Description**
+A clear and concise description of what the bug is.
+
+**Steps to reproduce the behavior**
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Computation Environment**
+- OS: [e.g. macOS High Sierra]
+- QIIME 2 Release [e.g. 2018.6]
+
+**Questions**
+1. An enumerated list with any questions about the problem here.
+2. If not applicable, please delete this section.
+
+**Comments**
+1. An enumerated list with any other context or comments about the problem here.
+2. If not applicable, please delete this section.
+
+**References**
+1. An enumerated list of links to relevant references, including forum posts, stack overflow, etc.
+2. If not applicable, please delete this section.
diff --git a/.github/ISSUE_TEMPLATE/4-make-better.md b/.github/ISSUE_TEMPLATE/4-make-better.md
new file mode 100644
index 0000000..02c673f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/4-make-better.md
@@ -0,0 +1,26 @@
+---
+name: I am a developer and I have an idea for an improvement...
+about: I am a developer and I have an idea for an improvement to existing functionality
+
+---
+
+**Improvement Description**
+A clear and concise description of what the improvement is.
+
+**Current Behavior**
+Please provide a brief description of the current behavior.
+
+**Proposed Behavior**
+Please provide a brief description of the proposed behavior.
+
+**Questions**
+1. An enumerated list of questions related to the proposal.
+2. If not applicable, please delete this section.
+
+**Comments**
+1. An enumerated list of comments related to the proposal that don't fit anywhere else.
+2. If not applicable, please delete this section.
+
+**References**
+1. An enumerated list of links to relevant references, including forum posts, stack overflow, etc.
+2. If not applicable, please delete this section.
diff --git a/.github/ISSUE_TEMPLATE/5-make-new.md b/.github/ISSUE_TEMPLATE/5-make-new.md
new file mode 100644
index 0000000..6fd7431
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/5-make-new.md
@@ -0,0 +1,26 @@
+---
+name: I am a developer and I have an idea for a new feature...
+about: I am a developer and I have an idea for new functionality
+
+---
+
+**Addition Description**
+A clear and concise description of what the addition is.
+
+**Current Behavior**
+Please provide a brief description of the current behavior, if applicable.
+
+**Proposed Behavior**
+Please provide a brief description of the proposed behavior.
+
+**Questions**
+1. An enumerated list of questions related to the proposal.
+2. If not applicable, please delete this section.
+
+**Comments**
+1. An enumerated list of comments related to the proposal that don't fit anywhere else.
+2. If not applicable, please delete this section.
+
+**References**
+1. An enumerated list of links to relevant references, including forum posts, stack overflow, etc.
+2. If not applicable, please delete this section.
diff --git a/.github/ISSUE_TEMPLATE/6-where-to-go.md b/.github/ISSUE_TEMPLATE/6-where-to-go.md
new file mode 100644
index 0000000..fc2155d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/6-where-to-go.md
@@ -0,0 +1,147 @@
+---
+name: I don't know where to file my issue...
+about: I am a developer and I don't know which repo to file this in
+
+---
+
+The repos within the QIIME 2 GitHub Organization are listed below, with a brief description about the repo.
+
+Sorted alphabetically by repo name.
+
+- The CI automation engine that builds and distributes QIIME 2
+ https://github.com/qiime2/busywork/issues
+
+- A Concourse resource for working with conda
+ https://github.com/qiime2/conda-channel-resource/issues
+
+- Web app for vanity URLs for QIIME 2 data assets
+ https://github.com/qiime2/data.qiime2.org/issues
+
+- The Developer Documentation
+ https://github.com/qiime2/dev-docs/issues
+
+- A discourse plugin for handling queued/unqueued topics
+ https://github.com/qiime2/discourse-unhandled-tagger/issues
+
+- The User Documentation
+ https://github.com/qiime2/docs/issues
+
+- Rendered QIIME 2 environment files for conda
+ https://github.com/qiime2/environment-files/issues
+
+- Google Sheets Add-On for validating tabular data
+ https://github.com/qiime2/Keemei/issues
+
+- A docker image for linux-based busywork workers
+ https://github.com/qiime2/linux-worker-docker/issues
+
+- Official project logos
+ https://github.com/qiime2/logos/issues
+
+- The q2-alignment plugin
+ https://github.com/qiime2/q2-alignment/issues
+
+- The q2-composition plugin
+ https://github.com/qiime2/q2-composition/issues
+
+- The q2-cutadapt plugin
+ https://github.com/qiime2/q2-cutadapt/issues
+
+- The q2-dada2 plugin
+ https://github.com/qiime2/q2-dada2/issues
+
+- The q2-deblur plugin
+ https://github.com/qiime2/q2-deblur/issues
+
+- The q2-demux plugin
+ https://github.com/qiime2/q2-demux/issues
+
+- The q2-diversity plugin
+ https://github.com/qiime2/q2-diversity/issues
+
+- The q2-diversity-lib plugin
+ https://github.com/qiime2/q2-diversity-lib/issues
+
+- The q2-emperor plugin
+ https://github.com/qiime2/q2-emperor/issues
+
+- The q2-feature-classifier plugin
+ https://github.com/qiime2/q2-feature-classifier/issues
+
+- The q2-feature-table plugin
+ https://github.com/qiime2/q2-feature-table/issues
+
+- The q2-fragment-insertion plugin
+ https://github.com/qiime2/q2-fragment-insertion/issues
+
+- The q2-gneiss plugin
+ https://github.com/qiime2/q2-gneiss/issues
+
+- The q2-longitudinal plugin
+ https://github.com/qiime2/q2-longitudinal/issues
+
+- The q2-metadata plugin
+ https://github.com/qiime2/q2-metadata/issues
+
+- The q2-phylogeny plugin
+ https://github.com/qiime2/q2-phylogeny/issues
+
+- The q2-quality-control plugin
+ https://github.com/qiime2/q2-quality-control/issues
+
+- The q2-quality-filter plugin
+ https://github.com/qiime2/q2-quality-filter/issues
+
+- The q2-sample-classifier plugin
+ https://github.com/qiime2/q2-sample-classifier/issues
+
+- The q2-shogun plugin
+ https://github.com/qiime2/q2-shogun/issues
+
+- The q2-taxa plugin
+ https://github.com/qiime2/q2-taxa/issues
+
+- The q2-types plugin
+ https://github.com/qiime2/q2-types/issues
+
+- The q2-vsearch plugin
+ https://github.com/qiime2/q2-vsearch/issues
+
+- The CLI interface
+ https://github.com/qiime2/q2cli/issues
+
+- The prototype CWL interface
+ https://github.com/qiime2/q2cwl/issues
+
+- The prototype Galaxy interface
+ https://github.com/qiime2/q2galaxy/issues
+
+- An internal tool for ensuring header text and copyrights are present
+ https://github.com/qiime2/q2lint/issues
+
+- The prototype GUI interface
+ https://github.com/qiime2/q2studio/issues
+
+- A base template for use in official QIIME 2 plugins
+ https://github.com/qiime2/q2templates/issues
+
+- The read-only web interface at view.qiime2.org
+ https://github.com/qiime2/q2view/issues
+
+- The QIIME 2 homepage at qiime2.org
+ https://github.com/qiime2/qiime2.github.io/issues
+
+- The QIIME 2 framework
+ https://github.com/qiime2/qiime2/issues
+
+- Centralized templates for repo assets
+ https://github.com/qiime2/template-repo/issues
+
+- Scripts for building QIIME 2 VMs
+ https://github.com/qiime2/vm-playbooks/issues
+
+- Scripts for building QIIME 2 workshop clusters
+ https://github.com/qiime2/workshop-playbooks/issues
+
+- The web app that runs workshops.qiime2.org
+ https://github.com/qiime2/workshops.qiime2.org/issues
diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md
new file mode 100644
index 0000000..8e42409
--- /dev/null
+++ b/.github/SUPPORT.md
@@ -0,0 +1,112 @@
+# QIIME 2 Users
+
+Check out the [User Docs](https://docs.qiime2.org) - there are many tutorials,
+walkthroughs, and guides available. If you still need help, please visit us at
+the [QIIME 2 Forum](https://forum.qiime2.org/c/user-support).
+
+# QIIME 2 Developers
+
+Check out the [Developer Docs](https://dev.qiime2.org) - there are many
+tutorials, walkthroughs, and guides available. If you still need help, please
+visit us at the [QIIME 2 Forum](https://forum.qiime2.org/c/dev-discussion).
+
+# General Bug/Issue Triage Discussion
+
+![rubric](./rubric.png?raw=true)
+
+# Projects/Repositories in the QIIME 2 GitHub Organization
+
+Sorted alphabetically by repo name.
+
+- [busywork](https://github.com/qiime2/busywork/issues)
+ | The CI automation engine that builds and distributes QIIME 2
+- [conda-channel-resource](https://github.com/qiime2/conda-channel-resource/issues)
+ | A Concourse resource for working with conda
+- [data.qiime2.org](https://github.com/qiime2/data.qiime2.org/issues)
+ | Web app for vanity URLs for QIIME 2 data assets
+- [dev-docs](https://github.com/qiime2/dev-docs/issues)
+ | The Developer Documentation
+- [discourse-unhandled-tagger](https://github.com/qiime2/discourse-unhandled-tagger/issues)
+ | A discourse plugin for handling queued/unqueued topics
+- [docs](https://github.com/qiime2/docs/issues)
+ | The User Documentation
+- [environment-files](https://github.com/qiime2/environment-files/issues)
+ | Rendered QIIME 2 environment files for conda
+- [Keemei](https://github.com/qiime2/Keemei/issues)
+ | Google Sheets Add-On for validating tabular data
+- [linux-worker-docker](https://github.com/qiime2/linux-worker-docker/issues)
+ | A docker image for linux-based busywork workers
+- [logos](https://github.com/qiime2/logos/issues)
+ | Official project logos
+- [q2-alignment](https://github.com/qiime2/q2-alignment/issues)
+ | The q2-alignment plugin
+- [q2-composition](https://github.com/qiime2/q2-composition/issues)
+ | The q2-composition plugin
+- [q2-cutadapt](https://github.com/qiime2/q2-cutadapt/issues)
+ | The q2-cutadapt plugin
+- [q2-dada2](https://github.com/qiime2/q2-dada2/issues)
+ | The q2-dada2 plugin
+- [q2-deblur](https://github.com/qiime2/q2-deblur/issues)
+ | The q2-deblur plugin
+- [q2-demux](https://github.com/qiime2/q2-demux/issues)
+ | The q2-demux plugin
+- [q2-diversity](https://github.com/qiime2/q2-diversity/issues)
+ | The q2-diversity plugin
+- [q2-diversity-lib](https://github.com/qiime2/q2-diversity-lib/issues)
+ | The q2-diversity-lib plugin
+- [q2-emperor](https://github.com/qiime2/q2-emperor/issues)
+ | The q2-emperor plugin
+- [q2-feature-classifier](https://github.com/qiime2/q2-feature-classifier/issues)
+ | The q2-feature-classifier plugin
+- [q2-feature-table](https://github.com/qiime2/q2-feature-table/issues)
+ | The q2-feature-table plugin
+- [q2-fragment-insertion](https://github.com/qiime2/q2-fragment-insertion/issues)
+ | The q2-fragment-insertion plugin
+- [q2-gneiss](https://github.com/qiime2/q2-gneiss/issues)
+ | The q2-gneiss plugin
+- [q2-longitudinal](https://github.com/qiime2/q2-longitudinal/issues)
+ | The q2-longitudinal plugin
+- [q2-metadata](https://github.com/qiime2/q2-metadata/issues)
+ | The q2-metadata plugin
+- [q2-phylogeny](https://github.com/qiime2/q2-phylogeny/issues)
+ | The q2-phylogeny plugin
+- [q2-quality-control](https://github.com/qiime2/q2-quality-control/issues)
+ | The q2-quality-control plugin
+- [q2-quality-filter](https://github.com/qiime2/q2-quality-filter/issues)
+ | The q2-quality-filter plugin
+- [q2-sample-classifier](https://github.com/qiime2/q2-sample-classifier/issues)
+ | The q2-sample-classifier plugin
+- [q2-shogun](https://github.com/qiime2/q2-shogun/issues)
+ | The q2-shogun plugin
+- [q2-taxa](https://github.com/qiime2/q2-taxa/issues)
+ | The q2-taxa plugin
+- [q2-types](https://github.com/qiime2/q2-types/issues)
+ | The q2-types plugin
+- [q2-vsearch](https://github.com/qiime2/q2-vsearch/issues)
+ | The q2-vsearch plugin
+- [q2cli](https://github.com/qiime2/q2cli/issues)
+ | The CLI interface
+- [q2cwl](https://github.com/qiime2/q2cwl/issues)
+ | The prototype CWL interface
+- [q2galaxy](https://github.com/qiime2/q2galaxy/issues)
+ | The prototype Galaxy interface
+- [q2lint](https://github.com/qiime2/q2lint/issues)
+ | An internal tool for ensuring header text and copyrights are present
+- [q2studio](https://github.com/qiime2/q2studio/issues)
+ | The prototype GUI interface
+- [q2templates](https://github.com/qiime2/q2templates/issues)
+ | A base template for use in official QIIME 2 plugins
+- [q2view](https://github.com/qiime2/q2view/issues)
+ | The read-only web interface at view.qiime2.org
+- [qiime2.github.io](https://github.com/qiime2/qiime2.github.io/issues)
+ | The QIIME 2 homepage at qiime2.org
+- [qiime2](https://github.com/qiime2/qiime2/issues)
+ | The QIIME 2 framework
+- [template-repo](https://github.com/qiime2/template-repo/issues)
+ | Centralized templates for repo assets
+- [vm-playbooks](https://github.com/qiime2/vm-playbooks/issues)
+ | Scripts for building QIIME 2 VMs
+- [workshop-playbooks](https://github.com/qiime2/workshop-playbooks/issues)
+ | Scripts for building QIIME 2 workshop clusters
+- [workshops.qiime2.org](https://github.com/qiime2/workshops.qiime2.org/issues)
+ | The web app that runs workshops.qiime2.org
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..cae82e1
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,11 @@
+Brief summary of the Pull Request, including any issues it may fix using the GitHub closing syntax:
+
+https://help.github.com/articles/closing-issues-using-keywords/
+
+Also, include any co-authors or contributors using the GitHub coauthor tag:
+
+https://help.github.com/articles/creating-a-commit-with-multiple-authors/
+
+---
+
+Include any questions for reviewers, screenshots, sample outputs, etc.
diff --git a/.github/rubric.png b/.github/rubric.png
new file mode 100644
index 0000000..8986d64
--- /dev/null
+++ b/.github/rubric.png
Binary files differ
diff --git a/.github/workflows/add-to-project-ci.yml b/.github/workflows/add-to-project-ci.yml
new file mode 100644
index 0000000..5de9d65
--- /dev/null
+++ b/.github/workflows/add-to-project-ci.yml
@@ -0,0 +1,21 @@
+name: Add new issues and PRs to triage project board
+
+on:
+ issues:
+ types:
+ - opened
+ pull_request_target:
+ types:
+ - opened
+
+jobs:
+ add-to-project:
+ name: Add issue to project
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/add-to-project@v0.3.0
+ with:
+ project-url: https://github.com/orgs/qiime2/projects/36
+ github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
+ labeled: skip-triage
+ label-operator: NOT
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..e994794
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,55 @@
+# This file is automatically generated by busywork.qiime2.org and
+# template-repos - any manual edits made to this file will be erased when
+# busywork performs maintenance updates.
+
+name: ci
+
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - name: checkout source
+ uses: actions/checkout@v2
+
+ - name: set up python 3.8
+ uses: actions/setup-python@v1
+ with:
+ python-version: 3.8
+
+ - name: install dependencies
+ run: python -m pip install --upgrade pip
+
+ - name: lint
+ run: |
+ pip install -q https://github.com/qiime2/q2lint/archive/master.zip
+ q2lint
+ pip install -q flake8
+ flake8
+
+ build-and-test:
+ needs: lint
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+ runs-on: ${{ matrix.os }}
+ steps:
+ - name: checkout source
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: set up git repo for versioneer
+ run: git fetch --depth=1 origin +refs/tags/*:refs/tags/*
+
+ - uses: qiime2/action-library-packaging@alpha1
+ with:
+ package-name: q2-sample-classifier
+ build-target: dev
+ additional-tests: py.test --pyargs q2_sample_classifier
+ library-token: ${{ secrets.LIBRARY_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..671372b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,76 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+#Ipython Notebook
+.ipynb_checkpoints
+
+# vi
+.*.swp
+
+# pytest cache
+.pytest_cache/
+
+# other
+*~
+*.code-workspace
+.vscode/
+
+
+.DS_store
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a77f678
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2017-2022, QIIME 2 development team.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..addbb51
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include versioneer.py
+include q2_sample_classifier/_version.py
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..94bf97a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+.PHONY: all lint test test-cov install dev clean distclean
+
+PYTHON ?= python
+
+all: ;
+
+lint:
+ q2lint
+ flake8
+
+test: all
+ py.test
+
+test-cov: all
+ py.test --cov=q2_sample_classifier
+
+install:
+ $(PYTHON) setup.py install
+
+dev: all
+ pip install -e .
+
+clean: distclean
+
+distclean: ;
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8c7c4de
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# q2-sample-classifier
+
+![](https://github.com/qiime2/q2-sample-classifier/workflows/ci/badge.svg)
+
+This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org. \ No newline at end of file
diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml
new file mode 100644
index 0000000..94f805c
--- /dev/null
+++ b/ci/recipe/meta.yaml
@@ -0,0 +1,49 @@
+{% set data = load_setup_py_data() %}
+{% set version = data.get('version') or 'placehold' %}
+
+package:
+ name: q2-sample-classifier
+ version: {{ version }}
+
+source:
+ path: ../..
+
+build:
+ script: make install
+
+requirements:
+ host:
+ - python {{ python }}
+ - setuptools
+
+ run:
+ - python {{ python }}
+ - pandas {{ pandas }}
+ - scipy {{ scipy }}
+ - numpy {{ numpy }}
+ - joblib
+ - scikit-learn {{ scikit_learn }}
+ - scikit-bio {{ scikit_bio }}
+ - seaborn >=0.8
+ - fastcluster
+ - qiime2 {{ qiime2_epoch }}.*
+ - q2-types {{ qiime2_epoch }}.*
+ - q2templates {{ qiime2_epoch }}.*
+ - q2-feature-table {{ qiime2_epoch }}.*
+
+test:
+ requires:
+ - qiime2 >={{ qiime2 }}
+ - q2-types >={{ q2_types }}
+ - q2templates >={{ q2templates }}
+ - q2-feature-table >={{ q2_feature_table }}
+ - pytest
+
+ imports:
+ - q2_sample_classifier
+ - qiime2.plugins.sample_classifier
+
+about:
+ home: https://qiime2.org
+ license: BSD-3-Clause
+ license_family: BSD
diff --git a/paper/fig1.png b/paper/fig1.png
new file mode 100644
index 0000000..e48e536
--- /dev/null
+++ b/paper/fig1.png
Binary files differ
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..2cfe3d3
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,58 @@
+---
+title: 'q2-sample-classifier: machine-learning tools for microbiome classification and regression'
+tags:
+- microbiome
+- supervised learning
+- amplicon sequencing
+- metagenomics
+authors:
+- name: Nicholas A Bokulich
+ orcid: 0000-0002-1784-8935
+ affiliation: 1
+- name: Matthew R Dillon
+ orcid: 0000-0002-7713-1952
+ affiliation: 1
+- name: Evan Bolyen
+ orcid: 0000-0002-5362-6782
+ affiliation: 1
+- name: Benjamin D Kaehler
+ orcid: 0000-0002-5318-9551
+ affiliation: 2
+- name: Gavin A Huttley
+ orcid: 0000-0001-7224-2074
+ affiliation: 2
+- name: J Gregory Caporaso
+ orcid: 0000-0002-8865-1670
+ affiliation: "1, 3"
+affiliations:
+- name: The Pathogen and Microbiome Institute, Northern Arizona University, Flagstaff, AZ, USA
+ index: 1
+- name: Research School of Biology, Australian National University, Canberra, Australia
+ index: 2
+- name: Department of Biological Sciences, Northern Arizona University, Flagstaff, AZ, USA
+ index: 3
+date: 8 August 2018
+bibliography: references.bib
+---
+
+# Summary
+q2-sample-classifier is a plugin for the [QIIME 2](https://qiime2.org/) microbiome bioinformatics platform that facilitates access, reproducibility, and interpretation of supervised learning (SL) methods for a broad audience of non-bioinformatics specialists.
+
+Microbiome studies often aim to predict outcomes or differentiate samples based on their microbial compositions, tasks that can be efficiently performed by SL methods [@Knights2011-ow]. The goal of SL is to train a machine learning model on a set of samples with known target values/class labels, and then use that model to predict the target values/class membership of additional, unlabeled samples. The ability to categorize new samples, as opposed to describing the structure of existing data, extends itself to many useful applications, e.g., the prediction of disease/susceptibility [@Yazdani2016-ih; @Schubert2015-da; @Pasolli2016-qi], crop productivity [@Chang2017-bq], wine chemical composition [@Bokulich2016-ea], or sample collection site [@Bokulich2013-go]; the identification of mislabeled samples in microbiome data sets [@Knights2011-ow]; or tracking microbiota-for-age development in children [@Subramanian2014-ch; @Bokulich2016-wa].
+
+We describe [q2-sample-classifier](https://github.com/qiime2/q2-sample-classifier), a [QIIME 2 plugin](https://qiime2.org/) to support SL tools for pattern recognition in microbiome data. This plugin provides several SL methods, automatic parameter tuning, feature selection, and various learning algorithms. The visualizations generated provide portable, shareable reports, publication-ready figures, and integrated decentralized data provenance. Additionally, integration as a QIIME 2 plugin streamlines data handling and supports the use of multiple user interfaces, including a prototype graphical user interface ([q2studio](https://github.com/qiime2/q2studio])), facilitating its use for non-expert users. The plugin is freely available under the BSD-3-Clause license at https://github.com/qiime2/q2-sample-classifier.
+
+The q2-sample-classifier plugin is written in Python 3.5 and employs pandas [@McKinney2010-lu] and numpy [@Van_der_Walt2011-rv] for data manipulation, scikit-learn [@Pedregosa2011-vr] for SL and feature selection algorithms, scipy [@scipy] for statistical testing, and matplotlib [@Hunter2007-vy] and seaborn [@michael_waskom_2017_883859] for data visualization. The plugin is compatible with macOS and Linux operating systems.
+
+The standard workflow for classification and regression in q2-feature-classifier is shown in Figure 1. All q2-sample-classifier actions accept a feature table (i.e., matrix of feature counts per sample) and sample metadata (prediction targets) as input. Feature observations for q2-sample-classifier would commonly consist of microbial counts (e.g., amplicon sequence variants, operational taxonomic units, or taxa detected by marker-gene or shotgun metagenome sequencing methods), but any observation data, such as gene, transcript, protein, or metabolite abundance could be provided as input. Input samples are shuffled and split into training and test sets at a user-defined ratio (default: 4:1) with or without stratification (equal sampling per class label; stratified by default); test samples are left out of all model training steps and are only used for final model validation.
+
+![Workflow schematic (A) and output data and visualizations (B-E) for q2-feature-classifier. Data splitting, model training, and testing (A) can be accompanied by automatic hyperparameter optimization (OPT) and recursive feature elimination for feature selection (RFE). Outputs include trained estimators for re-use on additional samples, lists of feature importance (B), RFE results if RFE is enabled (C), and predictions and accuracy results, including either confusion matrix heatmaps for classification results (D) or scatter plots of true vs. predicted values for regression results (E).](fig1.png)
+
+The user can enable automatic feature selection and hyperparameter tuning, and can select the number of cross-validations to perform for each (default = 5). Feature selection is performed using cross-validated recursive feature elimination via scikit-learn’s [RFECV](http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html) to select the features that maximize predictive accuracy. Hyperparameter tuning is automatically performed using a cross-validated randomized parameter grid search via scikit-learn’s [RandomizedSearchCV](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html) to find hyperparameter permutations (within a sensible range) that maximize accuracy.
+
+The following scikit-learn [@Pedregosa2011-vr] SL estimators are currently implemented in q2-sample-classifier: AdaBoost [@Freund1997-vv], Extra Trees [@Geurts2006-tv], Gradient boosting [@Friedman2002-vw], and Random Forest [@Breiman2001-ei] ensemble classifiers and regressors; linear SVC, linear SVR, and non-linear SVR support vector machine classifiers/regressors [@Cortes1995-jv]; k-Neighbors classifiers/regressors [@Altman1992-fo]; and Elastic Net [@Zou2005-py], Ridge [@Hoerl1970-sr], and Lasso [@Tibshirani1996-nt] regression models.
+
+# Acknowledgments
+The authors thank Jai Ram Rideout for his input and assistance integrating q2-sample-classifier with QIIME 2. This work was supported by the National Science Foundation [1565100 to JGC], and by the National Institutes of Health / National Cancer Institute Partnership for Native American Cancer Prevention [U54CA143924 and U54CA143925 to JGC].
+
+# References
diff --git a/paper/references.bib b/paper/references.bib
new file mode 100644
index 0000000..9c4bdf8
--- /dev/null
+++ b/paper/references.bib
@@ -0,0 +1,324 @@
+@ARTICLE{Altman1992-fo,
+ title = "An Introduction to Kernel and {Nearest-Neighbor} Nonparametric
+ Regression",
+ author = "Altman, N S",
+ journal = "Am. Stat.",
+ volume = 46,
+ number = 3,
+ pages = "175",
+ year = 1992,
+ doi = {10.1080/00031305.1992.10475879},
+}
+
+@ARTICLE{Knights2011-ow,
+ title = "Supervised classification of microbiota mitigates mislabeling
+ errors",
+ author = "Knights, Dan and Kuczynski, Justin and Koren, Omry and Ley, Ruth
+ E and Field, Dawn and Knight, Rob and DeSantis, Todd Z and
+ Kelley, Scott T",
+ journal = "ISME J.",
+ volume = 5,
+ number = 4,
+ pages = "570--573",
+ month = apr,
+ year = 2011,
+ doi = {10.1038/ismej.2010.148},
+ language = "en"
+}
+
+@ARTICLE{Freund1997-vv,
+ title = "A Decision-Theoretic Generalization of
+ on-Line Learning and an Application to Boosting",
+ author = "Freund, Y and Schapire, R",
+ journal = "J. Comput. System Sci.",
+ volume = 55,
+ pages = "119--139",
+ year = 1997,
+ doi = "10.1006/jcss.1997.1504"
+}
+
+@INPROCEEDINGS{Yazdani2016-ih,
+ title = "Using machine learning to identify major shifts in human gut
+ microbiome protein family abundance in disease",
+ booktitle = "2016 {IEEE} International Conference on Big Data (Big Data)",
+ author = "Yazdani, Mehrdad and Taylor, Bryn C and Debelius, Justine W and
+ Li, Weizhong and Knight, Rob and Smarr, Larry",
+ year = 2016,
+ doi = "10.1109/BigData.2016.7840731"
+}
+
+@ARTICLE{Geurts2006-tv,
+ title = "Extremely randomized trees",
+ author = "Geurts, P and Ernst, D and Wehenkel, L",
+ journal = "Mach. Learn.",
+ volume = 63,
+ number = 1,
+ pages = "3--42",
+ year = 2006,
+ doi = "10.1007/s10994-006-6226-1"
+}
+
+@ARTICLE{Pasolli2016-qi,
+ title = "Machine Learning Meta-analysis of Large Metagenomic Datasets:
+ Tools and Biological Insights",
+ author = "Pasolli, Edoardo and Truong, Duy Tin and Malik, Faizan and
+ Waldron, Levi and Segata, Nicola",
+ journal = "PLoS Comput. Biol.",
+ volume = 12,
+ number = 7,
+ pages = "e1004977",
+ month = jul,
+ year = 2016,
+ doi = "10.1371/journal.pcbi.1004977",
+ language = "en"
+}
+
+@ARTICLE{Hoerl1970-sr,
+ title = "Ridge Regression: Biased Estimation for Nonorthogonal Problems",
+ author = "Hoerl, Arthur E and Kennard, Robert W",
+ journal = "Technometrics",
+ volume = 12,
+ number = 1,
+ pages = "55--67",
+ year = 1970,
+ doi = {10.1080/00401706.1970.10488634},
+}
+
+@ARTICLE{Zou2005-py,
+ title = "Regularization and Variable Selection via the Elastic Net",
+ author = "Zou, Hui and Hastie, Trevor",
+ journal = "J. R. Stat. Soc. Series B Stat. Methodol.",
+ volume = 67,
+ pages = "301--320",
+ year = 2005,
+ doi = "10.1111/j.1467-9868.2005.00503.x"
+}
+
+@ARTICLE{Chang2017-bq,
+ title = "Metagenome-Wide Association Study and Machine Learning
+ Prediction of Bulk Soil Microbiome and Crop Productivity",
+ author = "Chang, Hao-Xun and Haudenshield, James S and Bowen, Charles R and
+ Hartman, Glen L",
+ journal = "Front. Microbiol.",
+ volume = 8,
+ pages = "519",
+ month = apr,
+ year = 2017,
+ keywords = "machine learning; metagenome-wide association study; microbiome;
+ nitrogen fixation; productivity; random forest; rhizobium;
+ soybeans",
+ language = "en",
+ doi = "10.3389/fmicb.2017.00519"
+}
+
+@ARTICLE{Bokulich2016-ea,
+ title = "Associations among Wine Grape Microbiome, Metabolome, and
+ Fermentation Behavior Suggest Microbial Contribution to Regional
+ Wine Characteristics",
+ author = "Bokulich, Nicholas A and Collins, Thomas S and Masarweh, Chad and
+ Allen, Greg and Heymann, Hildegarde and Ebeler, Susan E and
+ Mills, David A",
+ journal = "MBio",
+ volume = 7,
+ number = 3,
+ month = jun,
+ year = 2016,
+ language = "en",
+ doi = "10.1128/mBio.00631-16"
+}
+
+@ARTICLE{Schubert2015-da,
+ title = "Antibiotic-Induced Alterations of the Murine Gut Microbiota and
+ Subsequent Effects on Colonization Resistance against Clostridium
+ difficile",
+ author = "Schubert, Alyxandria M and Sinani, Hamide and Schloss, Patrick D",
+ journal = "MBio",
+ volume = 6,
+ number = 4,
+ pages = "e00974",
+ month = jul,
+ year = 2015,
+ language = "en",
+ doi = "10.1128/mBio.00974-15"
+}
+
+@ARTICLE{Bokulich2013-go,
+ title = "Microbial biogeography of wine grapes
+ is conditioned by cultivar, vintage, and climate",
+ author = "Bokulich, Nicholas A and Thorngate, J H and Richardson, P M and Mills, D
+ A",
+ journal = "Proceedings of the National Academy of Sciences",
+ volume = 111,
+ number = 1,
+ pages = "E139--E148",
+ year = 2013,
+ doi = "10.1073/pnas.1317377110"
+}
+
+@ARTICLE{Tibshirani1996-nt,
+ title = "Regression Shrinkage and Selection via the lasso",
+ author = "Tibshirani, Robert",
+ journal = "J. R. Stat. Soc. Series B Stat. Methodol.",
+ volume = 58,
+ number = 1,
+ pages = "267--288",
+ year = 1996
+}
+
+@ARTICLE{Van_der_Walt2011-rv,
+ title = "The NumPy Array: A Structure for Efficient Numerical Computation",
+ author = "S. van der Walt and S. C. Colbert and G. Varoquaux",
+ journal = "Comput. Sci. Eng.",
+ volume = 13,
+ number = 2,
+ pages = "22--30",
+ year = 2011,
+ doi = "10.1109/MCSE.2011.37"
+}
+
+@ARTICLE{Subramanian2014-ch,
+ title = "Persistent gut microbiota immaturity in malnourished Bangladeshi
+ children",
+ author = "Subramanian, Sathish and Huq, Sayeeda and Yatsunenko, Tanya and
+ Haque, Rashidul and Mahfuz, Mustafa and Alam, Mohammed A and
+ Benezra, Amber and DeStefano, Joseph and Meier, Martin F and
+ Muegge, Brian D and Barratt, Michael J and VanArendonk, Laura G
+ and Zhang, Qunyuan and Province, Michael A and Petri, Jr, William
+ A and Ahmed, Tahmeed and Gordon, Jeffrey I",
+ journal = "Nature",
+ volume = 510,
+ number = 7505,
+ pages = "417--421",
+ month = jun,
+ year = 2014,
+ language = "en",
+ doi = "10.1038/nature13421"
+}
+
+@ARTICLE{Friedman2002-vw,
+ title = "Stochastic gradient boosting",
+ author = "Friedman, Jerome H",
+ journal = "Comput. Stat. Data Anal.",
+ volume = 38,
+ number = 4,
+ pages = "367--378",
+ year = 2002,
+ doi = "10.1016/S0167-9473(01)00065-2"
+}
+
+@ARTICLE{Breiman2001-ei,
+ title = "Random Forests",
+ author = "Breiman, L",
+ journal = "Mach. Learn.",
+ volume = 45,
+ number = 1,
+ pages = "5--32",
+ year = 2001,
+ doi = "10.1023/A:1010933404324",
+}
+
+@ARTICLE{Bokulich2016-wa,
+ title = "Antibiotics, birth mode, and diet shape microbiome maturation
+ during early life",
+ author = "Bokulich, Nicholas A and Chung, Jennifer and Battaglia, Thomas
+ and Henderson, Nora and Jay, Melanie and Li, Huilin and D Lieber,
+ Arnon and Wu, Fen and Perez-Perez, Guillermo I and Chen, Yu and
+ Schweizer, William and Zheng, Xuhui and Contreras, Monica and
+ Dominguez-Bello, Maria Gloria and Blaser, Martin J",
+ journal = "Sci. Transl. Med.",
+ volume = 8,
+ number = 343,
+ pages = "343ra82",
+ month = jun,
+ year = 2016,
+ language = "en",
+ doi = "10.1126/scitranslmed.aad7121"
+}
+
+@ARTICLE{Hunter2007-vy,
+ title = "Matplotlib: A {2D} Graphics Environment",
+ author = "Hunter, John D",
+ journal = "Comput. Sci. Eng.",
+ volume = 9,
+ number = 3,
+ pages = "90--95",
+ year = 2007,
+ doi = "10.1109/MCSE.2007.55"
+}
+
+@ARTICLE{McKinney2010-lu,
+ title = "Data Structures for Statistical Computing in Python",
+ author = "McKinney, W",
+ journal = "Proceedings of the 9th Python in Science Conference",
+ pages = "51--56",
+ year = 2010
+}
+
+@ARTICLE{Pedregosa2011-vr,
+ title = "Scikit-learn: Machine Learning in {P}ython",
+ author = "Pedregosa, F and Varoquaux, G and Gramfort, A and Michel, V and
+ Thirion, B and Grisel, O and Blondel, M and Prettenhofer, P and
+ Weiss, R and Dubourg, V and Vanderplas, J and Passos, A and
+ Cournapeau, D and Brucher, M and Perrot, M and Duchesnay, E",
+ journal = "J. Mach. Learn. Res.",
+ volume = 12,
+ pages = "2825--2830",
+ year = 2011
+}
+
+@ARTICLE{Cortes1995-jv,
+ title = "Support-vector networks",
+ author = "Cortes, C and Vapnik, V",
+ journal = "Mach. Learn.",
+ volume = 20,
+ number = 3,
+ pages = "273--297",
+ year = 1995,
+ doi = "10.1007/BF00994018"
+}
+
+@Misc{scipy,
+ author = {Jones, Eric and Oliphant, Travis and Peterson, Pearu and others},
+ title = {{SciPy}: Open source scientific tools for {Python}},
+ year = {2001},
+ url = "http://www.scipy.org/",
+ note = {[Online; 2018-09-07]}
+}
+
+@misc{michael_waskom_2017_883859,
+ author = {Michael Waskom and
+ Olga Botvinnik and
+ Drew O'Kane and
+ Paul Hobson and
+ Saulius Lukauskas and
+ David C Gemperline and
+ Tom Augspurger and
+ Yaroslav Halchenko and
+ John B. Cole and
+ Jordi Warmenhoven and
+ Julian de Ruiter and
+ Cameron Pye and
+ Stephan Hoyer and
+ Jake Vanderplas and
+ Santi Villalba and
+ Gero Kunter and
+ Eric Quintero and
+ Pete Bachant and
+ Marcel Martin and
+ Kyle Meyer and
+ Alistair Miles and
+ Yoav Ram and
+ Tal Yarkoni and
+ Mike Lee Williams and
+ Constantine Evans and
+ Clark Fitzgerald and
+ Brian and
+ Chris Fonnesbeck and
+ Antony Lee and
+ Adel Qalieh},
+ title = {mwaskom/seaborn: v0.8.1 (September 2017)},
+ month = sep,
+ year = 2017,
+ doi = {10.5281/zenodo.883859},
+ url = {https://doi.org/10.5281/zenodo.883859}
+}
diff --git a/q2_sample_classifier/__init__.py b/q2_sample_classifier/__init__.py
new file mode 100644
index 0000000..e7a023f
--- /dev/null
+++ b/q2_sample_classifier/__init__.py
@@ -0,0 +1,31 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from ._format import (
+ BooleanSeriesFormat, BooleanSeriesDirectoryFormat,
+ PredictionsFormat, PredictionsDirectoryFormat, ImportanceFormat,
+ ImportanceDirectoryFormat, SampleEstimatorDirFmt, PickleFormat,
+ ProbabilitiesFormat, ProbabilitiesDirectoryFormat,
+ TrueTargetsDirectoryFormat)
+from ._type import (BooleanSeries, ClassifierPredictions, RegressorPredictions,
+ Importance, SampleEstimator, Classifier, Regressor,
+ Probabilities, TrueTargets)
+from ._version import get_versions
+
+
+__version__ = get_versions()['version']
+del get_versions
+
+__all__ = ['BooleanSeriesFormat', 'BooleanSeriesDirectoryFormat',
+ 'PredictionsFormat', 'PredictionsDirectoryFormat',
+ 'ImportanceFormat', 'ImportanceDirectoryFormat',
+ 'SampleEstimatorDirFmt', 'PickleFormat', 'BooleanSeries',
+ 'ClassifierPredictions', 'RegressorPredictions', 'Importance',
+ 'Classifier', 'Regressor', 'SampleEstimator', 'Probabilities',
+ 'ProbabilitiesFormat', 'ProbabilitiesDirectoryFormat',
+ 'TrueTargets', 'TrueTargetsDirectoryFormat']
diff --git a/q2_sample_classifier/_format.py b/q2_sample_classifier/_format.py
new file mode 100644
index 0000000..fe125f3
--- /dev/null
+++ b/q2_sample_classifier/_format.py
@@ -0,0 +1,183 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import tarfile
+import json
+
+import qiime2.plugin.model as model
+from qiime2.plugin import ValidationError
+
+
+def _validate_record_len(cells, current_line_number, exp_len):
+ if len(cells) != exp_len:
+ raise ValidationError(
+ "Expected data record to be TSV with {0} "
+ "fields. Detected {1} fields at line {2}:\n\n{3!r}"
+ .format(exp_len, len(cells), current_line_number, cells))
+
+
+def _validate_file_not_empty(has_data):
+ if not has_data:
+ raise ValidationError(
+ "There must be at least one data record present in the "
+ "file in addition to the header line.")
+
+
+class BooleanSeriesFormat(model.TextFileFormat):
+ def _validate_(self, level):
+ n_records = {'min': 5, 'max': None}[level]
+ with self.open() as fh:
+ # validate header
+ # for now we will not validate any information in the header.
+ line = fh.readline()
+
+ # validate body
+ has_data = False
+ for line_number, line in enumerate(fh, start=2):
+ cells = line.strip().split('\t')
+ _validate_record_len(cells, line_number, 2)
+ if str(cells[1]) not in ('True', 'False'):
+ raise ValidationError(
+ "Expected data to be comprised of values `True` and "
+ "`False`, found {0} at line {1}."
+ .format(str(cells[1]), line_number))
+ has_data = True
+ if n_records is not None and (line_number - 1) >= n_records:
+ break
+
+ _validate_file_not_empty(has_data)
+
+
+BooleanSeriesDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'BooleanSeriesDirectoryFormat', 'outliers.tsv',
+ BooleanSeriesFormat)
+
+
+# This is effectively an internal format - it isn't registered with the
+# plugin, but rather used as part of a dir fmt. This format also exists
+# in q2-feature-classifier.
+class PickleFormat(model.BinaryFileFormat):
+ def _validate_(self, level):
+ if not tarfile.is_tarfile(str(self)):
+ raise ValidationError(
+ "Unable to load pickled file (not a tar file).")
+
+
+# https://github.com/qiime2/q2-types/issues/49
+# This is effectively an internal format - it isn't registered with the
+# plugin, but rather used as part of a dir fmt. This format also exists
+# in q2-feature-classifier.
+class JSONFormat(model.TextFileFormat):
+ def _validate_(self, level):
+ with self.open() as fh:
+ try:
+ json.load(fh)
+ except json.JSONDecodeError as e:
+ raise ValidationError(e)
+
+
+class SampleEstimatorDirFmt(model.DirectoryFormat):
+ version_info = model.File('sklearn_version.json', format=JSONFormat)
+ sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)
+
+
+class PredictionsFormat(model.TextFileFormat):
+ def _validate(self, n_records=None):
+ with self.open() as fh:
+ # validate header
+ # for now we will not validate any information in the header,
+ # since the name of the predicted column should be flexible. The
+ # header name written by methods in q2-sample-classifier will be
+ # "predicted-*", but this should also accommodate user-defined
+ # column names.
+ line = fh.readline()
+
+ # validate body
+ has_data = False
+ for line_number, line in enumerate(fh, start=2):
+ # we want to strip each cell, not the original line
+ # otherwise empty cells are dropped, causing a TypeError
+ cells = [c.strip() for c in line.split('\t')]
+ _validate_record_len(cells, line_number, 2)
+ has_data = True
+ if n_records is not None and (line_number - 1) >= n_records:
+ break
+
+ _validate_file_not_empty(has_data)
+
+ def _validate_(self, level):
+ record_count_map = {'min': 5, 'max': None}
+ self._validate(record_count_map[level])
+
+
+PredictionsDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'PredictionsDirectoryFormat', 'predictions.tsv',
+ PredictionsFormat)
+
+
+class _MultiColumnNumericFormat(model.TextFileFormat):
+ def _validate(self, n_records=None):
+ with self.open() as fh:
+ # validate header
+ # for now we will not validate any information in the header,
+ # since column names, count etc are frequently unique to individual
+ # estimators. Let's keep this flexible.
+ line = fh.readline()
+
+ # validate body
+ has_data = False
+ for line_number, line in enumerate(fh, start=2):
+ # we want to strip each cell, not the original line
+ # otherwise empty cells are dropped, causing a TypeError
+ cells = [c.strip() for c in line.split('\t')]
+ if len(cells) < 2:
+ raise ValidationError(
+ "Expected data record to be TSV with two or more "
+ "fields. Detected {0} fields at line {1}:\n\n{2!r}"
+ .format(len(cells), line_number, cells))
+ # all values (except row name) should be numbers
+ try:
+ [float(c) for c in cells[1:]]
+ except ValueError:
+ raise ValidationError(
+ "Columns must contain only numeric values. "
+ "A non-numeric value ({0!r}) was detected at line "
+ "{1}.".format(cells[1], line_number))
+
+ has_data = True
+ if n_records is not None and (line_number - 1) >= n_records:
+ break
+
+ _validate_file_not_empty(has_data)
+
+ def _validate_(self, level):
+ record_count_map = {'min': 5, 'max': None}
+ self._validate(record_count_map[level])
+
+
+class ImportanceFormat(_MultiColumnNumericFormat):
+ pass
+
+
+ImportanceDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'ImportanceDirectoryFormat', 'importance.tsv',
+ ImportanceFormat)
+
+
+class ProbabilitiesFormat(_MultiColumnNumericFormat):
+ pass
+
+
+ProbabilitiesDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'ProbabilitiesDirectoryFormat', 'class_probabilities.tsv',
+ ProbabilitiesFormat)
+
+
+TrueTargetsDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'TrueTargetsDirectoryFormat', 'true_targets.tsv',
+ PredictionsFormat)
diff --git a/q2_sample_classifier/_transformer.py b/q2_sample_classifier/_transformer.py
new file mode 100644
index 0000000..04299ed
--- /dev/null
+++ b/q2_sample_classifier/_transformer.py
@@ -0,0 +1,176 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import os
+import tarfile
+import json
+
+import pandas as pd
+import numpy as np
+import qiime2
+import qiime2.plugin.model as model
+import sklearn
+import joblib
+from sklearn.pipeline import Pipeline
+
+from .plugin_setup import plugin
+from ._format import (SampleEstimatorDirFmt, JSONFormat, BooleanSeriesFormat,
+ ImportanceFormat, PredictionsFormat, PickleFormat,
+ ProbabilitiesFormat)
+
+
+def _read_dataframe(fh):
+ # Using `dtype=object` and `set_index` to avoid type casting/inference
+ # of any columns or the index.
+ df = pd.read_csv(fh, sep='\t', header=0, dtype='str')
+ df.set_index(df.columns[0], drop=True, append=False, inplace=True)
+ df.index.name = 'id'
+ return df
+
+
+@plugin.register_transformer
+def _1(data: pd.Series) -> (BooleanSeriesFormat):
+ ff = BooleanSeriesFormat()
+ with ff.open() as fh:
+ data.to_csv(fh, sep='\t', header=True)
+ return ff
+
+
+@plugin.register_transformer
+def _2(ff: BooleanSeriesFormat) -> (pd.Series):
+ with ff.open() as fh:
+ df = _read_dataframe(fh)
+ return df.iloc[:, 0]
+
+
+@plugin.register_transformer
+def _3(ff: BooleanSeriesFormat) -> (qiime2.Metadata):
+ with ff.open() as fh:
+ return qiime2.Metadata(_read_dataframe(fh))
+
+
+@plugin.register_transformer
+def _4(data: pd.Series) -> (PredictionsFormat):
+ ff = PredictionsFormat()
+ with ff.open() as fh:
+ data.to_csv(fh, sep='\t', header=True)
+ return ff
+
+
+@plugin.register_transformer
+def _5(ff: PredictionsFormat) -> (pd.Series):
+ with ff.open() as fh:
+ df = _read_dataframe(fh)
+ return pd.to_numeric(df.iloc[:, 0], errors='ignore')
+
+
+@plugin.register_transformer
+def _6(ff: PredictionsFormat) -> (qiime2.Metadata):
+ with ff.open() as fh:
+ return qiime2.Metadata(_read_dataframe(fh).apply(
+ lambda x: pd.to_numeric(x, errors='ignore')))
+
+
+@plugin.register_transformer
+def _7(data: pd.DataFrame) -> (ImportanceFormat):
+ ff = ImportanceFormat()
+ with ff.open() as fh:
+ data.to_csv(fh, sep='\t', header=True, na_rep=np.nan)
+ return ff
+
+
+@plugin.register_transformer
+def _8(ff: ImportanceFormat) -> (pd.DataFrame):
+ with ff.open() as fh:
+ return _read_dataframe(fh).apply(
+ lambda x: pd.to_numeric(x, errors='raise'))
+
+
+@plugin.register_transformer
+def _9(ff: ImportanceFormat) -> (qiime2.Metadata):
+ with ff.open() as fh:
+ return qiime2.Metadata(_read_dataframe(fh).apply(
+ lambda x: pd.to_numeric(x, errors='raise')))
+
+
+@plugin.register_transformer
+def _10(data: pd.DataFrame) -> (ProbabilitiesFormat):
+ ff = ProbabilitiesFormat()
+ with ff.open() as fh:
+ data.to_csv(fh, sep='\t', na_rep=np.nan, header=True)
+ return ff
+
+
+@plugin.register_transformer
+def _11(ff: ProbabilitiesFormat) -> (pd.DataFrame):
+ with ff.open() as fh:
+ return _read_dataframe(fh).apply(
+ lambda x: pd.to_numeric(x, errors='raise'))
+
+
+@plugin.register_transformer
+def _12(ff: ProbabilitiesFormat) -> (qiime2.Metadata):
+ with ff.open() as fh:
+ return qiime2.Metadata(_read_dataframe(fh).apply(
+ lambda x: pd.to_numeric(x, errors='raise')))
+
+
+@plugin.register_transformer
+def _a(dirfmt: SampleEstimatorDirFmt) -> Pipeline:
+ sklearn_version = dirfmt.version_info.view(dict)['sklearn-version']
+ if sklearn_version != sklearn.__version__:
+ raise ValueError('The scikit-learn version (%s) used to generate this'
+ ' artifact does not match the current version'
+ ' of scikit-learn installed (%s). Please retrain your'
+ ' classifier for your current deployment to prevent'
+ ' data-corruption errors.'
+ % (sklearn_version, sklearn.__version__))
+
+ sklearn_pipeline = dirfmt.sklearn_pipeline.view(PickleFormat)
+
+ with tarfile.open(str(sklearn_pipeline)) as tar:
+ tmpdir = model.DirectoryFormat()
+ dirname = str(tmpdir)
+ tar.extractall(dirname)
+ pipeline = joblib.load(os.path.join(dirname, 'sklearn_pipeline.pkl'))
+ for fn in tar.getnames():
+ os.unlink(os.path.join(dirname, fn))
+
+ return pipeline
+
+
+@plugin.register_transformer
+def _b(data: Pipeline) -> SampleEstimatorDirFmt:
+ sklearn_pipeline = PickleFormat()
+ with tarfile.open(str(sklearn_pipeline), 'w') as tar:
+ tmpdir = model.DirectoryFormat()
+ pf = os.path.join(str(tmpdir), 'sklearn_pipeline.pkl')
+ for fn in joblib.dump(data, pf):
+ tar.add(fn, os.path.basename(fn))
+ os.unlink(fn)
+
+ dirfmt = SampleEstimatorDirFmt()
+ dirfmt.version_info.write_data(
+ {'sklearn-version': sklearn.__version__}, dict)
+ dirfmt.sklearn_pipeline.write_data(sklearn_pipeline, PickleFormat)
+
+ return dirfmt
+
+
+@plugin.register_transformer
+def _d(fmt: JSONFormat) -> dict:
+ with fmt.open() as fh:
+ return json.load(fh)
+
+
+@plugin.register_transformer
+def _e(data: dict) -> JSONFormat:
+ result = JSONFormat()
+ with result.open() as fh:
+ json.dump(data, fh)
+ return result
diff --git a/q2_sample_classifier/_type.py b/q2_sample_classifier/_type.py
new file mode 100644
index 0000000..66a4e82
--- /dev/null
+++ b/q2_sample_classifier/_type.py
@@ -0,0 +1,30 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from qiime2.plugin import SemanticType
+from q2_types.sample_data import SampleData
+from q2_types.feature_data import FeatureData
+
+
+ClassifierPredictions = SemanticType(
+ 'ClassifierPredictions', variant_of=SampleData.field['type'])
+RegressorPredictions = SemanticType(
+ 'RegressorPredictions', variant_of=SampleData.field['type'])
+SampleEstimator = SemanticType('SampleEstimator', field_names='type')
+Classifier = SemanticType(
+ 'Classifier', variant_of=SampleEstimator.field['type'])
+Regressor = SemanticType(
+ 'Regressor', variant_of=SampleEstimator.field['type'])
+BooleanSeries = SemanticType(
+ 'BooleanSeries', variant_of=SampleData.field['type'])
+Importance = SemanticType(
+ 'Importance', variant_of=FeatureData.field['type'])
+Probabilities = SemanticType(
+ 'Probabilities', variant_of=SampleData.field['type'])
+TrueTargets = SemanticType(
+ 'TrueTargets', variant_of=SampleData.field['type'])
diff --git a/q2_sample_classifier/_version.py b/q2_sample_classifier/_version.py
new file mode 100644
index 0000000..97323c1
--- /dev/null
+++ b/q2_sample_classifier/_version.py
@@ -0,0 +1,520 @@
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.18 (https://github.com/warner/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+ """Get the keywords needed to look up the version information."""
+ # these strings will be replaced by git during git-archive.
+ # setup.py/versioneer.py will grep for the variable names, so they must
+ # each be defined on a line of their own. _version.py will just call
+ # get_keywords().
+ git_refnames = " (HEAD -> master, tag: 2022.11.1)"
+ git_full = "f693e2087a65c868846472d3284c7ba9f00d4bfc"
+ git_date = "2022-12-21 22:30:20 +0000"
+ keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+ return keywords
+
+
+class VersioneerConfig:
+ """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+ """Create, populate and return the VersioneerConfig() object."""
+ # these strings are filled in when 'setup.py versioneer' creates
+ # _version.py
+ cfg = VersioneerConfig()
+ cfg.VCS = "git"
+ cfg.style = "pep440"
+ cfg.tag_prefix = ""
+ cfg.parentdir_prefix = "q2-sample-classifier-"
+ cfg.versionfile_source = "q2_sample_classifier/_version.py"
+ cfg.verbose = False
+ return cfg
+
+
+class NotThisMethod(Exception):
+ """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method): # decorator
+ """Decorator to mark a method as the handler for a particular VCS."""
+ def decorate(f):
+ """Store f in HANDLERS[vcs][method]."""
+ if vcs not in HANDLERS:
+ HANDLERS[vcs] = {}
+ HANDLERS[vcs][method] = f
+ return f
+ return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+ env=None):
+ """Call the given command(s)."""
+ assert isinstance(commands, list)
+ p = None
+ for c in commands:
+ try:
+ dispcmd = str([c] + args)
+ # remember shell=False, so use git.cmd on windows, not just git
+ p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None))
+ break
+ except EnvironmentError:
+ e = sys.exc_info()[1]
+ if e.errno == errno.ENOENT:
+ continue
+ if verbose:
+ print("unable to run %s" % dispcmd)
+ print(e)
+ return None, None
+ else:
+ if verbose:
+ print("unable to find command, tried %s" % (commands,))
+ return None, None
+ stdout = p.communicate()[0].strip()
+ if sys.version_info[0] >= 3:
+ stdout = stdout.decode()
+ if p.returncode != 0:
+ if verbose:
+ print("unable to run %s (error)" % dispcmd)
+ print("stdout was %s" % stdout)
+ return None, p.returncode
+ return stdout, p.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+ """Try to determine the version from the parent directory name.
+
+ Source tarballs conventionally unpack into a directory that includes both
+ the project name and a version string. We will also support searching up
+ two directory levels for an appropriately named parent directory
+ """
+ rootdirs = []
+
+ for i in range(3):
+ dirname = os.path.basename(root)
+ if dirname.startswith(parentdir_prefix):
+ return {"version": dirname[len(parentdir_prefix):],
+ "full-revisionid": None,
+ "dirty": False, "error": None, "date": None}
+ else:
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
+
+ if verbose:
+ print("Tried directories %s but none started with prefix %s" %
+ (str(rootdirs), parentdir_prefix))
+ raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+ """Extract version information from the given file."""
+ # the code embedded in _version.py can just fetch the value of these
+ # keywords. When used from setup.py, we don't want to import _version.py,
+ # so we do it with a regexp instead. This function is not used from
+ # _version.py.
+ keywords = {}
+ try:
+ f = open(versionfile_abs, "r")
+ for line in f.readlines():
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ f.close()
+ except EnvironmentError:
+ pass
+ return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+ """Get version information from git keywords."""
+ if not keywords:
+ raise NotThisMethod("no keywords at all, weird")
+ date = keywords.get("date")
+ if date is not None:
+ # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+ # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+ # -like" string, which we must then edit to make compliant), because
+ # it's been around since git-1.5.3, and it's too difficult to
+ # discover which version we're using, or to work around using an
+ # older one.
+ date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+ refnames = keywords["refnames"].strip()
+ if refnames.startswith("$Format"):
+ if verbose:
+ print("keywords are unexpanded, not using")
+ raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+ refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+ # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+ TAG = "tag: "
+ tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ if not tags:
+ # Either we're using git < 1.8.3, or there really are no tags. We use
+ # a heuristic: assume all version tags have a digit. The old git %d
+ # expansion behaves like git log --decorate=short and strips out the
+ # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+ # between branches and tags. By ignoring refnames without digits, we
+ # filter out many common branch names like "release" and
+ # "stabilization", as well as "HEAD" and "master".
+ tags = set([r for r in refs if re.search(r'\d', r)])
+ if verbose:
+ print("discarding '%s', no digits" % ",".join(refs - tags))
+ if verbose:
+ print("likely tags: %s" % ",".join(sorted(tags)))
+ for ref in sorted(tags):
+ # sorting will prefer e.g. "2.0" over "2.0rc1"
+ if ref.startswith(tag_prefix):
+ r = ref[len(tag_prefix):]
+ if verbose:
+ print("picking %s" % r)
+ return {"version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": None,
+ "date": date}
+ # no suitable tags, so version is "0+unknown", but full hex is still there
+ if verbose:
+ print("no suitable tags, using unknown + full revision id")
+ return {"version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+ """Get version from 'git describe' in the root of the source tree.
+
+ This only gets called if the git-archive 'subst' keywords were *not*
+ expanded, and _version.py hasn't already been rewritten with a short
+ version string, meaning we're inside a checked out source tree.
+ """
+ GITS = ["git"]
+ if sys.platform == "win32":
+ GITS = ["git.cmd", "git.exe"]
+
+ out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=True)
+ if rc != 0:
+ if verbose:
+ print("Directory %s not under git control" % root)
+ raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+ # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+ # if there isn't one, this yields HEX[-dirty] (no NUM)
+ describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+ "--always", "--long",
+ "--match", "%s*" % tag_prefix],
+ cwd=root)
+ # --long was added in git-1.5.5
+ if describe_out is None:
+ raise NotThisMethod("'git describe' failed")
+ describe_out = describe_out.strip()
+ full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+ if full_out is None:
+ raise NotThisMethod("'git rev-parse' failed")
+ full_out = full_out.strip()
+
+ pieces = {}
+ pieces["long"] = full_out
+ pieces["short"] = full_out[:7] # maybe improved later
+ pieces["error"] = None
+
+ # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+ # TAG might have hyphens.
+ git_describe = describe_out
+
+ # look for -dirty suffix
+ dirty = git_describe.endswith("-dirty")
+ pieces["dirty"] = dirty
+ if dirty:
+ git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+ # now we have TAG-NUM-gHEX or HEX
+
+ if "-" in git_describe:
+ # TAG-NUM-gHEX
+ mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ if not mo:
+ # unparseable. Maybe git-describe is misbehaving?
+ pieces["error"] = ("unable to parse git-describe output: '%s'"
+ % describe_out)
+ return pieces
+
+ # tag
+ full_tag = mo.group(1)
+ if not full_tag.startswith(tag_prefix):
+ if verbose:
+ fmt = "tag '%s' doesn't start with prefix '%s'"
+ print(fmt % (full_tag, tag_prefix))
+ pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+ % (full_tag, tag_prefix))
+ return pieces
+ pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+ # distance: number of commits since tag
+ pieces["distance"] = int(mo.group(2))
+
+ # commit: short hex revision ID
+ pieces["short"] = mo.group(3)
+
+ else:
+ # HEX: no tags
+ pieces["closest-tag"] = None
+ count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+ cwd=root)
+ pieces["distance"] = int(count_out) # total number of commits
+
+ # commit date: see ISO-8601 comment in git_versions_from_keywords()
+ date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+ cwd=root)[0].strip()
+ pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+ return pieces
+
+
+def plus_or_dot(pieces):
+ """Return a + if we don't already have one, else return a ."""
+ if "+" in pieces.get("closest-tag", ""):
+ return "."
+ return "+"
+
+
+def render_pep440(pieces):
+ """Build up version string, with post-release "local version identifier".
+
+ Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+ get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+ Exceptions:
+ 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_pre(pieces):
+ """TAG[.post.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post.devDISTANCE
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += ".post.dev%d" % pieces["distance"]
+ else:
+ # exception #1
+ rendered = "0.post.dev%d" % pieces["distance"]
+ return rendered
+
+
+def render_pep440_post(pieces):
+ """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+ The ".dev0" means dirty. Note that .dev0 sorts backwards
+ (a dirty tree will appear "older" than the corresponding clean one),
+ but you shouldn't be releasing software with -dirty anyways.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%s" % pieces["short"]
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += "+g%s" % pieces["short"]
+ return rendered
+
+
+def render_pep440_old(pieces):
+ """TAG[.postDISTANCE[.dev0]] .
+
+ The ".dev0" means dirty.
+
+ Eexceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ return rendered
+
+
+def render_git_describe(pieces):
+ """TAG[-DISTANCE-gHEX][-dirty].
+
+ Like 'git describe --tags --dirty --always'.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render_git_describe_long(pieces):
+ """TAG-DISTANCE-gHEX[-dirty].
+
+ Like 'git describe --tags --dirty --always -long'.
+ The distance/hash is unconditional.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render(pieces, style):
+ """Render the given version pieces into the requested style."""
+ if pieces["error"]:
+ return {"version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None}
+
+ if not style or style == "default":
+ style = "pep440" # the default
+
+ if style == "pep440":
+ rendered = render_pep440(pieces)
+ elif style == "pep440-pre":
+ rendered = render_pep440_pre(pieces)
+ elif style == "pep440-post":
+ rendered = render_pep440_post(pieces)
+ elif style == "pep440-old":
+ rendered = render_pep440_old(pieces)
+ elif style == "git-describe":
+ rendered = render_git_describe(pieces)
+ elif style == "git-describe-long":
+ rendered = render_git_describe_long(pieces)
+ else:
+ raise ValueError("unknown style '%s'" % style)
+
+ return {"version": rendered, "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"], "error": None,
+ "date": pieces.get("date")}
+
+
+def get_versions():
+ """Get version information or return default if unable to do so."""
+ # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+ # __file__, we can work backwards from there to the root. Some
+ # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+ # case we can only use expanded keywords.
+
+ cfg = get_config()
+ verbose = cfg.verbose
+
+ try:
+ return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+ verbose)
+ except NotThisMethod:
+ pass
+
+ try:
+ root = os.path.realpath(__file__)
+ # versionfile_source is the relative path from the top of the source
+ # tree (where the .git directory might live) to this file. Invert
+ # this to find the root from __file__.
+ for i in cfg.versionfile_source.split('/'):
+ root = os.path.dirname(root)
+ except NameError:
+ return {"version": "0+unknown", "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to find root of source tree",
+ "date": None}
+
+ try:
+ pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+ return render(pieces, cfg.style)
+ except NotThisMethod:
+ pass
+
+ try:
+ if cfg.parentdir_prefix:
+ return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+ except NotThisMethod:
+ pass
+
+ return {"version": "0+unknown", "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to compute version", "date": None}
diff --git a/q2_sample_classifier/assets/index.html b/q2_sample_classifier/assets/index.html
new file mode 100644
index 0000000..3d717b0
--- /dev/null
+++ b/q2_sample_classifier/assets/index.html
@@ -0,0 +1,101 @@
+{% extends 'base.html' %}
+
+{% block title %}q2-sample-classifier : {{ title }}{% endblock %}
+
+{% block fixed %}{% endblock %}
+
+{% block content %}
+
+{% if warning_msg %}
+<div class="panel-group" id="warnings" role="tablist" aria-multiselectable="true">
+ <div class="panel panel-warning">
+ <div class="panel-heading" role="tab" id="warnings-heading">
+ <h4 class="panel-title">
+ <a role="button" data-toggle="collapse" data-parent="#warnings" href="#warnings-list" aria-expanded="true" aria-controls="warnings-list">
+ Warnings (click here to collapse/expand):
+ </a>
+ </h4>
+ </div>
+ <div id="warnings-list" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="warnings-heading">
+ <div class="alert alert-warning col-md-12">
+ <p><strong>{{ warning_msg }}</strong></p>
+ </div>
+ </div>
+ </div>
+</div>
+{% endif %}
+
+<div class="row">
+ {% if predictions %}
+ <h1>Model Accuracy</h1>
+ {% endif %}
+ <div class="text-center">
+ {% if predictions %}
+ <a href="predictions.pdf">
+ <img src="predictions.png">
+ <br>
+ <p>Download as PDF</p>
+ </a>
+ {% endif %}
+ {% if predictions %}
+ <div class="col-lg-12">
+ {{ predictions }}
+ <a href="predictive_accuracy.tsv">
+ <p>Download accuracy results as tsv</p>
+ </a>
+ </div>
+ {% endif %}
+ {% if roc %}
+ <div class="col-lg-12">
+ <h1>Receiver Operating Characteristic Curves</h1>
+ <a href="roc_plot.pdf">
+ <img src="roc_plot.png">
+ <br>
+ <p>Download as PDF</p>
+ </a>
+ <div class="text-justify">
+ <p>Receiver Operating Characteristic (ROC) curves are a graphical
+ representation of the classification accuracy of a machine-learning
+ model. The ROC curve plots the relationship between the true positive
+ rate (TPR, on the y-axis) and the false positive rate (FPR, on the
+ x-axis) at various threshold settings. Thus, the top-left corner of the
+ plot represents the "optimal" performance position, indicating a FPR
+ of zero and a TPR of one. This "optimal" scenario is unlikely to occur
+ in practice, but a greater area under the curve (AUC) indicates better
+ performance. This can be compared to the error rate achieved by random
+ chance, which is represented here as a diagonal line extending from the
+ lower-left to upper-right corners. Additionally, the "steepness" of the
+ curve is important, as a good classifier should maximize the TPR while
+ minimizing the FPR.
+
+ In addition to showing the ROC curves for each class, average ROCs and
+ AUCs are calculated. "Micro-averaging" calculates metrics globally by
+ averaging across each sample; hence class imbalance impacts this metric.
+ "Macro-averaging" is another average metric, which gives equal weight to
+ the classification of each sample.</p>
+ </div>
+ </div>
+ {% endif %}
+ {% if optimize_feature_selection %}
+ <h1>Recursive feature extraction</h1>
+ <div class="text-center">
+ <a href="rfe_plot.pdf">
+ <img src="rfe_plot.png">
+ <br>
+ <p>Download as PDF</p>
+ </a>
+ <a href="rfe_scores.tsv">
+ <p>Download as TSV</p>
+ </a>
+ </div>
+ {% endif %}
+ {% if result %}
+ <h1>Model parameters</h1>
+ <div class="col-lg-12">
+ {{ result }}
+ </div>
+ {% endif %}
+ </div>
+</div>
+
+{% endblock %}
diff --git a/q2_sample_classifier/citations.bib b/q2_sample_classifier/citations.bib
new file mode 100644
index 0000000..8ae794f
--- /dev/null
+++ b/q2_sample_classifier/citations.bib
@@ -0,0 +1,20 @@
+@article {Bokulich306167,
+ author = {Bokulich, Nicholas and Dillon, Matthew and Bolyen, Evan and Kaehler, Benjamin D and Huttley, Gavin A and Caporaso, J Gregory},
+ title = {{q2-sample-classifier}: machine-learning tools for microbiome classification and regression},
+ year = {2018},
+ doi = {10.21105/joss.00934},
+ journal = {Journal of Open Source Software},
+ volume={3},
+ number={30},
+ pages={934}
+}
+
+@article{pedregosa2011scikit,
+ title={Scikit-learn: Machine learning in Python},
+ author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, {\'E}douard},
+ journal={Journal of machine learning research},
+ volume={12},
+ number={Oct},
+ pages={2825--2830},
+ year={2011}
+}
diff --git a/q2_sample_classifier/classify.py b/q2_sample_classifier/classify.py
new file mode 100644
index 0000000..414881f
--- /dev/null
+++ b/q2_sample_classifier/classify.py
@@ -0,0 +1,514 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import numpy as np
+from sklearn.ensemble import IsolationForest
+from sklearn.metrics import mean_squared_error, accuracy_score
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.model_selection import KFold
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import Pipeline
+
+import qiime2
+import pandas as pd
+import biom
+import skbio
+
+from .utilities import (_load_data, _prepare_training_data,
+ nested_cross_validation, _fit_estimator,
+ _extract_features, _plot_accuracy,
+ _summarize_estimator, predict_probabilities,
+ _classifiers)
+
+
+defaults = {
+ 'test_size': 0.2,
+ 'step': 0.05,
+ 'cv': 5,
+ 'n_jobs': 1,
+ 'n_estimators': 100,
+ 'estimator_c': 'RandomForestClassifier',
+ 'estimator_r': 'RandomForestRegressor',
+ 'palette': 'sirocco',
+ 'missing_samples': 'error'
+}
+
+
+def metatable(ctx,
+ metadata,
+ table=None,
+ missing_samples='ignore',
+ missing_values='error',
+ drop_all_unique=False):
+ # gather numeric metadata
+ metadata = metadata.filter_columns(
+ column_type='numeric', drop_all_unique=drop_all_unique,
+ drop_zero_variance=True, drop_all_missing=True).to_dataframe()
+
+ if missing_values == 'drop_samples':
+ metadata = metadata.dropna(axis=0)
+ elif missing_values == 'drop_features':
+ metadata = metadata.dropna(axis=1)
+ elif missing_values == 'error' and metadata.isnull().values.any():
+ raise ValueError('You are attempting to coerce metadata containing '
+ 'missing values into a feature table! These may '
+ 'cause fatal errors downstream and must be removed '
+ 'or converted to 0. See the missing_values parameter '
+ 'to review your options.')
+ elif missing_values == 'fill':
+ metadata = metadata.fillna(0.)
+
+ # drop columns with negative values
+ # grab column IDs with all values >= 0
+ metadata = metadata.loc[:, (metadata >= 0).all(axis=0)]
+
+ if len(metadata.columns) == 0:
+ raise ValueError('All metadata columns have been filtered.')
+ if len(metadata.index) == 0:
+ raise ValueError('All metadata samples have been filtered.')
+
+ # only retain IDs that intersect with table
+ if table is not None:
+ tab = table.view(biom.Table)
+ table_ids = set(tab.ids())
+ metadata_ids = set(metadata.index)
+ sample_ids = table_ids.intersection(metadata_ids)
+ if missing_samples == 'error' and len(sample_ids) != len(table_ids):
+ raise ValueError('Missing samples in metadata: %r' %
+ table_ids.difference(metadata_ids))
+ else:
+ metadata = metadata.loc[sample_ids]
+ if len(sample_ids) < len(table_ids):
+ tab = tab.filter(
+ ids_to_keep=sample_ids, axis='sample', inplace=False)
+ table = ctx.make_artifact('FeatureTable[Frequency]', tab)
+
+ # convert to FeatureTable[Frequency]
+ metadata = metadata.T
+ metadata = biom.table.Table(
+ metadata.values, metadata.index, metadata.columns)
+ metatab = ctx.make_artifact('FeatureTable[Frequency]', metadata)
+
+ # optionally merge with existing feature table
+ if table is not None:
+ merge = ctx.get_action('feature_table', 'merge')
+ metatab, = merge(
+ [table, metatab], overlap_method='error_on_overlapping_feature')
+
+ return metatab
+
+
+def _fit_predict_knn_cv(
+ x: pd.DataFrame, y: pd.Series, k: int, cv: int,
+ random_state: int, n_jobs: int
+) -> (pd.Series, pd.Series):
+ kf = KFold(n_splits=cv, shuffle=True, random_state=random_state)
+
+ # train and test with CV
+ predictions, pred_ids, truth = [], [], []
+ for train_index, test_index in kf.split(x):
+ x_train, x_test = x.iloc[train_index, train_index], \
+ x.iloc[test_index, train_index]
+ y_train, y_test = y[train_index], y[test_index]
+
+ knn = KNeighborsClassifier(
+ n_neighbors=k, metric='precomputed', n_jobs=n_jobs
+ )
+ knn.fit(x_train, y_train)
+
+ # gather predictions for the confusion matrix
+ predictions.append(knn.predict(x_test))
+ pred_ids.extend(x_test.index.tolist())
+ truth.append(y_test)
+
+ predictions = pd.Series(
+ np.concatenate(predictions).ravel(),
+ index=pd.Index(pred_ids, name='SampleID')
+ )
+ truth = pd.concat(truth)
+ truth.index.name = 'SampleID'
+
+ return predictions, truth
+
+
+def classify_samples_from_dist(
+ ctx, distance_matrix, metadata, k=1, cv=defaults['cv'],
+ random_state=None, n_jobs=defaults['n_jobs'],
+ palette=defaults['palette']
+):
+ """ Trains and evaluates a KNN classifier from a distance matrix
+ using cross-validation."""
+ distance_matrix = distance_matrix \
+ .view(skbio.DistanceMatrix) \
+ .to_data_frame()
+ # reorder (required for splitting into train/test)
+ metadata_ser = metadata.to_series()[distance_matrix.index]
+
+ predictions, truth = _fit_predict_knn_cv(
+ distance_matrix, metadata_ser, k, cv, random_state, n_jobs
+ )
+ predictions = qiime2.Artifact.import_data(
+ 'SampleData[ClassifierPredictions]', predictions
+ )
+ truth = qiime2.CategoricalMetadataColumn(truth)
+
+ confusion = ctx.get_action('sample_classifier', 'confusion_matrix')
+ accuracy_results, = confusion(
+ predictions, truth, missing_samples='ignore', palette=palette
+ )
+
+ return predictions, accuracy_results
+
+
+def classify_samples(ctx,
+ table,
+ metadata,
+ test_size=defaults['test_size'],
+ step=defaults['step'],
+ cv=defaults['cv'],
+ random_state=None,
+ n_jobs=defaults['n_jobs'],
+ n_estimators=defaults['n_estimators'],
+ estimator=defaults['estimator_c'],
+ optimize_feature_selection=False,
+ parameter_tuning=False,
+ palette=defaults['palette'],
+ missing_samples=defaults['missing_samples']):
+
+ split = ctx.get_action('sample_classifier', 'split_table')
+ fit = ctx.get_action('sample_classifier', 'fit_classifier')
+ predict_test = ctx.get_action(
+ 'sample_classifier', 'predict_classification')
+ summarize_estimator = ctx.get_action('sample_classifier', 'summarize')
+ confusion = ctx.get_action('sample_classifier', 'confusion_matrix')
+ heat = ctx.get_action('sample_classifier', 'heatmap')
+
+ X_train, X_test, y_train, y_test = split(table, metadata, test_size,
+ random_state,
+ stratify=True,
+ missing_samples=missing_samples)
+
+ sample_estimator, importance = fit(
+ X_train, metadata, step, cv, random_state, n_jobs, n_estimators,
+ estimator, optimize_feature_selection, parameter_tuning,
+ missing_samples='ignore')
+
+ predictions, probabilities, = predict_test(
+ X_test, sample_estimator, n_jobs)
+
+ summary, = summarize_estimator(sample_estimator)
+
+ accuracy_results, = confusion(predictions, metadata, probabilities,
+ missing_samples='ignore', palette=palette)
+
+ _heatmap, _ = heat(table, importance, sample_metadata=metadata,
+ group_samples=True, missing_samples=missing_samples)
+
+ return (sample_estimator, importance, predictions, summary,
+ accuracy_results, probabilities, _heatmap, y_train, y_test)
+
+
+def regress_samples(ctx,
+ table,
+ metadata,
+ test_size=defaults['test_size'],
+ step=defaults['step'],
+ cv=defaults['cv'],
+ random_state=None,
+ n_jobs=defaults['n_jobs'],
+ n_estimators=defaults['n_estimators'],
+ estimator=defaults['estimator_r'],
+ optimize_feature_selection=False,
+ stratify=False,
+ parameter_tuning=False,
+ missing_samples=defaults['missing_samples']):
+
+ split = ctx.get_action('sample_classifier', 'split_table')
+ fit = ctx.get_action('sample_classifier', 'fit_regressor')
+ predict_test = ctx.get_action('sample_classifier', 'predict_regression')
+ summarize_estimator = ctx.get_action('sample_classifier', 'summarize')
+ scatter = ctx.get_action('sample_classifier', 'scatterplot')
+
+ X_train, X_test, y_train, y_test = split(table, metadata, test_size,
+ random_state,
+ stratify,
+ missing_samples=missing_samples)
+
+ sample_estimator, importance = fit(
+ X_train, metadata, step, cv, random_state, n_jobs, n_estimators,
+ estimator, optimize_feature_selection, parameter_tuning,
+ missing_samples='ignore')
+
+ predictions, = predict_test(X_test, sample_estimator, n_jobs)
+
+ summary, = summarize_estimator(sample_estimator)
+
+ accuracy_results, = scatter(predictions, metadata, 'ignore')
+
+ return (sample_estimator, importance, predictions, summary,
+ accuracy_results)
+
+
+def fit_classifier(table: biom.Table,
+ metadata: qiime2.CategoricalMetadataColumn,
+ step: float = defaults['step'], cv: int = defaults['cv'],
+ random_state: int = None, n_jobs: int = defaults['n_jobs'],
+ n_estimators: int = defaults['n_estimators'],
+ estimator: str = defaults['estimator_c'],
+ optimize_feature_selection: bool = False,
+ parameter_tuning: bool = False,
+ missing_samples: str = defaults['missing_samples']
+ ) -> (Pipeline, pd.DataFrame):
+ estimator, importance = _fit_estimator(
+ table, metadata, estimator, n_estimators, step, cv, random_state,
+ n_jobs, optimize_feature_selection, parameter_tuning,
+ missing_samples=missing_samples, classification=True)
+
+ return estimator, importance
+
+
+def fit_regressor(table: biom.Table,
+ metadata: qiime2.CategoricalMetadataColumn,
+ step: float = defaults['step'], cv: int = defaults['cv'],
+ random_state: int = None, n_jobs: int = defaults['n_jobs'],
+ n_estimators: int = defaults['n_estimators'],
+ estimator: str = defaults['estimator_r'],
+ optimize_feature_selection: bool = False,
+ parameter_tuning: bool = False,
+ missing_samples: str = defaults['missing_samples']
+ ) -> (Pipeline, pd.DataFrame):
+ estimator, importance = _fit_estimator(
+ table, metadata, estimator, n_estimators, step, cv, random_state,
+ n_jobs, optimize_feature_selection, parameter_tuning,
+ missing_samples=missing_samples, classification=False)
+
+ return estimator, importance
+
+
+def predict_base(table, sample_estimator, n_jobs):
+ # extract feature data from biom
+ feature_data = _extract_features(table)
+ index = table.ids()
+
+ # reset n_jobs if this is a valid parameter for the estimator
+ if 'est__n_jobs' in sample_estimator.get_params().keys():
+ sample_estimator.set_params(est__n_jobs=n_jobs)
+
+ # predict values and output as series
+ y_pred = sample_estimator.predict(feature_data)
+ # need to flatten arrays that come out as multidimensional
+ y_pred = y_pred.flatten()
+ y_pred = pd.Series(y_pred, index=index, name='prediction')
+ y_pred.index.name = 'SampleID'
+
+ # log prediction probabilities (classifiers only)
+ if sample_estimator.named_steps.est.__class__.__name__ in _classifiers:
+ probs = predict_probabilities(sample_estimator, feature_data, index)
+ else:
+ probs = None
+
+ return y_pred, probs
+
+
+def predict_classification(table: biom.Table, sample_estimator: Pipeline,
+ n_jobs: int = defaults['n_jobs']) -> (
+ pd.Series, pd.DataFrame):
+ return predict_base(table, sample_estimator, n_jobs)
+
+
+def predict_regression(table: biom.Table, sample_estimator: Pipeline,
+ n_jobs: int = defaults['n_jobs']) -> pd.Series:
+ # we only return the predictions, not the probabilities, which are empty
+ # for regressors.
+ return predict_base(table, sample_estimator, n_jobs)[0]
+
+
+def split_table(table: biom.Table, metadata: qiime2.MetadataColumn,
+ test_size: float = defaults['test_size'],
+ random_state: int = None, stratify: str = True,
+ missing_samples: str = defaults['missing_samples']
+ ) -> (biom.Table, biom.Table, pd.Series, pd.Series):
+ column = metadata.name
+ X_train, X_test, y_train, y_test = _prepare_training_data(
+ table, metadata, column, test_size, random_state, load_data=True,
+ stratify=stratify, missing_samples=missing_samples)
+ return X_train, X_test, y_train, y_test
+
+
+def regress_samples_ncv(
+ table: biom.Table, metadata: qiime2.NumericMetadataColumn,
+ cv: int = defaults['cv'], random_state: int = None,
+ n_jobs: int = defaults['n_jobs'],
+ n_estimators: int = defaults['n_estimators'],
+ estimator: str = defaults['estimator_r'], stratify: str = False,
+ parameter_tuning: bool = False,
+ missing_samples: str = defaults['missing_samples']
+ ) -> (pd.Series, pd.DataFrame):
+
+ y_pred, importances, probabilities = nested_cross_validation(
+ table, metadata, cv, random_state, n_jobs, n_estimators, estimator,
+ stratify, parameter_tuning, classification=False,
+ scoring=mean_squared_error, missing_samples=missing_samples)
+ return y_pred, importances
+
+
+def classify_samples_ncv(
+ table: biom.Table, metadata: qiime2.CategoricalMetadataColumn,
+ cv: int = defaults['cv'], random_state: int = None,
+ n_jobs: int = defaults['n_jobs'],
+ n_estimators: int = defaults['n_estimators'],
+ estimator: str = defaults['estimator_c'],
+ parameter_tuning: bool = False,
+ missing_samples: str = defaults['missing_samples']
+ ) -> (pd.Series, pd.DataFrame, pd.DataFrame):
+
+ y_pred, importances, probabilities = nested_cross_validation(
+ table, metadata, cv, random_state, n_jobs, n_estimators, estimator,
+ stratify=True, parameter_tuning=parameter_tuning, classification=False,
+ scoring=accuracy_score, missing_samples=missing_samples)
+ return y_pred, importances, probabilities
+
+
+def scatterplot(output_dir: str, predictions: pd.Series,
+ truth: qiime2.NumericMetadataColumn,
+ missing_samples: str = defaults['missing_samples']) -> None:
+ predictions = pd.to_numeric(predictions)
+
+ _plot_accuracy(output_dir, predictions, truth, probabilities=None,
+ missing_samples=missing_samples,
+ classification=False, palette=None,
+ plot_title='regression scatterplot')
+
+
+def confusion_matrix(output_dir: str,
+ predictions: pd.Series,
+ truth: qiime2.CategoricalMetadataColumn,
+ probabilities: pd.DataFrame = None,
+ missing_samples: str = defaults['missing_samples'],
+ vmin: int = 'auto', vmax: int = 'auto',
+ palette: str = defaults['palette']) -> None:
+
+ if vmin == 'auto':
+ vmin = None
+ if vmax == 'auto':
+ vmax = None
+
+ predictions = predictions.astype(str)
+
+ _plot_accuracy(output_dir, predictions, truth, probabilities,
+ missing_samples=missing_samples,
+ classification=True, palette=palette,
+ plot_title='confusion matrix', vmin=vmin, vmax=vmax)
+
+
+def summarize(output_dir: str, sample_estimator: Pipeline):
+ _summarize_estimator(output_dir, sample_estimator)
+
+
+def heatmap(ctx, table, importance, sample_metadata=None,
+ feature_metadata=None, feature_count=50,
+ importance_threshold=0, group_samples=False, normalize=True,
+ missing_samples='ignore', metric='braycurtis',
+ method='average', cluster='features', color_scheme='rocket'):
+ filter_features = ctx.get_action('feature_table', 'filter_features')
+ group = ctx.get_action('feature_table', 'group')
+ make_heatmap = ctx.get_action('feature_table', 'heatmap')
+ filter_samples = ctx.get_action('feature_table', 'filter_samples')
+
+ if group_samples and sample_metadata is None:
+ raise ValueError(
+ 'If group_samples is enabled, sample_metadata are not optional.')
+
+ if missing_samples == 'ignore' and sample_metadata is None:
+ raise ValueError(
+ 'If missing_samples is ignore, metadata are not optional')
+
+ clustermap_params = {
+ 'cluster': cluster, 'normalize': normalize, 'metric': metric,
+ 'method': method, 'color_scheme': color_scheme}
+
+ # load importance data and sum rows (to average importances if there are
+ # multiple scores).
+ importance = importance.view(pd.DataFrame)
+ importance = importance.sum(1)
+
+ # filter importances by user criteria
+ importance = importance.sort_values(ascending=False)
+ if importance_threshold > 0:
+ importance = importance[importance > importance_threshold]
+ if feature_count > 0:
+ importance = importance[:feature_count]
+ importance.name = 'importance'
+ importance = qiime2.Metadata(importance.to_frame())
+
+ # filter features by importance
+ table, = filter_features(table, metadata=importance)
+ if missing_samples == 'ignore':
+ table, = filter_samples(
+ table, metadata=qiime2.Metadata(sample_metadata.to_dataframe()))
+
+ # optionally group feature table by sample metadata
+ # otherwise annotate heatmap with sample metadata
+ if group_samples:
+ table, = group(table, metadata=sample_metadata, axis='sample',
+ mode='sum')
+ elif sample_metadata is not None:
+ clustermap_params['sample_metadata'] = sample_metadata
+ # label features using feature metadata
+ if feature_metadata is not None:
+ clustermap_params['feature_metadata'] = feature_metadata
+
+ # make yer heatmap
+ clustermap, = make_heatmap(table, **clustermap_params)
+
+ return clustermap, table
+
+
+# The following method is experimental and is not registered in the current
+# release. Any use of the API is at user's own risk.
+def detect_outliers(table: biom.Table,
+ metadata: qiime2.Metadata, subset_column: str = None,
+ subset_value: str = None,
+ n_estimators: int = defaults['n_estimators'],
+ contamination: float = 0.05, random_state: int = None,
+ n_jobs: int = defaults['n_jobs'],
+ missing_samples: str = 'ignore') -> (pd.Series):
+
+ features, sample_md = _load_data(
+ table, metadata, missing_samples=missing_samples)
+
+ # if opting to train on a subset, choose subset that fits criteria
+ if subset_column and subset_value:
+ X_train = \
+ [f for s, f in
+ zip(sample_md[subset_column] == subset_value, features) if s]
+ # raise error if subset_column or subset_value (but not both) are set
+ elif subset_column is not None or subset_value is not None:
+ raise ValueError((
+ 'subset_column and subset_value must both be provided with a '
+ 'valid value to perform model training on a subset of data.'))
+ else:
+ X_train = features
+
+ # fit isolation tree
+ estimator = Pipeline([('dv', DictVectorizer()),
+ ('est', IsolationForest(n_jobs=n_jobs,
+ n_estimators=n_estimators,
+ contamination=contamination,
+ random_state=random_state,
+ ))])
+ estimator.fit(X_train)
+
+ # predict outlier status
+ y_pred = estimator.predict(features)
+ y_pred = pd.Series(y_pred, index=sample_md.index)
+ # predict reports whether sample is an inlier; change to outlier status
+ y_pred[y_pred == -1] = 'True'
+ y_pred[y_pred == 1] = 'False'
+ y_pred.name = "outlier"
+ return y_pred
diff --git a/q2_sample_classifier/plugin_setup.py b/q2_sample_classifier/plugin_setup.py
new file mode 100644
index 0000000..8ae4d3d
--- /dev/null
+++ b/q2_sample_classifier/plugin_setup.py
@@ -0,0 +1,677 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import importlib
+
+from qiime2.plugin import (
+ Int, Str, Float, Range, Bool, Plugin, Metadata, Choices, MetadataColumn,
+ Numeric, Categorical, Citations, Visualization, TypeMatch)
+from q2_types.feature_table import (
+ FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Balance,
+ PercentileNormalized, Design)
+from q2_types.sample_data import SampleData
+from q2_types.feature_data import FeatureData
+from q2_types.distance_matrix import DistanceMatrix
+from q2_feature_table import heatmap_choices
+from .classify import (
+ classify_samples, classify_samples_from_dist, regress_samples,
+ regress_samples_ncv,
+ classify_samples_ncv, fit_classifier, fit_regressor, split_table,
+ predict_classification, predict_regression, confusion_matrix, scatterplot,
+ summarize, metatable, heatmap)
+from .visuals import _custom_palettes
+from ._format import (SampleEstimatorDirFmt,
+ BooleanSeriesFormat,
+ BooleanSeriesDirectoryFormat,
+ ImportanceFormat,
+ ImportanceDirectoryFormat,
+ PredictionsFormat,
+ PredictionsDirectoryFormat,
+ ProbabilitiesFormat,
+ ProbabilitiesDirectoryFormat,
+ TrueTargetsDirectoryFormat)
+
+from ._type import (ClassifierPredictions, RegressorPredictions,
+ SampleEstimator, BooleanSeries, Importance,
+ Classifier, Regressor, Probabilities,
+ TrueTargets)
+import q2_sample_classifier
+
+citations = Citations.load('citations.bib', package='q2_sample_classifier')
+
+plugin = Plugin(
+ name='sample-classifier',
+ version=q2_sample_classifier.__version__,
+ website="https://github.com/qiime2/q2-sample-classifier",
+ package='q2_sample_classifier',
+ description=(
+ 'This QIIME 2 plugin supports methods for supervised classification '
+ 'and regression of sample metadata, and other supervised machine '
+ 'learning methods.'),
+ short_description=(
+ 'Plugin for machine learning prediction of sample metadata.'),
+ citations=[citations['Bokulich306167'], citations['pedregosa2011scikit']]
+)
+
+description = ('Predicts a {0} sample metadata column using a {1}. Splits '
+ 'input data into training and test sets. The training set is '
+ 'used to train and test the estimator using a stratified '
+ 'k-fold cross-validation scheme. This includes optional steps '
+ 'for automated feature extraction and hyperparameter '
+ 'optimization. The test set validates classification accuracy '
+ 'of the optimized estimator. Outputs classification results '
+ 'for test set. For more details on the learning algorithm, '
+ 'see http://scikit-learn.org/stable/supervised_learning.html')
+
+ncv_description = ('Predicts a {0} sample metadata column using a {1}. Uses '
+ 'nested stratified k-fold cross validation for automated '
+ 'hyperparameter optimization and sample prediction. '
+ 'Outputs predicted values for each input sample, and '
+ 'relative importance of each feature for model accuracy.')
+
+cv_description = ('Fit a supervised learning {0}. Outputs the fit estimator '
+ '(for prediction of test samples and/or unknown samples) '
+ 'and the relative importance of each feature for model '
+ 'accuracy. Optionally use k-fold cross-validation for '
+ 'automatic recursive feature elimination and hyperparameter '
+ 'tuning.')
+
+predict_description = (
+ 'Use trained estimator to predict target values for new samples. '
+ 'These will typically be unseen samples, e.g., test data (derived '
+ 'manually or from split_table) or samples with unknown values, but '
+ 'can theoretically be any samples present in a feature table that '
+ 'contain overlapping features with the feature table used to train '
+ 'the estimator.')
+
+inputs = {'table': FeatureTable[Frequency]}
+
+input_descriptions = {'table': 'Feature table containing all features that '
+ 'should be used for target prediction.',
+ 'probabilities': 'Predicted class probabilities for '
+ 'each input sample.'}
+
+parameters = {
+ 'base': {
+ 'random_state': Int,
+ 'n_jobs': Int,
+ 'n_estimators': Int % Range(1, None),
+ 'missing_samples': Str % Choices(['error', 'ignore'])},
+ 'splitter': {
+ 'test_size': Float % Range(0.0, 1.0, inclusive_end=False,
+ inclusive_start=True)},
+ 'rfe': {
+ 'step': Float % Range(0.0, 1.0, inclusive_end=False,
+ inclusive_start=False),
+ 'optimize_feature_selection': Bool},
+ 'cv': {
+ 'cv': Int % Range(1, None),
+ 'parameter_tuning': Bool},
+ 'modified_metadata': {
+ 'metadata': Metadata,
+ 'column': Str},
+ 'regressor': {'stratify': Bool}
+}
+
+parameter_descriptions = {
+ 'base': {'random_state': 'Seed used by random number generator.',
+ 'n_jobs': 'Number of jobs to run in parallel.',
+ 'n_estimators': (
+ 'Number of trees to grow for estimation. More trees will '
+ 'improve predictive accuracy up to a threshold level, '
+ 'but will also increase time and memory requirements. This '
+ 'parameter only affects ensemble estimators, such as Random '
+ 'Forest, AdaBoost, ExtraTrees, and GradientBoosting.'),
+ 'missing_samples': (
+ 'How to handle missing samples in metadata. "error" will fail '
+ 'if missing samples are detected. "ignore" will cause the '
+ 'feature table and metadata to be filtered, so that only '
+ 'samples found in both files are retained.')},
+ 'splitter': {
+ 'test_size': ('Fraction of input samples to exclude from training set '
+ 'and use for classifier testing.')},
+ 'rfe': {
+ 'step': ('If optimize_feature_selection is True, step is the '
+ 'percentage of features to remove at each iteration.'),
+ 'optimize_feature_selection': ('Automatically optimize input feature '
+ 'selection using recursive feature '
+ 'elimination.')},
+ 'cv': {
+ 'cv': 'Number of k-fold cross-validations to perform.',
+ 'parameter_tuning': ('Automatically tune hyperparameters using random '
+ 'grid search.')},
+ 'regressor': {
+ 'stratify': ('Evenly stratify training and test data among metadata '
+ 'categories. If True, all values in column must match '
+ 'at least two samples.')},
+ 'estimator': {
+ 'estimator': 'Estimator method to use for sample prediction.'}
+}
+
+classifiers = Str % Choices(
+ ['RandomForestClassifier', 'ExtraTreesClassifier',
+ 'GradientBoostingClassifier', 'AdaBoostClassifier',
+ 'KNeighborsClassifier', 'LinearSVC', 'SVC'])
+
+regressors = Str % Choices(
+ ['RandomForestRegressor', 'ExtraTreesRegressor',
+ 'GradientBoostingRegressor', 'AdaBoostRegressor', 'ElasticNet',
+ 'Ridge', 'Lasso', 'KNeighborsRegressor', 'LinearSVR', 'SVR'])
+
+output_descriptions = {
+ 'predictions': 'Predicted target values for each input sample.',
+ 'feature_importance': 'Importance of each input feature to model accuracy.'
+}
+
+pipeline_parameters = {
+ **parameters['base'],
+ **parameters['rfe'],
+ **parameters['splitter'],
+ **parameters['cv']}
+
+classifier_pipeline_parameters = {
+ **pipeline_parameters,
+ 'metadata': MetadataColumn[Categorical],
+ 'estimator': classifiers,
+ 'palette': Str % Choices(_custom_palettes().keys())}
+
+regressor_pipeline_parameters = {
+ **pipeline_parameters,
+ 'metadata': MetadataColumn[Numeric],
+ **parameters['regressor'],
+ 'estimator': regressors}
+
+pipeline_parameter_descriptions = {
+ **parameter_descriptions['base'],
+ **parameter_descriptions['rfe'],
+ **parameter_descriptions['splitter'],
+ **parameter_descriptions['estimator'],
+ **parameter_descriptions['cv']}
+
+classifier_pipeline_parameter_descriptions = {
+ **pipeline_parameter_descriptions,
+ 'metadata': 'Categorical metadata column to use as prediction target.',
+ 'palette': 'The color palette to use for plotting.'}
+
+regressor_pipeline_parameter_descriptions = {
+ **pipeline_parameter_descriptions,
+ **parameter_descriptions['regressor'],
+ 'metadata': 'Numeric metadata column to use as prediction target.'}
+
+pipeline_outputs = [
+ ('model_summary', Visualization),
+ ('accuracy_results', Visualization)]
+
+regressor_pipeline_outputs = [
+ ('sample_estimator', SampleEstimator[Regressor]),
+ ('feature_importance', FeatureData[Importance]),
+ ('predictions', SampleData[RegressorPredictions])] + pipeline_outputs
+
+pipeline_output_descriptions = {
+ 'sample_estimator': 'Trained sample estimator.',
+ **output_descriptions,
+ 'model_summary': 'Summarized parameter and (if enabled) feature '
+ 'selection information for the trained estimator.',
+ 'accuracy_results': 'Accuracy results visualization.'}
+
+
+plugin.pipelines.register_function(
+ function=classify_samples,
+ inputs=inputs,
+ parameters=classifier_pipeline_parameters,
+ outputs=[('sample_estimator', SampleEstimator[Classifier]),
+ ('feature_importance', FeatureData[Importance]),
+ ('predictions', SampleData[ClassifierPredictions])
+ ] + pipeline_outputs + [
+ ('probabilities', SampleData[Probabilities]),
+ ('heatmap', Visualization),
+ ('training_targets', SampleData[TrueTargets]),
+ ('test_targets', SampleData[TrueTargets])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions=classifier_pipeline_parameter_descriptions,
+ output_descriptions={
+ **pipeline_output_descriptions,
+ 'probabilities': input_descriptions['probabilities'],
+ 'heatmap': 'A heatmap of the top 50 most important features from the '
+ 'table.',
+ 'training_targets': 'Series containing true target values of '
+ 'train samples',
+ 'test_targets': 'Series containing true target values '
+ 'of test samples'},
+ name='Train and test a cross-validated supervised learning classifier.',
+ description=description.format(
+ 'categorical', 'supervised learning classifier')
+)
+
+
+plugin.pipelines.register_function(
+ function=classify_samples_from_dist,
+ inputs={'distance_matrix': DistanceMatrix},
+ parameters={
+ 'metadata': MetadataColumn[Categorical],
+ 'k': Int,
+ 'cv': parameters['cv']['cv'],
+ 'random_state': parameters['base']['random_state'],
+ 'n_jobs': parameters['base']['n_jobs'],
+ 'palette': Str % Choices(_custom_palettes().keys()),
+ },
+ outputs=[
+ ('predictions', SampleData[ClassifierPredictions]),
+ ('accuracy_results', Visualization),
+ ],
+ input_descriptions={'distance_matrix': 'a distance matrix'},
+ parameter_descriptions={
+ 'metadata': 'Categorical metadata column to use as prediction target.',
+ 'k': 'Number of nearest neighbors',
+ 'cv': parameter_descriptions['cv']['cv'],
+ 'random_state': parameter_descriptions['base']['random_state'],
+ 'n_jobs': parameter_descriptions['base']['n_jobs'],
+ 'palette': 'The color palette to use for plotting.',
+ },
+ output_descriptions={
+ 'predictions': 'leave one out predictions for each sample',
+ 'accuracy_results': 'Accuracy results visualization.',
+ },
+ name=('Run k-nearest-neighbors on a labeled distance matrix.'),
+ description=(
+ 'Run k-nearest-neighbors on a labeled distance matrix.'
+ ' Return cross-validated (leave one out) predictions and '
+ ' accuracy. k = 1 by default'
+ )
+)
+
+
+plugin.pipelines.register_function(
+ function=regress_samples,
+ inputs=inputs,
+ parameters=regressor_pipeline_parameters,
+ outputs=regressor_pipeline_outputs,
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions=regressor_pipeline_parameter_descriptions,
+ output_descriptions=pipeline_output_descriptions,
+ name='Train and test a cross-validated supervised learning regressor.',
+ description=description.format(
+ 'continuous', 'supervised learning regressor')
+)
+
+
+plugin.methods.register_function(
+ function=regress_samples_ncv,
+ inputs=inputs,
+ parameters={
+ **parameters['base'],
+ **parameters['cv'],
+ 'metadata': MetadataColumn[Numeric],
+ **parameters['regressor'],
+ 'estimator': regressors},
+ outputs=[('predictions', SampleData[RegressorPredictions]),
+ ('feature_importance', FeatureData[Importance])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions={
+ **parameter_descriptions['base'],
+ **parameter_descriptions['cv'],
+ **parameter_descriptions['regressor'],
+ 'metadata': 'Numeric metadata column to use as prediction target.',
+ **parameter_descriptions['estimator']},
+ output_descriptions=output_descriptions,
+ name='Nested cross-validated supervised learning regressor.',
+ description=ncv_description.format(
+ 'continuous', 'supervised learning regressor')
+)
+
+plugin.methods.register_function(
+ function=classify_samples_ncv,
+ inputs=inputs,
+ parameters={
+ **parameters['base'],
+ **parameters['cv'],
+ 'metadata': MetadataColumn[Categorical],
+ 'estimator': classifiers},
+ outputs=[('predictions', SampleData[ClassifierPredictions]),
+ ('feature_importance', FeatureData[Importance]),
+ ('probabilities', SampleData[Probabilities])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions={
+ **parameter_descriptions['base'],
+ **parameter_descriptions['cv'],
+ 'metadata': 'Categorical metadata column to use as prediction target.',
+ **parameter_descriptions['estimator']},
+ output_descriptions={**output_descriptions,
+ 'probabilities': input_descriptions['probabilities']},
+ name='Nested cross-validated supervised learning classifier.',
+ description=ncv_description.format(
+ 'categorical', 'supervised learning classifier')
+)
+
+
+plugin.methods.register_function(
+ function=fit_classifier,
+ inputs=inputs,
+ parameters={
+ **parameters['base'],
+ **parameters['rfe'],
+ **parameters['cv'],
+ 'metadata': MetadataColumn[Categorical],
+ 'estimator': classifiers},
+ outputs=[('sample_estimator', SampleEstimator[Classifier]),
+ ('feature_importance', FeatureData[Importance])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions={
+ **parameter_descriptions['base'],
+ **parameter_descriptions['rfe'],
+ **parameter_descriptions['cv'],
+ 'metadata': 'Numeric metadata column to use as prediction target.',
+ **parameter_descriptions['estimator']},
+ output_descriptions={
+ 'feature_importance': output_descriptions['feature_importance'],
+ 'sample_estimator': 'Trained sample classifier.'},
+ name='Fit a supervised learning classifier.',
+ description=cv_description.format('classifier')
+)
+
+
+plugin.methods.register_function(
+ function=fit_regressor,
+ inputs=inputs,
+ parameters={
+ **parameters['base'],
+ **parameters['rfe'],
+ **parameters['cv'],
+ 'metadata': MetadataColumn[Numeric],
+ 'estimator': regressors},
+ outputs=[('sample_estimator', SampleEstimator[Regressor]),
+ ('feature_importance', FeatureData[Importance])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions={
+ **parameter_descriptions['base'],
+ **parameter_descriptions['rfe'],
+ **parameter_descriptions['cv'],
+ 'metadata': 'Numeric metadata column to use as prediction target.',
+ **parameter_descriptions['estimator']},
+ output_descriptions={
+ 'feature_importance': output_descriptions['feature_importance']},
+ name='Fit a supervised learning regressor.',
+ description=cv_description.format('regressor')
+)
+
+
+plugin.methods.register_function(
+ function=predict_classification,
+ inputs={**inputs, 'sample_estimator': SampleEstimator[Classifier]},
+ parameters={'n_jobs': parameters['base']['n_jobs']},
+ outputs=[('predictions', SampleData[ClassifierPredictions]),
+ ('probabilities', SampleData[Probabilities])],
+ input_descriptions={
+ 'table': input_descriptions['table'],
+ 'sample_estimator': 'Sample classifier trained with fit_classifier.'},
+ parameter_descriptions={
+ 'n_jobs': parameter_descriptions['base']['n_jobs']},
+ output_descriptions={
+ 'predictions': 'Predicted target values for each input sample.',
+ 'probabilities': input_descriptions['probabilities']},
+ name='Use trained classifier to predict target values for new samples.',
+ description=predict_description
+)
+
+
+plugin.methods.register_function(
+ function=predict_regression,
+ inputs={**inputs, 'sample_estimator': SampleEstimator[Regressor]},
+ parameters={'n_jobs': parameters['base']['n_jobs']},
+ outputs=[('predictions', SampleData[RegressorPredictions])],
+ input_descriptions={
+ 'table': input_descriptions['table'],
+ 'sample_estimator': 'Sample regressor trained with fit_regressor.'},
+ parameter_descriptions={
+ 'n_jobs': parameter_descriptions['base']['n_jobs']},
+ output_descriptions={
+ 'predictions': 'Predicted target values for each input sample.'},
+ name='Use trained regressor to predict target values for new samples.',
+ description=predict_description
+)
+
+
+plugin.visualizers.register_function(
+ function=scatterplot,
+ inputs={'predictions': SampleData[RegressorPredictions]},
+ parameters={
+ 'truth': MetadataColumn[Numeric],
+ 'missing_samples': parameters['base']['missing_samples']},
+ input_descriptions={'predictions': (
+ 'Predicted values to plot on y axis. Must be predictions of '
+ 'numeric data produced by a sample regressor.')},
+ parameter_descriptions={
+ 'truth': 'Metadata column (true values) to plot on x axis.',
+ 'missing_samples': parameter_descriptions['base']['missing_samples']},
+ name='Make 2D scatterplot and linear regression of regressor predictions.',
+ description='Make a 2D scatterplot and linear regression of predicted vs. '
+ 'true values for a set of samples predicted using a sample '
+ 'regressor.'
+)
+
+
+plugin.visualizers.register_function(
+ function=confusion_matrix,
+ inputs={'predictions': SampleData[ClassifierPredictions],
+ 'probabilities': SampleData[Probabilities]},
+ parameters={
+ 'truth': MetadataColumn[Categorical],
+ 'missing_samples': parameters['base']['missing_samples'],
+ 'vmin': Float | Str % Choices(['auto']),
+ 'vmax': Float | Str % Choices(['auto']),
+ 'palette': Str % Choices(_custom_palettes().keys())},
+ input_descriptions={
+ 'predictions': 'Predicted values to plot on x axis. Should be '
+ 'predictions of categorical data produced by a sample '
+ 'classifier.',
+ 'probabilities': input_descriptions['probabilities']},
+ parameter_descriptions={
+ 'truth': 'Metadata column (true values) to plot on y axis.',
+ 'missing_samples': parameter_descriptions['base']['missing_samples'],
+ 'vmin': 'The minimum value to use for anchoring the colormap. If '
+ '"auto", vmin is set to the minimum value in the data.',
+ 'vmax': 'The maximum value to use for anchoring the colormap. If '
+ '"auto", vmax is set to the maximum value in the data.',
+ 'palette': 'The color palette to use for plotting.'},
+ name='Make a confusion matrix from sample classifier predictions.',
+ description='Make a confusion matrix and calculate accuracy of predicted '
+ 'vs. true values for a set of samples classified using a '
+ 'sample classifier. If per-sample class probabilities are '
+ 'provided, will also generate Receiver Operating '
+ 'Characteristic curves and calculate area under the curve for '
+ 'each class.'
+)
+
+
+T = TypeMatch([Frequency, RelativeFrequency, PresenceAbsence, Balance,
+ PercentileNormalized, Design])
+plugin.methods.register_function(
+ function=split_table,
+ inputs={'table': FeatureTable[T]},
+ parameters={
+ 'random_state': parameters['base']['random_state'],
+ 'missing_samples': parameters['base']['missing_samples'],
+ **parameters['splitter'],
+ 'metadata': MetadataColumn[Numeric | Categorical],
+ **parameters['regressor']},
+ outputs=[('training_table', FeatureTable[T]),
+ ('test_table', FeatureTable[T]),
+ ('training_targets', SampleData[TrueTargets]),
+ ('test_targets', SampleData[TrueTargets])],
+ input_descriptions={'table': 'Feature table containing all features that '
+ 'should be used for target prediction.'},
+ parameter_descriptions={
+ 'random_state': parameter_descriptions['base']['random_state'],
+ 'missing_samples': parameter_descriptions['base']['missing_samples'],
+ **parameter_descriptions['splitter'],
+ **parameter_descriptions['regressor'],
+ 'metadata': 'Numeric metadata column to use as prediction target.'},
+ output_descriptions={
+ 'training_table': 'Feature table containing training samples',
+ 'test_table': 'Feature table containing test samples',
+ 'training_targets': 'Series containing true target values of '
+ 'train samples',
+ 'test_targets': 'Series containing true target values of '
+ 'test samples'},
+ name='Split a feature table into training and testing sets.',
+ description=(
+ 'Split a feature table into training and testing sets. By default '
+ 'stratifies training and test sets on a metadata column, such that '
+ 'values in that column are evenly represented across training and '
+ 'test sets.')
+)
+
+
+plugin.visualizers.register_function(
+ function=summarize,
+ inputs={'sample_estimator': SampleEstimator[Classifier | Regressor]},
+ parameters={},
+ input_descriptions={
+ 'sample_estimator': 'Sample estimator trained with fit_classifier or '
+ 'fit_regressor.'},
+ parameter_descriptions={},
+ name='Summarize parameter and feature extraction information for a '
+ 'trained estimator.',
+ description='Summarize parameter and feature extraction information for a '
+ 'trained estimator.'
+)
+
+
+plugin.pipelines.register_function(
+ function=metatable,
+ inputs=inputs,
+ parameters={'metadata': Metadata,
+ 'missing_samples': parameters['base']['missing_samples'],
+ 'missing_values': Str % Choices(
+ ['drop_samples', 'drop_features', 'error', 'fill']),
+ 'drop_all_unique': Bool},
+ outputs=[('converted_table', FeatureTable[Frequency])],
+ input_descriptions={'table': input_descriptions['table']},
+ parameter_descriptions={
+ 'metadata': 'Metadata file to convert to feature table.',
+ 'missing_samples': parameter_descriptions['base']['missing_samples'],
+ 'missing_values': (
+ 'How to handle missing values (nans) in metadata. Either '
+ '"drop_samples" with missing values, "drop_features" with missing '
+ 'values, "fill" missing values with zeros, or "error" if '
+ 'any missing values are found.'),
+ 'drop_all_unique': 'If True, columns that contain a unique value for '
+ 'every ID will be dropped.'
+ },
+ output_descriptions={'converted_table': 'Converted feature table'},
+ name='Convert (and merge) positive numeric metadata (in)to feature table.',
+ description='Convert numeric sample metadata from TSV file into a feature '
+ 'table. Optionally merge with an existing feature table. Only '
+ 'numeric metadata will be converted; categorical columns will '
+ 'be silently dropped. By default, if a table is used as input '
+ 'only samples found in both the table and metadata '
+ '(intersection) are merged, and others are silently dropped. '
+ 'Set missing_samples="error" to raise an error if samples '
+ 'found in the table are missing from the metadata file. The '
+ 'metadata file can always contain a superset of samples. Note '
+ 'that columns will be dropped if they are non-numeric, '
+ 'contain no unique values (zero '
+ 'variance), contain only empty cells, or contain negative '
+ 'values. This method currently only converts '
+ 'postive numeric metadata into feature data. Tip: convert '
+ 'categorical columns to dummy variables to include them in '
+ 'the output feature table.'
+)
+
+
+plugin.pipelines.register_function(
+ function=heatmap,
+ inputs={**inputs, 'importance': FeatureData[Importance]},
+ parameters={'sample_metadata': MetadataColumn[Categorical],
+ 'feature_metadata': MetadataColumn[Categorical],
+ 'feature_count': Int % Range(0, None),
+ 'importance_threshold': Float % Range(0, None),
+ 'group_samples': Bool,
+ 'normalize': Bool,
+ 'missing_samples': parameters['base']['missing_samples'],
+ 'metric': Str % Choices(heatmap_choices['metric']),
+ 'method': Str % Choices(heatmap_choices['method']),
+ 'cluster': Str % Choices(heatmap_choices['cluster']),
+ 'color_scheme': Str % Choices(heatmap_choices['color_scheme']),
+ },
+ outputs=[('heatmap', Visualization),
+ ('filtered_table', FeatureTable[Frequency])],
+ input_descriptions={'table': input_descriptions['table'],
+ 'importance': 'Feature importances.'},
+ parameter_descriptions={
+ 'sample_metadata': 'Sample metadata column to use for sample labeling '
+ 'or grouping.',
+ 'feature_metadata': 'Feature metadata (e.g., taxonomy) to use for '
+ 'labeling features in the heatmap.',
+ 'feature_count': 'Filter feature table to include top N most '
+ 'important features. Set to zero to include all '
+ 'features.',
+ 'importance_threshold': 'Filter feature table to exclude any features '
+ 'with an importance score less than this '
+ 'threshold. Set to zero to include all '
+ 'features.',
+ 'group_samples': 'Group samples by sample metadata.',
+ 'normalize': 'Normalize the feature table by adding a psuedocount '
+ 'of 1 and then taking the log10 of the table.',
+ 'missing_samples': parameter_descriptions['base']['missing_samples'],
+ 'metric': 'Metrics exposed by seaborn (see http://seaborn.pydata.org/'
+ 'generated/seaborn.clustermap.html#seaborn.clustermap for '
+ 'more detail).',
+ 'method': 'Clustering methods exposed by seaborn (see http://seaborn.'
+ 'pydata.org/generated/seaborn.clustermap.html#seaborn.clust'
+ 'ermap for more detail).',
+ 'cluster': 'Specify which axes to cluster.',
+ 'color_scheme': 'Color scheme for heatmap.',
+ },
+ output_descriptions={
+ 'heatmap': 'Heatmap of important features.',
+ 'filtered_table': 'Filtered feature table containing data displayed '
+ 'in heatmap.'},
+ name='Generate heatmap of important features.',
+ description='Generate a heatmap of important features. Features are '
+ 'filtered based on importance scores; samples are optionally '
+ 'grouped by sample metadata; and a heatmap is generated that '
+ 'displays (normalized) feature abundances per sample.'
+)
+
+
+# Registrations
+plugin.register_semantic_types(
+ SampleEstimator, BooleanSeries, Importance, ClassifierPredictions,
+ RegressorPredictions, Classifier, Regressor, Probabilities, TrueTargets)
+plugin.register_semantic_type_to_format(
+ SampleEstimator[Classifier],
+ artifact_format=SampleEstimatorDirFmt)
+plugin.register_semantic_type_to_format(
+ SampleEstimator[Regressor],
+ artifact_format=SampleEstimatorDirFmt)
+plugin.register_semantic_type_to_format(
+ SampleData[BooleanSeries],
+ artifact_format=BooleanSeriesDirectoryFormat)
+plugin.register_semantic_type_to_format(
+ SampleData[RegressorPredictions],
+ artifact_format=PredictionsDirectoryFormat)
+plugin.register_semantic_type_to_format(
+ SampleData[ClassifierPredictions],
+ artifact_format=PredictionsDirectoryFormat)
+plugin.register_semantic_type_to_format(
+ FeatureData[Importance],
+ artifact_format=ImportanceDirectoryFormat)
+plugin.register_semantic_type_to_format(
+ SampleData[Probabilities],
+ artifact_format=ProbabilitiesDirectoryFormat)
+plugin.register_semantic_type_to_format(
+ SampleData[TrueTargets],
+ artifact_format=TrueTargetsDirectoryFormat)
+plugin.register_formats(
+ SampleEstimatorDirFmt, BooleanSeriesFormat, BooleanSeriesDirectoryFormat,
+ ImportanceFormat, ImportanceDirectoryFormat, PredictionsFormat,
+ PredictionsDirectoryFormat, ProbabilitiesFormat,
+ ProbabilitiesDirectoryFormat,
+ TrueTargetsDirectoryFormat)
+importlib.import_module('q2_sample_classifier._transformer')
diff --git a/q2_sample_classifier/tests/__init__.py b/q2_sample_classifier/tests/__init__.py
new file mode 100644
index 0000000..fed4ef6
--- /dev/null
+++ b/q2_sample_classifier/tests/__init__.py
@@ -0,0 +1,7 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
diff --git a/q2_sample_classifier/tests/data/categorical_predictions.tsv b/q2_sample_classifier/tests/data/categorical_predictions.tsv
new file mode 100644
index 0000000..8c29ca8
--- /dev/null
+++ b/q2_sample_classifier/tests/data/categorical_predictions.tsv
@@ -0,0 +1,9 @@
+SampleID prediction
+matt peanut
+sat is
+bat fake
+cat news
+rat fake
+that news
+hat is
+vat peanut
diff --git a/q2_sample_classifier/tests/data/chardonnay.map.txt b/q2_sample_classifier/tests/data/chardonnay.map.txt
new file mode 100644
index 0000000..5feab16
--- /dev/null
+++ b/q2_sample_classifier/tests/data/chardonnay.map.txt
@@ -0,0 +1,22 @@
+#SampleID Vineyard Region latitude longitude
+v4.3.618 1 Coombsville 38.306 -122.228
+v4.3.642 1 Coombsville 38.306 -122.228
+v4.3.687 1 Coombsville 38.306 -122.228
+v4.3.412 2 Coombsville 38.306 -122.227
+v4.3.637 2 Coombsville 38.306 -122.227
+v4.3.643 2 Coombsville 38.306 -122.227
+v4.3.350 3 Carneros 38.212 -122.213
+v4.3.353 3 Carneros 38.212 -122.213
+v4.3.355 3 Carneros 38.212 -122.213
+v4.3.415 8 Oakville 38.418 -122.412
+v4.3.416 8 Oakville 38.418 -122.412
+v4.3.417 8 Oakville 38.418 -122.412
+v4.3.692 9 Carneros 38.285 -122.322
+v4.3.693 9 Carneros 38.285 -122.322
+v4.3.694 9 Carneros 38.285 -122.322
+v4.3.600 10 Russian_River 38.424 -122.895
+v4.3.657 10 Russian_River 38.424 -122.895
+v4.3.659 10 Russian_River 38.424 -122.895
+v4.3.587 11 Russian_River 38.455 -122.862
+v4.3.588 11 Russian_River 38.455 -122.862
+v4.3.589 11 Russian_River 38.455 -122.862
diff --git a/q2_sample_classifier/tests/data/chardonnay.table.qza b/q2_sample_classifier/tests/data/chardonnay.table.qza
new file mode 100644
index 0000000..cb76bd8
--- /dev/null
+++ b/q2_sample_classifier/tests/data/chardonnay.table.qza
Binary files differ
diff --git a/q2_sample_classifier/tests/data/class_probabilities.tsv b/q2_sample_classifier/tests/data/class_probabilities.tsv
new file mode 100644
index 0000000..f1a12cc
--- /dev/null
+++ b/q2_sample_classifier/tests/data/class_probabilities.tsv
@@ -0,0 +1,8 @@
+ classA classB classC
+s1 0.4446 0.9828 0.3208
+s2 0.0776 0.0118 0.4175
+s3 0.0657 0.0251 0.7505
+s4 0.0617 0.1855 0.8716
+s5 0.0281 0.8616 0.0291
+s6 0.0261 0.0253 0.9075
+s7 0.0252 0.7385 0.4068
diff --git a/q2_sample_classifier/tests/data/coordinates.tsv b/q2_sample_classifier/tests/data/coordinates.tsv
new file mode 100644
index 0000000..9d15dab
--- /dev/null
+++ b/q2_sample_classifier/tests/data/coordinates.tsv
@@ -0,0 +1,5 @@
+#SampleID Latitude Longitude
+a 38.306 -122.228
+b 38.306 -122.228
+c 38.306 -122.228
+d 38.306 -122.228
diff --git a/q2_sample_classifier/tests/data/ecam-table-maturity.qza b/q2_sample_classifier/tests/data/ecam-table-maturity.qza
new file mode 100644
index 0000000..f6b64a7
--- /dev/null
+++ b/q2_sample_classifier/tests/data/ecam-table-maturity.qza
Binary files differ
diff --git a/q2_sample_classifier/tests/data/ecam_map_maturity.txt b/q2_sample_classifier/tests/data/ecam_map_maturity.txt
new file mode 100644
index 0000000..95c61e7
--- /dev/null
+++ b/q2_sample_classifier/tests/data/ecam_map_maturity.txt
@@ -0,0 +1,127 @@
+#SampleID antiexposedall day_of_life delivery diet diet_3 mom_child month month_of_life sample_summary sex studyid Description
+10249.C001.01SS n 0 Vaginal bd eb C 0 0 Vaginal.bd.n.0 Female 1 Vaginal.bd.n.0
+10249.C002.01SS n 0 Cesarean bd eb C 0 0 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C002.01SS.r n 0 Cesarean bd eb C 0 0 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C004.01SS n 0 Cesarean bd eb C 0 0 Cesarean.bd.n.1 Male 4 Cesarean.bd.n.1
+10249.C017.01SS n 0 Vaginal bd eb C 0 0 Vaginal.bd.n.1 Male 17 Vaginal.bd.n.1
+10249.C017.01SS.r n 0 Vaginal bd eb C 0 0 Vaginal.bd.n.1 Male 17 Vaginal.bd.n.1
+10249.C055.02SD n 30 Cesarean fd fd C 1 1 Cesarean.fd.n.1 Male 55 Cesarean.fd.n.1
+10249.C055.02SS n 30 Cesarean fd fd C 1 1 Cesarean.fd.n.1 Male 55 Cesarean.fd.n.1
+10249.C004.02SS n 31 Cesarean bd eb C 1 1 Cesarean.bd.n.1 Male 4 Cesarean.bd.n.1
+10249.C008.02SS n 31 Vaginal bd eb C 1 1 Vaginal.bd.n.0 Male 8 Vaginal.bd.n.0
+10249.C009.02SS n 31 Vaginal bd eb C 1 1 Vaginal.bd.n.0 Male 9 Vaginal.bd.n.0
+10249.C014.02SS n 31 Vaginal bd eb C 1 1 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C016.04SS n 45 Vaginal bd eb C 2 1.5 Vaginal.bd.n.1 Male 16 Vaginal.bd.n.1
+10249.C017.05SS y 45 Vaginal bd eb C 2 1.5 Vaginal.bd.y.1 Male 17 Vaginal.bd.y.1
+10249.C034.03SS n 45 Cesarean fd fd C 2 1.5 Cesarean.fd.n.1 Female 34 Cesarean.fd.n.1
+10249.C053.03SD n 45 Cesarean bd bd C 2 1.5 Cesarean.bd.n.1 Male 53 Cesarean.bd.n.1
+10249.C053.03SS n 45 Cesarean bd bd C 2 1.5 Cesarean.bd.n.1 Male 53 Cesarean.bd.n.1
+10249.C001.04SS y 49 Vaginal bd eb C 2 1.6 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C002.05SS n 89 Cesarean bd eb C 3 2.9 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C031.05SS n 89 Vaginal bd eb C 3 2.9 Vaginal.bd.n.1 Male 31 Vaginal.bd.n.1
+10249.C018.03SS n 90 Vaginal bd eb C 3 3 Vaginal.bd.n.0 Female 18 Vaginal.bd.n.0
+10249.C030.05SS n 90 Vaginal bd eb C 3 3 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C034.05SS n 90 Cesarean fd fd C 3 3 Cesarean.fd.n.1 Female 34 Cesarean.fd.n.1
+10249.C004.04SS n 91 Cesarean bd eb C 3 3 Cesarean.bd.n.1 Male 4 Cesarean.bd.n.1
+10249.C031.06SS n 119 Vaginal bd eb C 4 3.9 Vaginal.bd.n.1 Male 31 Vaginal.bd.n.1
+10249.C033.04SS n 119 Vaginal bd eb C 4 3.9 Vaginal.bd.n.0 Female 33 Vaginal.bd.n.0
+10249.C034.06SS n 119 Cesarean fd fd C 4 3.9 Cesarean.fd.n.1 Female 34 Cesarean.fd.n.1
+10249.C011.04SS n 120 Cesarean fd fd C 4 3.9 Cesarean.fd.n.1 Female 11 Cesarean.fd.n.1
+10249.C045.05SD n 120 Vaginal bd eb C 4 3.9 Vaginal.bd.n.0 Male 45 Vaginal.bd.n.0
+10249.C055.06SS n 120 Cesarean fd fd C 4 3.9 Cesarean.fd.n.1 Male 55 Cesarean.fd.n.1
+10249.C012.07SS y 146 Cesarean bd eb C 5 4.8 Cesarean.bd.y.1 Female 12 Cesarean.bd.y.1
+10249.C033.05SS n 147 Vaginal bd eb C 5 4.8 Vaginal.bd.n.0 Female 33 Vaginal.bd.n.0
+10249.C024.08SS y 150 Vaginal bd eb C 5 4.9 Vaginal.bd.y.1 Female 24 Vaginal.bd.y.1
+10249.C030.07SS n 150 Vaginal bd eb C 5 4.9 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C046.06SD n 151 Cesarean fd fd C 5 5 Cesarean.bd.n.1 Female 46 Cesarean.bd.n.1
+10249.C046.06SS n 151 Cesarean fd fd C 5 5 Cesarean.bd.n.1 Female 46 Cesarean.bd.n.1
+10249.C042.07SS y 182 Vaginal fd fd C 6 6 Vaginal.fd.n.1 Male 42 Vaginal.fd.n.1
+10249.C001.10SS y 183 Vaginal bd eb C 6 6 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C005.08SS n 183 Cesarean fd fd C 6 6 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C007.08SS n 183 Cesarean bd eb C 6 6 Cesarean.bd.n.1 Male 7 Cesarean.bd.n.1
+10249.C010.09SS n 183 Vaginal bd eb C 6 6 Vaginal.fd.n.0 Male 10 Vaginal.fd.n.0
+10249.C025.08SS n 183 Cesarean bd bd C 6 6 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C037.07SS n 199 Cesarean fd fd C 7 6.5 Cesarean.fd.n.1 Female 37 Cesarean.fd.n.1
+10249.C032.09SS y 209 Vaginal bd eb C 7 6.9 Vaginal.bd.y.1 Male 32 Vaginal.bd.y.1
+10249.C011.07SS n 210 Cesarean fd fd C 7 6.9 Cesarean.fd.n.1 Female 11 Cesarean.fd.n.1
+10249.C031.09SS n 210 Vaginal bd eb C 7 6.9 Vaginal.bd.n.1 Male 31 Vaginal.bd.n.1
+10249.C014.08SS n 211 Vaginal bd eb C 7 6.9 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C020.13SS y 211 Cesarean bd eb C 7 6.9 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C025.10SS n 240 Cesarean bd bd C 8 7.9 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C033.10SD y 240 Vaginal bd eb C 8 7.9 Vaginal.bd.y.1 Female 33 Vaginal.bd.y.1
+10249.C033.10SS y 240 Vaginal bd eb C 8 7.9 Vaginal.bd.y.1 Female 33 Vaginal.bd.y.1
+10249.C016.11SS n 242 Vaginal bd eb C 8 8 Vaginal.bd.n.1 Male 16 Vaginal.bd.n.1
+10249.C007.11SS y 243 Cesarean bd eb C 8 8 Cesarean.bd.y.1 Male 7 Cesarean.bd.y.1
+10249.C020.14SS y 243 Cesarean bd eb C 8 8 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C004.11SS y 266 Cesarean bd eb C 9 8.7 Cesarean.bd.y.1 Male 4 Cesarean.bd.y.1
+10249.C007.13SS y 266 Cesarean bd eb C 9 8.7 Cesarean.bd.y.1 Male 7 Cesarean.bd.y.1
+10249.C012.12SS y 272 Cesarean bd eb C 9 8.9 Cesarean.bd.y.1 Female 12 Cesarean.bd.y.1
+10249.C016.12SS n 272 Vaginal bd eb C 9 8.9 Vaginal.bd.n.1 Male 16 Vaginal.bd.n.1
+10249.C024.12SS y 272 Vaginal bd eb C 9 8.9 Vaginal.bd.y.1 Female 24 Vaginal.bd.y.1
+10249.C032.11SD y 272 Vaginal bd eb C 9 8.9 Vaginal.bd.y.1 Male 32 Vaginal.bd.y.1
+10249.C001.14SS y 294 Vaginal bd eb C 10 9.7 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C001.15SS y 300 Vaginal bd eb C 10 9.9 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C007.15SS y 303 Cesarean bd eb C 10 10 Cesarean.bd.y.1 Male 7 Cesarean.bd.y.1
+10249.C011.10SS n 303 Cesarean fd fd C 10 10 Cesarean.fd.n.1 Female 11 Cesarean.fd.n.1
+10249.C016.13SS n 303 Vaginal bd eb C 10 10 Vaginal.bd.n.1 Male 16 Vaginal.bd.n.1
+10249.C025.12SS n 303 Cesarean bd bd C 10 10 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C005.13SS n 332 Cesarean fd fd C 11 10.9 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C014.12SS n 333 Vaginal bd eb C 11 10.9 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C016.14SS n 333 Vaginal bd eb C 11 10.9 Vaginal.bd.n.1 Male 16 Vaginal.bd.n.1
+10249.C007.16SS y 334 Cesarean bd eb C 11 11 Cesarean.bd.y.1 Male 7 Cesarean.bd.y.1
+10249.C020.17SS y 334 Cesarean bd eb C 11 11 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C030.13SS n 334 Vaginal bd eb C 11 11 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C020.18SS y 365 Cesarean bd eb C 12 12 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C025.14SD n 365 Cesarean bd bd C 12 12 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C025.14SS n 365 Cesarean bd bd C 12 12 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C030.14SS n 365 Vaginal bd eb C 12 12 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C031.14SS n 365 Vaginal bd eb C 12 12 Vaginal.bd.n.1 Male 31 Vaginal.bd.n.1
+10249.C033.14SS y 365 Vaginal bd eb C 12 12 Vaginal.bd.y.1 Female 33 Vaginal.bd.y.1
+10249.C044.15SS n 424 Vaginal bd eb C 14 13.9 Vaginal.bd.n.0 Male 44 Vaginal.bd.n.0
+10249.C018.13SS n 425 Vaginal bd eb C 14 14 Vaginal.bd.n.0 Female 18 Vaginal.bd.n.0
+10249.C036.13SS n 425 Cesarean fd fd C 14 14 Cesarean.fd.n.1 Male 36 Cesarean.fd.n.1
+10249.C012.16SS y 426 Cesarean bd eb C 14 14 Cesarean.bd.y.1 Female 12 Cesarean.bd.y.1
+10249.C014.15SS n 426 Vaginal bd eb C 14 14 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C020.19SS y 426 Cesarean bd eb C 14 14 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C034.16SS y 483 Cesarean fd fd C 16 15.9 Cesarean.fd.n.1 Female 34 Cesarean.fd.n.1
+10249.C005.16SS n 485 Cesarean fd fd C 16 15.9 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C032.17SS y 485 Vaginal bd eb C 16 15.9 Vaginal.bd.y.1 Male 32 Vaginal.bd.y.1
+10249.C033.16SS y 485 Vaginal bd eb C 16 15.9 Vaginal.bd.y.1 Female 33 Vaginal.bd.y.1
+10249.C011.14SS y 486 Cesarean fd fd C 16 16 Cesarean.fd.y.1 Female 11 Cesarean.fd.y.1
+10249.C014.16SS n 486 Vaginal bd eb C 16 16 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C008.15SS y 552 Vaginal bd eb C 18 18.1 Vaginal.bd.y.1 Male 8 Vaginal.bd.y.1
+10249.C011.16SS y 553 Cesarean fd fd C 18 18.2 Cesarean.fd.y.1 Female 11 Cesarean.fd.y.1
+10249.C044.17SS n 553 Vaginal bd eb C 18 18.2 Vaginal.bd.n.0 Male 44 Vaginal.bd.n.0
+10249.C030.18SS n 554 Vaginal bd eb C 18 18.2 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C037.16SS n 554 Cesarean fd fd C 18 18.2 Cesarean.fd.n.1 Female 37 Cesarean.fd.n.1
+10249.C002.16SS n 555 Cesarean bd eb C 18 18.2 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C020.24SS y 599 Cesarean bd eb C 20 19.7 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C020.25SD y 604 Cesarean bd eb C 20 19.9 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C020.25SS y 604 Cesarean bd eb C 20 19.9 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C018.16SS n 606 Vaginal bd eb C 20 19.9 Vaginal.bd.n.0 Female 18 Vaginal.bd.n.0
+10249.C030.19SD n 606 Vaginal bd eb C 20 19.9 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C030.19SS n 606 Vaginal bd eb C 20 19.9 Vaginal.bd.n.0 Male 30 Vaginal.bd.n.0
+10249.C007.22SS y 669 Cesarean bd eb C 22 22 Cesarean.bd.y.1 Male 7 Cesarean.bd.y.1
+10249.C020.26SD y 670 Cesarean bd eb C 22 22 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C020.26SS y 670 Cesarean bd eb C 22 22 Cesarean.bd.y.1 Female 20 Cesarean.bd.y.1
+10249.C033.19SS y 670 Vaginal bd eb C 22 22 Vaginal.bd.y.1 Female 33 Vaginal.bd.y.1
+10249.C022.18SD n 671 Vaginal fd fd C 22 22.1 Vaginal.fd.n.0 Male 22 Vaginal.fd.n.0
+10249.C022.18SS n 671 Vaginal fd fd C 22 22.1 Vaginal.fd.n.0 Male 22 Vaginal.fd.n.0
+10249.C002.19SS n 731 Cesarean bd eb C 24 24 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C012.21SD y 732 Cesarean bd eb C 24 24.1 Cesarean.bd.y.1 Female 12 Cesarean.bd.y.1
+10249.C012.21SS y 732 Cesarean bd eb C 24 24.1 Cesarean.bd.y.1 Female 12 Cesarean.bd.y.1
+10249.C018.17SS y 732 Vaginal bd eb C 24 24.1 Vaginal.bd.n.0 Female 18 Vaginal.bd.n.0
+10249.C001.34SD y 733 Vaginal bd eb C 24 24.1 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C001.34SS y 733 Vaginal bd eb C 24 24.1 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C022.20SS n 791 Vaginal fd fd C 26 26 Vaginal.fd.n.0 Male 22 Vaginal.fd.n.0
+10249.C001.35SS y 792 Vaginal bd eb C 26 26 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C014.21SS n 792 Vaginal bd eb C 26 26 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C025.21SS y 792 Cesarean bd bd C 26 26 Cesarean.fd.n.1 Male 25 Cesarean.fd.n.1
+10249.C005.21SD n 793 Cesarean fd fd C 26 26.1 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C005.21SS n 793 Cesarean fd fd C 26 26.1 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C014.22SS n 854 Vaginal bd eb C 28 28.1 Vaginal.bd.n.1 Male 14 Vaginal.bd.n.1
+10249.C001.36SD y 855 Vaginal bd eb C 28 28.1 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C001.36SS y 855 Vaginal bd eb C 28 28.1 Vaginal.bd.y.1 Female 1 Vaginal.bd.y.1
+10249.C005.22SS n 855 Cesarean fd fd C 28 28.1 Cesarean.fd.n.1 Female 5 Cesarean.fd.n.1
+10249.C002.21SD n 856 Cesarean bd eb C 28 28.1 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
+10249.C002.21SS n 856 Cesarean bd eb C 28 28.1 Cesarean.bd.n.1 Male 2 Cesarean.bd.n.1
diff --git a/q2_sample_classifier/tests/data/empty_file.txt b/q2_sample_classifier/tests/data/empty_file.txt
new file mode 100644
index 0000000..302fb4e
--- /dev/null
+++ b/q2_sample_classifier/tests/data/empty_file.txt
@@ -0,0 +1 @@
+#SampleID Garbage \ No newline at end of file
diff --git a/q2_sample_classifier/tests/data/garbage.txt b/q2_sample_classifier/tests/data/garbage.txt
new file mode 100644
index 0000000..fa43564
--- /dev/null
+++ b/q2_sample_classifier/tests/data/garbage.txt
@@ -0,0 +1,6 @@
+#SampleID
+one
+two
+buckle
+my
+shoe
diff --git a/q2_sample_classifier/tests/data/importance.tsv b/q2_sample_classifier/tests/data/importance.tsv
new file mode 100644
index 0000000..57b2df3
--- /dev/null
+++ b/q2_sample_classifier/tests/data/importance.tsv
@@ -0,0 +1,1057 @@
+ importance
+74ec9fe6ffab4ecff6d5def74298a825 0.44469828320835586
+c82032c40c98975f71892e4be561c87a 0.07760118417569697
+79280cea51a6fe8a3432b2f266dd34db 0.06570251750505914
+f7686a74ca2d3729eb66305e8a26309b 0.061718558716901406
+4d04d759f5a6615dac43060726239891 0.028086160290024458
+04195686f2b70585790ec75320de0d6f 0.02610253907515516
+ea3433f2c148c6fae4cc2e9ae197f5b8 0.025273854068462408
+8ce931b0c99ad5ed648c25483c1ca492 0.02029344879308132
+370829fece3190f1a8efccf50b7d0921 0.017580266040786597
+8720f9ecd2331ce68e2896df1bf2d977 0.017034058634807364
+e2c3ff4f647112723741aa72087f1bfa 0.016776104364241963
+a650f349bbdffb95b3624519d1ba72b8 0.013283768202156983
+2ae8a432208b85a14e86eb821c86ce2b 0.010909411591413926
+cae14798faeabb8e1b8840e712839921 0.010685797620545407
+5d62137a2d05df0be8d6b7e163a3ef08 0.010020841856182077
+596c1c5efe777a8d8aafbca19fe37e1d 0.009651091043216155
+48d9883fd871b82d9bbc89cb0447be8c 0.008982921060244274
+8937656c16c20701c107e715bad86732 0.008214293017642144
+c1b636001e0773f9cca8096d70e8b5de 0.006950690004962161
+77ad88eed91eef009bf64f101c96baa6 0.00665688537920522
+5ee1da6b8eb2e1149af74254c8c07736 0.006601399203235563
+eea6b86c0c75e740670ccc50613b1b23 0.006479475209726379
+3128e057b1d4f320961e4da677724613 0.005645919539722557
+be17b29b03ed02bed133932284568d16 0.004989948679963887
+a66dd8d31584930c2b0f811f572dddcf 0.0048124059283415396
+ea2b0e4a93c24c6c3661cbe347f93b74 0.004810025669591898
+07bfddb8e62098a85afc8fd68ebc308c 0.004140136921392747
+4b548f1f6fc22f32237396b7d4ffee6e 0.0038498233115332368
+d75b7080930e7a77ef3de8c6154895b9 0.0033346270477890225
+9099a7f671b923c7b07c12508f17270a 0.0033074689234597054
+0c70bdd5631104dd3d818561c9166f6c 0.00324330648569082
+47ad35356a9bfec68416d32e4f039021 0.0022404365031278746
+ee084a976fc8028a3fdc50c9555b2a08 0.0022298514655499533
+c162a4f3943238810eba8a25f0563cca 0.0021855014795936893
+c4f9ef34bd2919511069f409c25de6f1 0.002166099280415516
+94b000e59b7ac160b0416c9ab45ff0bf 0.0021550588496877933
+148c7eb0139bbc1b8c87dd31464742e4 0.0021365496990399203
+0cc2420a6a4698f8bf664d50b17d26b4 0.0021008032637668066
+d56536a97c18adccf18eb08958d93345 0.002021901283310038
+25def15fb42bed6855ede4893d27a21d 0.0020017444379080624
+c18afe570abfe82d2f746ecc6e291bab 0.001972967397047078
+a9387e76477da8688775569cf540191b 0.0018074625819242743
+6c83fec6e8d03fad51eabc871853c991 0.0017508229418444995
+8a261f382a739bbc5f9494b2c64c2744 0.0017232389271744585
+3e93fd55a231bb99b67a666bad8446a8 0.0015688191643981873
+d996e8d86e152c0a3c5852a2d8c4c52c 0.0014605617822782211
+bae0d49b5eb22b0dfaecbcbf39a52265 0.0014360435703925205
+74923f4bbde849e27fc4eda25d757e2a 0.0013332140948707867
+aebf2a74a3aa9e5876c479b1db88985f 0.0013164588291310188
+8e6e925baa81466352ff781a289e188d 0.001305557690975839
+d1b7d3ea1db807e9352522d92e403246 0.0012130802121359668
+90c2b51afe404951ec89bd52c7ae1f58 0.0012032892974370578
+9d4e16844252e2dfbaf80a4a6fc4d55d 0.0011721920358687048
+dafe809740d0545dc25c6939a84a1820 0.0010598552486115634
+df6a1dbd34417b068878c043bd7a6bcc 0.0010397960283382362
+4311a44a7d56ce4086afad3e75bedd6e 0.000943488403386402
+61795ee8e60f4a0f66080ab812573fb8 0.0008962861766715468
+a24dae6b87885a55d81df1ea6d1abaf5 0.0008721132528209371
+7667b07b46f3d78c76f13781337cd218 0.0007996665724943059
+3f1ed51e954b35a8108c12dd882b696b 0.0007869337305015549
+29bb5fa7f45e47bbabab8cc1b1b3d18e 0.0007471894961390048
+ad52a0f6646c574417ce0b13b84acfa9 0.0007379284165598606
+59b17c1cd2e1fa86064869f5bdbea8c4 0.0006810720140934703
+2fee36ac3b841b7c63b377f87cd57634 0.0006184580387951528
+ad7bc2bb6e1ec90a9bc3c8949bce80d6 0.00061784619277928
+d9b76ee51ff2b6961430bf352cc578f2 0.0006143795451714199
+877f27b47c85f6b2d3cf8a6aa86def40 0.000579939824065776
+1986030e3c5cd5df6d869054bc96e536 0.0005571839389159251
+e91f1c5b67ffecb5ca3ca3b393a50f00 0.0005367673621172981
+f83bc32e60e9c7bddf53a52937cc7ab6 0.0005354610613922348
+059c8f8bb468064b134ea6d3661761ea 0.0005301713126190168
+1243750138937650754b619370565800 0.0004890847692921625
+9ac5fec396208e598ab2e2d26985d0df 0.0004198438160258734
+d43642bb228b6bd084483d2e5b48c3ca 0.0004196113785368746
+147e08753c0c32354e69746256220ca8 0.00040094057274993046
+b9024a1f377b62508864cfea59b16a31 0.0003872820865898205
+3b092a59cd0e2d57483420cc994ec49c 0.0003753321093973754
+5e4405e37412f415c47c112a0cbc487a 0.0003630769561779562
+6c7fa77831ae630967a6fbfc8ee47901 0.0003620100294304884
+e7e74960c5ee1984f2e60f99081a368b 0.00035092992275626917
+b75d05e361f46e33b39ef21d05334744 0.0003429082395799726
+0d357d84911ef4a089c6ae3c6deb39b9 0.000321837484901028
+bca0b81a0b8d59e90c25a323c2f62f31 0.00032148902937567593
+ece1af985b63ebccd2833e9b5f0432e3 0.00031620424482054
+dff37b3a4d27b9220b9af1459162a48b 0.00031297752477390487
+78eecb7cd2176d3a2ed21ce8154ba1b3 0.0002897361736257693
+0021d135d4ac12982cc8abdf2b38e23f 0.00028687561969616186
+1ad289cd8f44e109fd95de0382c5b252 0.0002614751742022106
+b7fe67097e773cb42109cefc45628173 0.0002592864600918602
+82585d0d14cc315f9da0257c3a2875a4 0.0002544671856237352
+ac5402de1ddf427ab8d2b0a8a0a44f19 0.00020068725358655715
+c123ec846a7e3837d1247386a6060ad9 0.0001854681628289824
+2a13ecb6c618ddf8154298c69336dd8e 0.0001810963201993466
+7ce470a3f833253f6d667fa6830abe07 0.0001808688941675811
+588a23811b3e98af68f4f0fb24a0daa8 0.00017377450975371653
+447b9d6a5c17543a456c597e65b37139 0.00016726377442023347
+82c890da8759fa9477cd991a1f214b3c 0.00016594333445879174
+ea3af29626a95b6c7b89d631bf1a739c 0.00015564865018761215
+ed5952d1e022067b212146b9b3bc6812 0.0001490369165590016
+010f0ac2691bc0be12d0633d4b5d2cc4 0.0001427705331111527
+a590ab40dc8a6b5cbffa347e8e32e63d 0.00013477008207658987
+86deb5cf8d6f29d383136e0d09863b69 0.00013383226709210312
+d0f1985c2b919dcbd0bf4664c844a900 0.00012362381132402674
+124c75409b4ecd0bf23a84e28593567d 0.0001205792627783874
+a186bb74dfce512f83043f7b366c60c4 0.00011304305885473818
+62cb0ed0f2cba6fd1ab5d2a374c15bb1 0.00010757469651420703
+1aab913302590e21a7be28b1494a3320 0.00010706581367368249
+c8d7e00fbcfbd34e1c85a8040155ecb3 0.00010371458403674598
+fcc915000295812eecfa7245a74346e9 0.00010216080211401838
+5b6a30b5a900999d91448bc79c5e14d4 9.716320058704939e-05
+c5b4c6b372dbc13b6a7f2d466fc7335f 9.489811410385969e-05
+c04879d53b5b084096c5f2bdcc38d55a 9.334312563306969e-05
+de08a70ee5e3336d2c494d20206e650e 9.043444708379022e-05
+7281d8476d29dd24b3e674ce9cd9e3b0 8.785266881993596e-05
+9b2d42844c99887e67257a377f7b193b 8.74479052197794e-05
+4e1758e81adcb1107024ee52c0113a6f 8.098909104258687e-05
+0189d0173c07f11e7586ff20eb33f5ba 7.858913280228686e-05
+539ca948741b9a713eb781d4281b23fc 7.821268709252524e-05
+b3fef1e26b7b6ac20847ede4da68547a 7.701147547233651e-05
+ee02d6d3506da57909d5e603709a295f 7.427114488130107e-05
+b45c9784d674879d68d54362419a1d70 7.171890492404162e-05
+105d5959bed2c9399522aba4291031fd 6.810720140934897e-05
+3baa56d5f42b81f2817aa34412e36439 6.782583531284291e-05
+bc4177a5fd330e9c23138dac2d6c69e0 6.723418532137901e-05
+de61860db8d93da85692dffb5cf32ab9 6.60098361191445e-05
+6a66cd42df39f678f4bb2f3e840b011e 6.028963138919508e-05
+b7a041d2927c03859714fcdd60b9dc7e 5.378917869303054e-05
+04c8be5a3a6ba2d70446812e99318905 5.134352337296379e-05
+328d5b7cdad033d7b3ed419b720ba2d3 5.108040105701027e-05
+4c731267e801c2afe253b5cbb58bdd24 5.0241359490998746e-05
+b835514e2678e3c7ba13bfe0a0d79bb4 5.0241359490996184e-05
+2480f3aa1e7784b461d9b5b83a6bfc81 4.604372766711962e-05
+237135dc8b20f922cabee934f7d410a1 4.460367317873236e-05
+5cf5895a231b0a52d5859c74c74aada7 4.139914348861156e-05
+764bb70efe70ec930379806d178319cb 4.0341884019772904e-05
+40c85a458df8d5a935122ba778d4f334 3.7337250253227875e-05
+34c133377349d1b57072aaab4f781fd9 3.580564835875417e-05
+2364e23d05750415070f8df3ccb0c95b 3.568860455789974e-05
+ede423f2b32cfcbc0815db2435994947 3.3617092660689695e-05
+424613c7d3d2daf5c715d1356a972381 3.3188666891759055e-05
+59196a586276f0be745d0e334fc071c6 3.3188666891756934e-05
+954f244280781f0339631a6eee679e1e 3.0255383394620722e-05
+30fc8af207d17abc37394fbf3d1793f7 2.5212819495517268e-05
+b8315874f4341a59e8e3ed317406ab09 2.4891500168818583e-05
+b0574dba26050e54cf5efd672c2f58ee 2.4453423469918827e-05
+52b25decd37eb58a3bd46167d1c46354 2.128350044041883e-05
+68c83aae1babce28612ba404b1096cc1 2.1077191628870952e-05
+c5adecb440a6bac201ab573ccd2d6dda 2.098349859690685e-05
+ff5c02a6c8a80d8898f37b7dc71f4c10 1.897042303767303e-05
+5a0df24ab3d9c2d78a043cd477a54b4f 1.8243000377503335e-05
+df1bc3956db29b2cded9d3925999f6e7 1.6809118341572044e-05
+4bdc8766a80671911353b96f2d702ffc 1.344683706427589e-05
+22b9160be22106587cc68893686c44c1 1.3446837064275873e-05
+f8000a94d405169f38df61e7cf1cd286 1.3427118134532813e-05
+df3f8cdac76c62baa98469c33b82d8df 1.2770100264252568e-05
+1ba09da532a0f9985b3caf72a47b42e3 1.2376844272339594e-05
+cca22a273fa140d50cc3cdce9f539f73 1.1787784859310052e-05
+0c17cef318a9e1c8895c0961c5ed2193 1.0706581367368972e-05
+3ee072a59b45057a86369a7c80ad90e4 1.0371458403674174e-05
+ed1c3370c86ae00e0062a3498ef06cf4 1.0085471004943226e-05
+1a389bf765c8ff0f92600f684fbe7ad7 8.9648631155042e-06
+276c5b1d00e091a197a9ed8753beebc7 8.951412089688543e-06
+ebd14a21e990e0266b2b985be4cb3c44 6.6916133546051555e-06
+bc0588863994a8f20e7dbce7a6fb8b96 0.0
+b1f2146bef78066448576fceeb6b644b 0.0
+bc23d2ee539bf2f52faf789289af7879 0.0
+8dccd839cf4868a3343e411c063932a6 0.0
+b153e0852342c0738da791930625716f 0.0
+bb8cd42775d7feb5b54059f90e5ce9eb 0.0
+b148aa1f06e0eea5a72ac72fdf3ac867 0.0
+acfd094c6c92700cdcf89371abdd104b 0.0
+ad01a1417ca1d8a5a7a768ea78e45673 0.0
+8fd6bffb88d374f38c6bf35cc8a4a221 0.0
+8f5b5babdbe9847334e115160088b371 0.0
+b098aa6c11cba636420401399b83be7c 0.0
+adc59da8759a4276a8f218e237e27f12 0.0
+ad8991870b8bdd236d97d01487ddadc6 0.0
+ae791ca848029549cd27ece21344d8a1 0.0
+ae793fc8909a69dde3c7757780299e14 0.0
+8f4a3341ccda8d1b45f286cb686a3aec 0.0
+b01e64261beff06c591132b8f0cf3099 0.0
+b265615da6b98d477507607fc51972f7 0.0
+8dcd5864b11c06837480c727198cded4 0.0
+af25adac3aa4a6a6f6c97b23b7755273 0.0
+aefd4044e43af5915f9a3fac602e9308 0.0
+8df5a666d571bcb1b2a44aaf978fab49 0.0
+aee0d6f2626271eacf89d2e803440657 0.0
+8e1c69dc9a692ce4b7e93c8cb728b7a6 0.0
+8f1c11c7b732265cbfc1bb7dbd0241e7 0.0
+8b159e3b7ba334383b185c1926d9b9c3 0.0
+b29deb0964b5328b6206ea33b9035302 0.0
+8dba41073e8e2650e099cda3fd3a0630 0.0
+ab2ef9bffd8e25c75290c0a2f6a8f85c 0.0
+b460255b8269b55301e0c3c3e3da252c 0.0
+b47e328fa7998e8085a30dc2a03506c0 0.0
+b47e999a38113e0e4e2b139e8b861fdd 0.0
+b48a25941ae901a73b4a2830cc24b382 0.0
+b507596e6af67ab90ce421ce6f40b0af 0.0
+b55ddaf95d9d2217ea087863a7afbbad 0.0
+b6985598d3f4a9b830bbe1c6a665f5fe 0.0
+8bd6fae62d450bdf768bfc6464d36d38 0.0
+b796b2132a5015f869c8b5ac44a0e2bf 0.0
+b955f67a541a149dd3b179dc62d37a72 0.0
+8bb5ff097e0707dca3fe5860af20f4a6 0.0
+8ad30192adf51cd7b03105a1acdea794 0.0
+8b86b84549e5940744c30df2a573b55e 0.0
+b8c685d6d691d57d63a05b0eaf5a669b 0.0
+b8c6906ea995f21303d44ab7b21655c6 0.0
+8b78fe0639a5308fb6f33a8eaafcdfd9 0.0
+b9d953d2c367d1b3150ccf062dacfe83 0.0
+b9fefd4224e459a6a0d21db63b1c0d03 0.0
+ba88d4e4e6a24ecc2a5aab6bdfbb77f5 0.0
+b3d06445a910a9a444919e41f5da39c3 0.0
+b2ab597a902ff227a5ba76030cd6002e 0.0
+bb1c2099a32a335b1a55f44d3ba54484 0.0
+8db257dc277d4e84ef974e07dc449820 0.0
+b2e3852763d37eff884d326836ccec27 0.0
+b36ff490f4df87e30600ed347f23cbdc 0.0
+b39a1bb517361c2a87f8fb48d66a49ee 0.0
+b3b1ae423dc7f3d8942237d115a5bf86 0.0
+8b0fef40cdee88fe61bf6c86ea5b8d41 0.0
+b4446b03208372a5dd17608d78f7e3e8 0.0
+b3e2d6b8bb4b97afa07c0d048746b712 0.0
+b3ea95770cf649371d22e1a1d5aa3c0d 0.0
+8aa70d0c06e8da0b4c0ea2eba6308983 0.0
+bad4f7c5ada34a1f813946abcfe5c556 0.0
+b423592b0ae631882095792b27729b9c 0.0
+8d99d70a19898e91e9d7dd54dbd5881c 0.0
+8d89ee75fdeac54f5ebb1ef3edf80e73 0.0
+965d7a58f59ea6445b32afede05a4012 0.0
+a7e74b8756d98ed147f70628144245cc 0.0
+ab0b78445a54df766c12448fa0ad6d49 0.0
+9dd9cefe38347b087dd318d82962cc03 0.0
+932e1f2d3f75eaba13d2b2be2f2f7cb3 0.0
+9c25197937d1f54e2634026fbca6b55f 0.0
+9c58ab3805d06ed6f1ff2a2de766458d 0.0
+9c61dd9fc1208289eefdccd5ad281288 0.0
+9caa12440830e96456250882ad25f89b 0.0
+9cb1cfaea33aafcffe7c261eb104ca56 0.0
+9d2176c908640e85287b95f9cd17bf64 0.0
+93029132afec86c495769fbd8dd65ff2 0.0
+9d650084364f31c59068a17261e13dc8 0.0
+9dc78fd8f2f7cb10e190e77af61c7ecd 0.0
+92f000f936d3f349c5e7719a5edc5078 0.0
+9394eec407745afeb6370fb2a0ee1a98 0.0
+9e033e0a6e420a9d407f908d59aa31eb 0.0
+9e644716572f5930cb56186d75a6bb84 0.0
+9e8e56f06c0615130e1e2f459bd4052a 0.0
+9ecd25659ebff6a840c089b93515c5b7 0.0
+92ae272884c88aa3be9a5800f2836740 0.0
+926708e33d1ebf1a9da757f78c673190 0.0
+9f48f7b79ed6f3dcde8b25e0cf244d5d 0.0
+9f4d14b114e7499772a797790d81c1db 0.0
+91d7d1256be7c1797cd45c2296095521 0.0
+9f74473636e6874853d88dfcbcecff49 0.0
+9bbff59b3c1084173c4917e136b496fa 0.0
+9bbe5054cf0bc376baf895f0638705f1 0.0
+aacaf5f360c110204da1051a50c5ba5f 0.0
+992072d045462445dc239b35300af2cc 0.0
+959d48a11e3b26c9268f0bd1dae6ec40 0.0
+96794c2d601e0082c9b66552dce42e4e 0.0
+957a16edeead7722188be53d05fbf001 0.0
+97951199cbbcc765ccde49fbcbff09b1 0.0
+979578981da3916b394bf24c32bace21 0.0
+97cfaa815d9e25d41d6698f790b7ad0a 0.0
+95348b8c18227b458056472fc9eeafd3 0.0
+9828f58eaf910927264c204ace9caeed 0.0
+984ef97167ec7b42d7a3650716d6a2ed 0.0
+988b0b7321184cfc2c53e0966dcab560 0.0
+94a699e62982fe59b7725f1b7271972a 0.0
+9acc238746a1f2aa7745a0b5720c4eac 0.0
+94738eca8f0e714dac0390e647565488 0.0
+9938c8f22908861f780f8719b75dc0b2 0.0
+993ea35e8ad7ff6894e32247dc507e9b 0.0
+99dda7b5337bc09a69e9e27bab56fd79 0.0
+94650d81f3f14b948edfadca3ca7b166 0.0
+94035fd1b8dabc1d79586ff8b52846b3 0.0
+9a75764782a384ce3b83e019c8b2dd94 0.0
+93a9affe525a4ddda933fce27518d7ce 0.0
+9aa7314d56b3b3db2253ec2763414be7 0.0
+9394fd89106163be8a11cd4dc46bc8a3 0.0
+9f8119029c8b972ae0375c8b968c14e6 0.0
+91b88aa295fe09a255b2f321d9d2b12b 0.0
+919db619ad90cc6b09b31c83d5719da9 0.0
+95f77f9e6df255cf93ec17c1a48bc194 0.0
+a607665fc01df970cb3f1c4002a00bab 0.0
+a6099802840e60febef8e896967b7747 0.0
+904f7b1ae29e6a78ff83eacd9ef9cda7 0.0
+a66a33da056496a076f57ec22a435dc9 0.0
+904d063813394e9a3cd5a23fef870ec8 0.0
+904ad13c7b8512200836d5cd9fb09e1b 0.0
+a6bd74dcea0ad6299a4cd2937e192151 0.0
+a6fa2de5f4576d1d77b1616786e3dd52 0.0
+a75b0438ad19fb4e31f4d58c2d49ea02 0.0
+a765441df9f96c9e8a1baf9645528616 0.0
+a7fad69a900f747d777bf13f8c1ffd83 0.0
+9122e867c908301e783c5ce0c3bf12e0 0.0
+a837426344372de1e6d07876dd214a5b 0.0
+a898fafd80c4c90cdcde401ca2332e05 0.0
+a8aca6d9fcb6ac34f6104cbe28a466c7 0.0
+a9566f812a634f1b709810bb0cf50629 0.0
+a957f3e1f57e99be7c1c28de7f016780 0.0
+a9b72251b244f66d8296965d9435218c 0.0
+a9e1acfa6cc2678b1fcc40f49007d4c5 0.0
+a9e6fc1854a3fd8873b32c9fbd9ac13a 0.0
+902c0a7dd5a911a8fbf9e34e49f72297 0.0
+aa738f71de2d14bf826740d8df65e5f2 0.0
+906138f305d5b85ac761fc50bbb6ca87 0.0
+a5676561cb37d21b8c485040ca3ebe96 0.0
+a544abd909afc727c4418e4e19eed417 0.0
+a51b62113b7c9d4febf6956e86e5dfd0 0.0
+a02c286e5afe7220b1fdf83dadea939f 0.0
+a1999b4f10c91d6f8efa3c9d73f77fa4 0.0
+90e427df120ab07ffbd57703f11c1a4f 0.0
+a1a22b2e460e6454dca7805f1cde48d8 0.0
+90dd37137cfac54abcaebc7048fe77c9 0.0
+a1f75692d5fc417ae9461aa66e0ed7ef 0.0
+a220d7daecd7de9a9560ac2a5a64e2a5 0.0
+90b56798a9abd8360a79627ec3c332d3 0.0
+a2e0f228d7110587954e13405f98f003 0.0
+90a685caa7eac21daa8a8f56df52eb29 0.0
+90a0937762431acf0f8f3643e08d3180 0.0
+a2e3ac6b18daba602b14d93882f3663a 0.0
+a33859e0fe8e20bd783ad918ac192884 0.0
+a36d070ba91e6c48d0c79036200d0b20 0.0
+a42ad1956d801cce5e0109dc0e987ceb 0.0
+90949bb38f58e92949ab8504aff01df1 0.0
+a4c61d535793417f6f6ab9eae2463f12 0.0
+a4dbf71f32a016e120fe6cf253dfa6c0 0.0
+907ceab78e867d765fa360527608efa1 0.0
+a4e2a4e1321a599651edf92f6999e78e 0.0
+a4ecfb2e708863a17978a0942358ffc3 0.0
+bc4ae61d84daab04b0e888d22f7d3f69 0.0
+c6959c6d7f3f2806418eddfeeb3f2d22 0.0
+bd0f4e14387ee1ac7e5fe586cadbbed4 0.0
+ecaa9f4e15a28f212d9bd60f749191ab 0.0
+e86212f766f010236a072bfbc04774b4 0.0
+e8cff5dd9314935749206c1d83214d77 0.0
+e94776066de6ff852fc7a43e7a0626cd 0.0
+e9c73fbac5dd73dd5e3ada5664ae95fb 0.0
+ea6265476f08a35fdb764ae98206ad5e 0.0
+eae4215acb2d651ce76c5e4d10f3d839 0.0
+eaea5d7b84c08fdef5d8ab40f707e37e 0.0
+ebab7cfc7858bfb00c486bba21d6ca6d 0.0
+ebb5ea4b9b617249c40513ded403e30b 0.0
+ebfae3fd8d9da7341650be7f4f6c13e1 0.0
+ec38f851c5429708e114208c539ae458 0.0
+ec6c9ea52e88226c339487585ea02f42 0.0
+ecc877a3410d54ea784131356fe5ea73 0.0
+e82ab12c51a893c6d7938a16632199a8 0.0
+ecff0a63780369ddcdc800ab97fb81f9 0.0
+ed1528f4822674c9e1da64b84c778e9f 0.0
+ee062262ac3b97896676e558bb08986a 0.0
+ee41b2201102d120e0ceff1eb49ba6cf 0.0
+ee5c4bf663b722fc809746f322a4f6f4 0.0
+ee90bebacf6867cd5b4c487cb587ea9a 0.0
+ef4045fe369d078db89c253c9ddbd123 0.0
+ef467d766ba05adf651d1fc6a30cba06 0.0
+ef7c8af53de2ee3385027bc6b4bf0883 0.0
+f05a6141cb84e0c95470be3e92085b93 0.0
+f09ca081a69806aca5de24d404ad4c6c 0.0
+f10155af1849a17996f302d71b32398c 0.0
+e84bea8ae61342674744560dd7de4721 0.0
+e80317d0fb0618cf19a96ef454fef9de 0.0
+bd8cf9d4f4744a93802336d81b60966e 0.0
+e30c40e583572ed45fef99d6a32d3924 0.0
+dbf76c1b3ed4745cb94c15374d951beb 0.0
+dc92897ae65716d40ff3b2746a2a6527 0.0
+dd50bb187bc6f99222e626ddc810909a 0.0
+dde2f514908a4fcaa80827346c7b7a19 0.0
+de0ad18436d210c9b60044fc7b9caf90 0.0
+de478a19bf7c5ab5e75d3dfcb742f4d2 0.0
+de6891b6f7a1f35bd9ccbc058b9e1a77 0.0
+def8acc0fc88cc9a9cf110370ecc2c37 0.0
+e0fdbc7f687a7ea0cf13009c2e501197 0.0
+e10c0e74616a50d8956a945d0b6dd531 0.0
+e21d9f482315985036cf80d1b20de58b 0.0
+e2bf5a8c1878477d542ef764b910e5a2 0.0
+e32053e0d7a8b525d0b062beb2ae11ce 0.0
+e79e5e4c871bac3c43de566188e10002 0.0
+e392de248b88befeab2319aebd7872c3 0.0
+e3a4a62adc4e83c274978806164b474a 0.0
+e4207d118a1ffe33509edaf320ac2a7c 0.0
+e4d8026eb8e261594d51750b1080dbf7 0.0
+e56aff2928773e4f9d4646bebe35123c 0.0
+e5a43b018c81cedd19e9a5354d32d469 0.0
+e6441fdd2f0ec36ec914d7154a64061a 0.0
+e644522cf6d7222c9da7f9ecf614cd89 0.0
+e6a34eb113dba66df0b8bbec907a8f5d 0.0
+e6f747cd89d512cf187e8472cfb250a1 0.0
+e7037dc2701d6b9666d2c0dab6110ae7 0.0
+e78cb60fcb9cadfa0a32ba3ac4b2e95a 0.0
+f12011ad5219b22b6ca790103333a755 0.0
+f19fa34c0c987ab3bd8aa42d8a4d21ec 0.0
+f1da0f084f3e5b7957d11d01abe7013d 0.0
+fc7466fe7b054b641298ab8b5c707d21 0.0
+f85dae8239789b16d0f8dcac9e272361 0.0
+f8dc4d17a9589b91dc466d20015518e4 0.0
+f94eeb84a8ab5049e8e0cbcc078de30c 0.0
+f994313e9f7ddae7f7d6806221d6be70 0.0
+f9b52205cb2d03153fd394a290212a10 0.0
+f9f5f08e8da4cc9740518223846563db 0.0
+fa09b814b417561651db81db34860ea5 0.0
+fa1d5d724ccb4287e07ea1caa9d35502 0.0
+fa71c8409cbcaad5dd98b8d80a46b07b 0.0
+fad561b7eab58e57b71a09a8b7f990c8 0.0
+fba43f263a40d5a9cc7f37f1551b13f9 0.0
+fc613984c6bd68a102eb7daee76a24a7 0.0
+fc9717bf86bf07fe748f608b4f2c57c6 0.0
+f283cd6d309acf7626b1fbc0079d275d 0.0
+fcab01e402145abc9e946d12aad1cde5 0.0
+fcfab546634e10be9800b9a4118b8dc9 0.0
+fd069fff020b15638483c7e49d2ef9cb 0.0
+fd1830279629e35893c2cc7538b028a9 0.0
+fd1f139259c53276619476db0cc81913 0.0
+fd21badba5eecdf5a284d5db7aa0679b 0.0
+fd8cc2ec2e11078f3635cdc7fb477823 0.0
+fda9e2638dfa1f4eebff04a14575f763 0.0
+fdcd6808ef8269653d25dce4a55a025d 0.0
+fea3411c847e6627afb5e78b88e2e9af 0.0
+fefc7999a7e316dfa98bb6b63d6cfda9 0.0
+ff2b5301608beca5896495311a512610 0.0
+f83918824d8dd7875a4b478adf4ee601 0.0
+f837443696652157e220ae13e330e0e6 0.0
+f80a17436037d706dc728dd265b7c21e 0.0
+f7ef98bb6d0061b9368a72b1ff40aad0 0.0
+f2b4aaab4b89a51ec6205485018c9419 0.0
+f307aa8ff39ee31213f4d9e1bfce6dcb 0.0
+f3153e589a8718a2a1109a6e181617f7 0.0
+f31aab4ff2dba1e72d9ea251e447844d 0.0
+f31ac04c4b89a126b2406e811dff97fa 0.0
+f3b9619b167cd0ec5f9c55a7dd6c7ee6 0.0
+f3c30aa47b3639f25d168a810ee07fd9 0.0
+f3cac486ad34144511deb96b29ded2c5 0.0
+f402dfa5da0f4bd53468557a1a6b01e3 0.0
+f434ebfe13e3764e53f40e7b99e07a39 0.0
+f45b62ab6ec22fb20364a7e05088158f 0.0
+f47f8e690205be8c0572db3c32829f43 0.0
+f4909ad74d5b44aa7c3694798c21ea87 0.0
+f4cd813a1b8a862f7a285466b25c6302 0.0
+f5487404313a4fb57bc3489c167e9a37 0.0
+f56e589e8377da78834cf577a69b939c 0.0
+f5dd3f079f8866010ff9e1ce86dea070 0.0
+f5f4ef252850e0b1f378923731257d69 0.0
+f65ee26124609d6de91d4bffee014357 0.0
+f6c6e7419ddde1e2e1d4a790ec6babfe 0.0
+f710c0927080e422094a96ffe13b389c 0.0
+f73160a94abee0441ab5a573877744d2 0.0
+f7a5b80128b001df0f52bdad023aeb23 0.0
+f7ce5d68ad002cdc285273d049516c84 0.0
+f7e75337c2baaabcd5c9cbfa83ce5ef1 0.0
+db9caf76f81e72dac22fd1a55da6d5ce 0.0
+db6e4e3d6bea23bd5c619e4dd4ef5b87 0.0
+da5463f9a3163db4528898c49e7fa54c 0.0
+c9ccc527189a7a19902fc022a72c3bae 0.0
+8a974c19d04562e710612ca049799942 0.0
+c7408e951d6050a4ea360aa404050896 0.0
+c7ec80af1d984fddec04831296fd65a5 0.0
+c7ff7e806765a39b6cdf66a15b63f6bb 0.0
+c870a3c711d733d9058cd2274a101381 0.0
+c8933e6cd4ce9389f4ad6e2e24dbf5b9 0.0
+c8ae16a39fbfa227aadd2e6d5a63da7e 0.0
+c8fce068920feb387167d44c01689295 0.0
+c936431ab7a77c5828a3da971da51acd 0.0
+c942f59feffce9c3ab527135de234101 0.0
+c98ea813f96e96f25ba321f74703bf6b 0.0
+c99c4c951e6f4745973043285ccf486b 0.0
+ca0fb2462b67d4296fdb8de1372d9165 0.0
+cd5f131980408e798875e41b3c0d53de 0.0
+ca5b38f92987671538ac4f8f5b4ee95d 0.0
+cac55aa795998014361e14836ddae2bc 0.0
+caf54b4995a78b51266cb0e4aed4deae 0.0
+cbb1dee8bf80a6cf676e15d39d9979d1 0.0
+cbfaac9f291ac42563f27903d0f684a1 0.0
+cc3e4918ee1c089c5bed6cab11dbee83 0.0
+cc42ed01e236e8f05d25a7310d47e354 0.0
+cc5f74d0b0787e9ca4f97835c2c0f2a2 0.0
+cc968157f7691aab95df444e8cd103e2 0.0
+ccce7fe8aa849bed027f6898300e0c45 0.0
+cce0f21cdf5597d900d303fdac05b33c 0.0
+ccfb4c8a7a57ffb8fd5212276ff2b193 0.0
+c6178d46f91b123b2c226120a3167a34 0.0
+c5e3c5422835403541b0af331a2cc649 0.0
+c5b95a83a06195f219b0edd9ab18fb1d 0.0
+c59161ae395fd5322d2f192f13cfb31d 0.0
+bda6b6e7da7508c68057140e7bd6cd8d 0.0
+bdb2c60864ae18c4302c9f144219512e 0.0
+be480a1e4a4e71fe774ccd3e4ff22945 0.0
+be9eb15986b4bdfad6c474a30e7ce4b9 0.0
+bed1b5521bddc2d35732fafc25043f19 0.0
+bf8144aac6a30e6695ad6bf8f49131ab 0.0
+bfb7bb16ab570748b3a5d099e9426af1 0.0
+bfd6e1ccc45f37b290d63de0053c212e 0.0
+c07ef20fbf08691cd223cba17009e12f 0.0
+c0b31b31a1d47acfa4b2a6b279ee631b 0.0
+c12331989624002d4f790eb1d19dad02 0.0
+c1365ec008032ee718dcafaf5ac17adb 0.0
+c1a5735daf6f1e6157aeafb503b9e564 0.0
+c24ac2e6e7002af9827563df39144c55 0.0
+c2cd19f77efced924e3eccd8bc6750be 0.0
+c2efeb6d250b34f682ae6380682f60b3 0.0
+c30a545d14731276f649c2428c0babe7 0.0
+c326a5572886b04b8ec92047c9415a00 0.0
+c348a7cc121fa43fb14ca92cc4273c2e 0.0
+c3937c933413c88df1db4b761e0e3ac7 0.0
+c3cb5f0be6d5e3a4761a78606c1e4113 0.0
+c4b0455f8d6d59d4caafda4b3aa90622 0.0
+c4b11e3e5340bdf421aeb4ba0aff0269 0.0
+c4cceb02f4ce674101aa2c0d1317cdf6 0.0
+c4e57b91cc2be5a2232b3612abdf4c0c 0.0
+cd1fc0b5ef8bb9823eb279222f0d0860 0.0
+cdce04d738e587eb6a58f8a79ed36c20 0.0
+d9ef9c1fd69f0b398b331c428991204b 0.0
+d6bb5703483ac68992e09ce17683c52f 0.0
+d34887a684a1acc690b8f3644cb11c12 0.0
+d371cae3627eb71177294af103c68940 0.0
+d39f37b814ebd81091a949f8bd9d1710 0.0
+d3d99580d90cca486a23372a474e9571 0.0
+d3f86c6a6fd03654e0848a1fb1ad1385 0.0
+d43ad42f3b1625379a2429c10f419eda 0.0
+d4e63b08a25162d359541e38c6e484e5 0.0
+d4e8fb8d73378ac761ec8eb3faf71804 0.0
+d5b4f33a93b1580c123d306690d0ffb9 0.0
+d5d727a0e5ba892a10e9c81ef29d02c0 0.0
+d666cafbfcffb984b12c4a405df6c7c9 0.0
+d6752c4c0b83bb0ba7df8368b3c94d6a 0.0
+d6bc69828ae0ef27e99c01c9e7df86b0 0.0
+cdfd52e813292000f1bbc5c4d7c95b4b 0.0
+d6c46ad81c01997055a3b713c0d5ee1b 0.0
+d6e76cfe58d253204f5589d78cc79bd1 0.0
+d7130a2536c5b200a01662b82ff93d06 0.0
+d73c014c9a033812619aa44e9cae934f 0.0
+d7466ab97edf938c5ed46f6bb1649b60 0.0
+d780f9d98701c8ab19b4488869bd5be6 0.0
+d9294a924ce5ef9df6e464b4ef2f9db2 0.0
+d93dfe29130fd71bd30fcdeefea2862b 0.0
+d97a97d4f8974683e4a66de090129c1c 0.0
+d98796acd66eb71638defd318991adfd 0.0
+d9d0120f569d8e15590c6486f4ffed36 0.0
+d9e7a2013a2b6863cae05c51be41848b 0.0
+d32d7b4a5e805f7e43f7823bf3eb566d 0.0
+d32627161d829a1488c2a047669cb29f 0.0
+d3211b7a04e5fb36a383450b8f5710e2 0.0
+d315a0b30c8e21db4cfe89e65c0b2bf1 0.0
+ce33f26f3bd4784cdb63910753fa0792 0.0
+ce399235aeab566680d0ad1ac6f9c7a4 0.0
+ce9976fb696a210e526fb161c15ed88a 0.0
+cea0486054293b47714d155ca349b763 0.0
+ceaf720138cf7bdfb66e2dbdae539e38 0.0
+cee9ade29a61d7669cc2710549611c53 0.0
+ceedb016e7628567e25ad61c46a8be83 0.0
+ceefb8fade04925c7bbfad924171a220 0.0
+cf54928a038081788aa5939c49ae5340 0.0
+cf8eb2fb3d885d1919ba916bc0975347 0.0
+d0309ad131312d425619892c703b13ad 0.0
+d05c86b5bfc092d8677507fb1eae8d66 0.0
+d061a99148d649c4eca4836c5f4b5b68 0.0
+d0c34b21c7c96271a1bc5f7fe82df3a0 0.0
+d0f02fb55f33f41f45d8b5a6a6a5b018 0.0
+d103541d4efc81020afaef43c0f02c02 0.0
+d117a20b7daabc35e9d810f3a93d18b5 0.0
+d1539f8c2855fe568d1313f20d26710d 0.0
+d208cad7c585049e04076ac7df960e7f 0.0
+d213f55b066d42f6c6fea9f1d27582c5 0.0
+d2589a845c837dbf413021f74e82ce0f 0.0
+d28170caeb000a27998d838a8e7d5de8 0.0
+d284b92d7518959fe5d52aa45b687394 0.0
+d28f5cab517f832750bbf62798e58dc3 0.0
+d2aa3a30c8e178110125b88dfc7e6f45 0.0
+8a9f7499cdb85b8efa497bb56256cb43 0.0
+7ebc04cb539befd9f2e7faa131628c66 0.0
+8a83cfd5eb074b5dbc6118eb03ef915e 0.0
+33950a5fa4d8ee86e10778182fae6714 0.0
+2ea79eaaa0b23b2a7d2cf470cbaf72ea 0.0
+2eb7bab7b0af702e4f18358968a8f7ff 0.0
+2f3b51e3f7acc3c9f66f1552b67a0209 0.0
+2f75b02cd4d3ddebf390d17a120f419a 0.0
+2f9d067d5ea9cc1b1289e9de830b326e 0.0
+2fad39e365933a7f925040cb3c2a5ef8 0.0
+3017f87a3b0f5200ed54eca17eef3cbb 0.0
+30207d209491dfb7de83344e77fd6fbd 0.0
+306d425d43fecaddb4ea5af1cc34ef98 0.0
+3098ee4e595175076514b00a8a20c291 0.0
+3127220258419507743dc5f2bba69d86 0.0
+31ebb433ebf0b3e83259d826875b4438 0.0
+31f493208054aa0fea8a26d72ee670dd 0.0
+31fe7997ddfef193110db377890f9520 0.0
+320c03afc417b829a51ecad3dea2e065 0.0
+320fd62cdcfb66cfcc1438371751c4af 0.0
+3233034b11dfc584717d9264882a4fce 0.0
+32382a0f5831da5faee464d011b5616c 0.0
+3255b196d8b70893d9124239bcd46782 0.0
+32a21601ca89d1f2629c1a9fc2de4359 0.0
+32afbeb5f76a69e47f6cf6a1870e95d6 0.0
+32d9ab84e859d48f487b790c58e98234 0.0
+32ebb69544acaf3162b5e828d05a5f84 0.0
+330b6fa220b88e87ac7c5ed39ec276dc 0.0
+33206a2306ca96922fed904661eff421 0.0
+334af877a390268de17839f80e212fd0 0.0
+33852dc244bb69590e8e756a83bb2f4c 0.0
+2d5957484a1289ca13b12328802dfaec 0.0
+2d273dee370721d368ed3534c76100ce 0.0
+2cf53730a41137e7ad95ed3680541d80 0.0
+2732bd972ea75eaae442f937cb39253e 0.0
+228592b44e6a4d192af2d0732ec64a51 0.0
+2290e92733707772a839abf72e7981f9 0.0
+239b4f4f6c5fc459a1c95c4bf704de3c 0.0
+23bf5f7fe3d1ff8167bf3901fdb0685a 0.0
+23c05ffd2b0ba1534717ee52a81b589e 0.0
+23dece0aa961e717a81e1118455253d1 0.0
+23f4c333bc50c5cf2c0f20855d049cc4 0.0
+2403a466dd6d1e4f551d80394c2aa994 0.0
+24ef07664cfa04a819d7797a89e653b6 0.0
+251429848560a80abcb96640ceb102db 0.0
+252d0fd6601d9776290e142e5bfd744b 0.0
+2594fcf48955bec7b61a5ce4aefa3868 0.0
+29568b810d7a4e985424b9037aaeab9e 0.0
+2cdea72aa7fe332d3e0a891d49d22c1e 0.0
+29784e17460e4faa8e4eb6576e85f22d 0.0
+29ccf8b234ac00b114fb2c8e1c44f932 0.0
+29e53aaab03b86749d3941da8ea5b59c 0.0
+2a8a90cd5770804a5f23f425004de7e5 0.0
+2a99ec1157a90661db7ff643b82f1914 0.0
+2aa7a8ef7c70b5f684c856e6c21b6635 0.0
+2b982979f2401a8bc447d7141879e9b2 0.0
+2be34a58b64107daf7b5426302feeff7 0.0
+2c0fa4f09b72e538e3c6acead5b987cb 0.0
+2c1dcc76478dfbb19fa59690738202c6 0.0
+2c61ba77eb8ea89ea85728bd0022ac09 0.0
+2cca9e3bd17f06418ac623fb0017c4cc 0.0
+33902c26956acf7efa69413cfea6326e 0.0
+33b58b0709e9c05ba8404c47e6a7ea2e 0.0
+4346c337f6760bd0f8e574277524165e 0.0
+340a5698fe78713b785c3bcce89df73b 0.0
+3d6469bbfb14a86abdaa602a89753ec9 0.0
+3d766867c97f511431bf97e059d6498f 0.0
+3dca01142e22e42b989fe5a513e103c7 0.0
+3df77157a242066dfb4eb9c19062436a 0.0
+3e2716bec0426995af52846a88864c6c 0.0
+3e9c062471302ac0ef352264490d1b3f 0.0
+3ee56b6c4d038d4b8d26f63fc337cd02 0.0
+3f11f9f226ab63cc1ff6bfeee9699b79 0.0
+3f2fd56fd1ec26b86eed89795fe014cc 0.0
+3f54618710e03bf1e78b57e14ca8775f 0.0
+3f55a8ca784ae7190790023a30cd859c 0.0
+3f56d7b61bdc82e4d12c6f58617f5103 0.0
+3f6c45001fd40e8b6f9d0558539fd820 0.0
+3fe55dd5e59267f275e972017302459f 0.0
+4060be28072a120ebefbef199c09187f 0.0
+40787e473f59ede4ad561b4e4079d97b 0.0
+40a6b537d995aeb12b0100e2cb6ad879 0.0
+41239266f2952a56a313fc4798f28a1c 0.0
+419242e308e14800b26a10a4bf82831f 0.0
+41b518f17927240fdfd1465cd351afc4 0.0
+41df3b4c55182503ecd97c6dbe47966a 0.0
+4261b78dbd18dd66cbbc7a7f0f2a51a8 0.0
+426bd7119a31b73ef734771a071a216c 0.0
+42ac6d34b293d442987af367e2eb4f51 0.0
+42ce63585eaa407ee53b568bdc648be8 0.0
+42ef609ea1f85d92d08a416be896e309 0.0
+431c53fa5c895a6e6f5951578f6dca8b 0.0
+3d52122a218f9c70126d51d42996a1a7 0.0
+3cc657dfb41c68faa70508afc63f4222 0.0
+3cc53c481fdae6358e4d2d5c9f95a1f0 0.0
+38e3b7abf4e08699d7a1d1965f177086 0.0
+34cddd9fe152c24d78924ef25927d67c 0.0
+35513c8de34a49ef37dde85737e6aef9 0.0
+362b97b08074732371b6e76cd1a894e1 0.0
+363c1c39d58bce4fc3218659d0944258 0.0
+36402f6154239da8ce422da64a0bd247 0.0
+36be43c248f8e4a7b60a4739cd8dd932 0.0
+3781803fe3eeb8e5bc50a5308f4516a6 0.0
+3788b1d514737976689f4ed1dee8b52f 0.0
+37bdb470cfb64ad4594c5968cd95e312 0.0
+37ff76f02e76dd8fc629cb05575defb1 0.0
+387e36ab394fd347fd94d8762016779f 0.0
+38c80ed971960fd1e878080f767a4765 0.0
+3902e1865aa0a0863687793a7891f8f7 0.0
+3cb3514dd7db8d7720477ab5759b970e 0.0
+3950fe55c9d109b986e327da3ab3653f 0.0
+399bcf0feb12a20ca0731d8d65268dae 0.0
+39bb3fb5b2257fd2eacca408dc42c5d1 0.0
+3a0e00761e8113458ccd7d13af71b4b0 0.0
+3a1461e8439091ad6b93af0ddb3849eb 0.0
+3a3e562a61e57ea1ff4f24857a1e79da 0.0
+3a8731148bda5325db164e3729754abd 0.0
+3ad7da49c52ec79eb228067d25d2f2a7 0.0
+3af2ddaf4af4b5ae4100439a9c3d4099 0.0
+3bfae16068c7a0b87db6a711c01eadc7 0.0
+3bff9ab82e7f2a76cc22e687ac7b72fd 0.0
+3c5496fdcfbdd5b2e4f620d44f83414c 0.0
+226d7503ab3d1ea0ae84d97fbd804323 0.0
+22026223740aa4b6376f724986d7bf12 0.0
+21d92a1e71eeadaeec2fd51f41249446 0.0
+21ce8465657612a85e2f89e74d285ce1 0.0
+091e6e4ffe1252fd657f19e42e04e859 0.0
+0a5225695ff3a449ad8ce395d75c3799 0.0
+0a99bccbf1bd2e4f728514ba8c4cf6b5 0.0
+0ae0be308568d07e0434434b14b8d565 0.0
+0b05a2e48cdc5f9e766bb0f4c209d4b4 0.0
+0b3c20a64187d28e11a68f342081c17c 0.0
+0c336b876b379dc729ffcc2a1340f670 0.0
+0c3a02ffb054a07b355c1649433fc699 0.0
+0c3f267cef317f7d71015849370f1f6d 0.0
+0c4204a0e15fccc22a7456685898cd5c 0.0
+0cc26190e976fc496119a5eb4cd0fee7 0.0
+0cdc26df2e337ea78808c32c8f52c94b 0.0
+0cf86462b45382da531327e4186091da 0.0
+0d188e3528ce4d674680de129470f7bc 0.0
+0d3b942348cade7e1c41ac52df03e63e 0.0
+0d69754929eb3dd1a5b256b43f1c05d7 0.0
+0e637c81e25ffbf7b7e68a8fbcc4f2ea 0.0
+0e7be154d128e3ce2b9b05e3e64a679e 0.0
+0eb15dc6ca644533da15764998436274 0.0
+0f0a1f8c92368be6e8d3145bd4e94cd1 0.0
+0f0c741eeebdb89289460115e9430f1a 0.0
+0f42bfd00e871207c3e2bc92a79f3456 0.0
+0f6200a5b77099113b52eea9a8645181 0.0
+0f8aa0e4c4ccd80c090db7c72be4d264 0.0
+0f92fcc184a951ff8b0914467ad2f8f9 0.0
+0f9411ed14b5b3aa1a9655523fa33004 0.0
+0fcd8cec2c4190fb7703e87f3c04a0a9 0.0
+08be83d0a527ddd1da0861135192e593 0.0
+08a62d972f6bf2ca4dd9291bc2cc9f63 0.0
+08a1598eb1ea4dca23db1135552cd5a7 0.0
+052ad6076dacb9cca0dbb762a466404c 0.0
+009a4919860d6d1fddec5d3771d37351 0.0
+00bb7a84ce1fa6f7411597672ff1b09d 0.0
+00ecdfa98a302087bca221884a535f55 0.0
+01241d7b0a1f219f5a68bf78b9f3b294 0.0
+0209f9868a22059c659df474a746d8f4 0.0
+021899b8a2f4157d54475a024303ad5c 0.0
+0233fed56365db96e625e92c4dfb9cd0 0.0
+026598f1d03fa900cc03cf9a32dc17d8 0.0
+03569e3fd843f501355f7853b0ec9361 0.0
+03fdbec0c9b63a05ae79f92c6d3a4cfc 0.0
+041d6dba3af0b8c9c1a422ed942ee16c 0.0
+04cf55ca51883bf19bca35e8ab225def 0.0
+053c4d5b8717b60387f777efae437b40 0.0
+0820b82d14921828dfa20b56242a48c3 0.0
+0540bf08e9a1eb8691a9b1be2afe577a 0.0
+0550a92d2e1e15136084e85881efbdea 0.0
+05736395b184324b94bf557984b36834 0.0
+0584be58049c7828c032b2171aa00a4b 0.0
+0590c29091ff2b3affd966ed57773407 0.0
+05992d2befeb4bd87b626698a9694ccc 0.0
+0615dc8ff9d74b14531dde03da40bc47 0.0
+06c0aa8ad0fa418a4236c6914fb420e1 0.0
+07465ac96a07454a7c155d9c674bbd5c 0.0
+075eb451ed18cc7fa554e88c2726e7ed 0.0
+075f8821bc5c57034fb74afc07233d97 0.0
+08091b93dffe421f648aac3eb7e192c5 0.0
+10155b2834300619ce0102110a37b057 0.0
+10a9a0887483597b71cffa08dcda40de 0.0
+10cd502b01abafc87e2ecbad86389286 0.0
+1cba52a9db0c46eaa9148953a6983f1d 0.0
+1a0bea5ccf9d532d75136168ddb54654 0.0
+1a5526b9f7147b6893f1a034a107cc65 0.0
+1a962bffebb20d79245326ada6fa9b4a 0.0
+1aaeb592fcca5013db99a4f629c55062 0.0
+1bb5836797e9a89a2de9b96bbe206524 0.0
+1bf05d12bfd387888facafd52c07a37b 0.0
+1c4344ff805fe2e4a760b5d01eb57eca 0.0
+1c85fe6e01899c1b9575734197172ab7 0.0
+1c86153e1b13ab5322c96f2602b73513 0.0
+1c94105978b1aa2095e3c0096774f240 0.0
+1c9f7427ad9eef9581c32e490d0f3e35 0.0
+1caec778d70d2d387717d1d1bc248bb6 0.0
+1d1d606cff1c8bab0027fdd1e2ede36d 0.0
+192522c9b7c41f7a761f8a2ba792268a 0.0
+1ec95834f03c9606f9433e59443e5bf3 0.0
+1ef782a559e3e0a4ab42516e0a2c828f 0.0
+1ef9d7dc5f8c93a8a43bd84e55f9a08e 0.0
+1f4c8ba04bcab2849c9ddfbd7fa3d1ea 0.0
+1f982b62b3d278f92eb3007e366fbe14 0.0
+1fa555f9a74d47a6ca7a65a347bc885f 0.0
+1fc9389433a5ed33af26e3985bdcda26 0.0
+200f25968eccba0fac01ab0385952acb 0.0
+2086666afdd5de098b04a94247040c11 0.0
+218a6324d16b842a38c70f1088a7c8b0 0.0
+219d2ac2e47968bb839bfa948559ebcd 0.0
+219fca301444ebadab34c1a35847fca8 0.0
+19cb3add28813b2bf3e1f40a000d33c2 0.0
+185e45802a5c7674f89b5d0dd66d7912 0.0
+113ce005a880987438bdbe19ee22add8 0.0
+149e7d5e484d157f2ac6140a2111618e 0.0
+1141f30eddb626a3a1151a19445eadfe 0.0
+1142fa8636688cdac86a8bdcbc8bee05 0.0
+114a94e972186d56504069f1bacf505a 0.0
+11764b0405507198175fe590debb40b4 0.0
+1182d94e2fd82a9b44657970e252736d 0.0
+11b21c7f71591dd1570f9717990ea4c7 0.0
+1264ac914d010d7c663fadca9c5904f4 0.0
+131d3b899e72f19fcae1b0f9f8f3a009 0.0
+138daeaeee1211d2a3d8502b8fe23b2a 0.0
+1407e963207002430eaebcae4b500708 0.0
+1442af5b43d77ad0cd6b73feea01af87 0.0
+145ebc1f8ab3b2896eb631008973c0b4 0.0
+14ea2e862193001ceb4322087eca9f4d 0.0
+180b74e94caf35fac5e1f0f4b7ad9d9f 0.0
+1516918080ce0a63f2e1ac6251ba1282 0.0
+1527fd8b14037f53b8edb26b111e1f72 0.0
+157a9e8a83fb37615cabb48c14d919f5 0.0
+15dd83460074717cb57171578115cb49 0.0
+161d27a7363501e838efa59628353a65 0.0
+16306acecd104a4f12ec62522e1bf751 0.0
+1634bc38311d3c868d1077602dc439f2 0.0
+1661c49d1dc41645c0bf242b2064f1f3 0.0
+16c36bcee25e65305630899e962804ca 0.0
+17824e221f06eecf126ea102c3a2c289 0.0
+17c24a9e01ee9569cd8f4a4b0e9b1b33 0.0
+17dd8d3d133442253a1d51601ca5a192 0.0
+433d8ad01fdf6ed57d8f509a7d6bfe2a 0.0
+4355c3f086c69fffe013ed0a3fdc8b66 0.0
+8a658263f5f46e9b06ce187893565b47 0.0
+7a8d29c59b803baaed9cc1f04ce0dc33 0.0
+7029eddc845e01d613a3bbc113ebeaba 0.0
+704d0070549f26fe1f99eb736cdc4c34 0.0
+704e8a1acf77dcc1bc06251a34ba2132 0.0
+7069d1ba735fd4d5199050b4c100bea7 0.0
+71a713c5aa69a8e46efa23a2437010b8 0.0
+71d32936afbf6c7745a6083e7b09dff3 0.0
+7201d53bd647437ec3de879774eba498 0.0
+7212fe837f6f09bd16ed2c9b2c58e557 0.0
+740c97e7b2eddae8044cf27859a86b7b 0.0
+7412af58f3daee6101db082fd334144b 0.0
+744b8cd51857478bda77a9f3ab5e1d5a 0.0
+74b7dc5aa537fcaf4d0046dc7715b9bf 0.0
+74f2fa20e0ed1f74fbe4857f249e3a28 0.0
+758cd34d30d7af60445b5f0f932d21b9 0.0
+75a5999048000b7e178036f0e75e4dd4 0.0
+75c5f35120584c258c8ffa734a94ea8b 0.0
+76075eede2e2c9cf276a8394c7cca293 0.0
+764ce1e6cf32606d676870bd6ab3e21d 0.0
+76cee9663ccd2b6160d3dfcc88df3c38 0.0
+76f7f30b434c8deb5e2a0fd50baa1496 0.0
+7769cac4c6bfbe7e3cddd33636c22519 0.0
+77c18841a3a2296268d30a71c26a5cec 0.0
+78266846787de98cf89a724cb5586858 0.0
+7839f352a2bbf8261351db78a9bf3bd8 0.0
+78d8e6cbe439685d63e01561094d1736 0.0
+797ebe05936131bb058c0ab04c171a26 0.0
+7a1144e3e5689e67eb04e1a00e49c45c 0.0
+6fa149c798c4251f1ca8cd356f0568ee 0.0
+6ed7152332889b69d2d2f33c53b68934 0.0
+6eae8918e5425723c03d19afa8ca0356 0.0
+6a129e8ae637c96e7bb56223913911f7 0.0
+686d42b9552375c3b61301ae5605e3a2 0.0
+68bb0065fa02ba7b52acf3f4d833895b 0.0
+68db566a60ba7167b024b4c5032d9400 0.0
+690754306aeed53ec9ffe767151681cb 0.0
+691656f33e727f8857f5c9ee40ed05b1 0.0
+69536042153447ceb692aa53159ce0c2 0.0
+6961c6244fb505547380d15bfee162a9 0.0
+697b3fb29268cba4b2a9eef0eab2de7e 0.0
+698862fbff78c7fc3cd7a9f4958bdadb 0.0
+699d4b4de178e2d52195830debfc3b1f 0.0
+69d052e452e79d571cba042af265d50c 0.0
+69f46a090420a7f596df5d450ef67e66 0.0
+6a17479edbe461431433bddb084f731e 0.0
+6e8986f8088b452f964e7968bb8bca87 0.0
+6a5f9d9d0fbc066a57bb7798f9f371ff 0.0
+6a6880faba39965be0e4b7d88966d1c9 0.0
+6a6d8054e2a7052fb8fd458dccb2f88b 0.0
+6a75fd73efa64af9c36a4435bae0f920 0.0
+6a83401c3ea3089bf7d3c3051a709ef9 0.0
+6b90b86f5da836f13c0ee2f67132e12a 0.0
+6bbaf2d026d54c61f24b5c5483e56842 0.0
+6c1f3bf3e2186d0faf5325be92f4c974 0.0
+6c4de756c45fa6e7e5adfe196f9f0039 0.0
+6c84b0ea6103c2ad5f4c3b7f5dd5e1d2 0.0
+6d0013cae9b331ee166821cb124dbca1 0.0
+6d34445f3adb296a5642fcb5f81e5219 0.0
+7a629c748d0205443ef1e8e26ee09543 0.0
+7a9469ff27dc7afffdb7238976da0a6e 0.0
+438eb58eedcd45e1eedb5889431ebe2a 0.0
+7ae7a28e3e35f8669d62a551beedaf7b 0.0
+8183f1e3f9806994e125b1d5c6932498 0.0
+81a1e88c8e85192aecbcb517cb0bf1ba 0.0
+81f1b22052c8eb3dfb1e6b45ab7c8530 0.0
+82168325f126cecd669b2d0e030c9eae 0.0
+825e1e1d675a615bb3e6914499939c93 0.0
+830932289444e28d4ab0ac9cdabfe977 0.0
+832e6210c0410af8e0356bea5ec39c80 0.0
+832fd777a62ea50c5c34a8c2efb5c5e2 0.0
+83420377ed17df2c16abaf3a72e05633 0.0
+846b433f433cbb20d6208bd54067f911 0.0
+84853bd31e74a04290e47c542c8d293d 0.0
+84d6422c401698d91e646b9b4fe4774e 0.0
+8507091bac6b1be88f3d360d6d535d0e 0.0
+855e8b78c6f45f9a2e3d5afb75c4f5bb 0.0
+85d8ef495cd58b08a300a623a3718ba9 0.0
+862f918acd63eeace039314ff2263a39 0.0
+86f04c5c81713db61f97bb53835f9189 0.0
+8731860c790449910b7955d1bde836f2 0.0
+87bb02a4196dc0430e4ad2a975a345c8 0.0
+8833ed97ab90d2af80eb7c7f7921f139 0.0
+88c180a637696e987712b40ac9089f8c 0.0
+890bfce6cfa2575db09fc1ba0909d3bd 0.0
+8919944cb4b67e5abb04cc8afad9045d 0.0
+894f46d021396ebbbc4041b65e198bd4 0.0
+8951cc9103140b71bd50c0932e096502 0.0
+89aabf43d24dd5bd0fd0f5aca189d341 0.0
+8a20667bca2eaf2a0687995af876a84c 0.0
+8139d4cc5e4e5d44bad494a295a3e7d6 0.0
+812aac97abf11d6d64a00496e070e266 0.0
+80fd14aea177425eb5d6a4b388a08a07 0.0
+7df3c653a9ad469d0de18261ee771eac 0.0
+7b2ff5940de11da4dc3bb208805dff11 0.0
+7b6f8858b481821ad1aae2e8d796fae6 0.0
+7b9a7ec76bd5dd38a2964e80934bc632 0.0
+7bb3a2f7b1bab185035b2e361b4fa760 0.0
+7be9bac1b13acbd8acf0de464d7c20e6 0.0
+7bfa7582156c882bb0c1de44aaaf7a3c 0.0
+7c99906fc47555e0b2df4cb90c47a3c9 0.0
+7cb7753109015ae718a1a0201d081e99 0.0
+7cc8273fa6f3c9f55f426c3765741834 0.0
+7d2f0f34f2ea4ce7030a589e76c0d06a 0.0
+7d4bc1c15b975e6ffe247f13716e5f53 0.0
+7db357a4adb452de8504d8ca1ddb3f99 0.0
+7dfa96abfa7484bb6c9cfa90dc9b606f 0.0
+8042eb376d870b471cadeab735dee5a5 0.0
+7e17a5137fd9dab8c02617a186088c4d 0.0
+7e3f4a0767fe59521a6db1862056798d 0.0
+7e54570d21f03dfcefff07151a8c2d54 0.0
+7e57bf1306ddc4beb5aa7a1084d8c845 0.0
+7e86e84e054d9c5a6d485c9ef762c5c2 0.0
+7e907df30e0739c3bf57f18729659005 0.0
+7e9ed2c841e64976eace4b578ff56a26 0.0
+7ead39abbaa90f90242247c455393162 0.0
+7eb58843a058a3dcc0c051e0762de977 0.0
+0064e2a432a2756eabdb6288a368eaef 0.0
+7f323d6db23992932ec6e097ae09c4c4 0.0
+7fa68c46b87cb8fc334456fc14dfdf23 0.0
+67ec5c8d397bcd64b2e336a60eef437c 0.0
+67cd78854a06d6e3867c28745e509ad9 0.0
+6700772469767941a3992c3e2c65c3bb 0.0
+6694ca246e895a4f1d7e1d077cc27f5d 0.0
+4eb0fadd9ccb6cd253273f89be164175 0.0
+4eb2c50a69efefc9d10540928ce08822 0.0
+4f5e90e831377b98f5f6e569d6d8f64f 0.0
+4f6f2afb7adf1bb487b6a86b13022445 0.0
+4fd1b92e9814b1bc6d1f7b1117daf660 0.0
+4fdbeac7d6cea1fef876d607636f189b 0.0
+4feb634e0ed63566c64710659b17a2ad 0.0
+4fed36b0643aa2ddcecf381c1f487a85 0.0
+50f328ff0b5c3003ce220fbd7aaa2ef7 0.0
+50f8db74e965cdba7ae86014644a3137 0.0
+516549c565407769125e3e8f3ca298cd 0.0
+517eed6e97dd2846dae1553b8c7c9464 0.0
+51a63d18809e04591820c7e3f3acf0b4 0.0
+51c999131379511e8950fed0550134f7 0.0
+51e4182fc25b356d52164f3de0d5d4b6 0.0
+51e77f4577d36c8bd3627a3d0229dd37 0.0
+5226eb5e70712d3425df4b0e57d5a406 0.0
+5350db19ba80799a8c5ee886143dd940 0.0
+536018032c5d0d65e6d25d3988f87421 0.0
+53609d8db4e52b4f03beab3bc44ad5ba 0.0
+53668ab088d92c4856914e433447af5a 0.0
+539c656b9710b0099a96a678b3023299 0.0
+53a3eb7f49d8d6857ba7c50dd5bda866 0.0
+53f6fd6e785c7fb9d72f94a6ea343a15 0.0
+5406f4862e6c5e04e925b8f0a501958d 0.0
+5494f5c56e0875a35b62e234cc67ffa1 0.0
+552e00cb506f739acaf08aa5641f9d84 0.0
+4e3406627aa545813116167826c24a46 0.0
+4db47b583b03cc6706e28a2fe2731080 0.0
+4d7f3a8e908cb29809ed118acf40d513 0.0
+47c98abffb77eb5999855b75594356cb 0.0
+444d635c03299c7b88a3a0c236831bad 0.0
+44620cd517e481b3b2b27ff6d6611d53 0.0
+446ec4331e4894b14be920825fc962a5 0.0
+44a36b6738e227582e14a58b9d92a5f5 0.0
+454aceb66125863a02ea9d46c0b23136 0.0
+457d3865cf0a4bc602a07d8d5324388f 0.0
+467201ebcbdb17a73c8dbe98f8bbdafc 0.0
+467fa58ad637f123633ecf3cd389ed36 0.0
+4687677f9674fec6aca4275f36063b4d 0.0
+470aabeb5aab36d5ddc9d1995b087843 0.0
+4786895e195caa90723aa4bcb25b8b03 0.0
+4790a2be4d1288d94177213af8380f12 0.0
+47d0d6c4ca012181585fabd306fe7026 0.0
+4cc94d6f0ba7a2a174fa668ef5966bae 0.0
+49a130e2f6d8bdeddd170945a75a6783 0.0
+49dd84930ff914f17216bbbef0c6ef0f 0.0
+4a86baaa7e5adb00842830155d3c7fd0 0.0
+4ab7cc7f5969573696f5cc3b1ea0f4bc 0.0
+4acc36d268585bc3834e627756313b21 0.0
+4b3f3f0ecac9b4026751b7549559c9c3 0.0
+4bec204a5986223b16143ad5db1c8433 0.0
+4c1afcf47b47fc2850fae19210e27117 0.0
+4c2189b6b5ced3cb80093414e9449de9 0.0
+4c855814a58f4f324e8f0cfaab222e1f 0.0
+4c954887a7bd1f32382dbbade7b91aaa 0.0
+4ca53113aeac7afa697e8cae58f69855 0.0
+55993950ca597b6dc821765327bbf3b9 0.0
+55af0326535a408ef56415f2952e2896 0.0
+55d4d3011e1f3bb8e4e5089b801b61a4 0.0
+628a44c637e99429e3ac4c39227c6c9d 0.0
+5f1d2db99a41906b5ca2800149e0c385 0.0
+5f8c4f83215d666e801e21278b17fb45 0.0
+5ff2bac04b0e8ba06ff7e0293588c308 0.0
+5ff715b26c5f3632a12da2d4dbd758bd 0.0
+6007bf22076e2e3d0030e38442149c54 0.0
+602510f333bda9666e551383f7ebaf82 0.0
+60746d8928422234eb34fff42f4cca47 0.0
+60c57911662a9159dfdd0fc05d975a55 0.0
+60ce43bfcf795e997e3c969938262dab 0.0
+60e681703dcab859a8e336be27fd6cf0 0.0
+61602441fe4b6f2b36be40c311fa9cb3 0.0
+6240207fd3edd24bf0799b5f427423a0 0.0
+62ada1b9706d5e11072e7d78ecb0a4bb 0.0
+5e99c84d99dca9630c51c5fb88000143 0.0
+62f043c2aed6f033879b83ca519fde32 0.0
+630bffb8b9a197b3a6359f42828b4de3 0.0
+639a2c1d7ae3f6700506aba8baeec034 0.0
+640f0d7bcadfc07ac9f3a4d0b440d859 0.0
+64618afa3a07dcab19ba87629333ab84 0.0
+6474f5ab8c91efe951ad41f60fb4218b 0.0
+64b3028e870c12fc7d39287219f7af67 0.0
+64dc86ad2807b50d91f4938a5bb31a87 0.0
+651794369aeb3db83839b81fe49c8b4e 0.0
+66127b1730c816b3e022c01fcaf024ea 0.0
+6677fe5ca3b81ce606eac5d76b0a0ece 0.0
+6689cc067754fa14964e70c77ae9ce0a 0.0
+5eb9520307f1836913cc5937f1c9f64d 0.0
+5e32dea22765b73ed60b1d78a9d4d574 0.0
+56c3d1a8e4dc5c2aa7afbc1563559448 0.0
+59d031b8ba709e8da78b4c1f63c74f09 0.0
+576c2135e8ddf9d0a3674a5937d2466d 0.0
+580e8927be639c5ab7fa8731a72c76d2 0.0
+5844d8e76e12fb8526d7fede1c425670 0.0
+58464ec27209abed9698e88ab974e84f 0.0
+585f01cd625ddb45fa4eebbc072fecd1 0.0
+5860dac317300b1fe6094e958240cc7f 0.0
+58df89090206b56c52bfffa4b97f4d5d 0.0
+59597cd430deea6d8e0908dcd6db7e50 0.0
+597966c93921ecc2907ad15121cbcd2c 0.0
+599ba6458e91e2527f358f547ea39261 0.0
+59c4a678634e50b3fda861310820aadd 0.0
+59c6ed04d2faf081cecca31f1eb2e42d 0.0
+5a21785c07d7cc123256f09d00deec29 0.0
+5e151aa3f416a2fa6f0a1d8985282783 0.0
+5aa1c4b839946b713b450ff38e7d5bf8 0.0
+5ae365ca527c17c22171f54421e75197 0.0
+5b31527f1b28c37035a08b3642d61ece 0.0
+5ba339813ae7235641d2f1ff581ff2d7 0.0
+5bfef4e7e21aee58609421e4f24b5dc8 0.0
+5c3e4422374120cd00d41557946e5c6f 0.0
+5c6ba6ec99b516d0d9cba8e4730ea1fe 0.0
+5c78e7010bae7392c680c7ee9a1ed076 0.0
+5ca5ea401c8d3569e69626d2f81d5578 0.0
+5cad0c3ca2db85210a2d320bbe783fbd 0.0
+5d141d74c8bd046171ec64835c4864b2 0.0
+5d20f93c621d95608e5b8473d56d5b52 0.0
+ffaecab289059c048c7494d4595a746c 0.0
diff --git a/q2_sample_classifier/tests/data/importance_cv.tsv b/q2_sample_classifier/tests/data/importance_cv.tsv
new file mode 100644
index 0000000..0e21e1f
--- /dev/null
+++ b/q2_sample_classifier/tests/data/importance_cv.tsv
@@ -0,0 +1,1057 @@
+feature importance
+74ec9fe6ffab4ecff6d5def74298a825 0.3428133408142244
+c82032c40c98975f71892e4be561c87a 0.27634103114598535
+2ae8a432208b85a14e86eb821c86ce2b 0.08828221994710637
+4d04d759f5a6615dac43060726239891 0.06823380209336659
+bae0d49b5eb22b0dfaecbcbf39a52265 0.0369473194038547
+eea6b86c0c75e740670ccc50613b1b23 0.020006535264682004
+ea3433f2c148c6fae4cc2e9ae197f5b8 0.016953147830527746
+5d62137a2d05df0be8d6b7e163a3ef08 0.013211350936413749
+1ad289cd8f44e109fd95de0382c5b252 0.012560366391362142
+539ca948741b9a713eb781d4281b23fc 0.011156791684704343
+e3a4a62adc4e83c274978806164b474a 0.010308983991542713
+588a23811b3e98af68f4f0fb24a0daa8 0.010069329482887605
+a650f349bbdffb95b3624519d1ba72b8 0.009871225092780213
+04195686f2b70585790ec75320de0d6f 0.00876402102831411
+9d4e16844252e2dfbaf80a4a6fc4d55d 0.00630257923568541
+3b092a59cd0e2d57483420cc994ec49c 0.005622043494081645
+2c1dcc76478dfbb19fa59690738202c6 0.004675382639932856
+ece1af985b63ebccd2833e9b5f0432e3 0.004513544151860472
+e6a34eb113dba66df0b8bbec907a8f5d 0.004315160855326535
+fea3411c847e6627afb5e78b88e2e9af 0.003730624243958615
+f1da0f084f3e5b7957d11d01abe7013d 0.003652015129777001
+c18afe570abfe82d2f746ecc6e291bab 0.0033049296043843033
+8ce931b0c99ad5ed648c25483c1ca492 0.0032228897025259203
+74923f4bbde849e27fc4eda25d757e2a 0.003150217256362466
+8e6e925baa81466352ff781a289e188d 0.0026718177108241716
+3128e057b1d4f320961e4da677724613 0.002664604196836876
+a66dd8d31584930c2b0f811f572dddcf 0.0026417565965941056
+7667b07b46f3d78c76f13781337cd218 0.0026087247436465213
+ee02d6d3506da57909d5e603709a295f 0.0022537226110718044
+25def15fb42bed6855ede4893d27a21d 0.0018996856274791142
+be17b29b03ed02bed133932284568d16 0.001627510760401745
+77ad88eed91eef009bf64f101c96baa6 0.0015835888689348587
+d73c014c9a033812619aa44e9cae934f 0.0014153770414606557
+d0f1985c2b919dcbd0bf4664c844a900 0.0012848091782786443
+370829fece3190f1a8efccf50b7d0921 0.001263760407966557
+8f4a3341ccda8d1b45f286cb686a3aec 0.0010168579626972755
+bca0b81a0b8d59e90c25a323c2f62f31 0.0007409732185557153
+0f92fcc184a951ff8b0914467ad2f8f9 0.0007082879970565094
+d996e8d86e152c0a3c5852a2d8c4c52c 0.0006555148868768255
+c162a4f3943238810eba8a25f0563cca 0.0005993278965731458
+ad7bc2bb6e1ec90a9bc3c8949bce80d6 0.0005790178805975375
+8937656c16c20701c107e715bad86732 0.0004707675753228123
+2a99ec1157a90661db7ff643b82f1914 0.00045698752113701083
+ac5402de1ddf427ab8d2b0a8a0a44f19 0.00041009086561453946
+07bfddb8e62098a85afc8fd68ebc308c 0.0004035832014127108
+ea3af29626a95b6c7b89d631bf1a739c 0.000367769391078963
+9ac5fec396208e598ab2e2d26985d0df 0.00036166905786704954
+0f0a1f8c92368be6e8d3145bd4e94cd1 0.0003347680535628769
+dafe809740d0545dc25c6939a84a1820 0.00030100173376998625
+d9b76ee51ff2b6961430bf352cc578f2 0.0002364961500966159
+ed1528f4822674c9e1da64b84c778e9f 0.00020923003347680163
+e7e74960c5ee1984f2e60f99081a368b 0.0001972740315638449
+0d357d84911ef4a089c6ae3c6deb39b9 0.00019167798083865142
+6a5f9d9d0fbc066a57bb7798f9f371ff 0.00018830703012912491
+2d273dee370721d368ed3534c76100ce 0.00017979836897195073
+6c83fec6e8d03fad51eabc871853c991 0.000167384026781444
+691656f33e727f8857f5c9ee40ed05b1 0.000149831974143296
+3127220258419507743dc5f2bba69d86 0.00014983197414327685
+3e93fd55a231bb99b67a666bad8446a8 0.0001412302725968437
+180b74e94caf35fac5e1f0f4b7ad9d9f 0.00012553802008608329
+4e1758e81adcb1107024ee52c0113a6f 0.00011566499047711704
+ebab7cfc7858bfb00c486bba21d6ca6d 0.00010760401721664332
+c1b636001e0773f9cca8096d70e8b5de 0.00010535060681949527
+1986030e3c5cd5df6d869054bc96e536 8.428048545559623e-05
+48d9883fd871b82d9bbc89cb0447be8c 8.026712900532825e-05
+df3f8cdac76c62baa98469c33b82d8df 5.380200860832166e-05
+29bb5fa7f45e47bbabab8cc1b1b3d18e 4.184600669535961e-05
+68c83aae1babce28612ba404b1096cc1 3.745799353582101e-05
+d28f5cab517f832750bbf62798e58dc3 3.745799353582054e-05
+a4dbf71f32a016e120fe6cf253dfa6c0 3.745799353582041e-05
+9a75764782a384ce3b83e019c8b2dd94 3.138450502152082e-05
+aa738f71de2d14bf826740d8df65e5f2 0.0
+ab0b78445a54df766c12448fa0ad6d49 0.0
+aacaf5f360c110204da1051a50c5ba5f 0.0
+a957f3e1f57e99be7c1c28de7f016780 0.0
+bc4ae61d84daab04b0e888d22f7d3f69 0.0
+a8aca6d9fcb6ac34f6104cbe28a466c7 0.0
+a9387e76477da8688775569cf540191b 0.0
+a9e1acfa6cc2678b1fcc40f49007d4c5 0.0
+a9b72251b244f66d8296965d9435218c 0.0
+a9566f812a634f1b709810bb0cf50629 0.0
+a9e6fc1854a3fd8873b32c9fbd9ac13a 0.0
+b835514e2678e3c7ba13bfe0a0d79bb4 0.0
+a898fafd80c4c90cdcde401ca2332e05 0.0
+a590ab40dc8a6b5cbffa347e8e32e63d 0.0
+a42ad1956d801cce5e0109dc0e987ceb 0.0
+be9eb15986b4bdfad6c474a30e7ce4b9 0.0
+a4c61d535793417f6f6ab9eae2463f12 0.0
+be480a1e4a4e71fe774ccd3e4ff22945 0.0
+a4e2a4e1321a599651edf92f6999e78e 0.0
+bdb2c60864ae18c4302c9f144219512e 0.0
+a4ecfb2e708863a17978a0942358ffc3 0.0
+a51b62113b7c9d4febf6956e86e5dfd0 0.0
+a544abd909afc727c4418e4e19eed417 0.0
+a5676561cb37d21b8c485040ca3ebe96 0.0
+a607665fc01df970cb3f1c4002a00bab 0.0
+a837426344372de1e6d07876dd214a5b 0.0
+bda6b6e7da7508c68057140e7bd6cd8d 0.0
+a6099802840e60febef8e896967b7747 0.0
+bd8cf9d4f4744a93802336d81b60966e 0.0
+a66a33da056496a076f57ec22a435dc9 0.0
+bd0f4e14387ee1ac7e5fe586cadbbed4 0.0
+a6bd74dcea0ad6299a4cd2937e192151 0.0
+a6fa2de5f4576d1d77b1616786e3dd52 0.0
+a765441df9f96c9e8a1baf9645528616 0.0
+a7e74b8756d98ed147f70628144245cc 0.0
+a7fad69a900f747d777bf13f8c1ffd83 0.0
+a75b0438ad19fb4e31f4d58c2d49ea02 0.0
+bc23d2ee539bf2f52faf789289af7879 0.0
+ab2ef9bffd8e25c75290c0a2f6a8f85c 0.0
+b460255b8269b55301e0c3c3e3da252c 0.0
+b39a1bb517361c2a87f8fb48d66a49ee 0.0
+b3b1ae423dc7f3d8942237d115a5bf86 0.0
+b3d06445a910a9a444919e41f5da39c3 0.0
+b3e2d6b8bb4b97afa07c0d048746b712 0.0
+b9fefd4224e459a6a0d21db63b1c0d03 0.0
+b9d953d2c367d1b3150ccf062dacfe83 0.0
+b3ea95770cf649371d22e1a1d5aa3c0d 0.0
+b3fef1e26b7b6ac20847ede4da68547a 0.0
+b955f67a541a149dd3b179dc62d37a72 0.0
+b423592b0ae631882095792b27729b9c 0.0
+b4446b03208372a5dd17608d78f7e3e8 0.0
+b45c9784d674879d68d54362419a1d70 0.0
+b47e328fa7998e8085a30dc2a03506c0 0.0
+acfd094c6c92700cdcf89371abdd104b 0.0
+b47e999a38113e0e4e2b139e8b861fdd 0.0
+b9024a1f377b62508864cfea59b16a31 0.0
+b48a25941ae901a73b4a2830cc24b382 0.0
+b507596e6af67ab90ce421ce6f40b0af 0.0
+b55ddaf95d9d2217ea087863a7afbbad 0.0
+b6985598d3f4a9b830bbe1c6a665f5fe 0.0
+b75d05e361f46e33b39ef21d05334744 0.0
+b8c6906ea995f21303d44ab7b21655c6 0.0
+b796b2132a5015f869c8b5ac44a0e2bf 0.0
+b8c685d6d691d57d63a05b0eaf5a669b 0.0
+b7a041d2927c03859714fcdd60b9dc7e 0.0
+b7fe67097e773cb42109cefc45628173 0.0
+ba88d4e4e6a24ecc2a5aab6bdfbb77f5 0.0
+b36ff490f4df87e30600ed347f23cbdc 0.0
+b2e3852763d37eff884d326836ccec27 0.0
+b2ab597a902ff227a5ba76030cd6002e 0.0
+ad01a1417ca1d8a5a7a768ea78e45673 0.0
+bc4177a5fd330e9c23138dac2d6c69e0 0.0
+b8315874f4341a59e8e3ed317406ab09 0.0
+ad52a0f6646c574417ce0b13b84acfa9 0.0
+ad8991870b8bdd236d97d01487ddadc6 0.0
+adc59da8759a4276a8f218e237e27f12 0.0
+ae791ca848029549cd27ece21344d8a1 0.0
+ae793fc8909a69dde3c7757780299e14 0.0
+aebf2a74a3aa9e5876c479b1db88985f 0.0
+aee0d6f2626271eacf89d2e803440657 0.0
+bc0588863994a8f20e7dbce7a6fb8b96 0.0
+aefd4044e43af5915f9a3fac602e9308 0.0
+af25adac3aa4a6a6f6c97b23b7755273 0.0
+b01e64261beff06c591132b8f0cf3099 0.0
+a36d070ba91e6c48d0c79036200d0b20 0.0
+b0574dba26050e54cf5efd672c2f58ee 0.0
+b098aa6c11cba636420401399b83be7c 0.0
+b148aa1f06e0eea5a72ac72fdf3ac867 0.0
+b153e0852342c0738da791930625716f 0.0
+bb8cd42775d7feb5b54059f90e5ce9eb 0.0
+b1f2146bef78066448576fceeb6b644b 0.0
+b265615da6b98d477507607fc51972f7 0.0
+bb1c2099a32a335b1a55f44d3ba54484 0.0
+b29deb0964b5328b6206ea33b9035302 0.0
+bad4f7c5ada34a1f813946abcfe5c556 0.0
+0021d135d4ac12982cc8abdf2b38e23f 0.0
+9e8e56f06c0615130e1e2f459bd4052a 0.0
+a33859e0fe8e20bd783ad918ac192884 0.0
+902c0a7dd5a911a8fbf9e34e49f72297 0.0
+8bb5ff097e0707dca3fe5860af20f4a6 0.0
+8bd6fae62d450bdf768bfc6464d36d38 0.0
+8d89ee75fdeac54f5ebb1ef3edf80e73 0.0
+8d99d70a19898e91e9d7dd54dbd5881c 0.0
+8db257dc277d4e84ef974e07dc449820 0.0
+8dba41073e8e2650e099cda3fd3a0630 0.0
+8dccd839cf4868a3343e411c063932a6 0.0
+8dcd5864b11c06837480c727198cded4 0.0
+8df5a666d571bcb1b2a44aaf978fab49 0.0
+8e1c69dc9a692ce4b7e93c8cb728b7a6 0.0
+8f1c11c7b732265cbfc1bb7dbd0241e7 0.0
+8f5b5babdbe9847334e115160088b371 0.0
+8fd6bffb88d374f38c6bf35cc8a4a221 0.0
+904ad13c7b8512200836d5cd9fb09e1b 0.0
+91b88aa295fe09a255b2f321d9d2b12b 0.0
+904d063813394e9a3cd5a23fef870ec8 0.0
+904f7b1ae29e6a78ff83eacd9ef9cda7 0.0
+906138f305d5b85ac761fc50bbb6ca87 0.0
+907ceab78e867d765fa360527608efa1 0.0
+90949bb38f58e92949ab8504aff01df1 0.0
+9099a7f671b923c7b07c12508f17270a 0.0
+90a0937762431acf0f8f3643e08d3180 0.0
+90a685caa7eac21daa8a8f56df52eb29 0.0
+90b56798a9abd8360a79627ec3c332d3 0.0
+90c2b51afe404951ec89bd52c7ae1f58 0.0
+90dd37137cfac54abcaebc7048fe77c9 0.0
+90e427df120ab07ffbd57703f11c1a4f 0.0
+9122e867c908301e783c5ce0c3bf12e0 0.0
+8b86b84549e5940744c30df2a573b55e 0.0
+8b78fe0639a5308fb6f33a8eaafcdfd9 0.0
+8b159e3b7ba334383b185c1926d9b9c3 0.0
+8b0fef40cdee88fe61bf6c86ea5b8d41 0.0
+84853bd31e74a04290e47c542c8d293d 0.0
+84d6422c401698d91e646b9b4fe4774e 0.0
+8507091bac6b1be88f3d360d6d535d0e 0.0
+855e8b78c6f45f9a2e3d5afb75c4f5bb 0.0
+85d8ef495cd58b08a300a623a3718ba9 0.0
+862f918acd63eeace039314ff2263a39 0.0
+86deb5cf8d6f29d383136e0d09863b69 0.0
+86f04c5c81713db61f97bb53835f9189 0.0
+8720f9ecd2331ce68e2896df1bf2d977 0.0
+8731860c790449910b7955d1bde836f2 0.0
+877f27b47c85f6b2d3cf8a6aa86def40 0.0
+87bb02a4196dc0430e4ad2a975a345c8 0.0
+8833ed97ab90d2af80eb7c7f7921f139 0.0
+88c180a637696e987712b40ac9089f8c 0.0
+890bfce6cfa2575db09fc1ba0909d3bd 0.0
+8919944cb4b67e5abb04cc8afad9045d 0.0
+894f46d021396ebbbc4041b65e198bd4 0.0
+8951cc9103140b71bd50c0932e096502 0.0
+89aabf43d24dd5bd0fd0f5aca189d341 0.0
+8a20667bca2eaf2a0687995af876a84c 0.0
+8a261f382a739bbc5f9494b2c64c2744 0.0
+8a658263f5f46e9b06ce187893565b47 0.0
+8a83cfd5eb074b5dbc6118eb03ef915e 0.0
+8a974c19d04562e710612ca049799942 0.0
+8a9f7499cdb85b8efa497bb56256cb43 0.0
+8aa70d0c06e8da0b4c0ea2eba6308983 0.0
+8ad30192adf51cd7b03105a1acdea794 0.0
+919db619ad90cc6b09b31c83d5719da9 0.0
+91d7d1256be7c1797cd45c2296095521 0.0
+a2e3ac6b18daba602b14d93882f3663a 0.0
+9e644716572f5930cb56186d75a6bb84 0.0
+9b2d42844c99887e67257a377f7b193b 0.0
+9bbe5054cf0bc376baf895f0638705f1 0.0
+9bbff59b3c1084173c4917e136b496fa 0.0
+9c25197937d1f54e2634026fbca6b55f 0.0
+9c58ab3805d06ed6f1ff2a2de766458d 0.0
+9c61dd9fc1208289eefdccd5ad281288 0.0
+9caa12440830e96456250882ad25f89b 0.0
+9cb1cfaea33aafcffe7c261eb104ca56 0.0
+9d2176c908640e85287b95f9cd17bf64 0.0
+9d650084364f31c59068a17261e13dc8 0.0
+9dc78fd8f2f7cb10e190e77af61c7ecd 0.0
+9dd9cefe38347b087dd318d82962cc03 0.0
+9e033e0a6e420a9d407f908d59aa31eb 0.0
+bf8144aac6a30e6695ad6bf8f49131ab 0.0
+926708e33d1ebf1a9da757f78c673190 0.0
+9ecd25659ebff6a840c089b93515c5b7 0.0
+9f48f7b79ed6f3dcde8b25e0cf244d5d 0.0
+9f4d14b114e7499772a797790d81c1db 0.0
+9f74473636e6874853d88dfcbcecff49 0.0
+9f8119029c8b972ae0375c8b968c14e6 0.0
+a02c286e5afe7220b1fdf83dadea939f 0.0
+a186bb74dfce512f83043f7b366c60c4 0.0
+a1999b4f10c91d6f8efa3c9d73f77fa4 0.0
+a1a22b2e460e6454dca7805f1cde48d8 0.0
+a1f75692d5fc417ae9461aa66e0ed7ef 0.0
+a220d7daecd7de9a9560ac2a5a64e2a5 0.0
+a24dae6b87885a55d81df1ea6d1abaf5 0.0
+a2e0f228d7110587954e13405f98f003 0.0
+9acc238746a1f2aa7745a0b5720c4eac 0.0
+9aa7314d56b3b3db2253ec2763414be7 0.0
+99dda7b5337bc09a69e9e27bab56fd79 0.0
+993ea35e8ad7ff6894e32247dc507e9b 0.0
+92ae272884c88aa3be9a5800f2836740 0.0
+92f000f936d3f349c5e7719a5edc5078 0.0
+93029132afec86c495769fbd8dd65ff2 0.0
+932e1f2d3f75eaba13d2b2be2f2f7cb3 0.0
+9394eec407745afeb6370fb2a0ee1a98 0.0
+9394fd89106163be8a11cd4dc46bc8a3 0.0
+93a9affe525a4ddda933fce27518d7ce 0.0
+94035fd1b8dabc1d79586ff8b52846b3 0.0
+94650d81f3f14b948edfadca3ca7b166 0.0
+94738eca8f0e714dac0390e647565488 0.0
+94a699e62982fe59b7725f1b7271972a 0.0
+94b000e59b7ac160b0416c9ab45ff0bf 0.0
+95348b8c18227b458056472fc9eeafd3 0.0
+954f244280781f0339631a6eee679e1e 0.0
+957a16edeead7722188be53d05fbf001 0.0
+959d48a11e3b26c9268f0bd1dae6ec40 0.0
+95f77f9e6df255cf93ec17c1a48bc194 0.0
+965d7a58f59ea6445b32afede05a4012 0.0
+96794c2d601e0082c9b66552dce42e4e 0.0
+97951199cbbcc765ccde49fbcbff09b1 0.0
+979578981da3916b394bf24c32bace21 0.0
+97cfaa815d9e25d41d6698f790b7ad0a 0.0
+9828f58eaf910927264c204ace9caeed 0.0
+984ef97167ec7b42d7a3650716d6a2ed 0.0
+988b0b7321184cfc2c53e0966dcab560 0.0
+992072d045462445dc239b35300af2cc 0.0
+9938c8f22908861f780f8719b75dc0b2 0.0
+bed1b5521bddc2d35732fafc25043f19 0.0
+c4b0455f8d6d59d4caafda4b3aa90622 0.0
+bfb7bb16ab570748b3a5d099e9426af1 0.0
+ecc877a3410d54ea784131356fe5ea73 0.0
+e91f1c5b67ffecb5ca3ca3b393a50f00 0.0
+e94776066de6ff852fc7a43e7a0626cd 0.0
+e9c73fbac5dd73dd5e3ada5664ae95fb 0.0
+ea2b0e4a93c24c6c3661cbe347f93b74 0.0
+ea6265476f08a35fdb764ae98206ad5e 0.0
+eae4215acb2d651ce76c5e4d10f3d839 0.0
+eaea5d7b84c08fdef5d8ab40f707e37e 0.0
+ebb5ea4b9b617249c40513ded403e30b 0.0
+ebd14a21e990e0266b2b985be4cb3c44 0.0
+ebfae3fd8d9da7341650be7f4f6c13e1 0.0
+ec38f851c5429708e114208c539ae458 0.0
+ec6c9ea52e88226c339487585ea02f42 0.0
+ecaa9f4e15a28f212d9bd60f749191ab 0.0
+ecff0a63780369ddcdc800ab97fb81f9 0.0
+f12011ad5219b22b6ca790103333a755 0.0
+ed1c3370c86ae00e0062a3498ef06cf4 0.0
+ed5952d1e022067b212146b9b3bc6812 0.0
+ede423f2b32cfcbc0815db2435994947 0.0
+ee062262ac3b97896676e558bb08986a 0.0
+ee084a976fc8028a3fdc50c9555b2a08 0.0
+ee41b2201102d120e0ceff1eb49ba6cf 0.0
+ee5c4bf663b722fc809746f322a4f6f4 0.0
+ee90bebacf6867cd5b4c487cb587ea9a 0.0
+ef4045fe369d078db89c253c9ddbd123 0.0
+ef467d766ba05adf651d1fc6a30cba06 0.0
+ef7c8af53de2ee3385027bc6b4bf0883 0.0
+f05a6141cb84e0c95470be3e92085b93 0.0
+f09ca081a69806aca5de24d404ad4c6c 0.0
+e8cff5dd9314935749206c1d83214d77 0.0
+e86212f766f010236a072bfbc04774b4 0.0
+e84bea8ae61342674744560dd7de4721 0.0
+e82ab12c51a893c6d7938a16632199a8 0.0
+de0ad18436d210c9b60044fc7b9caf90 0.0
+de478a19bf7c5ab5e75d3dfcb742f4d2 0.0
+de61860db8d93da85692dffb5cf32ab9 0.0
+de6891b6f7a1f35bd9ccbc058b9e1a77 0.0
+def8acc0fc88cc9a9cf110370ecc2c37 0.0
+df1bc3956db29b2cded9d3925999f6e7 0.0
+df6a1dbd34417b068878c043bd7a6bcc 0.0
+dff37b3a4d27b9220b9af1459162a48b 0.0
+e0fdbc7f687a7ea0cf13009c2e501197 0.0
+e10c0e74616a50d8956a945d0b6dd531 0.0
+e21d9f482315985036cf80d1b20de58b 0.0
+e2bf5a8c1878477d542ef764b910e5a2 0.0
+e2c3ff4f647112723741aa72087f1bfa 0.0
+e30c40e583572ed45fef99d6a32d3924 0.0
+e32053e0d7a8b525d0b062beb2ae11ce 0.0
+e392de248b88befeab2319aebd7872c3 0.0
+e4207d118a1ffe33509edaf320ac2a7c 0.0
+e4d8026eb8e261594d51750b1080dbf7 0.0
+e56aff2928773e4f9d4646bebe35123c 0.0
+e5a43b018c81cedd19e9a5354d32d469 0.0
+e6441fdd2f0ec36ec914d7154a64061a 0.0
+e644522cf6d7222c9da7f9ecf614cd89 0.0
+e6f747cd89d512cf187e8472cfb250a1 0.0
+e7037dc2701d6b9666d2c0dab6110ae7 0.0
+e78cb60fcb9cadfa0a32ba3ac4b2e95a 0.0
+e79e5e4c871bac3c43de566188e10002 0.0
+e80317d0fb0618cf19a96ef454fef9de 0.0
+f10155af1849a17996f302d71b32398c 0.0
+f19fa34c0c987ab3bd8aa42d8a4d21ec 0.0
+bfd6e1ccc45f37b290d63de0053c212e 0.0
+fc7466fe7b054b641298ab8b5c707d21 0.0
+f83bc32e60e9c7bddf53a52937cc7ab6 0.0
+f85dae8239789b16d0f8dcac9e272361 0.0
+f8dc4d17a9589b91dc466d20015518e4 0.0
+f94eeb84a8ab5049e8e0cbcc078de30c 0.0
+f994313e9f7ddae7f7d6806221d6be70 0.0
+f9b52205cb2d03153fd394a290212a10 0.0
+f9f5f08e8da4cc9740518223846563db 0.0
+fa09b814b417561651db81db34860ea5 0.0
+fa1d5d724ccb4287e07ea1caa9d35502 0.0
+fa71c8409cbcaad5dd98b8d80a46b07b 0.0
+fad561b7eab58e57b71a09a8b7f990c8 0.0
+fba43f263a40d5a9cc7f37f1551b13f9 0.0
+fc613984c6bd68a102eb7daee76a24a7 0.0
+fc9717bf86bf07fe748f608b4f2c57c6 0.0
+f283cd6d309acf7626b1fbc0079d275d 0.0
+fcab01e402145abc9e946d12aad1cde5 0.0
+fcc915000295812eecfa7245a74346e9 0.0
+fcfab546634e10be9800b9a4118b8dc9 0.0
+fd069fff020b15638483c7e49d2ef9cb 0.0
+fd1830279629e35893c2cc7538b028a9 0.0
+fd1f139259c53276619476db0cc81913 0.0
+fd21badba5eecdf5a284d5db7aa0679b 0.0
+fd8cc2ec2e11078f3635cdc7fb477823 0.0
+fda9e2638dfa1f4eebff04a14575f763 0.0
+fdcd6808ef8269653d25dce4a55a025d 0.0
+fefc7999a7e316dfa98bb6b63d6cfda9 0.0
+ff2b5301608beca5896495311a512610 0.0
+ff5c02a6c8a80d8898f37b7dc71f4c10 0.0
+f83918824d8dd7875a4b478adf4ee601 0.0
+f837443696652157e220ae13e330e0e6 0.0
+f80a17436037d706dc728dd265b7c21e 0.0
+f8000a94d405169f38df61e7cf1cd286 0.0
+f2b4aaab4b89a51ec6205485018c9419 0.0
+f307aa8ff39ee31213f4d9e1bfce6dcb 0.0
+f3153e589a8718a2a1109a6e181617f7 0.0
+f31aab4ff2dba1e72d9ea251e447844d 0.0
+f31ac04c4b89a126b2406e811dff97fa 0.0
+f3b9619b167cd0ec5f9c55a7dd6c7ee6 0.0
+f3c30aa47b3639f25d168a810ee07fd9 0.0
+f3cac486ad34144511deb96b29ded2c5 0.0
+f402dfa5da0f4bd53468557a1a6b01e3 0.0
+f434ebfe13e3764e53f40e7b99e07a39 0.0
+f45b62ab6ec22fb20364a7e05088158f 0.0
+f47f8e690205be8c0572db3c32829f43 0.0
+f4909ad74d5b44aa7c3694798c21ea87 0.0
+f4cd813a1b8a862f7a285466b25c6302 0.0
+f5487404313a4fb57bc3489c167e9a37 0.0
+f56e589e8377da78834cf577a69b939c 0.0
+f5dd3f079f8866010ff9e1ce86dea070 0.0
+f5f4ef252850e0b1f378923731257d69 0.0
+f65ee26124609d6de91d4bffee014357 0.0
+f6c6e7419ddde1e2e1d4a790ec6babfe 0.0
+f710c0927080e422094a96ffe13b389c 0.0
+f73160a94abee0441ab5a573877744d2 0.0
+f7686a74ca2d3729eb66305e8a26309b 0.0
+f7a5b80128b001df0f52bdad023aeb23 0.0
+f7ce5d68ad002cdc285273d049516c84 0.0
+f7e75337c2baaabcd5c9cbfa83ce5ef1 0.0
+f7ef98bb6d0061b9368a72b1ff40aad0 0.0
+de08a70ee5e3336d2c494d20206e650e 0.0
+dde2f514908a4fcaa80827346c7b7a19 0.0
+dd50bb187bc6f99222e626ddc810909a 0.0
+caf54b4995a78b51266cb0e4aed4deae 0.0
+c8933e6cd4ce9389f4ad6e2e24dbf5b9 0.0
+c8ae16a39fbfa227aadd2e6d5a63da7e 0.0
+c8d7e00fbcfbd34e1c85a8040155ecb3 0.0
+c8fce068920feb387167d44c01689295 0.0
+c936431ab7a77c5828a3da971da51acd 0.0
+c942f59feffce9c3ab527135de234101 0.0
+c98ea813f96e96f25ba321f74703bf6b 0.0
+c99c4c951e6f4745973043285ccf486b 0.0
+c9ccc527189a7a19902fc022a72c3bae 0.0
+ca0fb2462b67d4296fdb8de1372d9165 0.0
+ca5b38f92987671538ac4f8f5b4ee95d 0.0
+cac55aa795998014361e14836ddae2bc 0.0
+cae14798faeabb8e1b8840e712839921 0.0
+cbb1dee8bf80a6cf676e15d39d9979d1 0.0
+dc92897ae65716d40ff3b2746a2a6527 0.0
+cbfaac9f291ac42563f27903d0f684a1 0.0
+cc3e4918ee1c089c5bed6cab11dbee83 0.0
+cc42ed01e236e8f05d25a7310d47e354 0.0
+cc5f74d0b0787e9ca4f97835c2c0f2a2 0.0
+cc968157f7691aab95df444e8cd103e2 0.0
+cca22a273fa140d50cc3cdce9f539f73 0.0
+ccce7fe8aa849bed027f6898300e0c45 0.0
+cce0f21cdf5597d900d303fdac05b33c 0.0
+ccfb4c8a7a57ffb8fd5212276ff2b193 0.0
+cd1fc0b5ef8bb9823eb279222f0d0860 0.0
+cd5f131980408e798875e41b3c0d53de 0.0
+cdce04d738e587eb6a58f8a79ed36c20 0.0
+cdfd52e813292000f1bbc5c4d7c95b4b 0.0
+c870a3c711d733d9058cd2274a101381 0.0
+c7ff7e806765a39b6cdf66a15b63f6bb 0.0
+c7ec80af1d984fddec04831296fd65a5 0.0
+c7408e951d6050a4ea360aa404050896 0.0
+c04879d53b5b084096c5f2bdcc38d55a 0.0
+c07ef20fbf08691cd223cba17009e12f 0.0
+c0b31b31a1d47acfa4b2a6b279ee631b 0.0
+c12331989624002d4f790eb1d19dad02 0.0
+c123ec846a7e3837d1247386a6060ad9 0.0
+c1365ec008032ee718dcafaf5ac17adb 0.0
+c1a5735daf6f1e6157aeafb503b9e564 0.0
+c24ac2e6e7002af9827563df39144c55 0.0
+c2cd19f77efced924e3eccd8bc6750be 0.0
+c2efeb6d250b34f682ae6380682f60b3 0.0
+c30a545d14731276f649c2428c0babe7 0.0
+c326a5572886b04b8ec92047c9415a00 0.0
+c348a7cc121fa43fb14ca92cc4273c2e 0.0
+c3937c933413c88df1db4b761e0e3ac7 0.0
+c3cb5f0be6d5e3a4761a78606c1e4113 0.0
+83420377ed17df2c16abaf3a72e05633 0.0
+c4b11e3e5340bdf421aeb4ba0aff0269 0.0
+c4cceb02f4ce674101aa2c0d1317cdf6 0.0
+c4e57b91cc2be5a2232b3612abdf4c0c 0.0
+c4f9ef34bd2919511069f409c25de6f1 0.0
+c59161ae395fd5322d2f192f13cfb31d 0.0
+c5adecb440a6bac201ab573ccd2d6dda 0.0
+c5b4c6b372dbc13b6a7f2d466fc7335f 0.0
+c5b95a83a06195f219b0edd9ab18fb1d 0.0
+c5e3c5422835403541b0af331a2cc649 0.0
+c6178d46f91b123b2c226120a3167a34 0.0
+c6959c6d7f3f2806418eddfeeb3f2d22 0.0
+ce33f26f3bd4784cdb63910753fa0792 0.0
+ce399235aeab566680d0ad1ac6f9c7a4 0.0
+ce9976fb696a210e526fb161c15ed88a 0.0
+d3f86c6a6fd03654e0848a1fb1ad1385 0.0
+d43ad42f3b1625379a2429c10f419eda 0.0
+d4e63b08a25162d359541e38c6e484e5 0.0
+d4e8fb8d73378ac761ec8eb3faf71804 0.0
+d56536a97c18adccf18eb08958d93345 0.0
+d5b4f33a93b1580c123d306690d0ffb9 0.0
+d5d727a0e5ba892a10e9c81ef29d02c0 0.0
+d666cafbfcffb984b12c4a405df6c7c9 0.0
+d6752c4c0b83bb0ba7df8368b3c94d6a 0.0
+d6bb5703483ac68992e09ce17683c52f 0.0
+d6bc69828ae0ef27e99c01c9e7df86b0 0.0
+d6c46ad81c01997055a3b713c0d5ee1b 0.0
+d6e76cfe58d253204f5589d78cc79bd1 0.0
+d7130a2536c5b200a01662b82ff93d06 0.0
+d7466ab97edf938c5ed46f6bb1649b60 0.0
+d75b7080930e7a77ef3de8c6154895b9 0.0
+d780f9d98701c8ab19b4488869bd5be6 0.0
+d9294a924ce5ef9df6e464b4ef2f9db2 0.0
+d93dfe29130fd71bd30fcdeefea2862b 0.0
+d97a97d4f8974683e4a66de090129c1c 0.0
+d98796acd66eb71638defd318991adfd 0.0
+d9d0120f569d8e15590c6486f4ffed36 0.0
+d9e7a2013a2b6863cae05c51be41848b 0.0
+d9ef9c1fd69f0b398b331c428991204b 0.0
+da5463f9a3163db4528898c49e7fa54c 0.0
+db6e4e3d6bea23bd5c619e4dd4ef5b87 0.0
+db9caf76f81e72dac22fd1a55da6d5ce 0.0
+dbf76c1b3ed4745cb94c15374d951beb 0.0
+d43642bb228b6bd084483d2e5b48c3ca 0.0
+d3d99580d90cca486a23372a474e9571 0.0
+cea0486054293b47714d155ca349b763 0.0
+d39f37b814ebd81091a949f8bd9d1710 0.0
+ceaf720138cf7bdfb66e2dbdae539e38 0.0
+cee9ade29a61d7669cc2710549611c53 0.0
+ceedb016e7628567e25ad61c46a8be83 0.0
+ceefb8fade04925c7bbfad924171a220 0.0
+cf54928a038081788aa5939c49ae5340 0.0
+cf8eb2fb3d885d1919ba916bc0975347 0.0
+d0309ad131312d425619892c703b13ad 0.0
+d05c86b5bfc092d8677507fb1eae8d66 0.0
+d061a99148d649c4eca4836c5f4b5b68 0.0
+d0c34b21c7c96271a1bc5f7fe82df3a0 0.0
+d0f02fb55f33f41f45d8b5a6a6a5b018 0.0
+d103541d4efc81020afaef43c0f02c02 0.0
+d117a20b7daabc35e9d810f3a93d18b5 0.0
+d1539f8c2855fe568d1313f20d26710d 0.0
+d1b7d3ea1db807e9352522d92e403246 0.0
+d208cad7c585049e04076ac7df960e7f 0.0
+d213f55b066d42f6c6fea9f1d27582c5 0.0
+d2589a845c837dbf413021f74e82ce0f 0.0
+d28170caeb000a27998d838a8e7d5de8 0.0
+d284b92d7518959fe5d52aa45b687394 0.0
+d2aa3a30c8e178110125b88dfc7e6f45 0.0
+d315a0b30c8e21db4cfe89e65c0b2bf1 0.0
+d3211b7a04e5fb36a383450b8f5710e2 0.0
+d32627161d829a1488c2a047669cb29f 0.0
+d32d7b4a5e805f7e43f7823bf3eb566d 0.0
+d34887a684a1acc690b8f3644cb11c12 0.0
+d371cae3627eb71177294af103c68940 0.0
+846b433f433cbb20d6208bd54067f911 0.0
+7ebc04cb539befd9f2e7faa131628c66 0.0
+832fd777a62ea50c5c34a8c2efb5c5e2 0.0
+3017f87a3b0f5200ed54eca17eef3cbb 0.0
+2c0fa4f09b72e538e3c6acead5b987cb 0.0
+2c61ba77eb8ea89ea85728bd0022ac09 0.0
+2cca9e3bd17f06418ac623fb0017c4cc 0.0
+2cdea72aa7fe332d3e0a891d49d22c1e 0.0
+2cf53730a41137e7ad95ed3680541d80 0.0
+2d5957484a1289ca13b12328802dfaec 0.0
+2ea79eaaa0b23b2a7d2cf470cbaf72ea 0.0
+2eb7bab7b0af702e4f18358968a8f7ff 0.0
+2f3b51e3f7acc3c9f66f1552b67a0209 0.0
+2f75b02cd4d3ddebf390d17a120f419a 0.0
+2f9d067d5ea9cc1b1289e9de830b326e 0.0
+2fad39e365933a7f925040cb3c2a5ef8 0.0
+2fee36ac3b841b7c63b377f87cd57634 0.0
+30207d209491dfb7de83344e77fd6fbd 0.0
+2b982979f2401a8bc447d7141879e9b2 0.0
+306d425d43fecaddb4ea5af1cc34ef98 0.0
+3098ee4e595175076514b00a8a20c291 0.0
+30fc8af207d17abc37394fbf3d1793f7 0.0
+31ebb433ebf0b3e83259d826875b4438 0.0
+31f493208054aa0fea8a26d72ee670dd 0.0
+31fe7997ddfef193110db377890f9520 0.0
+320c03afc417b829a51ecad3dea2e065 0.0
+320fd62cdcfb66cfcc1438371751c4af 0.0
+3233034b11dfc584717d9264882a4fce 0.0
+32382a0f5831da5faee464d011b5616c 0.0
+3255b196d8b70893d9124239bcd46782 0.0
+328d5b7cdad033d7b3ed419b720ba2d3 0.0
+32a21601ca89d1f2629c1a9fc2de4359 0.0
+2be34a58b64107daf7b5426302feeff7 0.0
+2aa7a8ef7c70b5f684c856e6c21b6635 0.0
+32d9ab84e859d48f487b790c58e98234 0.0
+23dece0aa961e717a81e1118455253d1 0.0
+219fca301444ebadab34c1a35847fca8 0.0
+21ce8465657612a85e2f89e74d285ce1 0.0
+21d92a1e71eeadaeec2fd51f41249446 0.0
+22026223740aa4b6376f724986d7bf12 0.0
+226d7503ab3d1ea0ae84d97fbd804323 0.0
+228592b44e6a4d192af2d0732ec64a51 0.0
+2290e92733707772a839abf72e7981f9 0.0
+22b9160be22106587cc68893686c44c1 0.0
+2364e23d05750415070f8df3ccb0c95b 0.0
+237135dc8b20f922cabee934f7d410a1 0.0
+239b4f4f6c5fc459a1c95c4bf704de3c 0.0
+23bf5f7fe3d1ff8167bf3901fdb0685a 0.0
+23c05ffd2b0ba1534717ee52a81b589e 0.0
+23f4c333bc50c5cf2c0f20855d049cc4 0.0
+2a8a90cd5770804a5f23f425004de7e5 0.0
+2403a466dd6d1e4f551d80394c2aa994 0.0
+2480f3aa1e7784b461d9b5b83a6bfc81 0.0
+24ef07664cfa04a819d7797a89e653b6 0.0
+251429848560a80abcb96640ceb102db 0.0
+252d0fd6601d9776290e142e5bfd744b 0.0
+2594fcf48955bec7b61a5ce4aefa3868 0.0
+2732bd972ea75eaae442f937cb39253e 0.0
+276c5b1d00e091a197a9ed8753beebc7 0.0
+29568b810d7a4e985424b9037aaeab9e 0.0
+29784e17460e4faa8e4eb6576e85f22d 0.0
+29ccf8b234ac00b114fb2c8e1c44f932 0.0
+29e53aaab03b86749d3941da8ea5b59c 0.0
+2a13ecb6c618ddf8154298c69336dd8e 0.0
+32afbeb5f76a69e47f6cf6a1870e95d6 0.0
+32ebb69544acaf3162b5e828d05a5f84 0.0
+218a6324d16b842a38c70f1088a7c8b0 0.0
+3ee072a59b45057a86369a7c80ad90e4 0.0
+3bfae16068c7a0b87db6a711c01eadc7 0.0
+3bff9ab82e7f2a76cc22e687ac7b72fd 0.0
+3c5496fdcfbdd5b2e4f620d44f83414c 0.0
+3cb3514dd7db8d7720477ab5759b970e 0.0
+3cc53c481fdae6358e4d2d5c9f95a1f0 0.0
+3cc657dfb41c68faa70508afc63f4222 0.0
+3d52122a218f9c70126d51d42996a1a7 0.0
+3d6469bbfb14a86abdaa602a89753ec9 0.0
+3d766867c97f511431bf97e059d6498f 0.0
+3dca01142e22e42b989fe5a513e103c7 0.0
+3df77157a242066dfb4eb9c19062436a 0.0
+3e2716bec0426995af52846a88864c6c 0.0
+3e9c062471302ac0ef352264490d1b3f 0.0
+3ee56b6c4d038d4b8d26f63fc337cd02 0.0
+3af2ddaf4af4b5ae4100439a9c3d4099 0.0
+3f11f9f226ab63cc1ff6bfeee9699b79 0.0
+3f1ed51e954b35a8108c12dd882b696b 0.0
+3f2fd56fd1ec26b86eed89795fe014cc 0.0
+3f54618710e03bf1e78b57e14ca8775f 0.0
+3f55a8ca784ae7190790023a30cd859c 0.0
+3f56d7b61bdc82e4d12c6f58617f5103 0.0
+3f6c45001fd40e8b6f9d0558539fd820 0.0
+3fe55dd5e59267f275e972017302459f 0.0
+4060be28072a120ebefbef199c09187f 0.0
+40787e473f59ede4ad561b4e4079d97b 0.0
+40a6b537d995aeb12b0100e2cb6ad879 0.0
+40c85a458df8d5a935122ba778d4f334 0.0
+41239266f2952a56a313fc4798f28a1c 0.0
+3baa56d5f42b81f2817aa34412e36439 0.0
+3ad7da49c52ec79eb228067d25d2f2a7 0.0
+330b6fa220b88e87ac7c5ed39ec276dc 0.0
+36be43c248f8e4a7b60a4739cd8dd932 0.0
+33206a2306ca96922fed904661eff421 0.0
+334af877a390268de17839f80e212fd0 0.0
+33852dc244bb69590e8e756a83bb2f4c 0.0
+33902c26956acf7efa69413cfea6326e 0.0
+33950a5fa4d8ee86e10778182fae6714 0.0
+33b58b0709e9c05ba8404c47e6a7ea2e 0.0
+340a5698fe78713b785c3bcce89df73b 0.0
+34c133377349d1b57072aaab4f781fd9 0.0
+34cddd9fe152c24d78924ef25927d67c 0.0
+35513c8de34a49ef37dde85737e6aef9 0.0
+362b97b08074732371b6e76cd1a894e1 0.0
+363c1c39d58bce4fc3218659d0944258 0.0
+36402f6154239da8ce422da64a0bd247 0.0
+3781803fe3eeb8e5bc50a5308f4516a6 0.0
+3a8731148bda5325db164e3729754abd 0.0
+3788b1d514737976689f4ed1dee8b52f 0.0
+37bdb470cfb64ad4594c5968cd95e312 0.0
+37ff76f02e76dd8fc629cb05575defb1 0.0
+387e36ab394fd347fd94d8762016779f 0.0
+38c80ed971960fd1e878080f767a4765 0.0
+38e3b7abf4e08699d7a1d1965f177086 0.0
+3902e1865aa0a0863687793a7891f8f7 0.0
+3950fe55c9d109b986e327da3ab3653f 0.0
+399bcf0feb12a20ca0731d8d65268dae 0.0
+39bb3fb5b2257fd2eacca408dc42c5d1 0.0
+3a0e00761e8113458ccd7d13af71b4b0 0.0
+3a1461e8439091ad6b93af0ddb3849eb 0.0
+3a3e562a61e57ea1ff4f24857a1e79da 0.0
+219d2ac2e47968bb839bfa948559ebcd 0.0
+2086666afdd5de098b04a94247040c11 0.0
+41b518f17927240fdfd1465cd351afc4 0.0
+0c70bdd5631104dd3d818561c9166f6c 0.0
+08a62d972f6bf2ca4dd9291bc2cc9f63 0.0
+08be83d0a527ddd1da0861135192e593 0.0
+091e6e4ffe1252fd657f19e42e04e859 0.0
+0a5225695ff3a449ad8ce395d75c3799 0.0
+0a99bccbf1bd2e4f728514ba8c4cf6b5 0.0
+0ae0be308568d07e0434434b14b8d565 0.0
+0b05a2e48cdc5f9e766bb0f4c209d4b4 0.0
+0b3c20a64187d28e11a68f342081c17c 0.0
+0c17cef318a9e1c8895c0961c5ed2193 0.0
+0c336b876b379dc729ffcc2a1340f670 0.0
+0c3a02ffb054a07b355c1649433fc699 0.0
+0c3f267cef317f7d71015849370f1f6d 0.0
+0c4204a0e15fccc22a7456685898cd5c 0.0
+0cc2420a6a4698f8bf664d50b17d26b4 0.0
+0820b82d14921828dfa20b56242a48c3 0.0
+0cc26190e976fc496119a5eb4cd0fee7 0.0
+0cdc26df2e337ea78808c32c8f52c94b 0.0
+0cf86462b45382da531327e4186091da 0.0
+0d188e3528ce4d674680de129470f7bc 0.0
+0d3b942348cade7e1c41ac52df03e63e 0.0
+0d69754929eb3dd1a5b256b43f1c05d7 0.0
+0e637c81e25ffbf7b7e68a8fbcc4f2ea 0.0
+0e7be154d128e3ce2b9b05e3e64a679e 0.0
+0eb15dc6ca644533da15764998436274 0.0
+0f0c741eeebdb89289460115e9430f1a 0.0
+0f42bfd00e871207c3e2bc92a79f3456 0.0
+0f6200a5b77099113b52eea9a8645181 0.0
+0f8aa0e4c4ccd80c090db7c72be4d264 0.0
+08a1598eb1ea4dca23db1135552cd5a7 0.0
+08091b93dffe421f648aac3eb7e192c5 0.0
+0fcd8cec2c4190fb7703e87f3c04a0a9 0.0
+04c8be5a3a6ba2d70446812e99318905 0.0
+009a4919860d6d1fddec5d3771d37351 0.0
+00bb7a84ce1fa6f7411597672ff1b09d 0.0
+00ecdfa98a302087bca221884a535f55 0.0
+010f0ac2691bc0be12d0633d4b5d2cc4 0.0
+01241d7b0a1f219f5a68bf78b9f3b294 0.0
+0189d0173c07f11e7586ff20eb33f5ba 0.0
+0209f9868a22059c659df474a746d8f4 0.0
+021899b8a2f4157d54475a024303ad5c 0.0
+0233fed56365db96e625e92c4dfb9cd0 0.0
+026598f1d03fa900cc03cf9a32dc17d8 0.0
+03569e3fd843f501355f7853b0ec9361 0.0
+03fdbec0c9b63a05ae79f92c6d3a4cfc 0.0
+041d6dba3af0b8c9c1a422ed942ee16c 0.0
+04cf55ca51883bf19bca35e8ab225def 0.0
+075f8821bc5c57034fb74afc07233d97 0.0
+052ad6076dacb9cca0dbb762a466404c 0.0
+053c4d5b8717b60387f777efae437b40 0.0
+0540bf08e9a1eb8691a9b1be2afe577a 0.0
+0550a92d2e1e15136084e85881efbdea 0.0
+05736395b184324b94bf557984b36834 0.0
+0584be58049c7828c032b2171aa00a4b 0.0
+0590c29091ff2b3affd966ed57773407 0.0
+05992d2befeb4bd87b626698a9694ccc 0.0
+059c8f8bb468064b134ea6d3661761ea 0.0
+0615dc8ff9d74b14531dde03da40bc47 0.0
+06c0aa8ad0fa418a4236c6914fb420e1 0.0
+07465ac96a07454a7c155d9c674bbd5c 0.0
+075eb451ed18cc7fa554e88c2726e7ed 0.0
+0f9411ed14b5b3aa1a9655523fa33004 0.0
+10155b2834300619ce0102110a37b057 0.0
+200f25968eccba0fac01ab0385952acb 0.0
+1c4344ff805fe2e4a760b5d01eb57eca 0.0
+17dd8d3d133442253a1d51601ca5a192 0.0
+185e45802a5c7674f89b5d0dd66d7912 0.0
+192522c9b7c41f7a761f8a2ba792268a 0.0
+19cb3add28813b2bf3e1f40a000d33c2 0.0
+1a0bea5ccf9d532d75136168ddb54654 0.0
+1a389bf765c8ff0f92600f684fbe7ad7 0.0
+1a5526b9f7147b6893f1a034a107cc65 0.0
+1a962bffebb20d79245326ada6fa9b4a 0.0
+1aab913302590e21a7be28b1494a3320 0.0
+1aaeb592fcca5013db99a4f629c55062 0.0
+1ba09da532a0f9985b3caf72a47b42e3 0.0
+1bb5836797e9a89a2de9b96bbe206524 0.0
+1bf05d12bfd387888facafd52c07a37b 0.0
+1c85fe6e01899c1b9575734197172ab7 0.0
+17824e221f06eecf126ea102c3a2c289 0.0
+1c86153e1b13ab5322c96f2602b73513 0.0
+1c94105978b1aa2095e3c0096774f240 0.0
+1c9f7427ad9eef9581c32e490d0f3e35 0.0
+1caec778d70d2d387717d1d1bc248bb6 0.0
+1cba52a9db0c46eaa9148953a6983f1d 0.0
+1d1d606cff1c8bab0027fdd1e2ede36d 0.0
+1ec95834f03c9606f9433e59443e5bf3 0.0
+1ef782a559e3e0a4ab42516e0a2c828f 0.0
+1ef9d7dc5f8c93a8a43bd84e55f9a08e 0.0
+1f4c8ba04bcab2849c9ddfbd7fa3d1ea 0.0
+1f982b62b3d278f92eb3007e366fbe14 0.0
+1fa555f9a74d47a6ca7a65a347bc885f 0.0
+1fc9389433a5ed33af26e3985bdcda26 0.0
+17c24a9e01ee9569cd8f4a4b0e9b1b33 0.0
+16c36bcee25e65305630899e962804ca 0.0
+105d5959bed2c9399522aba4291031fd 0.0
+138daeaeee1211d2a3d8502b8fe23b2a 0.0
+10a9a0887483597b71cffa08dcda40de 0.0
+10cd502b01abafc87e2ecbad86389286 0.0
+113ce005a880987438bdbe19ee22add8 0.0
+1141f30eddb626a3a1151a19445eadfe 0.0
+1142fa8636688cdac86a8bdcbc8bee05 0.0
+114a94e972186d56504069f1bacf505a 0.0
+11764b0405507198175fe590debb40b4 0.0
+1182d94e2fd82a9b44657970e252736d 0.0
+11b21c7f71591dd1570f9717990ea4c7 0.0
+1243750138937650754b619370565800 0.0
+124c75409b4ecd0bf23a84e28593567d 0.0
+1264ac914d010d7c663fadca9c5904f4 0.0
+131d3b899e72f19fcae1b0f9f8f3a009 0.0
+1407e963207002430eaebcae4b500708 0.0
+1661c49d1dc41645c0bf242b2064f1f3 0.0
+1442af5b43d77ad0cd6b73feea01af87 0.0
+145ebc1f8ab3b2896eb631008973c0b4 0.0
+147e08753c0c32354e69746256220ca8 0.0
+148c7eb0139bbc1b8c87dd31464742e4 0.0
+149e7d5e484d157f2ac6140a2111618e 0.0
+14ea2e862193001ceb4322087eca9f4d 0.0
+1516918080ce0a63f2e1ac6251ba1282 0.0
+1527fd8b14037f53b8edb26b111e1f72 0.0
+157a9e8a83fb37615cabb48c14d919f5 0.0
+15dd83460074717cb57171578115cb49 0.0
+161d27a7363501e838efa59628353a65 0.0
+16306acecd104a4f12ec62522e1bf751 0.0
+1634bc38311d3c868d1077602dc439f2 0.0
+419242e308e14800b26a10a4bf82831f 0.0
+41df3b4c55182503ecd97c6dbe47966a 0.0
+832e6210c0410af8e0356bea5ec39c80 0.0
+6d34445f3adb296a5642fcb5f81e5219 0.0
+6a17479edbe461431433bddb084f731e 0.0
+6a66cd42df39f678f4bb2f3e840b011e 0.0
+6a6880faba39965be0e4b7d88966d1c9 0.0
+6a6d8054e2a7052fb8fd458dccb2f88b 0.0
+6a75fd73efa64af9c36a4435bae0f920 0.0
+6a83401c3ea3089bf7d3c3051a709ef9 0.0
+6b90b86f5da836f13c0ee2f67132e12a 0.0
+6bbaf2d026d54c61f24b5c5483e56842 0.0
+6c1f3bf3e2186d0faf5325be92f4c974 0.0
+6c4de756c45fa6e7e5adfe196f9f0039 0.0
+6c7fa77831ae630967a6fbfc8ee47901 0.0
+6c84b0ea6103c2ad5f4c3b7f5dd5e1d2 0.0
+6d0013cae9b331ee166821cb124dbca1 0.0
+6e8986f8088b452f964e7968bb8bca87 0.0
+69f46a090420a7f596df5d450ef67e66 0.0
+6eae8918e5425723c03d19afa8ca0356 0.0
+6ed7152332889b69d2d2f33c53b68934 0.0
+6fa149c798c4251f1ca8cd356f0568ee 0.0
+7029eddc845e01d613a3bbc113ebeaba 0.0
+704d0070549f26fe1f99eb736cdc4c34 0.0
+704e8a1acf77dcc1bc06251a34ba2132 0.0
+7069d1ba735fd4d5199050b4c100bea7 0.0
+71a713c5aa69a8e46efa23a2437010b8 0.0
+71d32936afbf6c7745a6083e7b09dff3 0.0
+7201d53bd647437ec3de879774eba498 0.0
+7212fe837f6f09bd16ed2c9b2c58e557 0.0
+7281d8476d29dd24b3e674ce9cd9e3b0 0.0
+740c97e7b2eddae8044cf27859a86b7b 0.0
+6a129e8ae637c96e7bb56223913911f7 0.0
+69d052e452e79d571cba042af265d50c 0.0
+744b8cd51857478bda77a9f3ab5e1d5a 0.0
+66127b1730c816b3e022c01fcaf024ea 0.0
+6240207fd3edd24bf0799b5f427423a0 0.0
+628a44c637e99429e3ac4c39227c6c9d 0.0
+62ada1b9706d5e11072e7d78ecb0a4bb 0.0
+62cb0ed0f2cba6fd1ab5d2a374c15bb1 0.0
+62f043c2aed6f033879b83ca519fde32 0.0
+630bffb8b9a197b3a6359f42828b4de3 0.0
+639a2c1d7ae3f6700506aba8baeec034 0.0
+640f0d7bcadfc07ac9f3a4d0b440d859 0.0
+64618afa3a07dcab19ba87629333ab84 0.0
+6474f5ab8c91efe951ad41f60fb4218b 0.0
+64b3028e870c12fc7d39287219f7af67 0.0
+64dc86ad2807b50d91f4938a5bb31a87 0.0
+651794369aeb3db83839b81fe49c8b4e 0.0
+6677fe5ca3b81ce606eac5d76b0a0ece 0.0
+699d4b4de178e2d52195830debfc3b1f 0.0
+6689cc067754fa14964e70c77ae9ce0a 0.0
+6694ca246e895a4f1d7e1d077cc27f5d 0.0
+6700772469767941a3992c3e2c65c3bb 0.0
+67cd78854a06d6e3867c28745e509ad9 0.0
+67ec5c8d397bcd64b2e336a60eef437c 0.0
+686d42b9552375c3b61301ae5605e3a2 0.0
+68bb0065fa02ba7b52acf3f4d833895b 0.0
+68db566a60ba7167b024b4c5032d9400 0.0
+690754306aeed53ec9ffe767151681cb 0.0
+69536042153447ceb692aa53159ce0c2 0.0
+6961c6244fb505547380d15bfee162a9 0.0
+697b3fb29268cba4b2a9eef0eab2de7e 0.0
+698862fbff78c7fc3cd7a9f4958bdadb 0.0
+7412af58f3daee6101db082fd334144b 0.0
+74b7dc5aa537fcaf4d0046dc7715b9bf 0.0
+61602441fe4b6f2b36be40c311fa9cb3 0.0
+0064e2a432a2756eabdb6288a368eaef 0.0
+7d4bc1c15b975e6ffe247f13716e5f53 0.0
+7db357a4adb452de8504d8ca1ddb3f99 0.0
+7df3c653a9ad469d0de18261ee771eac 0.0
+7dfa96abfa7484bb6c9cfa90dc9b606f 0.0
+7e17a5137fd9dab8c02617a186088c4d 0.0
+7e3f4a0767fe59521a6db1862056798d 0.0
+7e54570d21f03dfcefff07151a8c2d54 0.0
+7e57bf1306ddc4beb5aa7a1084d8c845 0.0
+7e86e84e054d9c5a6d485c9ef762c5c2 0.0
+7e907df30e0739c3bf57f18729659005 0.0
+7e9ed2c841e64976eace4b578ff56a26 0.0
+7ead39abbaa90f90242247c455393162 0.0
+7eb58843a058a3dcc0c051e0762de977 0.0
+7f323d6db23992932ec6e097ae09c4c4 0.0
+7ce470a3f833253f6d667fa6830abe07 0.0
+7fa68c46b87cb8fc334456fc14dfdf23 0.0
+8042eb376d870b471cadeab735dee5a5 0.0
+80fd14aea177425eb5d6a4b388a08a07 0.0
+812aac97abf11d6d64a00496e070e266 0.0
+8139d4cc5e4e5d44bad494a295a3e7d6 0.0
+8183f1e3f9806994e125b1d5c6932498 0.0
+81a1e88c8e85192aecbcb517cb0bf1ba 0.0
+81f1b22052c8eb3dfb1e6b45ab7c8530 0.0
+82168325f126cecd669b2d0e030c9eae 0.0
+82585d0d14cc315f9da0257c3a2875a4 0.0
+825e1e1d675a615bb3e6914499939c93 0.0
+82c890da8759fa9477cd991a1f214b3c 0.0
+830932289444e28d4ab0ac9cdabfe977 0.0
+7d2f0f34f2ea4ce7030a589e76c0d06a 0.0
+7cc8273fa6f3c9f55f426c3765741834 0.0
+74f2fa20e0ed1f74fbe4857f249e3a28 0.0
+78eecb7cd2176d3a2ed21ce8154ba1b3 0.0
+758cd34d30d7af60445b5f0f932d21b9 0.0
+75a5999048000b7e178036f0e75e4dd4 0.0
+75c5f35120584c258c8ffa734a94ea8b 0.0
+76075eede2e2c9cf276a8394c7cca293 0.0
+764bb70efe70ec930379806d178319cb 0.0
+764ce1e6cf32606d676870bd6ab3e21d 0.0
+76cee9663ccd2b6160d3dfcc88df3c38 0.0
+76f7f30b434c8deb5e2a0fd50baa1496 0.0
+7769cac4c6bfbe7e3cddd33636c22519 0.0
+77c18841a3a2296268d30a71c26a5cec 0.0
+78266846787de98cf89a724cb5586858 0.0
+7839f352a2bbf8261351db78a9bf3bd8 0.0
+78d8e6cbe439685d63e01561094d1736 0.0
+79280cea51a6fe8a3432b2f266dd34db 0.0
+7cb7753109015ae718a1a0201d081e99 0.0
+797ebe05936131bb058c0ab04c171a26 0.0
+7a1144e3e5689e67eb04e1a00e49c45c 0.0
+7a629c748d0205443ef1e8e26ee09543 0.0
+7a8d29c59b803baaed9cc1f04ce0dc33 0.0
+7a9469ff27dc7afffdb7238976da0a6e 0.0
+7ae7a28e3e35f8669d62a551beedaf7b 0.0
+7b2ff5940de11da4dc3bb208805dff11 0.0
+7b6f8858b481821ad1aae2e8d796fae6 0.0
+7b9a7ec76bd5dd38a2964e80934bc632 0.0
+7bb3a2f7b1bab185035b2e361b4fa760 0.0
+7be9bac1b13acbd8acf0de464d7c20e6 0.0
+7bfa7582156c882bb0c1de44aaaf7a3c 0.0
+7c99906fc47555e0b2df4cb90c47a3c9 0.0
+61795ee8e60f4a0f66080ab812573fb8 0.0
+60e681703dcab859a8e336be27fd6cf0 0.0
+424613c7d3d2daf5c715d1356a972381 0.0
+4e3406627aa545813116167826c24a46 0.0
+4b3f3f0ecac9b4026751b7549559c9c3 0.0
+4b548f1f6fc22f32237396b7d4ffee6e 0.0
+4bdc8766a80671911353b96f2d702ffc 0.0
+4bec204a5986223b16143ad5db1c8433 0.0
+4c1afcf47b47fc2850fae19210e27117 0.0
+4c2189b6b5ced3cb80093414e9449de9 0.0
+4c731267e801c2afe253b5cbb58bdd24 0.0
+4c855814a58f4f324e8f0cfaab222e1f 0.0
+4c954887a7bd1f32382dbbade7b91aaa 0.0
+4ca53113aeac7afa697e8cae58f69855 0.0
+4cc94d6f0ba7a2a174fa668ef5966bae 0.0
+4d7f3a8e908cb29809ed118acf40d513 0.0
+4db47b583b03cc6706e28a2fe2731080 0.0
+4eb0fadd9ccb6cd253273f89be164175 0.0
+4ab7cc7f5969573696f5cc3b1ea0f4bc 0.0
+4eb2c50a69efefc9d10540928ce08822 0.0
+4f5e90e831377b98f5f6e569d6d8f64f 0.0
+4f6f2afb7adf1bb487b6a86b13022445 0.0
+4fd1b92e9814b1bc6d1f7b1117daf660 0.0
+4fdbeac7d6cea1fef876d607636f189b 0.0
+4feb634e0ed63566c64710659b17a2ad 0.0
+4fed36b0643aa2ddcecf381c1f487a85 0.0
+50f328ff0b5c3003ce220fbd7aaa2ef7 0.0
+50f8db74e965cdba7ae86014644a3137 0.0
+516549c565407769125e3e8f3ca298cd 0.0
+517eed6e97dd2846dae1553b8c7c9464 0.0
+51a63d18809e04591820c7e3f3acf0b4 0.0
+51c999131379511e8950fed0550134f7 0.0
+4acc36d268585bc3834e627756313b21 0.0
+4a86baaa7e5adb00842830155d3c7fd0 0.0
+51e77f4577d36c8bd3627a3d0229dd37 0.0
+446ec4331e4894b14be920825fc962a5 0.0
+4261b78dbd18dd66cbbc7a7f0f2a51a8 0.0
+426bd7119a31b73ef734771a071a216c 0.0
+42ac6d34b293d442987af367e2eb4f51 0.0
+42ce63585eaa407ee53b568bdc648be8 0.0
+42ef609ea1f85d92d08a416be896e309 0.0
+4311a44a7d56ce4086afad3e75bedd6e 0.0
+431c53fa5c895a6e6f5951578f6dca8b 0.0
+433d8ad01fdf6ed57d8f509a7d6bfe2a 0.0
+4346c337f6760bd0f8e574277524165e 0.0
+4355c3f086c69fffe013ed0a3fdc8b66 0.0
+438eb58eedcd45e1eedb5889431ebe2a 0.0
+444d635c03299c7b88a3a0c236831bad 0.0
+44620cd517e481b3b2b27ff6d6611d53 0.0
+447b9d6a5c17543a456c597e65b37139 0.0
+49dd84930ff914f17216bbbef0c6ef0f 0.0
+44a36b6738e227582e14a58b9d92a5f5 0.0
+454aceb66125863a02ea9d46c0b23136 0.0
+457d3865cf0a4bc602a07d8d5324388f 0.0
+467201ebcbdb17a73c8dbe98f8bbdafc 0.0
+467fa58ad637f123633ecf3cd389ed36 0.0
+4687677f9674fec6aca4275f36063b4d 0.0
+470aabeb5aab36d5ddc9d1995b087843 0.0
+4786895e195caa90723aa4bcb25b8b03 0.0
+4790a2be4d1288d94177213af8380f12 0.0
+47ad35356a9bfec68416d32e4f039021 0.0
+47c98abffb77eb5999855b75594356cb 0.0
+47d0d6c4ca012181585fabd306fe7026 0.0
+49a130e2f6d8bdeddd170945a75a6783 0.0
+51e4182fc25b356d52164f3de0d5d4b6 0.0
+5226eb5e70712d3425df4b0e57d5a406 0.0
+60ce43bfcf795e997e3c969938262dab 0.0
+5d20f93c621d95608e5b8473d56d5b52 0.0
+5aa1c4b839946b713b450ff38e7d5bf8 0.0
+5ae365ca527c17c22171f54421e75197 0.0
+5b31527f1b28c37035a08b3642d61ece 0.0
+5b6a30b5a900999d91448bc79c5e14d4 0.0
+5ba339813ae7235641d2f1ff581ff2d7 0.0
+5bfef4e7e21aee58609421e4f24b5dc8 0.0
+5c3e4422374120cd00d41557946e5c6f 0.0
+5c6ba6ec99b516d0d9cba8e4730ea1fe 0.0
+5c78e7010bae7392c680c7ee9a1ed076 0.0
+5ca5ea401c8d3569e69626d2f81d5578 0.0
+5cad0c3ca2db85210a2d320bbe783fbd 0.0
+5cf5895a231b0a52d5859c74c74aada7 0.0
+5d141d74c8bd046171ec64835c4864b2 0.0
+5e151aa3f416a2fa6f0a1d8985282783 0.0
+5a0df24ab3d9c2d78a043cd477a54b4f 0.0
+5e32dea22765b73ed60b1d78a9d4d574 0.0
+5e4405e37412f415c47c112a0cbc487a 0.0
+5e99c84d99dca9630c51c5fb88000143 0.0
+5eb9520307f1836913cc5937f1c9f64d 0.0
+5ee1da6b8eb2e1149af74254c8c07736 0.0
+5f1d2db99a41906b5ca2800149e0c385 0.0
+5f8c4f83215d666e801e21278b17fb45 0.0
+5ff2bac04b0e8ba06ff7e0293588c308 0.0
+5ff715b26c5f3632a12da2d4dbd758bd 0.0
+6007bf22076e2e3d0030e38442149c54 0.0
+602510f333bda9666e551383f7ebaf82 0.0
+60746d8928422234eb34fff42f4cca47 0.0
+60c57911662a9159dfdd0fc05d975a55 0.0
+5a21785c07d7cc123256f09d00deec29 0.0
+59d031b8ba709e8da78b4c1f63c74f09 0.0
+52b25decd37eb58a3bd46167d1c46354 0.0
+56c3d1a8e4dc5c2aa7afbc1563559448 0.0
+5350db19ba80799a8c5ee886143dd940 0.0
+536018032c5d0d65e6d25d3988f87421 0.0
+53609d8db4e52b4f03beab3bc44ad5ba 0.0
+53668ab088d92c4856914e433447af5a 0.0
+539c656b9710b0099a96a678b3023299 0.0
+53a3eb7f49d8d6857ba7c50dd5bda866 0.0
+53f6fd6e785c7fb9d72f94a6ea343a15 0.0
+5406f4862e6c5e04e925b8f0a501958d 0.0
+5494f5c56e0875a35b62e234cc67ffa1 0.0
+552e00cb506f739acaf08aa5641f9d84 0.0
+55993950ca597b6dc821765327bbf3b9 0.0
+55af0326535a408ef56415f2952e2896 0.0
+55d4d3011e1f3bb8e4e5089b801b61a4 0.0
+576c2135e8ddf9d0a3674a5937d2466d 0.0
+59c6ed04d2faf081cecca31f1eb2e42d 0.0
+580e8927be639c5ab7fa8731a72c76d2 0.0
+5844d8e76e12fb8526d7fede1c425670 0.0
+58464ec27209abed9698e88ab974e84f 0.0
+585f01cd625ddb45fa4eebbc072fecd1 0.0
+5860dac317300b1fe6094e958240cc7f 0.0
+58df89090206b56c52bfffa4b97f4d5d 0.0
+59196a586276f0be745d0e334fc071c6 0.0
+59597cd430deea6d8e0908dcd6db7e50 0.0
+596c1c5efe777a8d8aafbca19fe37e1d 0.0
+597966c93921ecc2907ad15121cbcd2c 0.0
+599ba6458e91e2527f358f547ea39261 0.0
+59b17c1cd2e1fa86064869f5bdbea8c4 0.0
+59c4a678634e50b3fda861310820aadd 0.0
+ffaecab289059c048c7494d4595a746c 0.0
diff --git a/q2_sample_classifier/tests/data/outliers.tsv b/q2_sample_classifier/tests/data/outliers.tsv
new file mode 100644
index 0000000..ec19433
--- /dev/null
+++ b/q2_sample_classifier/tests/data/outliers.tsv
@@ -0,0 +1,7 @@
+ outlier
+a True
+b False
+c True
+d False
+e True
+f False
diff --git a/q2_sample_classifier/tests/data/predictions.tsv b/q2_sample_classifier/tests/data/predictions.tsv
new file mode 100644
index 0000000..ecc05a5
--- /dev/null
+++ b/q2_sample_classifier/tests/data/predictions.tsv
@@ -0,0 +1,127 @@
+SampleID prediction
+10249.C001.10SS 4.5
+10249.C002.05SS 2.5
+10249.C004.01SS 0.5
+10249.C004.11SS 4.5
+10249.C007.13SS 5.0
+10249.C008.02SS 7.0
+10249.C014.02SS 6.5
+10249.C016.12SS 11.0
+10249.C017.01SS.r 7.0
+10249.C018.03SS 8.0
+10249.C020.14SS 6.5
+10249.C033.05SS 4.5
+10249.C012.16SS 19.5
+10249.C024.12SS 15.0
+10249.C025.12SS 13.0
+10249.C002.19SS 28.0
+10249.C020.25SS 24.0
+10249.C030.19SS 18.0
+10249.C033.10SD 15.0
+10249.C033.14SS 10.0
+10249.C001.35SS 8.5
+10249.C014.22SS 7.0
+10249.C017.01SS 9.5
+10249.C025.21SS 14.0
+10249.C037.16SS 17.5
+10249.C055.06SS 5.5
+10249.C005.08SS 1.0
+10249.C007.08SS 1.0
+10249.C011.04SS 5.5
+10249.C011.10SS 12.5
+10249.C012.12SS 4.0
+10249.C025.08SS 6.0
+10249.C030.07SS 3.0
+10249.C033.04SS 5.0
+10249.C034.05SS 3.0
+10249.C034.06SS 3.0
+10249.C020.17SS 10.0
+10249.C020.18SS 14.0
+10249.C002.21SD 15.5
+10249.C002.21SS 15.0
+10249.C020.24SS 14.0
+10249.C020.26SD 7.0
+10249.C031.14SS 14.0
+10249.C032.11SD 6.0
+10249.C044.15SS 24.0
+10249.C055.02SD 2.0
+10249.C002.01SS 5.0
+10249.C031.09SS 6.5
+10249.C032.09SS 3.0
+10249.C034.16SS 16.0
+10249.C036.13SS 11.0
+10249.C001.14SS 2.5
+10249.C004.02SS 0.0
+10249.C007.11SS 1.5
+10249.C007.15SS 16.5
+10249.C007.16SS 5.0
+10249.C010.09SS 8.0
+10249.C011.07SS 8.5
+10249.C012.07SS 1.5
+10249.C016.04SS 4.0
+10249.C017.05SS 6.5
+10249.C030.05SS 0.5
+10249.C034.03SS 4.0
+10249.C002.16SS 19.0
+10249.C005.16SS 17.0
+10249.C014.12SS 9.5
+10249.C016.13SS 23.0
+10249.C033.10SS 5.5
+10249.C005.21SS 26.0
+10249.C012.21SS 16.0
+10249.C030.19SD 20.0
+10249.C053.03SD 3.0
+10249.C053.03SS 2.5
+10249.C008.15SS 6.5
+10249.C030.18SS 23.0
+10249.C033.19SS 22.0
+10249.C001.15SS 16.5
+10249.C002.01SS.r 9.0
+10249.C004.04SS 5.5
+10249.C005.13SS 13.5
+10249.C016.11SS 3.0
+10249.C024.08SS 3.0
+10249.C031.05SS 7.5
+10249.C031.06SS 7.5
+10249.C037.07SS 20.0
+10249.C042.07SS 9.0
+10249.C011.14SS 18.5
+10249.C014.15SS 2.5
+10249.C018.13SS 14.0
+10249.C020.19SS 18.0
+10249.C025.14SS 7.0
+10249.C001.34SS 27.0
+10249.C001.36SS 22.0
+10249.C007.22SS 20.0
+10249.C012.21SD 24.0
+10249.C022.18SD 21.0
+10249.C022.18SS 22.0
+10249.C030.14SS 19.5
+10249.C001.01SS 1.5
+10249.C018.16SS 19.0
+10249.C044.17SS 28.0
+10249.C009.02SS 7.0
+10249.C014.08SS 9.0
+10249.C020.13SS 4.5
+10249.C025.10SS 12.0
+10249.C001.04SS 4.5
+10249.C030.13SS 16.0
+10249.C016.14SS 23.0
+10249.C001.34SD 23.0
+10249.C001.36SD 25.0
+10249.C005.21SD 24.0
+10249.C014.16SS 17.0
+10249.C020.25SD 17.0
+10249.C020.26SS 18.0
+10249.C025.14SD 12.0
+10249.C032.17SS 22.0
+10249.C045.05SD 6.0
+10249.C046.06SD 15.0
+10249.C046.06SS 15.0
+10249.C055.02SS 3.0
+10249.C005.22SS 27.0
+10249.C011.16SS 18.0
+10249.C014.21SS 19.0
+10249.C018.17SS 16.0
+10249.C022.20SS 14.0
+10249.C033.16SS 15.0
diff --git a/q2_sample_classifier/tests/data/true_targets.tsv b/q2_sample_classifier/tests/data/true_targets.tsv
new file mode 100644
index 0000000..fe1b16a
--- /dev/null
+++ b/q2_sample_classifier/tests/data/true_targets.tsv
@@ -0,0 +1,9 @@
+sample-id delivery
+10249.C041.08SS Vaginal
+10249.C055.08SS Cesarean
+10249.C027.07SS Vaginal
+10249.C042.07SS Vaginal
+10249.C005.08SS Cesarean
+10249.C056.09SS Cesarean
+10249.C035.07SD Vaginal
+10249.C001.10SS Vaginal
diff --git a/q2_sample_classifier/tests/data/vaw.qza b/q2_sample_classifier/tests/data/vaw.qza
new file mode 100644
index 0000000..813a06f
--- /dev/null
+++ b/q2_sample_classifier/tests/data/vaw.qza
Binary files differ
diff --git a/q2_sample_classifier/tests/data/vaw.txt b/q2_sample_classifier/tests/data/vaw.txt
new file mode 100644
index 0000000..cba0fc8
--- /dev/null
+++ b/q2_sample_classifier/tests/data/vaw.txt
@@ -0,0 +1,7 @@
+#SampleID Column
+Sample1 a
+Sample2 a
+Sample3 a
+Sample4 b
+Sample5 b
+Sample6 b
diff --git a/q2_sample_classifier/tests/data/vaw_importance.tsv b/q2_sample_classifier/tests/data/vaw_importance.tsv
new file mode 100644
index 0000000..4759ee9
--- /dev/null
+++ b/q2_sample_classifier/tests/data/vaw_importance.tsv
@@ -0,0 +1,6 @@
+ importance
+GG_OTU_1 0.084698283208355865
+GG_OTU_2 0.077601184175696976
+GG_OTU_3 0.065702517505059144
+GG_OTU_4 0.061718558716901406
+GG_OTU_5 0.028086160290024458
diff --git a/q2_sample_classifier/tests/test_actions.py b/q2_sample_classifier/tests/test_actions.py
new file mode 100644
index 0000000..2e1bd2a
--- /dev/null
+++ b/q2_sample_classifier/tests/test_actions.py
@@ -0,0 +1,183 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import os
+
+import pandas as pd
+import pandas.testing as pdt
+import numpy as np
+import biom
+
+import qiime2
+from qiime2.plugins import sample_classifier
+
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+from q2_sample_classifier.tests.test_estimators import SampleEstimatorTestBase
+from q2_sample_classifier.classify import summarize
+
+
+class NowLetsTestTheActions(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+ md = pd.Series(['a', 'a', 'b', 'b', 'b'],
+ index=['a', 'b', 'c', 'd', 'e'], name='bugs')
+ md.index.name = 'SampleID'
+ self.md = qiime2.CategoricalMetadataColumn(md)
+
+ tab = biom.Table(
+ np.array([[3, 6, 7, 3, 6], [3, 4, 5, 6, 2], [8, 6, 4, 1, 0],
+ [8, 6, 4, 1, 0], [8, 6, 4, 1, 0]]),
+ observation_ids=['v', 'w', 'x', 'y', 'z'],
+ sample_ids=['a', 'b', 'c', 'd', 'e'])
+ self.tab = qiime2.Artifact.import_data('FeatureTable[Frequency]', tab)
+
+ md2 = pd.DataFrame({'trash': ['a', 'a', 'b', 'b', 'b', 'junk'],
+ 'floats': [0.1, 0.1, 1.3, 1.8, 1000.1, 0.1],
+ 'ints': [0, 1, 2, 2, 2, 0],
+ 'nans': [1, 1, 2, 2, np.nan, np.nan],
+ 'negatives': [-7, -3, -1.2, -4, -9, -1]},
+ index=['a', 'b', 'c', 'd', 'e', 'peanut'])
+ md2.index.name = 'SampleID'
+ self.md2 = qiime2.Metadata(md2)
+
+ # let's make sure the function runs w/o errors and that the correct
+ # transformers are in place (see issue 114)
+ def test_action_split_table(self):
+ res = sample_classifier.actions.split_table(
+ self.tab, self.md, test_size=0.5)
+ y_train = res.training_targets.view(pd.Series)
+ y_test = res.test_targets.view(pd.Series)
+
+ # test whether extracted target is correct
+ self.assertEqual(y_train.name, 'bugs')
+
+ # test if complete target column is covered
+ y_all = y_train.append(y_test).sort_index()
+ y_all.index.name = 'SampleID'
+ pdt.assert_series_equal(y_all, self.md._series)
+
+ def test_metatable(self):
+ exp = biom.Table(
+ np.array([[0.1, 0.1, 1.3, 1.8, 1000.1, 0.1],
+ [0, 1, 2, 2, 2, 0]]),
+ observation_ids=['floats', 'ints'],
+ sample_ids=['a', 'b', 'c', 'd', 'e', 'peanut'])
+ res, = sample_classifier.actions.metatable(
+ self.md2, missing_values='drop_features')
+ report = res.view(biom.Table).descriptive_equality(exp)
+ self.assertIn('Tables appear equal', report, report)
+
+ def test_metatable_missing_error(self):
+ with self.assertRaisesRegex(ValueError, "missing values"):
+ sample_classifier.actions.metatable(
+ self.md2, missing_values='error')
+
+ def test_metatable_drop_samples(self):
+ exp = biom.Table(
+ np.array([[3, 6, 7, 3], [3, 4, 5, 6], [8, 6, 4, 1],
+ [8, 6, 4, 1], [8, 6, 4, 1],
+ [0.1, 0.1, 1.3, 1.8],
+ [0, 1, 2, 2], [1, 1, 2, 2]]),
+ observation_ids=['v', 'w', 'x', 'y', 'z', 'floats', 'ints',
+ 'nans'],
+ sample_ids=['a', 'b', 'c', 'd'])
+ res, = sample_classifier.actions.metatable(
+ self.md2, self.tab, missing_values='drop_samples')
+ report = res.view(biom.Table).descriptive_equality(exp)
+ self.assertIn('Tables appear equal', report, report)
+
+ def test_metatable_fill_na(self):
+ exp = biom.Table(
+ np.array([[3, 6, 7, 3, 6], [3, 4, 5, 6, 2], [8, 6, 4, 1, 0],
+ [8, 6, 4, 1, 0], [8, 6, 4, 1, 0],
+ [0.1, 0.1, 1.3, 1.8, 1000.1],
+ [0, 1, 2, 2, 2], [1., 1., 2., 2., 0.]]),
+ observation_ids=['v', 'w', 'x', 'y', 'z', 'floats', 'ints',
+ 'nans'],
+ sample_ids=['a', 'b', 'c', 'd', 'e'])
+ res, = sample_classifier.actions.metatable(
+ self.md2, self.tab, missing_values='fill')
+ report = res.view(biom.Table).descriptive_equality(exp)
+ self.assertIn('Tables appear equal', report, report)
+
+ def test_metatable_with_merge(self):
+ exp = biom.Table(
+ np.array([[3, 6, 7, 3, 6], [3, 4, 5, 6, 2], [8, 6, 4, 1, 0],
+ [8, 6, 4, 1, 0], [8, 6, 4, 1, 0],
+ [0.1, 0.1, 1.3, 1.8, 1000.1],
+ [0, 1, 2, 2, 2]]),
+ observation_ids=['v', 'w', 'x', 'y', 'z', 'floats', 'ints'],
+ sample_ids=['a', 'b', 'c', 'd', 'e'])
+ res, = sample_classifier.actions.metatable(
+ self.md2, self.tab, missing_values='drop_features')
+ report = res.view(biom.Table).descriptive_equality(exp)
+ self.assertIn('Tables appear equal', report, report)
+
+ def test_metatable_with_merge_successful_inner_join(self):
+ exp = biom.Table(
+ np.array([[3, 6, 7, 3], [3, 4, 5, 6], [8, 6, 4, 1],
+ [8, 6, 4, 1], [8, 6, 4, 1], [0.1, 0.1, 1.3, 1.8],
+ [0, 1, 2, 2], [1., 1., 2., 2.]]),
+ observation_ids=['v', 'w', 'x', 'y', 'z', 'floats', 'ints',
+ 'nans'],
+ sample_ids=['a', 'b', 'c', 'd'])
+ res, = sample_classifier.actions.metatable(
+ self.md2.filter_ids(['a', 'b', 'c', 'd']), self.tab,
+ missing_values='error')
+ report = res.view(biom.Table).descriptive_equality(exp)
+ self.assertIn('Tables appear equal', report, report)
+
+ def test_metatable_with_merge_error_inner_join(self):
+ with self.assertRaisesRegex(ValueError, "Missing samples"):
+ sample_classifier.actions.metatable(
+ self.md2.filter_ids(['a', 'b', 'c', 'd']),
+ self.tab, missing_samples='error',
+ missing_values='drop_samples')
+
+ def test_metatable_empty_metadata_after_drop_all_unique(self):
+ with self.assertRaisesRegex(
+ ValueError, "All metadata"): # are belong to us
+ sample_classifier.actions.metatable(
+ self.md2.filter_ids(['b', 'c']), self.tab,
+ missing_values='drop_samples', drop_all_unique=True)
+
+ def test_metatable_no_samples_after_filtering(self):
+ junk_md = pd.DataFrame(
+ {'trash': ['a', 'a', 'b', 'b', 'b', 'junk'],
+ 'floats': [np.nan, np.nan, np.nan, 1.8, 1000.1, 0.1],
+ 'ints': [0, 1, 2, np.nan, 2, 0],
+ 'nans': [1, 1, 2, 2, np.nan, np.nan],
+ 'negatives': [-7, -4, -1.2, -4, -9, -1]},
+ index=['a', 'b', 'c', 'd', 'e', 'peanut'])
+ junk_md.index.name = 'SampleID'
+ junk_md = qiime2.Metadata(junk_md)
+ with self.assertRaisesRegex(ValueError, "All metadata samples"):
+ sample_classifier.actions.metatable(
+ junk_md, missing_values='drop_samples')
+
+
+# make sure summarize visualizer works and that rfe_scores are stored properly
+class TestSummarize(SampleEstimatorTestBase):
+
+ def test_summary_with_rfecv(self):
+ summarize(self.temp_dir.name, self.pipeline)
+
+ self.assertTrue('rfe_plot.pdf' in os.listdir(self.temp_dir.name))
+ self.assertTrue('rfe_plot.png' in os.listdir(self.temp_dir.name))
+ self.assertTrue('rfe_scores.tsv' in os.listdir(self.temp_dir.name))
+
+ def test_summary_without_rfecv(self):
+ # nuke the rfe_scores to test the other branch of _summarize_estimator
+ del self.pipeline.rfe_scores
+ summarize(self.temp_dir.name, self.pipeline)
+
+ self.assertFalse('rfe_plot.pdf' in os.listdir(self.temp_dir.name))
+ self.assertFalse('rfe_plot.png' in os.listdir(self.temp_dir.name))
+ self.assertFalse('rfe_scores.tsv' in os.listdir(self.temp_dir.name))
diff --git a/q2_sample_classifier/tests/test_base_class.py b/q2_sample_classifier/tests/test_base_class.py
new file mode 100644
index 0000000..cdc0cc8
--- /dev/null
+++ b/q2_sample_classifier/tests/test_base_class.py
@@ -0,0 +1,27 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+import tempfile
+import pkg_resources
+
+from qiime2.plugin.testing import TestPluginBase
+
+
+class SampleClassifierTestPluginBase(TestPluginBase):
+ package = 'q2_sample_classifier.tests'
+
+ def setUp(self):
+ super().setUp()
+ self.temp_dir = tempfile.TemporaryDirectory(
+ prefix='q2-sample-classifier-test-temp-')
+
+ def tearDown(self):
+ self.temp_dir.cleanup()
+
+ def get_data_path(self, filename):
+ return pkg_resources.resource_filename(self.package,
+ 'data/%s' % filename)
diff --git a/q2_sample_classifier/tests/test_classifier.py b/q2_sample_classifier/tests/test_classifier.py
new file mode 100644
index 0000000..e17bbd9
--- /dev/null
+++ b/q2_sample_classifier/tests/test_classifier.py
@@ -0,0 +1,236 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+from warnings import filterwarnings
+import pandas as pd
+import numpy as np
+import skbio
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.feature_selection import RFECV
+import pandas.testing as pdt
+import biom
+
+import qiime2
+from qiime2.plugins import sample_classifier
+
+from q2_sample_classifier.visuals import (
+ _custom_palettes, _roc_palette, _roc_per_class, _roc_micro_average,
+ _roc_macro_average, _binarize_labels, _generate_roc_plots)
+from q2_sample_classifier.utilities import _extract_rfe_scores
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+
+
+filterwarnings("ignore", category=UserWarning)
+filterwarnings("ignore", category=Warning)
+
+
+class TestRFEExtractor(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+ np.random.seed(0)
+ self.X = np.random.rand(50, 20)
+ self.y = np.random.randint(0, 2, 50)
+
+ self.exp1 = pd.Series([
+ 0.4999999999999999, 0.52, 0.52, 0.5399999999999999,
+ 0.44000000000000006, 0.52, 0.4600000000000001,
+ 0.5599999999999998, 0.52, 0.52, 0.5, 0.5399999999999999, 0.54,
+ 0.5599999999999999, 0.47999999999999987, 0.6199999999999999,
+ 0.5399999999999999, 0.5, 0.4999999999999999, 0.45999999999999996],
+ index=pd.Index(range(1, 21)), name='Accuracy')
+ self.exp2 = pd.Series([
+ 0.5000000000000001, 0.52, 0.48, 0.5599999999999998, 0.5,
+ 0.5799999999999998, 0.54, 0.4600000000000001, 0.6,
+ 0.45999999999999996, 0.45999999999999996],
+ index=pd.Index([1] + [i for i in range(2, 21, 2)]),
+ name='Accuracy')
+ self.exp3 = pd.Series({1: 0.4600000000000001, 20: 0.45999999999999996},
+ name='Accuracy')
+
+ def extract_rfe_scores_template(self, steps, expected):
+ selector = RFECV(RandomForestClassifier(
+ random_state=123, n_estimators=2), step=steps, cv=10)
+ selector = selector.fit(self.X, self.y.ravel())
+ pdt.assert_series_equal(
+ _extract_rfe_scores(selector), expected)
+
+ def test_extract_rfe_scores_step_int_one(self):
+ self.extract_rfe_scores_template(1, self.exp1)
+
+ def test_extract_rfe_scores_step_float_one(self):
+ self.extract_rfe_scores_template(0.05, self.exp1)
+
+ def test_extract_rfe_scores_step_int_two(self):
+ self.extract_rfe_scores_template(2, self.exp2)
+
+ def test_extract_rfe_scores_step_float_two(self):
+ self.extract_rfe_scores_template(0.1, self.exp2)
+
+ def test_extract_rfe_scores_step_full_range(self):
+ self.extract_rfe_scores_template(20, self.exp3)
+
+ def test_extract_rfe_scores_step_out_of_range(self):
+ # should be equal to full_range
+ self.extract_rfe_scores_template(21, self.exp3)
+
+
+# test classifier pipelines succeed on binary data
+class TestBinaryClassification(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+ self.md = qiime2.CategoricalMetadataColumn(pd.Series(
+ ['a', 'a', 'a', 'b', 'b', 'b'],
+ index=pd.Index([c for c in 'abcdef'], name='id'), name='foo'))
+
+ tab = biom.Table(np.array(
+ [[13, 26, 37, 3, 6, 1], [33, 24, 23, 5, 6, 2],
+ [38, 26, 33, 4, 1, 0], [3, 2, 1, 22, 25, 31],
+ [2, 1, 3, 44, 46, 42]]),
+ observation_ids=[c for c in 'vwxyz'],
+ sample_ids=[c for c in 'abcdef'])
+ self.tab = qiime2.Artifact.import_data('FeatureTable[Frequency]', tab)
+
+ dist = skbio.DistanceMatrix.from_iterable(
+ iterable=[1, 16, 2, 1, 16, 17],
+ metric=lambda x, y: abs(y-x), keys=[c for c in 'abcdef']
+ )
+ self.dist = qiime2.Artifact.import_data('DistanceMatrix', dist)
+
+ # we will make sure predictions are correct, but no need to validate
+ # other outputs, which are tested elsewhere.
+ def test_classify_samples_binary(self):
+ res = sample_classifier.actions.classify_samples(
+ table=self.tab, metadata=self.md,
+ test_size=0.3, cv=1, n_estimators=2, n_jobs=1, random_state=123,
+ parameter_tuning=False, optimize_feature_selection=False)
+ exp = pd.Series(['a', 'b'], name='prediction',
+ index=pd.Index(['c', 'f'], name='id'))
+ pdt.assert_series_equal(exp, res[2].view(pd.Series))
+
+ def test_classify_samples_ncv_binary(self):
+ res = sample_classifier.actions.classify_samples_ncv(
+ table=self.tab, metadata=self.md, cv=3, n_estimators=2, n_jobs=1,
+ random_state=123, parameter_tuning=False)
+ exp = pd.Series([c for c in 'ababab'], name='prediction',
+ index=pd.Index([i for i in 'aebdcf'], name='id'))
+ pdt.assert_series_equal(exp, res[0].view(pd.Series))
+
+ def test_classify_samples_dist_binary(self):
+ res = sample_classifier.actions.classify_samples_from_dist(
+ distance_matrix=self.dist, metadata=self.md, k=2, cv=3,
+ n_jobs=1, random_state=123)
+ exp = pd.Series([c for c in 'abaaaa'], name='0',
+ index=pd.Index([i for i in 'abcdef'], name='id'))
+ pdt.assert_series_equal(
+ exp.sort_index(), res[0].view(pd.Series).sort_index()
+ )
+
+
+class TestROC(SampleClassifierTestPluginBase):
+ def setUp(self):
+ super().setUp()
+ self.md = np.array(
+ [[1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0],
+ [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0],
+ [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1],
+ [0, 0, 1], [0, 0, 1]])
+
+ np.random.seed(0)
+ probs = np.random.rand(20, 3)
+ # probabilities should sum to 1 for each sample
+ self.probs = np.apply_along_axis(
+ lambda x: x / x.sum(), axis=1, arr=probs)
+
+ self.exp_fpr = {0: [0., 0.07692308, 0.46153846, 0.46153846, 0.76923077,
+ 0.76923077, 0.84615385, 0.84615385, 1., 1.],
+ 1: [0., 0., 0.15384615, 0.15384615, 0.61538462,
+ 0.61538462, 0.69230769, 0.69230769, 1., 1.],
+ 2: [0., 0.07142857, 0.07142857, 0.14285714, 0.14285714,
+ 0.78571429, 0.78571429, 0.92857143, 0.92857143,
+ 1.]}
+ self.exp_tdr = {0: [0., 0., 0., 0.57142857, 0.57142857, 0.71428571,
+ 0.71428571, 0.85714286, 0.85714286, 1.],
+ 1: [0., 0.14285714, 0.14285714, 0.28571429, 0.28571429,
+ 0.57142857, 0.57142857, 0.85714286, 0.85714286,
+ 1.],
+ 2: [0., 0., 0.16666667, 0.16666667, 0.5, 0.5,
+ 0.66666667, 0.66666667, 1., 1.]}
+ self.exp_roc_auc = {0: 0.3626373626373626, 1: 0.4615384615384615,
+ 2: 0.49999999999999994}
+
+ # this test confirms that all palettes load properly.
+ def test_roc_palette(self):
+ [_roc_palette(p, 3) for p in _custom_palettes().keys()]
+
+ def test_roc_per_class(self):
+ fpr, tdr, roc_auc = _roc_per_class(self.md, self.probs, [0, 1, 2])
+ for d, e in zip([fpr, tdr, roc_auc],
+ [self.exp_fpr, self.exp_tdr, self.exp_roc_auc]):
+ for c in [0, 1, 2]:
+ np.testing.assert_array_almost_equal(d[c], e[c])
+
+ def test_roc_micro_average(self):
+ fpr, tdr, roc_auc = _roc_micro_average(
+ self.md, self.probs, self.exp_fpr, self.exp_tdr, self.exp_roc_auc)
+ np.testing.assert_array_almost_equal(fpr['micro'], np.array(
+ [0., 0.025, 0.025, 0.075, 0.075, 0.1, 0.1, 0.225, 0.225, 0.275,
+ 0.275, 0.475, 0.475, 0.575, 0.575, 0.6, 0.6, 0.65, 0.65, 0.675,
+ 0.675, 0.725, 0.725, 0.75, 0.75, 0.825, 0.825, 0.925, 0.925, 1.,
+ 1.]))
+ np.testing.assert_array_almost_equal(tdr['micro'], np.array(
+ [0., 0., 0.05, 0.05, 0.1, 0.1, 0.15, 0.15, 0.2, 0.2, 0.25, 0.25,
+ 0.35, 0.35, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5, 0.55, 0.55, 0.6, 0.6,
+ 0.75, 0.75, 0.8, 0.8, 0.95, 0.95, 1.]))
+ self.assertAlmostEqual(roc_auc['micro'], 0.41374999999999995)
+
+ def test_roc_macro_average(self):
+ fpr, tdr, roc_auc = _roc_macro_average(
+ self.exp_fpr, self.exp_tdr, self.exp_roc_auc, [0, 1, 2])
+ np.testing.assert_array_almost_equal(fpr['macro'], np.array(
+ [0., 0.07142857, 0.07692308, 0.14285714, 0.15384615, 0.46153846,
+ 0.61538462, 0.69230769, 0.76923077, 0.78571429, 0.84615385,
+ 0.92857143, 1.]))
+ np.testing.assert_array_almost_equal(tdr['macro'], np.array(
+ [0.04761905, 0.1031746, 0.1031746, 0.21428571, 0.26190476,
+ 0.45238095, 0.54761905, 0.64285714, 0.69047619, 0.74603175,
+ 0.7936508, 0.90476191, 1.]))
+ self.assertAlmostEqual(roc_auc['macro'], 0.49930228548098726)
+
+ # Proves that the ROC nuts + bolts work if predictions does not have all
+ # the classes present in probabilities. This will occur if there are many
+ # classes or few samples and the data are not stratified:
+ # https://github.com/qiime2/q2-sample-classifier/issues/171
+ def test_binarize_and_roc_on_missing_classes(self):
+ # seven samples with only 4 classes (adeh) of 8 possible classes
+ # (abcdefgh) represented
+ md = pd.Series([i for i in 'hedhadd'])
+ # array of 7 samples X 8 classes
+ # the values do not matter, only the labels
+ probs = pd.DataFrame(np.random.rand(7, 8),
+ columns=[i for i in 'abcdefgh'])
+ _generate_roc_plots(md, probs, 'GreenBlue')
+
+
+class TestBinarize(SampleClassifierTestPluginBase):
+ def setUp(self):
+ super().setUp()
+
+ def test_binarize_labels_binary(self):
+ md = pd.Series([c for c in 'aabbaa'])
+ labels = _binarize_labels(md, ['a', 'b'])
+ exp = np.array([[1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0]])
+ np.testing.assert_array_equal(exp, labels)
+
+ def test_binarize_labels_multiclass(self):
+ md = pd.Series([c for c in 'abcabc'])
+ labels = _binarize_labels(md, ['a', 'b', 'c'])
+ exp = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1],
+ [1, 0, 0], [0, 1, 0], [0, 0, 1]])
+ np.testing.assert_array_equal(exp, labels)
diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
new file mode 100644
index 0000000..277be3b
--- /dev/null
+++ b/q2_sample_classifier/tests/test_estimators.py
@@ -0,0 +1,593 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+import os
+import pandas as pd
+import pandas.testing as pdt
+import biom
+import shutil
+import json
+import numpy as np
+from sklearn.metrics import mean_squared_error, accuracy_score
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.pipeline import Pipeline
+import skbio
+
+import qiime2
+from q2_types.feature_table import (FeatureTable, PercentileNormalized)
+
+from qiime2.plugins import sample_classifier
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+from q2_sample_classifier.classify import (
+ regress_samples_ncv, classify_samples_ncv, fit_classifier, fit_regressor,
+ detect_outliers, split_table, predict_classification,
+ predict_regression)
+from q2_sample_classifier.utilities import (
+ _set_parameters_and_estimator, _train_adaboost_base_estimator,
+ _match_series_or_die, _extract_features)
+from q2_sample_classifier import (
+ SampleEstimatorDirFmt, PickleFormat)
+
+
+class SampleEstimatorTestBase(SampleClassifierTestPluginBase):
+ package = 'q2_sample_classifier.tests'
+
+ def setUp(self):
+ super().setUp()
+
+ def _load_biom(table_fp):
+ table_fp = self.get_data_path(table_fp)
+ table = qiime2.Artifact.load(table_fp)
+ table = table.view(biom.Table)
+ return table
+
+ def _load_cmc(md_fp, column):
+ md_fp = self.get_data_path(md_fp)
+ md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0)
+ md = qiime2.CategoricalMetadataColumn(md[column])
+ return md
+
+ table_chard_fp = _load_biom('chardonnay.table.qza')
+ mdc_chard_fp = _load_cmc('chardonnay.map.txt', 'Region')
+
+ pipeline, importances = fit_classifier(
+ table_chard_fp, mdc_chard_fp, random_state=123,
+ n_estimators=2, n_jobs=1, optimize_feature_selection=True,
+ parameter_tuning=True, missing_samples='ignore')
+ transformer = self.get_transformer(
+ Pipeline, SampleEstimatorDirFmt)
+ self._sklp = transformer(pipeline)
+ sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat)
+ self.sklearn_pipeline = str(sklearn_pipeline)
+ self.pipeline = pipeline
+
+ def _custom_setup(self, version):
+ with open(os.path.join(self.temp_dir.name,
+ 'sklearn_version.json'), 'w') as fh:
+ fh.write(json.dumps({'sklearn-version': version}))
+ shutil.copy(self.sklearn_pipeline, self.temp_dir.name)
+ return SampleEstimatorDirFmt(
+ self.temp_dir.name, mode='r')
+
+
+class EstimatorsTests(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+
+ def _load_biom(table_fp):
+ table_fp = self.get_data_path(table_fp)
+ table = qiime2.Artifact.load(table_fp)
+ table = table.view(biom.Table)
+ return table
+
+ def _load_md(md_fp):
+ md_fp = self.get_data_path(md_fp)
+ md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0)
+ md = qiime2.Metadata(md)
+ return md
+
+ def _load_nmc(md_fp, column):
+ md_fp = self.get_data_path(md_fp)
+ md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0)
+ md = qiime2.NumericMetadataColumn(md[column])
+ return md
+
+ def _load_cmc(md_fp, column):
+ md_fp = self.get_data_path(md_fp)
+ md = pd.read_csv(md_fp, sep='\t', header=0, index_col=0)
+ md = qiime2.CategoricalMetadataColumn(md[column])
+ return md
+
+ self.table_chard_fp = _load_biom('chardonnay.table.qza')
+ self.md_chard_fp = _load_md('chardonnay.map.txt')
+ self.mdc_chard_fp = _load_cmc('chardonnay.map.txt', 'Region')
+ self.table_ecam_fp = _load_biom('ecam-table-maturity.qza')
+ self.md_ecam_fp = _load_md('ecam_map_maturity.txt')
+ self.mdc_ecam_fp = _load_nmc('ecam_map_maturity.txt', 'month')
+ self.exp_imp = pd.read_csv(
+ self.get_data_path('importance.tsv'), sep='\t', header=0,
+ index_col=0, names=['feature', 'importance'])
+ self.exp_pred = pd.read_csv(
+ self.get_data_path('predictions.tsv'), sep='\t', header=0,
+ index_col=0, squeeze=True)
+ index = pd.Index(['A', 'B', 'C', 'D'], name='id')
+ self.table_percnorm = qiime2.Artifact.import_data(
+ FeatureTable[PercentileNormalized], pd.DataFrame(
+ [[20.0, 20.0, 50.0, 10.0], [10.0, 10.0, 70.0, 10.0],
+ [90.0, 8.0, 1.0, 1.0], [30.0, 15.0, 20.0, 35.0]],
+ index=index,
+ columns=['feat1', 'feat2', 'feat3', 'feat4'])).view(biom.Table)
+ self.mdc_percnorm = qiime2.CategoricalMetadataColumn(
+ pd.Series(['X', 'X', 'Y', 'Y'], index=index, name='name'))
+
+ # test feature extraction
+ def test_extract_features(self):
+ table = self.table_ecam_fp
+ dicts = _extract_features(table)
+ dv = DictVectorizer()
+ dv.fit(dicts)
+ features = table.ids('observation')
+ self.assertEqual(set(dv.get_feature_names()), set(features))
+ self.assertEqual(len(dicts), len(table.ids()))
+ for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
+ for feature, count in zip(features, table_row):
+ if count == 0:
+ self.assertTrue(feature not in dict_row)
+ else:
+ self.assertEqual(dict_row[feature], count)
+
+ def test_classify_samples_from_dist(self):
+ # -- setup -- #
+ # 1,2 are a group, 3,4 are a group
+ sample_ids = ('f1', 'f2', 's1', 's2')
+ distance_matrix = skbio.DistanceMatrix([
+ [0, 1, 4, 4],
+ [1, 0, 4, 4],
+ [4, 4, 0, 1],
+ [4, 4, 1, 0],
+ ], ids=sample_ids)
+
+ dm = qiime2.Artifact.import_data('DistanceMatrix', distance_matrix)
+ categories = pd.Series(('skinny', 'skinny', 'fat', 'fat'),
+ index=sample_ids[::-1], name='body_mass')
+ categories.index.name = 'SampleID'
+ metadata = qiime2.CategoricalMetadataColumn(categories)
+
+ # -- test -- #
+ res = sample_classifier.actions.classify_samples_from_dist(
+ distance_matrix=dm, metadata=metadata, k=1, cv=3, random_state=123
+ )
+ pred = res[0].view(pd.Series).sort_values()
+ expected = pd.Series(('fat', 'skinny', 'fat', 'skinny'),
+ index=['f1', 's1', 'f2', 's2'])
+ not_expected = pd.Series(('fat', 'fat', 'fat', 'skinny'),
+ index=sample_ids)
+
+ # order matters for pd.Series.equals()
+ self.assertTrue(expected.sort_index().equals(pred.sort_index()))
+ self.assertFalse(not_expected.sort_index().equals(pred.sort_index()))
+
+ def test_classify_samples_from_dist_with_group_of_single_item(self):
+ # -- setup -- #
+ # 1 is a group, 2,3,4 are a group
+ sample_ids = ('f1', 's1', 's2', 's3')
+ distance_matrix = skbio.DistanceMatrix([
+ [0, 2, 3, 3],
+ [2, 0, 1, 1],
+ [3, 1, 0, 1],
+ [3, 1, 1, 0],
+ ], ids=sample_ids)
+
+ dm = qiime2.Artifact.import_data('DistanceMatrix', distance_matrix)
+ categories = pd.Series(('fat', 'skinny', 'skinny', 'skinny'),
+ index=sample_ids, name='body_mass')
+ categories.index.name = 'SampleID'
+ metadata = qiime2.CategoricalMetadataColumn(categories)
+
+ # -- test -- #
+ res = sample_classifier.actions.classify_samples_from_dist(
+ distance_matrix=dm, metadata=metadata, k=1, cv=3, random_state=123
+ )
+ pred = res[0].view(pd.Series)
+ expected = pd.Series(('skinny', 'skinny', 'skinny', 'skinny'),
+ index=sample_ids)
+
+ self.assertTrue(expected.sort_index().equals(pred.sort_index()))
+
+ def test_2nn(self):
+ # -- setup -- #
+ # 2 nearest neighbors of each sample are
+ # f1: s1, s2 (classified as skinny)
+ # s1: f1, s2 (closer to f1 so fat)
+ # s2: f1, (s1 or s3) (closer to f1 so fat)
+ # s3: s1, s2 (skinny)
+ sample_ids = ('f1', 's1', 's2', 's3')
+ distance_matrix = skbio.DistanceMatrix([
+ [0, 2, 1, 5],
+ [2, 0, 3, 4],
+ [1, 3, 0, 3],
+ [5, 4, 3, 0],
+ ], ids=sample_ids)
+
+ dm = qiime2.Artifact.import_data('DistanceMatrix', distance_matrix)
+ categories = pd.Series(('fat', 'skinny', 'skinny', 'skinny'),
+ index=sample_ids, name='body_mass')
+ categories.index.name = 'SampleID'
+ metadata = qiime2.CategoricalMetadataColumn(categories)
+
+ # -- test -- #
+ res = sample_classifier.actions.classify_samples_from_dist(
+ distance_matrix=dm, metadata=metadata, k=2, cv=3, random_state=123
+ )
+ pred = res[0].view(pd.Series)
+ expected = pd.Series(('skinny', 'fat', 'fat', 'skinny'),
+ index=sample_ids)
+ self.assertTrue(expected.sort_index().equals(pred.sort_index()))
+
+ # test that each classifier works and delivers an expected accuracy result
+ # when a random seed is set.
+ def test_classifiers(self):
+ for classifier in ['RandomForestClassifier', 'ExtraTreesClassifier',
+ 'GradientBoostingClassifier', 'AdaBoostClassifier',
+ 'LinearSVC', 'SVC', 'KNeighborsClassifier']:
+ table_fp = self.get_data_path('chardonnay.table.qza')
+ table = qiime2.Artifact.load(table_fp)
+ res = sample_classifier.actions.classify_samples(
+ table=table, metadata=self.mdc_chard_fp,
+ test_size=0.5, cv=1, n_estimators=10, n_jobs=1,
+ estimator=classifier, random_state=123,
+ parameter_tuning=False, optimize_feature_selection=False,
+ missing_samples='ignore')
+ pred = res[2].view(pd.Series)
+ pred, truth = _match_series_or_die(
+ pred, self.mdc_chard_fp.to_series(), 'ignore')
+ accuracy = accuracy_score(truth, pred)
+ self.assertAlmostEqual(
+ accuracy, seeded_results[classifier], places=4,
+ msg='Accuracy of %s classifier was %f, but expected %f' % (
+ classifier, accuracy, seeded_results[classifier]))
+
+ # test if training classifier with pipeline classify_samples raises
+ # warning when test_size = 0.0
+ def test_classify_samples_w_all_train_set(self):
+ with self.assertWarnsRegex(Warning, "not representative of "
+ "your model's performance"):
+ table_fp = self.get_data_path('chardonnay.table.qza')
+ table = qiime2.Artifact.load(table_fp)
+ sample_classifier.actions.classify_samples(
+ table=table, metadata=self.mdc_chard_fp,
+ test_size=0.0, cv=1, n_estimators=10, n_jobs=1,
+ estimator='RandomForestClassifier', random_state=123,
+ parameter_tuning=False, optimize_feature_selection=False,
+ missing_samples='ignore')
+
+ # test that the plugin methods/visualizers work
+ def test_regress_samples_ncv(self):
+ y_pred, importances = regress_samples_ncv(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, n_jobs=1, stratify=True, parameter_tuning=True,
+ missing_samples='ignore')
+
+ def test_classify_samples_ncv(self):
+ y_pred, importances, probabilities = classify_samples_ncv(
+ self.table_chard_fp, self.mdc_chard_fp, random_state=123,
+ n_estimators=2, n_jobs=1, missing_samples='ignore')
+
+ # test reproducibility of classifier results, probabilities
+ def test_classify_samples_ncv_accuracy(self):
+ dat = biom.Table(np.array(
+ [[4446, 9828, 3208, 776, 118, 4175, 657, 251, 7505, 617],
+ [1855, 8716, 3257, 1251, 3205, 2557, 4251, 7405, 1417, 1215],
+ [6616, 281, 8616, 291, 261, 253, 9075, 252, 7385, 4068]]),
+ observation_ids=['o1', 'o2', 'o3'],
+ sample_ids=['s1', 's2', 's3', 's4', 's5',
+ 's6', 's7', 's8', 's9', 's10'])
+ md = qiime2.CategoricalMetadataColumn(pd.Series(
+ ['red', 'red', 'red', 'red', 'red',
+ 'blue', 'blue', 'blue', 'blue', 'blue'],
+ index=pd.Index(['s1', 's2', 's3', 's4', 's5',
+ 's6', 's7', 's8', 's9', 's10'],
+ name='sample-id'), name='color'))
+ y_pred, importances, probabilities = classify_samples_ncv(
+ dat, md, random_state=123, n_estimators=2, n_jobs=1,
+ missing_samples='ignore')
+ exp_pred = pd.Series(
+ ['blue', 'red', 'red', 'blue', 'blue',
+ 'blue', 'blue', 'red', 'blue', 'blue'],
+ index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9',
+ 's3', 's7'], dtype='object', name='SampleID'),
+ name='prediction')
+ exp_importances = pd.DataFrame(
+ [0.595111111111111, 0.23155555555555551, 0.17333333333333334],
+ index=pd.Index(['o3', 'o1', 'o2'], name='feature'),
+ columns=['importance'])
+ exp_probabilities = pd.DataFrame(
+ [[0.5, 0.5], [0., 1.], [0., 1.], [0.5, 0.5], [0.5, 0.5],
+ [0.5, 0.5], [0.5, 0.5], [0., 1.], [1., 0.], [1., 0.]],
+ index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9',
+ 's3', 's7'], name='SampleID'),
+ columns=['blue', 'red'])
+ pdt.assert_series_equal(y_pred, exp_pred)
+ pdt.assert_frame_equal(importances, exp_importances)
+ pdt.assert_frame_equal(probabilities, exp_probabilities)
+
+ # test ncv a second time with KNeighborsRegressor (no feature importance)
+ def test_regress_samples_ncv_knn(self):
+ y_pred, importances = regress_samples_ncv(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, n_jobs=1, stratify=False, parameter_tuning=False,
+ estimator='KNeighborsRegressor', missing_samples='ignore')
+
+ # test that ncv gives expected results
+ def test_regress_samples_ncv_accuracy(self):
+ y_pred, importances = regress_samples_ncv(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, n_jobs=1, missing_samples='ignore')
+ pdt.assert_series_equal(y_pred, self.exp_pred)
+ pdt.assert_frame_equal(importances, self.exp_imp)
+
+ # test that fit_* methods output consistent importance scores
+ def test_fit_regressor(self):
+ pipeline, importances = fit_regressor(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, n_jobs=1, missing_samples='ignore')
+ exp_imp = pd.read_csv(
+ self.get_data_path('importance_cv.tsv'), sep='\t', header=0,
+ index_col=0)
+ pdt.assert_frame_equal(importances, exp_imp)
+
+ # just make sure this method runs. Uses the same internal function as
+ # fit_regressor, so importance score consistency is covered by the above
+ # test.
+ def test_fit_classifier(self):
+ pipeline, importances = fit_classifier(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, n_jobs=1, optimize_feature_selection=True,
+ parameter_tuning=True, missing_samples='ignore')
+
+ # test that each regressor works and delivers an expected accuracy result
+ # when a random seed is set.
+ def test_regressors(self):
+ for regressor in ['RandomForestRegressor', 'ExtraTreesRegressor',
+ 'GradientBoostingRegressor', 'AdaBoostRegressor',
+ 'Lasso', 'Ridge', 'ElasticNet',
+ 'KNeighborsRegressor', 'LinearSVR', 'SVR']:
+ table_fp = self.get_data_path('ecam-table-maturity.qza')
+ table = qiime2.Artifact.load(table_fp)
+ res = sample_classifier.actions.regress_samples(
+ table=table, metadata=self.mdc_ecam_fp,
+ test_size=0.5, cv=1, n_estimators=10, n_jobs=1,
+ estimator=regressor, random_state=123,
+ parameter_tuning=False, optimize_feature_selection=False,
+ missing_samples='ignore', stratify=True)
+ pred = res[2].view(pd.Series)
+ pred, truth = _match_series_or_die(
+ pred, self.mdc_ecam_fp.to_series(), 'ignore')
+ accuracy = mean_squared_error(truth, pred)
+ # TODO: Remove this conditional when
+ # https://github.com/qiime2/q2-sample-classifier/issues/193 is
+ # closed
+ if regressor == 'Ridge':
+ self.assertAlmostEqual(
+ accuracy, seeded_results[regressor], places=0,
+ msg='Accuracy of %s regressor was %f, but expected %f' % (
+ regressor, accuracy, seeded_results[regressor]))
+ else:
+ self.assertAlmostEqual(
+ accuracy, seeded_results[regressor], places=4,
+ msg='Accuracy of %s regressor was %f, but expected %f' % (
+ regressor, accuracy, seeded_results[regressor]))
+
+ # test adaboost base estimator trainer
+ def test_train_adaboost_base_estimator(self):
+ abe = _train_adaboost_base_estimator(
+ self.table_chard_fp, self.mdc_chard_fp, 'Region',
+ n_estimators=10, n_jobs=1, cv=3, random_state=None,
+ parameter_tuning=True, classification=True,
+ missing_samples='ignore')
+ self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
+
+ # test some invalid inputs/edge cases
+ def test_invalids(self):
+ estimator, pad, pt = _set_parameters_and_estimator(
+ 'RandomForestClassifier', self.table_chard_fp, self.md_chard_fp,
+ 'Region', n_estimators=10, n_jobs=1, cv=1,
+ random_state=123, parameter_tuning=False, classification=True,
+ missing_samples='ignore')
+ regressor, pad, pt = _set_parameters_and_estimator(
+ 'RandomForestRegressor', self.table_chard_fp, self.md_chard_fp,
+ 'Region', n_estimators=10, n_jobs=1, cv=1,
+ random_state=123, parameter_tuning=False, classification=True,
+ missing_samples='ignore')
+
+ def test_split_table_no_rounding_error(self):
+ X_train, X_test, y_train, y_test = split_table(
+ self.table_chard_fp, self.mdc_chard_fp, test_size=0.5,
+ random_state=123, stratify=True, missing_samples='ignore')
+ self.assertEqual(len(X_train.ids()) + len(X_test.ids()), 21)
+ self.assertEqual(y_train.shape[0] + y_test.shape[0], 21)
+
+ def test_split_table_no_split(self):
+ X_train, X_test, y_train, y_test = split_table(
+ self.table_chard_fp, self.mdc_chard_fp, test_size=0.0,
+ random_state=123, stratify=True, missing_samples='ignore')
+ self.assertEqual(len(X_train.ids()), 21)
+ self.assertEqual(y_train.shape[0], 21)
+
+ def test_split_table_invalid_test_size(self):
+ with self.assertRaisesRegex(ValueError, "at least two samples"):
+ X_train, X_test, y_train, y_test = split_table(
+ self.table_chard_fp, self.mdc_chard_fp, test_size=1.0,
+ random_state=123, stratify=True, missing_samples='ignore')
+
+ def test_split_table_percnorm(self):
+ X_train, X_test, y_train, y_test = split_table(
+ self.table_percnorm, self.mdc_percnorm, test_size=0.5,
+ random_state=123, stratify=True, missing_samples='ignore')
+ self.assertEqual(len(X_train.ids()) + len(X_test.ids()), 4)
+ self.assertEqual(y_train.shape[0] + y_test.shape[0], 4)
+
+ # test experimental functions
+ def test_detect_outliers(self):
+ detect_outliers(self.table_chard_fp, self.md_chard_fp,
+ random_state=123, n_jobs=1, contamination=0.05)
+
+ def test_detect_outliers_with_subsets(self):
+ detect_outliers(self.table_chard_fp, self.md_chard_fp,
+ random_state=123, n_jobs=1, contamination=0.05,
+ subset_column='Vineyard', subset_value=1)
+
+ def test_detect_outliers_raise_error_on_missing_subset_data(self):
+ with self.assertRaisesRegex(ValueError, "must both be provided"):
+ detect_outliers(self.table_chard_fp, self.md_chard_fp,
+ random_state=123, n_jobs=1, contamination=0.05,
+ subset_column='Vineyard', subset_value=None)
+ with self.assertRaisesRegex(ValueError, "must both be provided"):
+ detect_outliers(self.table_chard_fp, self.md_chard_fp,
+ random_state=123, n_jobs=1, contamination=0.05,
+ subset_column=None, subset_value=1)
+
+ # just test that this works by making sure a classifier trained on samples
+ # x, y, and z predicts the correct metadata values for those same samples.
+ def test_predict_classifications(self):
+ for classifier in ['RandomForestClassifier', 'ExtraTreesClassifier',
+ 'GradientBoostingClassifier', 'AdaBoostClassifier',
+ 'LinearSVC', 'SVC', 'KNeighborsClassifier']:
+ estimator, importances = fit_classifier(
+ self.table_chard_fp, self.mdc_chard_fp, random_state=123,
+ n_estimators=2, estimator=classifier, n_jobs=1,
+ missing_samples='ignore')
+ pred, prob = predict_classification(self.table_chard_fp, estimator)
+ exp = self.mdc_chard_fp.to_series().reindex(pred.index).dropna()
+ # reindex both pred and exp because not all samples present in pred
+ # are present in the metadata! (hence missing_samples='ignore')
+ sample_ids = pred.index.intersection(exp.index)
+ pred = pred.loc[sample_ids]
+ exp = exp.loc[sample_ids]
+ # verify predictions:
+ # test that expected number of correct results is achieved (these
+ # are mostly quite high as we would expect (total n=21))
+ correct_results = np.sum(pred == exp)
+ self.assertEqual(
+ correct_results, seeded_predict_results[classifier],
+ msg='Accuracy of %s classifier was %f, but expected %f' % (
+ classifier, correct_results,
+ seeded_predict_results[classifier]))
+ # verify probabilities
+ # test whether all are in correct range (0 to 1)
+ ls_pred_classes = prob.columns.tolist()
+ ls_correct_range = [col for col in ls_pred_classes if
+ prob[col].between(
+ 0, 1, inclusive=True).all()]
+ self.assertEqual(len(ls_correct_range), prob.shape[1],
+ msg='Predicted probabilities of class {}'
+ 'are not in range [0,1]'.format(
+ [col for col in ls_pred_classes
+ if col not in ls_correct_range]))
+
+ def test_predict_regressions(self):
+ for regressor in ['RandomForestRegressor', 'ExtraTreesRegressor',
+ 'GradientBoostingRegressor', 'AdaBoostRegressor',
+ 'Lasso', 'Ridge', 'ElasticNet',
+ 'KNeighborsRegressor', 'SVR', 'LinearSVR']:
+ estimator, importances = fit_regressor(
+ self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
+ n_estimators=2, estimator=regressor, n_jobs=1,
+ missing_samples='ignore')
+ pred = predict_regression(self.table_ecam_fp, estimator)
+ exp = self.mdc_ecam_fp.to_series()
+ # reindex both pred and exp because not all samples present in pred
+ # are present in the metadata! (hence missing_samples='ignore')
+ sample_ids = pred.index.intersection(exp.index)
+ pred = pred.loc[sample_ids]
+ exp = exp.loc[sample_ids]
+ # test that expected MSE is achieved (these are mostly quite high
+ # as we would expect)
+ mse = mean_squared_error(exp, pred)
+ # TODO: Remove this conditional when
+ # https://github.com/qiime2/q2-sample-classifier/issues/193 is
+ # closed
+ if regressor == 'Ridge':
+ self.assertAlmostEqual(
+ mse, seeded_predict_results[regressor], places=4,
+ msg='Accuracy of %s regressor was %f, but expected %f' % (
+ regressor, mse, seeded_predict_results[regressor]))
+ else:
+ self.assertAlmostEqual(
+ mse, seeded_predict_results[regressor],
+ msg='Accuracy of %s regressor was %f, but expected %f' % (
+ regressor, mse, seeded_predict_results[regressor]))
+
+ # make sure predict still works when features are given in a different
+ # order from training set.
+ def test_predict_feature_order_aint_no_thing(self):
+ table = self.table_ecam_fp
+ estimator, importances = fit_regressor(
+ table, self.mdc_ecam_fp, random_state=123, n_estimators=2,
+ n_jobs=1, missing_samples='ignore')
+
+ # randomly shuffle and reorder features in biom table.
+ feature_ids = table.ids(axis='observation')
+ # look ma no seed! we should get the same result no matter the order.
+ np.random.shuffle(feature_ids)
+ shuffled_table = table.sort_order(feature_ids, axis='observation')
+
+ # now predict values on shuffled data
+ pred = predict_regression(shuffled_table, estimator)
+ exp = self.mdc_ecam_fp.to_series()
+ # reindex both pred and exp because not all samples present in pred
+ # are present in the metadata! (hence missing_samples='ignore')
+ sample_ids = pred.index.intersection(exp.index)
+ pred = pred.loc[sample_ids]
+ exp = exp.loc[sample_ids]
+ # test that expected MSE is achieved (these are mostly quite high
+ # as we would expect)
+ mse = mean_squared_error(exp, pred)
+ self.assertAlmostEqual(
+ mse, seeded_predict_results['RandomForestRegressor'])
+
+
+seeded_results = {
+ 'RandomForestClassifier': 0.63636363636363635,
+ 'ExtraTreesClassifier': 0.454545454545,
+ 'GradientBoostingClassifier': 0.272727272727,
+ 'AdaBoostClassifier': 0.272727272727,
+ 'LinearSVC': 0.818182,
+ 'SVC': 0.36363636363636365,
+ 'KNeighborsClassifier': 0.363636363636,
+ 'RandomForestRegressor': 23.226508,
+ 'ExtraTreesRegressor': 19.725397,
+ 'GradientBoostingRegressor': 34.157100,
+ 'AdaBoostRegressor': 30.920635,
+ 'Lasso': 722.827623,
+ 'Ridge': 521.195194222418,
+ 'ElasticNet': 618.532273,
+ 'KNeighborsRegressor': 44.7847619048,
+ 'LinearSVR': 511.816385601,
+ 'SVR': 51.325146}
+
+seeded_predict_results = {
+ 'RandomForestClassifier': 18,
+ 'ExtraTreesClassifier': 21,
+ 'GradientBoostingClassifier': 21,
+ 'AdaBoostClassifier': 21,
+ 'LinearSVC': 21,
+ 'SVC': 12,
+ 'KNeighborsClassifier': 14,
+ 'RandomForestRegressor': 7.4246031746,
+ 'ExtraTreesRegressor': 0.,
+ 'GradientBoostingRegressor': 50.1955883469,
+ 'AdaBoostRegressor': 9.7857142857142865,
+ 'Lasso': 0.173138653701,
+ 'Ridge': 2.694020055323081e-05,
+ 'ElasticNet': 0.0614243397637,
+ 'KNeighborsRegressor': 26.8625396825,
+ 'SVR': 37.86704865859832,
+ 'LinearSVR': 0.0099912565770459132}
diff --git a/q2_sample_classifier/tests/test_types_formats_transformers.py b/q2_sample_classifier/tests/test_types_formats_transformers.py
new file mode 100644
index 0000000..4fc95f8
--- /dev/null
+++ b/q2_sample_classifier/tests/test_types_formats_transformers.py
@@ -0,0 +1,439 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+import os
+import pandas as pd
+import pandas.testing as pdt
+import numpy as np
+import shutil
+import tempfile
+import tarfile
+import joblib
+import sklearn
+from sklearn.pipeline import Pipeline
+
+
+import qiime2
+from q2_types.feature_data import FeatureData
+from qiime2.plugin import ValidationError
+from q2_types.sample_data import SampleData
+
+from q2_sample_classifier import (
+ BooleanSeriesFormat, BooleanSeriesDirectoryFormat, BooleanSeries,
+ PredictionsFormat, PredictionsDirectoryFormat, ClassifierPredictions,
+ RegressorPredictions, ImportanceFormat, ImportanceDirectoryFormat,
+ Importance, PickleFormat, ProbabilitiesFormat,
+ ProbabilitiesDirectoryFormat, Probabilities, Classifier, Regressor,
+ SampleEstimator, SampleEstimatorDirFmt,
+ TrueTargetsDirectoryFormat, TrueTargets)
+from q2_sample_classifier.visuals import (
+ _custom_palettes, _plot_heatmap_from_confusion_matrix,)
+from q2_sample_classifier._format import JSONFormat
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+from q2_sample_classifier.tests.test_estimators import SampleEstimatorTestBase
+
+
+class TestSemanticTypes(SampleClassifierTestPluginBase):
+
+ def test_boolean_series_format_validate_positive(self):
+ filepath = self.get_data_path('outliers.tsv')
+ format = BooleanSeriesFormat(filepath, mode='r')
+ # These should both just succeed
+ format.validate('min')
+ format.validate('max')
+
+ def test_boolean_series_format_validate_negative_col_count(self):
+ filepath = self.get_data_path('coordinates.tsv')
+ format = BooleanSeriesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'BooleanSeriesFormat'):
+ format.validate()
+
+ def test_boolean_series_format_validate_negative_cell_values(self):
+ filepath = self.get_data_path('predictions.tsv')
+ format = BooleanSeriesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'BooleanSeriesFormat'):
+ format.validate()
+
+ def test_boolean_series_format_validate_negative_empty(self):
+ filepath = self.get_data_path('empty_file.txt')
+ format = BooleanSeriesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'one data record'):
+ format.validate()
+
+ def test_boolean_series_dir_fmt_validate_positive(self):
+ filepath = self.get_data_path('outliers.tsv')
+ shutil.copy(filepath, self.temp_dir.name)
+ format = BooleanSeriesDirectoryFormat(self.temp_dir.name, mode='r')
+ format.validate()
+
+ def test_boolean_series_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(BooleanSeries)
+
+ def test_sample_data_boolean_series_to_boolean_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleData[BooleanSeries], BooleanSeriesDirectoryFormat)
+
+ def test_pd_series_to_boolean_format(self):
+ transformer = self.get_transformer(pd.Series, BooleanSeriesFormat)
+ exp_index = pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype=object)
+ exp = pd.Series([True, False, True, False, True, False],
+ name='outlier', index=exp_index)
+ obs = transformer(exp)
+ obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
+ squeeze=True)
+ self.assertEqual(sorted(exp), sorted(obs))
+
+ def test_boolean_format_to_pd_series(self):
+ _, obs = self.transform_format(
+ BooleanSeriesFormat, pd.Series, 'outliers.tsv')
+ exp_index = pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype=object)
+ exp = pd.Series(['True', 'False', 'True', 'False', 'True', 'False'],
+ name='outlier', index=exp_index)
+ self.assertEqual(sorted(exp), sorted(obs))
+
+ def test_boolean_format_to_metadata(self):
+ _, obs = self.transform_format(
+ BooleanSeriesFormat, qiime2.Metadata, 'outliers.tsv')
+
+ exp_index = pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='id')
+ exp = pd.DataFrame([['True'], ['False'], ['True'],
+ ['False'], ['True'], ['False']],
+ columns=['outlier'], index=exp_index, dtype='str')
+ exp = qiime2.Metadata(exp)
+ self.assertEqual(obs, exp)
+
+ # test predictions format
+ def test_Predictions_format_validate_positive_numeric_predictions(self):
+ filepath = self.get_data_path('predictions.tsv')
+ format = PredictionsFormat(filepath, mode='r')
+ format.validate(level='min')
+ format.validate()
+
+ def test_Predictions_format_validate_positive_nonnumeric_predictions(self):
+ filepath = self.get_data_path('categorical_predictions.tsv')
+ format = PredictionsFormat(filepath, mode='r')
+ format.validate(level='min')
+ format.validate()
+
+ def test_Predictions_format_validate_negative(self):
+ filepath = self.get_data_path('coordinates.tsv')
+ format = PredictionsFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'PredictionsFormat'):
+ format.validate()
+
+ def test_Predictions_dir_fmt_validate_positive(self):
+ filepath = self.get_data_path('predictions.tsv')
+ shutil.copy(filepath, self.temp_dir.name)
+ format = PredictionsDirectoryFormat(self.temp_dir.name, mode='r')
+ format.validate()
+
+ def test_RegressorPredictions_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(RegressorPredictions)
+
+ def test_ClassifierPredictions_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(ClassifierPredictions)
+
+ def test_RegressorPredictions_to_Predictions_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleData[RegressorPredictions], PredictionsDirectoryFormat)
+
+ def test_ClassifierPredictions_to_Predictions_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleData[ClassifierPredictions], PredictionsDirectoryFormat)
+
+ def test_pd_series_to_Predictions_format(self):
+ transformer = self.get_transformer(pd.Series, PredictionsFormat)
+ exp = pd.Series([1, 2, 3, 4],
+ name='prediction', index=['a', 'b', 'c', 'd'])
+ obs = transformer(exp)
+ obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
+ squeeze=True)
+ pdt.assert_series_equal(obs, exp)
+
+ def test_pd_series_to_Predictions_format_allow_nans(self):
+ transformer = self.get_transformer(pd.Series, PredictionsFormat)
+ exp = pd.Series([1, np.nan, 3, np.nan],
+ name='prediction', index=['a', 'b', 'c', 'd'])
+ obs = transformer(exp)
+ obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
+ squeeze=True)
+ pdt.assert_series_equal(obs, exp)
+
+ def test_Predictions_format_to_pd_series(self):
+ _, obs = self.transform_format(
+ PredictionsFormat, pd.Series, 'predictions.tsv')
+ exp_index = pd.Index(['10249.C001.10SS', '10249.C002.05SS',
+ '10249.C004.01SS', '10249.C004.11SS'],
+ name='id', dtype=object)
+ exp = pd.Series([4.5, 2.5, 0.5, 4.5], name='prediction',
+ index=exp_index)
+ pdt.assert_series_equal(obs[:4], exp)
+
+ def test_Predictions_format_to_metadata(self):
+ _, obs = self.transform_format(
+ PredictionsFormat, qiime2.Metadata, 'predictions.tsv')
+ exp_index = pd.Index(['10249.C001.10SS', '10249.C002.05SS',
+ '10249.C004.01SS', '10249.C004.11SS'],
+ name='id')
+ exp = pd.DataFrame([4.5, 2.5, 0.5, 4.5], columns=['prediction'],
+ index=exp_index)
+ pdt.assert_frame_equal(obs.to_dataframe()[:4], exp)
+
+ # test Importance format
+ def test_Importance_format_validate_positive(self):
+ filepath = self.get_data_path('importance.tsv')
+ format = ImportanceFormat(filepath, mode='r')
+ format.validate(level='min')
+ format.validate()
+
+ def test_Importance_format_validate_negative_nonnumeric(self):
+ filepath = self.get_data_path('chardonnay.map.txt')
+ format = ImportanceFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'numeric values'):
+ format.validate()
+
+ def test_Importance_format_validate_negative_empty(self):
+ filepath = self.get_data_path('empty_file.txt')
+ format = ImportanceFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'one data record'):
+ format.validate()
+
+ def test_Importance_format_validate_negative(self):
+ filepath = self.get_data_path('garbage.txt')
+ format = ImportanceFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'two or more fields'):
+ format.validate()
+
+ def test_Importance_dir_fmt_validate_positive(self):
+ filepath = self.get_data_path('importance.tsv')
+ shutil.copy(filepath, self.temp_dir.name)
+ format = ImportanceDirectoryFormat(self.temp_dir.name, mode='r')
+ format.validate()
+
+ def test_Importance_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(Importance)
+
+ def test_sample_data_Importance_to_Importance_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ FeatureData[Importance], ImportanceDirectoryFormat)
+
+ def test_pd_dataframe_to_Importance_format(self):
+ transformer = self.get_transformer(pd.DataFrame, ImportanceFormat)
+ exp = pd.DataFrame([1, 2, 3, 4],
+ columns=['importance'], index=['a', 'b', 'c', 'd'])
+ obs = transformer(exp)
+ obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0)
+ pdt.assert_frame_equal(exp, obs)
+
+ def test_Importance_format_to_pd_dataframe(self):
+ _, obs = self.transform_format(
+ ImportanceFormat, pd.DataFrame, 'importance.tsv')
+ exp_index = pd.Index(['74ec9fe6ffab4ecff6d5def74298a825',
+ 'c82032c40c98975f71892e4be561c87a',
+ '79280cea51a6fe8a3432b2f266dd34db',
+ 'f7686a74ca2d3729eb66305e8a26309b'],
+ name='id')
+ exp = pd.DataFrame([0.44469828320835586, 0.07760118417569697,
+ 0.06570251750505914, 0.061718558716901406],
+ columns=['importance'],
+ index=exp_index)
+ pdt.assert_frame_equal(exp, obs[:4])
+
+ def test_Importance_format_to_metadata(self):
+ _, obs = self.transform_format(
+ ImportanceFormat, qiime2.Metadata, 'importance.tsv')
+ exp_index = pd.Index(['74ec9fe6ffab4ecff6d5def74298a825',
+ 'c82032c40c98975f71892e4be561c87a',
+ '79280cea51a6fe8a3432b2f266dd34db',
+ 'f7686a74ca2d3729eb66305e8a26309b'],
+ name='id')
+ exp = pd.DataFrame([0.44469828320835586, 0.07760118417569697,
+ 0.06570251750505914, 0.061718558716901406],
+ columns=['importance'],
+ index=exp_index)
+ pdt.assert_frame_equal(obs.to_dataframe()[:4], exp)
+
+ # test Probabilities format
+ def test_Probabilities_format_validate_positive(self):
+ filepath = self.get_data_path('class_probabilities.tsv')
+ format = ProbabilitiesFormat(filepath, mode='r')
+ format.validate(level='min')
+ format.validate()
+
+ def test_Probabilities_format_validate_negative_nonnumeric(self):
+ filepath = self.get_data_path('chardonnay.map.txt')
+ format = ProbabilitiesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'numeric values'):
+ format.validate()
+
+ def test_Probabilities_format_validate_negative_empty(self):
+ filepath = self.get_data_path('empty_file.txt')
+ format = ProbabilitiesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'one data record'):
+ format.validate()
+
+ def test_Probabilities_format_validate_negative(self):
+ filepath = self.get_data_path('garbage.txt')
+ format = ProbabilitiesFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'two or more fields'):
+ format.validate()
+
+ def test_Probabilities_dir_fmt_validate_positive(self):
+ filepath = self.get_data_path('class_probabilities.tsv')
+ shutil.copy(filepath, self.temp_dir.name)
+ format = ProbabilitiesDirectoryFormat(self.temp_dir.name, mode='r')
+ format.validate()
+
+ def test_Probabilities_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(Probabilities)
+
+ def test_sample_data_Probabilities_to_Probs_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleData[Probabilities], ProbabilitiesDirectoryFormat)
+
+ def test_pd_dataframe_to_Probabilities_format(self):
+ transformer = self.get_transformer(pd.DataFrame, ProbabilitiesFormat)
+ exp = pd.DataFrame([[0.1, 0.77], [0.8, 0.4], [0.7, 0.1], [0.44, 0.73]],
+ columns=['classA', 'classB'],
+ index=['a', 'b', 'c', 'd'])
+ obs = transformer(exp)
+ obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
+ parse_dates=True)
+ pdt.assert_frame_equal(exp, obs)
+
+ def test_Probabilities_format_to_pd_dataframe(self):
+ _, obs = self.transform_format(
+ ProbabilitiesFormat, pd.DataFrame, 'class_probabilities.tsv')
+ exp_index = pd.Index(['s1', 's2', 's3', 's4', 's5', 's6', 's7'],
+ name='id')
+ exp = pd.DataFrame([[0.4446, 0.9828, 0.3208],
+ [0.0776, 0.0118, 0.4175],
+ [0.0657, 0.0251, 0.7505],
+ [0.0617, 0.1855, 0.8716],
+ [0.0281, 0.8616, 0.0291],
+ [0.0261, 0.0253, 0.9075],
+ [0.0252, 0.7385, 0.4068]],
+ columns=['classA', 'classB', 'classC'],
+ index=exp_index)
+ pdt.assert_frame_equal(exp, obs)
+
+ def test_Probabilities_format_to_metadata(self):
+ _, obs = self.transform_format(
+ ProbabilitiesFormat, qiime2.Metadata, 'class_probabilities.tsv')
+ exp_index = pd.Index(['s1', 's2', 's3', 's4', 's5', 's6', 's7'],
+ name='id')
+ exp = pd.DataFrame([[0.4446, 0.9828, 0.3208],
+ [0.0776, 0.0118, 0.4175],
+ [0.0657, 0.0251, 0.7505],
+ [0.0617, 0.1855, 0.8716],
+ [0.0281, 0.8616, 0.0291],
+ [0.0261, 0.0253, 0.9075],
+ [0.0252, 0.7385, 0.4068]],
+ columns=['classA', 'classB', 'classC'],
+ index=exp_index)
+ pdt.assert_frame_equal(obs.to_dataframe(), exp)
+
+ # test utility formats
+ def test_pickle_format_validate_negative(self):
+ filepath = self.get_data_path('coordinates.tsv')
+ format = PickleFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'pickled file'):
+ format.validate()
+
+ def test_json_format_validate_negative(self):
+ filepath = self.get_data_path('coordinates.tsv')
+ format = JSONFormat(filepath, mode='r')
+ with self.assertRaisesRegex(ValidationError, 'Expecting value'):
+ format.validate()
+
+ # this just checks that palette names are valid input
+ def test_custom_palettes(self):
+ confused = np.array([[1, 0], [0, 1]])
+ for palette in _custom_palettes().keys():
+ _plot_heatmap_from_confusion_matrix(confused, palette)
+
+ # test TrueTarget
+ def test_TrueTargets_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(TrueTargets)
+
+ # test TrueTargetDirectoryFormats
+ def test_TrueTargets_dir_fmt_validate_positive(self):
+ filepath = self.get_data_path('true_targets.tsv')
+ shutil.copy(filepath, self.temp_dir.name)
+ format = TrueTargetsDirectoryFormat(self.temp_dir.name, mode='r')
+ format.validate()
+
+ def test_TrueTarget_to_TrueTargets_dir_fmt_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleData[TrueTargets], TrueTargetsDirectoryFormat)
+
+
+class TestTypes(SampleClassifierTestPluginBase):
+ def test_sample_estimator_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(SampleEstimator)
+
+ def test_classifier_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(Classifier)
+
+ def test_regressor_semantic_type_registration(self):
+ self.assertRegisteredSemanticType(Regressor)
+
+ def test_sample_classifier_semantic_type_to_format_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleEstimator[Classifier], SampleEstimatorDirFmt)
+
+ def test_sample_regressor_semantic_type_to_format_registration(self):
+ self.assertSemanticTypeRegisteredToFormat(
+ SampleEstimator[Regressor], SampleEstimatorDirFmt)
+
+
+class TestFormats(SampleEstimatorTestBase):
+ def test_sample_classifier_dir_fmt(self):
+ format = self._custom_setup(sklearn.__version__)
+
+ # Should not error
+ format.validate()
+
+
+class TestTransformers(SampleEstimatorTestBase):
+ def test_old_sklearn_version(self):
+ transformer = self.get_transformer(
+ SampleEstimatorDirFmt, Pipeline)
+ input = self._custom_setup('a very old version')
+ with self.assertRaises(ValueError):
+ transformer(input)
+
+ def test_taxo_class_dir_fmt_to_taxo_class_result(self):
+ input = self._custom_setup(sklearn.__version__)
+
+ transformer = self.get_transformer(
+ SampleEstimatorDirFmt, Pipeline)
+ obs = transformer(input)
+
+ self.assertTrue(obs)
+
+ def test_taxo_class_result_to_taxo_class_dir_fmt(self):
+ def read_pipeline(pipeline_filepath):
+ with tarfile.open(pipeline_filepath) as tar:
+ dirname = tempfile.mkdtemp()
+ tar.extractall(dirname)
+ pipeline = joblib.load(os.path.join(dirname,
+ 'sklearn_pipeline.pkl'))
+ for fn in tar.getnames():
+ os.unlink(os.path.join(dirname, fn))
+ os.rmdir(dirname)
+ return pipeline
+
+ exp = read_pipeline(self.sklearn_pipeline)
+ transformer = self.get_transformer(
+ Pipeline, SampleEstimatorDirFmt)
+ obs = transformer(exp)
+ sklearn_pipeline = obs.sklearn_pipeline.view(PickleFormat)
+ obs_pipeline = read_pipeline(str(sklearn_pipeline))
+ obs = obs_pipeline
+ self.assertTrue(obs)
diff --git a/q2_sample_classifier/tests/test_utilities.py b/q2_sample_classifier/tests/test_utilities.py
new file mode 100644
index 0000000..2aef9f5
--- /dev/null
+++ b/q2_sample_classifier/tests/test_utilities.py
@@ -0,0 +1,155 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+import pandas as pd
+import biom
+import numpy as np
+from sklearn.svm import LinearSVC
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.pipeline import Pipeline
+from sklearn.ensemble import RandomForestClassifier
+import pandas.testing as pdt
+
+import qiime2
+
+from q2_sample_classifier.utilities import (
+ _load_data, _calculate_feature_importances, _extract_important_features,
+ _disable_feature_selection, _mean_feature_importance,
+ _null_feature_importance, _extract_features)
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+
+
+class UtilitiesTests(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+
+ exp_rf = pd.DataFrame(
+ {'importance': [0.1, 0.2, 0.3]}, index=['a', 'b', 'c'])
+ exp_rf.index.name = 'feature'
+ self.exp_rf = exp_rf
+
+ exp_svm = pd.DataFrame(
+ {'importance0': [0.1, 0.2, 0.3], 'importance1': [0.4, 0.5, 0.6]},
+ index=['a', 'b', 'c'])
+ exp_svm.index.name = 'feature'
+ self.exp_svm = exp_svm
+
+ exp_lsvm = pd.DataFrame(
+ {'importance0': [-0.048794, -0.048794, -0.048794]},
+ index=['a', 'b', 'c'])
+ exp_lsvm.index.name = 'feature'
+ self.exp_lsvm = exp_lsvm
+
+ self.features = biom.Table(np.array([[1]*5]*3), ['a', 'b', 'c'],
+ list(map(str, range(5))))
+
+ self.targets = pd.Series(['a', 'a', 'b', 'b', 'a'], name='bullseye')
+
+ def test_extract_important_features_1d_array(self):
+ importances = _extract_important_features(
+ self.features.ids('observation'),
+ np.ndarray((3,), buffer=np.array([0.1, 0.2, 0.3])))
+ self.assertEqual(sorted(self.exp_rf), sorted(importances))
+
+ def test_extract_important_features_2d_array(self):
+ importances = _extract_important_features(
+ self.features.ids('observation'),
+ np.ndarray(
+ (2, 3), buffer=np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])))
+ self.assertEqual(sorted(self.exp_svm), sorted(importances))
+
+ # test feature importance calculation with main classifier types
+ def test_calculate_feature_importances_ensemble(self):
+ estimator = Pipeline(
+ [('dv', DictVectorizer()),
+ ('est', RandomForestClassifier(n_estimators=10))])
+ estimator.fit(_extract_features(self.features),
+ self.targets.values.ravel())
+ fi = _calculate_feature_importances(estimator)
+ self.assertEqual(sorted(self.exp_rf), sorted(fi))
+
+ def test_calculate_feature_importances_svm(self):
+ estimator = Pipeline(
+ [('dv', DictVectorizer()), ('est', LinearSVC())])
+ estimator.fit(_extract_features(self.features),
+ self.targets.values.ravel())
+ fi = _calculate_feature_importances(estimator)
+ self.assertEqual(sorted(self.exp_lsvm), sorted(fi))
+
+ # confirm that feature selection incompatibility warnings work
+ def test_disable_feature_selection_unsupported(self):
+ with self.assertWarnsRegex(UserWarning, "does not support recursive"):
+ _disable_feature_selection('KNeighborsClassifier', False)
+
+ def test_mean_feature_importance_1d_arrays(self):
+ exp = pd.DataFrame([10., 9., 8., 7.], columns=["importance0"],
+ index=[3, 2, 1, 0])
+ imps = [pd.DataFrame([1, 2, 3, 4], columns=["importance0"]),
+ pd.DataFrame([5, 6, 7, 8], columns=["importance0"]),
+ pd.DataFrame([9, 10, 11, 12], columns=["importance0"]),
+ pd.DataFrame([13, 14, 15, 16], columns=["importance0"])]
+ pdt.assert_frame_equal(_mean_feature_importance(imps), exp)
+
+ def test_mean_feature_importance_different_column_names(self):
+ exp = pd.DataFrame([[6., 5., 4., 3.], [14., 13., 12., 11.]],
+ index=["importance0", "importance1"],
+ columns=[3, 2, 1, 0]).T
+ imps = [pd.DataFrame([1, 2, 3, 4], columns=["importance0"]),
+ pd.DataFrame([5, 6, 7, 8], columns=["importance0"]),
+ pd.DataFrame([9, 10, 11, 12], columns=["importance1"]),
+ pd.DataFrame([13, 14, 15, 16], columns=["importance1"])]
+ pdt.assert_frame_equal(_mean_feature_importance(imps), exp)
+
+ def test_mean_feature_importance_2d_arrays(self):
+ exp = pd.DataFrame([[3.5] * 4, [9.5] * 4],
+ index=["importance0", "importance1"],
+ columns=[0, 1, 2, 3]).T
+ imps = [pd.DataFrame([[6, 5, 4, 3], [14, 13, 12, 11]],
+ index=["importance0", "importance1"],
+ columns=[0, 1, 2, 3]).T,
+ pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
+ index=["importance0", "importance1"],
+ columns=[0, 1, 2, 3]).T]
+ pdt.assert_frame_equal(_mean_feature_importance(imps), exp)
+
+ # and this should not occur now, but theoretically should just concat and
+ # sort but not collapse if all column names are unique
+ def test_mean_feature_importance_do_not_collapse(self):
+ imps = [pd.DataFrame([4.0, 3.0, 2.0, 1.0], columns=["importance0"]),
+ pd.DataFrame([16.0, 15.0, 14.0, 13.0],
+ columns=["importance1"])]
+ exp = pd.concat(imps, axis=1)
+ pdt.assert_frame_equal(_mean_feature_importance(imps), exp)
+
+ def test_null_feature_importance(self):
+ exp = pd.DataFrame(
+ [1, 1, 1], index=['o1', 'o2', 'o3'], columns=['importance'])
+ exp.index.name = 'feature'
+ tab = biom.Table(np.array([[1., 2., 3.], [3., 2., 1.], [7., 6., 9.]]),
+ ['o1', 'o2', 'o3'], ['s1', 's2', 's3'])
+ tab = _extract_features(tab)
+ pdt.assert_frame_equal(_null_feature_importance(tab), exp)
+
+ def test_load_data(self):
+ # phony feature table
+ id_map = {'0': 'peanut', '1': 'bugs', '2': 'qiime2', '3': 'matt',
+ '4': 'pandas'}
+ a = self.features.update_ids(id_map, axis='sample')
+ # phony metadata, convert to qiime2.Metadata
+ b = self.targets
+ b.index = ['pandas', 'peanut', 'qiime1', 'flapjacks', 'bugs']
+ b.index.name = '#SampleID'
+ b = qiime2.Metadata(b.to_frame())
+ # test that merge of tables is inner merge
+ intersection = set(('peanut', 'bugs', 'pandas'))
+ feature_data, targets = _load_data(a, b, missing_samples='ignore')
+ exp = [{'c': 1.0, 'a': 1.0, 'b': 1.0}, {'c': 1.0, 'a': 1.0, 'b': 1.0},
+ {'c': 1.0, 'a': 1.0, 'b': 1.0}]
+ np.testing.assert_array_equal(feature_data, exp)
+ self.assertEqual(set(targets.index), intersection)
diff --git a/q2_sample_classifier/tests/test_visualization.py b/q2_sample_classifier/tests/test_visualization.py
new file mode 100644
index 0000000..eb09e89
--- /dev/null
+++ b/q2_sample_classifier/tests/test_visualization.py
@@ -0,0 +1,237 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+import pandas as pd
+import pandas.testing as pdt
+from os import mkdir, listdir
+from os.path import join
+import biom
+
+import qiime2
+from qiime2.plugins import sample_classifier
+
+from q2_sample_classifier.visuals import (
+ _linear_regress, _calculate_baseline_accuracy,
+ _add_sample_size_to_xtick_labels)
+from q2_sample_classifier.classify import (
+ scatterplot, confusion_matrix)
+from q2_sample_classifier.utilities import (
+ _match_series_or_die, _predict_and_plot)
+from q2_sample_classifier.tests.test_base_class import \
+ SampleClassifierTestPluginBase
+
+
+class TestVisuals(SampleClassifierTestPluginBase):
+
+ md = pd.DataFrame([(1, 'a', 0.11), (1, 'a', 0.12), (1, 'a', 0.13),
+ (2, 'a', 0.19), (2, 'a', 0.18), (2, 'a', 0.21),
+ (1, 'b', 0.14), (1, 'b', 0.13), (1, 'b', 0.14),
+ (2, 'b', 0.26), (2, 'b', 0.27), (2, 'b', 0.29)],
+ columns=['Time', 'Group', 'Value'])
+
+ def test_linear_regress(self):
+ res = _linear_regress(self.md['Value'], self. md['Time'])
+ self.assertAlmostEqual(res.iloc[0]['Mean squared error'], 1.9413916666)
+ self.assertAlmostEqual(res.iloc[0]['r-value'], 0.86414956372460128)
+ self.assertAlmostEqual(res.iloc[0]['r-squared'], 0.74675446848541871)
+ self.assertAlmostEqual(res.iloc[0]['P-value'], 0.00028880275858705694)
+
+ def test_calculate_baseline_accuracy(self):
+ accuracy = 0.9
+ y_test = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], name="class")
+ classifier_accuracy = _calculate_baseline_accuracy(y_test, accuracy)
+ expected_results = (6, 3, 0.5, 1.8)
+ for i in zip(classifier_accuracy, expected_results):
+ self.assertEqual(i[0], i[1])
+
+
+class TestHeatmap(SampleClassifierTestPluginBase):
+
+ def setUp(self):
+ super().setUp()
+ md_vaw = self.get_data_path('vaw.txt')
+ md_vaw = qiime2.Metadata.load(md_vaw)
+ self.md_vaw = md_vaw.get_column('Column')
+ table_vaw = self.get_data_path('vaw.qza')
+ self.table_vaw = qiime2.Artifact.load(table_vaw)
+ imp = pd.read_csv(
+ self.get_data_path('vaw_importance.tsv'), sep='\t',
+ header=0, index_col=0)
+ self.imp = qiime2.Artifact.import_data('FeatureData[Importance]', imp)
+
+ def test_heatmap_default_feature_count_zero(self):
+ heatmap, table, = sample_classifier.actions.heatmap(
+ self.table_vaw, self.imp, self.md_vaw, group_samples=True,
+ feature_count=0)
+ self.assertEqual(table.view(biom.Table).shape, (5, 2))
+
+ def test_heatmap_importance_threshold(self):
+ heatmap, table, = sample_classifier.actions.heatmap(
+ self.table_vaw, self.imp, self.md_vaw,
+ importance_threshold=0.062, group_samples=False, feature_count=0)
+ self.assertEqual(table.view(biom.Table).shape, (3, 6))
+
+ def test_heatmap_feature_count(self):
+ heatmap, table, = sample_classifier.actions.heatmap(
+ self.table_vaw, self.imp, self.md_vaw, group_samples=True,
+ feature_count=2)
+ self.assertEqual(table.view(biom.Table).shape, (2, 2))
+
+ def test_heatmap_must_group_or_die(self):
+ with self.assertRaisesRegex(ValueError, "metadata are not optional"):
+ heatmap, table, = sample_classifier.actions.heatmap(
+ self.table_vaw, self.imp, sample_metadata=None,
+ group_samples=True)
+
+
+# This class really just checks that these visualizers run without error. Yay.
+# Also test some internal nuts/bolts but there's not much else we can do.
+class TestPlottingVisualizers(SampleClassifierTestPluginBase):
+ def setUp(self):
+ super().setUp()
+ self.tmpd = join(self.temp_dir.name, 'viz')
+ mkdir(self.tmpd)
+
+ self.a = pd.Series(['a', 'a', 'b', 'b', 'c', 'c'], name='site',
+ index=['a1', 'a2', 'b1', 'b2', 'c1', 'c2'])
+ self.a.index.name = 'SampleID'
+ self.bogus = pd.Series(['a', 'a', 'b', 'b', 'c', 'c'], name='site',
+ index=['a1', 'e3', 'f5', 'b2', 'z1', 'c2'])
+ self.bogus.index.name = 'SampleID'
+ self.c = pd.Series(
+ [0, 1, 2, 3], index=['a', 'b', 'c', 'd'], name='peanuts')
+ self.c.index.name = 'SampleID'
+
+ def test_confusion_matrix(self):
+ b = qiime2.CategoricalMetadataColumn(self.a)
+ confusion_matrix(self.tmpd, self.a, b)
+
+ def test_confusion_matrix_class_overlap_error(self):
+ b = pd.Series([1, 2, 3, 4, 5, 6], name='site',
+ index=['a1', 'a2', 'b1', 'b2', 'c1', 'c2'])
+ b.index.name = 'id'
+ b = qiime2.NumericMetadataColumn(b)
+ with self.assertRaisesRegex(ValueError, "do not overlap"):
+ confusion_matrix(self.tmpd, self.a, b)
+
+ def test_confusion_matrix_vmin_too_high(self):
+ b = qiime2.CategoricalMetadataColumn(self.a)
+ with self.assertRaisesRegex(ValueError, r'vmin must be less than.*\s\s'
+ r'0\.5.*greater.*0\.0'):
+ confusion_matrix(self.tmpd, self.a, b, vmin=.5, vmax=None)
+
+ def test_confusion_matrix_vmax_too_low(self):
+ b = qiime2.CategoricalMetadataColumn(self.a)
+ with self.assertRaisesRegex(ValueError, r'vmax must be greater than.*'
+ r'\s\s0\.5.*less.*1\.0'):
+ confusion_matrix(self.tmpd, self.a, b, vmin=None, vmax=.5)
+
+ def test_confusion_matrix_vmin_too_high_and_vmax_too_low(self):
+ b = qiime2.CategoricalMetadataColumn(self.a)
+ with self.assertRaisesRegex(ValueError, r'vmin must be less than.*\s'
+ r'\s0\.5.*greater.*0\.0\s.*vmax must be '
+ r'greater than.*\s\s0\.5.*less.*1\.0'):
+ confusion_matrix(self.tmpd, self.a, b, vmin=.5, vmax=.5)
+
+ def test_confusion_matrix_dtype_coercion(self):
+ predictions = pd.Series([1, 1, 1, 2, 2, 2],
+ index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'],
+ name='sample_id'), name='features')
+
+ # NOTE: the targets are numbers but represented as str
+ truth = qiime2.CategoricalMetadataColumn(pd.Series(
+ ['1', '2', '1', '2', '1', '2'],
+ index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sample-id'),
+ name='target'))
+
+ confusion_matrix(self.tmpd, predictions, truth)
+
+ self.assertTrue('index.html' in listdir(self.tmpd))
+
+ # test confusion matrix plotting independently to see how it handles
+ # partially overlapping classes when true labels are superset
+ def test_predict_and_plot_true_labels_are_superset(self):
+ b = pd.Series(['a', 'a', 'b', 'b', 'b', 'b'], name='site',
+ index=['a1', 'a2', 'b1', 'b2', 'c1', 'c2'])
+ exp = pd.DataFrame(
+ [[1., 0., 0., ''],
+ [0., 1., 0., ''],
+ [0., 1., 0., ''],
+ ['', '', '', 0.666666666],
+ ['', '', '', 0.3333333333],
+ ['', '', '', 2.]],
+ columns=['a', 'b', 'c', 'Overall Accuracy'],
+ index=['a', 'b', 'c', 'Overall Accuracy', 'Baseline Accuracy',
+ 'Accuracy Ratio'])
+ predictions, confusion = _predict_and_plot(self.tmpd, self.a, b)
+ pdt.assert_frame_equal(exp, predictions)
+
+ # test confusion matrix plotting independently to see how it handles
+ # partially overlapping classes when true labels are superset
+ def test_predict_and_plot_true_labels_are_subset(self):
+ b = pd.Series(['a', 'a', 'b', 'b', 'c', 'd'], name='site',
+ index=['a1', 'a2', 'b1', 'b2', 'c1', 'c2'])
+ exp = pd.DataFrame(
+ [[1., 0., 0., 0., ''],
+ [0., 1., 0., 0., ''],
+ [0., 0., 0.5, 0.5, ''],
+ [0., 0., 0., 0., ''],
+ ['', '', '', '', 0.8333333333],
+ ['', '', '', '', 0.3333333333],
+ ['', '', '', '', 2.5]],
+ columns=['a', 'b', 'c', 'd', 'Overall Accuracy'],
+ index=['a', 'b', 'c', 'd', 'Overall Accuracy', 'Baseline Accuracy',
+ 'Accuracy Ratio'])
+ predictions, confusion = _predict_and_plot(self.tmpd, self.a, b)
+ pdt.assert_frame_equal(exp, predictions)
+
+ # test confusion matrix plotting independently to see how it handles
+ # partially overlapping classes when true labels are mutually exclusive
+ def test_predict_and_plot_true_labels_are_mutually_exclusive(self):
+ b = pd.Series(['a', 'a', 'e', 'e', 'd', 'd'], name='site',
+ index=['a1', 'a2', 'b1', 'b2', 'c1', 'c2'])
+ exp = pd.DataFrame(
+ [[1., 0., 0., 0., 0., ''],
+ [0., 0., 0., 0., 1., ''],
+ [0., 0., 0., 1., 0., ''],
+ [0., 0., 0., 0., 0., ''],
+ [0., 0., 0., 0., 0., ''],
+ ['', '', '', '', '', 0.3333333333],
+ ['', '', '', '', '', 0.3333333333],
+ ['', '', '', '', '', 1.]],
+ columns=['a', 'b', 'c', 'd', 'e', 'Overall Accuracy'],
+ index=['a', 'b', 'c', 'd', 'e', 'Overall Accuracy',
+ 'Baseline Accuracy', 'Accuracy Ratio'])
+ predictions, confusion = _predict_and_plot(self.tmpd, self.a, b)
+ pdt.assert_frame_equal(exp, predictions)
+
+ def test_scatterplot(self):
+ b = qiime2.NumericMetadataColumn(self.c)
+ scatterplot(self.tmpd, self.c, b)
+
+ def test_add_sample_size_to_xtick_labels(self):
+ labels = _add_sample_size_to_xtick_labels(self.a, ['a', 'b', 'c'])
+ exp = ['a (n=2)', 'b (n=2)', 'c (n=2)']
+ self.assertListEqual(labels, exp)
+
+ # now test performance when extra classes are present
+ def test_add_sample_size_to_xtick_labels_extra_classes(self):
+ labels = _add_sample_size_to_xtick_labels(
+ self.a, [0, 'a', 'b', 'bb', 'c'])
+ exp = ['0 (n=0)', 'a (n=2)', 'b (n=2)', 'bb (n=0)', 'c (n=2)']
+ self.assertListEqual(labels, exp)
+
+ def test_match_series_or_die(self):
+ exp = pd.Series(['a', 'b', 'c'], name='site', index=['a1', 'b2', 'c2'])
+ exp.index.name = 'SampleID'
+ a, b = _match_series_or_die(self.a, self.bogus, 'ignore')
+ pdt.assert_series_equal(exp, a)
+ pdt.assert_series_equal(exp, b)
+
+ def test_match_series_or_die_missing_samples(self):
+ with self.assertRaisesRegex(ValueError, "Missing samples"):
+ a, b = _match_series_or_die(self.a, self.bogus, 'error')
diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
new file mode 100644
index 0000000..6f57456
--- /dev/null
+++ b/q2_sample_classifier/utilities.py
@@ -0,0 +1,861 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import warnings
+from os.path import join
+
+from sklearn.model_selection import (
+ train_test_split, RandomizedSearchCV, KFold, StratifiedKFold)
+from sklearn.metrics import accuracy_score
+from sklearn.feature_selection import RFECV
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.ensemble import (RandomForestRegressor, RandomForestClassifier,
+ ExtraTreesClassifier, ExtraTreesRegressor,
+ AdaBoostClassifier, GradientBoostingClassifier,
+ AdaBoostRegressor, GradientBoostingRegressor)
+from sklearn.svm import SVR, SVC
+from sklearn.linear_model import Ridge, Lasso, ElasticNet
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.pipeline import Pipeline
+
+import q2templates
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import pkg_resources
+from scipy.sparse import issparse
+from scipy.stats import randint
+import biom
+
+from .visuals import (_linear_regress, _plot_confusion_matrix, _plot_RFE,
+ _regplot_from_dataframe, _generate_roc_plots)
+
+_classifiers = ['RandomForestClassifier', 'ExtraTreesClassifier',
+ 'GradientBoostingClassifier', 'AdaBoostClassifier',
+ 'KNeighborsClassifier', 'LinearSVC', 'SVC']
+
+parameters = {
+ 'ensemble': {"max_depth": [4, 8, 16, None],
+ "max_features": [None, 'sqrt', 'log2', 0.1],
+ "min_samples_split": [0.001, 0.01, 0.1],
+ "min_weight_fraction_leaf": [0.0001, 0.001, 0.01]},
+ 'bootstrap': {"bootstrap": [True, False]},
+ 'criterion': {"criterion": ["gini", "entropy"]},
+ 'svm': {"C": [1, 0.5, 0.1, 0.9, 0.8],
+ "tol": [0.00001, 0.0001, 0.001, 0.01],
+ "shrinking": [True, False]},
+ 'kneighbors': {"n_neighbors": randint(2, 15),
+ "weights": ['uniform', 'distance'],
+ "leaf_size": randint(15, 100)},
+ 'linear': {"alpha": [0.0001, 0.01, 1.0, 10.0, 1000.0],
+ "tol": [0.00001, 0.0001, 0.001, 0.01]}
+}
+
+
+TEMPLATES = pkg_resources.resource_filename('q2_sample_classifier', 'assets')
+
+
+def _extract_features(feature_data):
+ ids = feature_data.ids('observation')
+ features = np.empty(feature_data.shape[1], dtype=dict)
+ for i, row in enumerate(feature_data.matrix_data.T):
+ features[i] = {ids[ix]: d for ix, d in zip(row.indices, row.data)}
+ return features
+
+
+def _load_data(feature_data, targets_metadata, missing_samples, extract=True):
+ '''Load data and generate training and test sets.
+
+ feature_data: pd.DataFrame
+ feature X sample values.
+ targets_metadata: qiime2.Metadata
+ target (columns) X sample (rows) values.
+ '''
+ # Load metadata, attempt to convert to numeric
+ targets = targets_metadata.to_dataframe()
+
+ if missing_samples == 'error':
+ _validate_metadata_is_superset(targets, feature_data)
+
+ # filter features and targest so samples match
+ index = set(targets.index)
+ index = [ix for ix in feature_data.ids() if ix in index]
+ targets = targets.loc[index]
+ feature_data = feature_data.filter(index, inplace=False)
+ if extract:
+ feature_data = _extract_features(feature_data)
+
+ return feature_data, targets
+
+
+def _validate_metadata_is_superset(metadata, table):
+ metadata_ids = set(metadata.index.tolist())
+ table_ids = set(table.ids())
+ missing_ids = table_ids.difference(metadata_ids)
+ if len(missing_ids) > 0:
+ raise ValueError('Missing samples in metadata: %r' % missing_ids)
+
+
+def _extract_important_features(index, top):
+ '''Find top features, match names to indices, sort.
+ index: ndarray
+ Feature names
+ top: array
+ Feature importance scores, coef_ scores, or ranking of scores.
+ '''
+ # is top a 1-d or multi-d array?
+ # coef_ is a multidimensional array of shape = [n_class-1, n_features]
+ if any(isinstance(i, list) for i in top) or top.ndim > 1:
+ if issparse(top):
+ top = top.todense()
+ imp = pd.DataFrame(
+ top, index=["importance{0}".format(n) for n in range(len(top))]).T
+ # ensemble estimators and RFECV return 1-d arrays
+ else:
+ imp = pd.DataFrame(top, columns=["importance"])
+ imp.index = index
+ imp.index.name = 'feature'
+ imp = sort_importances(imp, ascending=False)
+ return imp
+
+
+def _split_training_data(feature_data, targets, column, test_size=0.2,
+ stratify=None, random_state=None, drop_na=True):
+ '''Split data sets into training and test sets.
+
+ feature_data: biom.Table
+ feature X sample values.
+ targets: pandas.DataFrame
+ target (columns) X sample (rows) values.
+ column: str
+ Target column contained in targets.
+ test_size: float
+ Fraction of data to be reserved as test data.
+ stratify: array-like
+ Stratify data using this as class labels. E.g., set to df
+ column by setting stratify=df[column]
+ random_state: int or None
+ Int to use for seeding random state. Random if None.
+ '''
+ # Define target / predictor data
+ targets = targets[column]
+
+ if drop_na:
+ targets = targets.dropna()
+
+ if test_size > 0.0:
+ try:
+ y_train, y_test = train_test_split(
+ targets, test_size=test_size, stratify=stratify,
+ random_state=random_state)
+ except ValueError:
+ _stratification_error()
+ else:
+ warning_msg = _warn_zero_test_split()
+ warnings.warn(warning_msg, UserWarning)
+
+ X_train, X_test, y_train, y_test = (
+ feature_data, feature_data, targets, targets)
+
+ tri = y_train.index
+ # filter and sort biom tables to match split/filtered metadata ids
+ # skip filtering if no splitting/dropna was performed
+ # if test_size > 0.0 is implicit, so don't need to worry about initializing
+ # X_train and X_test in an else statement.
+ if list(tri) != list(feature_data.ids()):
+ tei = y_test.index
+ X_train = feature_data.filter(tri, inplace=False).sort_order(tri)
+ X_test = feature_data.filter(tei, inplace=False).sort_order(tei)
+
+ return X_train, X_test, y_train, y_test
+
+
+def _stratification_error():
+ raise ValueError((
+ 'You have chosen to predict a metadata column that contains '
+ 'one or more values that match only one sample. For proper '
+ 'stratification of data into training and test sets, each '
+ 'class (value) must contain at least two samples. This is a '
+ 'requirement for classification problems, but stratification '
+ 'can be disabled for regression by setting stratify=False. '
+ 'Alternatively, remove all samples that bear a unique class '
+ 'label for your chosen metadata column. Note that disabling '
+ 'stratification can negatively impact predictive accuracy for '
+ 'small data sets.'))
+
+
+def _rfecv_feature_selection(feature_data, targets, estimator,
+ cv=5, step=1, scoring=None, n_jobs=1):
+ '''Optimize feature depth by testing model accuracy at
+ multiple feature depths with cross-validated recursive
+ feature elimination.
+ __________
+ Parameters
+ __________
+ feature_data: list of dicts
+ Training set feature data x samples.
+ targets: pandas.DataFrame
+ Training set target value data x samples.
+ cv: int
+ Number of k-fold cross-validations to perform.
+ step: float or int
+ If float, reduce this fraction of features at each step.
+ If int, reduce this number of features at each step.
+ estimator: sklearn classifier
+ estimator to use, with parameters set. If none, default
+ to random forests.
+ n_jobs: int
+ Number of parallel jobs to run.
+
+ For other params, see sklearn.ensemble.RandomForestRegressor.
+
+ __________
+ Returns
+ __________
+ rfecv: sklearn estimator
+ Can be used to predict target values for test data.
+ importance: pandas.DataFrame
+ List of top features.
+ '''
+
+ rfecv = Pipeline(
+ [('dv', estimator.named_steps.dv),
+ ('est', RFECV(estimator=estimator.named_steps.est, step=step, cv=cv,
+ scoring=scoring, n_jobs=n_jobs))])
+
+ rfecv.fit(feature_data, targets.values.ravel())
+
+ # Describe top features
+ n_opt = rfecv.named_steps.est.n_features_
+ importance = _extract_important_features(
+ rfecv.named_steps.dv.get_feature_names(),
+ rfecv.named_steps.est.ranking_)
+ importance = sort_importances(importance, ascending=True)[:n_opt]
+
+ rfe_scores = _extract_rfe_scores(rfecv.named_steps.est)
+
+ return importance, rfe_scores
+
+
+def _extract_rfe_scores(rfecv):
+ n_features = len(rfecv.ranking_)
+ # If using fractional step, step = integer of fraction * n_features
+ if rfecv.step < 1:
+ rfecv.step = int(rfecv.step * n_features)
+ # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+ x = [n_features - (n * rfecv.step)
+ for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+ if x[0] < 1:
+ x[0] = 1
+ return pd.Series(rfecv.grid_scores_, index=x, name='Accuracy')
+
+
+def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
+ n_estimators, estimator, stratify,
+ parameter_tuning, classification, scoring,
+ missing_samples='error'):
+ # extract column name from NumericMetadataColumn
+ column = metadata.name
+
+ # load feature data, metadata targets
+ X_train, y_train = _load_data(
+ table, metadata, missing_samples=missing_samples)
+
+ # disable feature selection for unsupported estimators
+ optimize_feature_selection, calc_feature_importance = \
+ _disable_feature_selection(estimator, False)
+
+ # specify parameters and distributions to sample from for parameter tuning
+ estimator, param_dist, parameter_tuning = _set_parameters_and_estimator(
+ estimator, table, y_train[column], column, n_estimators, n_jobs, cv,
+ random_state, parameter_tuning, classification)
+
+ # predict values for all samples via (nested) CV
+ scores, predictions, importances, tops, probabilities = \
+ _fit_and_predict_cv(
+ X_train, y_train[column], estimator, param_dist, n_jobs, scoring,
+ random_state, cv, stratify, calc_feature_importance,
+ parameter_tuning)
+
+ # Print accuracy score to stdout
+ print("Estimator Accuracy: {0} ± {1}".format(
+ np.mean(scores), np.std(scores)))
+
+ # TODO: save down estimator with tops parameters (currently the estimator
+ # would be untrained, and tops parameters are not reported)
+
+ return predictions['prediction'], importances, probabilities
+
+
+def _fit_estimator(features, targets, estimator, n_estimators=100, step=0.05,
+ cv=5, random_state=None, n_jobs=1,
+ optimize_feature_selection=False, parameter_tuning=False,
+ missing_samples='error', classification=True):
+ # extract column name from CategoricalMetadataColumn
+ column = targets.to_series().name
+
+ # load data
+ X_train, y_train = _load_data(
+ features, targets, missing_samples=missing_samples)
+
+ # disable feature selection for unsupported estimators
+ optimize_feature_selection, calc_feature_importance = \
+ _disable_feature_selection(estimator, optimize_feature_selection)
+
+ # specify parameters and distributions to sample from for parameter tuning
+ estimator, param_dist, parameter_tuning = _set_parameters_and_estimator(
+ estimator, features, targets, column, n_estimators, n_jobs, cv,
+ random_state, parameter_tuning, classification=classification)
+
+ # optimize training feature count
+ if optimize_feature_selection:
+ X_train, importances, rfe_scores = _optimize_feature_selection(
+ X_train=X_train, y_train=y_train,
+ estimator=estimator, cv=cv, step=step, n_jobs=n_jobs)
+ else:
+ importances = None
+
+ # optimize tuning parameters on your training set
+ if parameter_tuning:
+ # tune parameters
+ estimator = _tune_parameters(
+ X_train, y_train, estimator, param_dist, n_iter_search=20,
+ n_jobs=n_jobs, cv=cv, random_state=random_state).best_estimator_
+
+ # fit estimator
+ estimator.fit(X_train, y_train.values.ravel())
+
+ importances = _attempt_to_calculate_feature_importances(
+ estimator, calc_feature_importance,
+ optimize_feature_selection, importances)
+
+ if optimize_feature_selection:
+ estimator.rfe_scores = rfe_scores
+
+ # TODO: drop this when we get around to supporting optional outputs
+ # methods cannot output an empty importances artifact; only KNN has no
+ # feature importance, but just warn and output all features as
+ # importance = 1
+ if importances is None:
+ _warn_feature_selection()
+ importances = pd.DataFrame(index=features.ids('observation'))
+ importances["importance"] = np.nan
+ importances.index.name = 'feature'
+
+ return estimator, importances
+
+
+def _attempt_to_calculate_feature_importances(
+ estimator, calc_feature_importance,
+ optimize_feature_selection, importances=None):
+ # calculate feature importances, if appropriate for the estimator
+ if calc_feature_importance:
+ importances = _calculate_feature_importances(estimator)
+ # otherwise, if optimizing feature selection, just return ranking from RFE
+ elif optimize_feature_selection:
+ pass
+ # otherwise, we have no weights nor selection, so features==n_features
+ else:
+ importances = None
+ return importances
+
+
+def _prepare_training_data(features, targets, column, test_size,
+ random_state, load_data=True, stratify=True,
+ missing_samples='error'):
+ # load data
+ if load_data:
+ features, targets = _load_data(
+ features, targets, missing_samples=missing_samples, extract=False)
+
+ # split into training and test sets
+ if stratify:
+ strata = targets[column]
+ else:
+ strata = None
+
+ X_train, X_test, y_train, y_test = _split_training_data(
+ features, targets, column, test_size, strata, random_state)
+
+ return X_train, X_test, y_train, y_test
+
+
+def _optimize_feature_selection(X_train, y_train, estimator, cv, step, n_jobs):
+ importance, rfe_scores = _rfecv_feature_selection(
+ X_train, y_train, estimator=estimator, cv=cv, step=step, n_jobs=n_jobs)
+
+ index = set(importance.index)
+ X_train = [{k: r[k] for k in r.keys() & index} for r in X_train]
+ return X_train, importance, rfe_scores
+
+
+def _calculate_feature_importances(estimator):
+ # only set calc_feature_importance=True if estimator has attributes
+ # feature_importances_ or coef_ to report feature importance/weights
+ try:
+ importances = _extract_important_features(
+ estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.est.feature_importances_)
+ # is there a better way to determine whether estimator has coef_ ?
+ except AttributeError:
+ importances = _extract_important_features(
+ estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.est.coef_)
+ return importances
+
+
+def _predict_and_plot(output_dir, y_test, y_pred, vmin=None, vmax=None,
+ classification=True, palette='sirocco'):
+ if classification:
+ x_classes = set(y_test.unique())
+ y_classes = set(y_pred.unique())
+ # validate: if classes are exclusive, accuracy is zero; user probably
+ # input the wrong data!
+ if len(x_classes.intersection(y_classes)) < 1:
+ raise _class_overlap_error()
+ else:
+ classes = sorted(list(x_classes.union(y_classes)))
+ predictions, predict_plot = _plot_confusion_matrix(
+ y_test, y_pred, classes, normalize=True, palette=palette,
+ vmin=vmin, vmax=vmax)
+ else:
+ predictions = _linear_regress(y_test, y_pred)
+ predict_plot = _regplot_from_dataframe(y_test, y_pred)
+
+ if output_dir is not None:
+ predict_plot.get_figure().savefig(
+ join(output_dir, 'predictions.png'), bbox_inches='tight')
+ predict_plot.get_figure().savefig(
+ join(output_dir, 'predictions.pdf'), bbox_inches='tight')
+
+ plt.close('all')
+ return predictions, predict_plot
+
+
+def _class_overlap_error():
+ raise ValueError(
+ 'Predicted and true metadata values do not overlap. Check your '
+ 'inputs to ensure that you are using the correct data. Is the '
+ 'correct metadata column being compared to these predictions? Was '
+ 'your model trained on the correct type of data? Prediction '
+ 'sample classes (metadata values) should match or be a subset of '
+ 'training sample classes. If you are attempting to calculate '
+ 'accuracy scores on predictions from a sample regressor, use '
+ 'scatterplot instead.')
+
+
+def _match_series_or_die(predictions, truth, missing_samples='error'):
+ # validate input metadata and predictions, output intersection.
+ # truth must be a superset of predictions
+ truth_ids = set(truth.index)
+ predictions_ids = set(predictions.index)
+ missing_ids = predictions_ids - truth_ids
+ if missing_samples == 'error' and len(missing_ids) > 0:
+ raise ValueError('Missing samples in metadata: %r' % missing_ids)
+
+ # match metadata / prediction IDs
+ predictions, truth = predictions.align(truth, axis=0, join='inner')
+
+ return predictions, truth
+
+
+def _plot_accuracy(output_dir, predictions, truth, probabilities,
+ missing_samples, classification, palette, plot_title,
+ vmin=None, vmax=None):
+ '''Plot accuracy results and send to visualizer on either categorical
+ or numeric data inside two pd.Series
+ '''
+ truth = truth.to_series()
+
+ # check if test_size == 0.0 and all predictions are complete dataset
+ if (missing_samples == 'ignore') & (
+ predictions.shape[0] == truth.shape[0]):
+ warning_msg = _warn_zero_test_split()
+ else:
+ warning_msg = None
+
+ predictions, truth = _match_series_or_die(
+ predictions, truth, missing_samples)
+
+ # calculate prediction accuracy and plot results
+ predictions, predict_plot = _predict_and_plot(
+ output_dir, truth, predictions, vmin=vmin, vmax=vmax,
+ classification=classification, palette=palette)
+
+ # optionally generate ROC curves for classification results
+ if probabilities is not None:
+ probabilities, truth = _match_series_or_die(
+ probabilities, truth, missing_samples)
+ roc = _generate_roc_plots(truth, probabilities, palette)
+ roc.savefig(join(output_dir, 'roc_plot.png'), bbox_inches='tight')
+ roc.savefig(join(output_dir, 'roc_plot.pdf'), bbox_inches='tight')
+
+ # output to viz
+ _visualize(output_dir=output_dir, estimator=None, cm=predictions,
+ roc=probabilities, optimize_feature_selection=False,
+ title=plot_title, warning_msg=warning_msg)
+
+
+def sort_importances(importances, ascending=False):
+ return importances.sort_values(
+ by=importances.columns[0], ascending=ascending)
+
+
+def _extract_estimator_parameters(estimator):
+ # summarize model accuracy and params
+ # (drop pipeline params and individual base estimators)
+ estimator_params = {k: v for k, v in estimator.get_params().items() if
+ k.startswith('est__') and k != 'est__base_estimator'}
+ return pd.Series(estimator_params, name='Parameter setting')
+
+
+def _summarize_estimator(output_dir, sample_estimator):
+ try:
+ rfep = _plot_RFE(
+ x=sample_estimator.rfe_scores.index, y=sample_estimator.rfe_scores)
+ rfep.savefig(join(output_dir, 'rfe_plot.png'))
+ rfep.savefig(join(output_dir, 'rfe_plot.pdf'))
+ plt.close('all')
+ optimize_feature_selection = True
+ # generate rfe scores file
+ df = pd.DataFrame(data={'rfe_score': sample_estimator.rfe_scores},
+ index=sample_estimator.rfe_scores.index)
+ df.index.name = 'feature_count'
+ df.to_csv(join(output_dir, 'rfe_scores.tsv'), sep='\t', index=True)
+ # if the rfe_scores attribute does not exist, do nothing
+ except AttributeError:
+ optimize_feature_selection = False
+
+ _visualize(output_dir=output_dir, estimator=sample_estimator, cm=None,
+ roc=None, optimize_feature_selection=optimize_feature_selection,
+ title='Estimator Summary')
+
+
+def _visualize(output_dir, estimator, cm, roc,
+ optimize_feature_selection=True, title='results',
+ warning_msg=None):
+
+ pd.set_option('display.max_colwidth', None)
+
+ # summarize model accuracy and params
+ if estimator is not None:
+ result = _extract_estimator_parameters(estimator)
+ result = q2templates.df_to_html(result.to_frame())
+ else:
+ result = False
+
+ if cm is not None:
+ cm.to_csv(join(
+ output_dir, 'predictive_accuracy.tsv'), sep='\t', index=True)
+ cm = q2templates.df_to_html(cm)
+
+ if roc is not None:
+ roc = True
+
+ index = join(TEMPLATES, 'index.html')
+ q2templates.render(index, output_dir, context={
+ 'title': title,
+ 'result': result,
+ 'predictions': cm,
+ 'roc': roc,
+ 'optimize_feature_selection': optimize_feature_selection,
+ 'warning_msg': warning_msg})
+
+
+def _visualize_knn(output_dir, params: pd.Series):
+ result = q2templates.df_to_html(params.to_frame())
+ index = join(TEMPLATES, 'index.html')
+ q2templates.render(index, output_dir, context={
+ 'title': 'Estimator Summary',
+ 'result': result,
+ 'predictions': None,
+ 'importances': None,
+ 'classification': True,
+ 'optimize_feature_selection': False})
+
+
+def _map_params_to_pipeline(param_dist):
+ return {'est__' + param: dist for param, dist in param_dist.items()}
+
+
+def _tune_parameters(X_train, y_train, estimator, param_dist, n_iter_search=20,
+ n_jobs=1, cv=None, random_state=None):
+ # run randomized search
+ random_search = RandomizedSearchCV(
+ estimator, param_distributions=param_dist, n_iter=n_iter_search,
+ n_jobs=n_jobs, cv=cv, random_state=random_state)
+ random_search.fit(X_train, y_train.values.ravel())
+ return random_search
+
+
+def _fit_and_predict_cv(table, metadata, estimator, param_dist, n_jobs,
+ scoring=accuracy_score, random_state=None, cv=10,
+ stratify=True, calc_feature_importance=False,
+ parameter_tuning=False):
+ '''train and test estimators via cross-validation.
+ scoring: str
+ use accuracy_score for classification, mean_squared_error for
+ regression.
+ '''
+ # Set CV method
+ if stratify:
+ _cv = StratifiedKFold(
+ n_splits=cv, shuffle=True, random_state=random_state)
+ else:
+ _cv = KFold(n_splits=cv, shuffle=True, random_state=random_state)
+
+ predictions = pd.DataFrame()
+ probabilities = pd.DataFrame()
+ scores = []
+ top_params = []
+ importances = []
+ if isinstance(table, biom.Table):
+ features = _extract_features(table)
+ else:
+ features = table
+ for train_index, test_index in _cv.split(features, metadata):
+ X_train = features[train_index]
+ y_train = metadata.iloc[train_index]
+ # perform parameter tuning in inner loop
+ if parameter_tuning:
+ estimator = _tune_parameters(
+ X_train, y_train, estimator, param_dist,
+ n_iter_search=20, n_jobs=n_jobs, cv=cv,
+ random_state=random_state).best_estimator_
+ else:
+ # fit estimator on inner outer training set
+ estimator.fit(X_train, y_train.values.ravel())
+ # predict values for outer loop test set
+ test_set = features[test_index]
+ index = metadata.iloc[test_index]
+ pred = pd.DataFrame(estimator.predict(test_set), index=index.index)
+
+ # log predictions results
+ predictions = pd.concat([predictions, pred])
+
+ # log prediction probabilities (classifiers only)
+ if estimator.named_steps.est.__class__.__name__ in _classifiers:
+ probs = predict_probabilities(estimator, test_set, index.index)
+ probabilities = pd.concat([probabilities, probs])
+
+ # log accuracy on that fold
+ scores += [scoring(pred, index)]
+ # log feature importances
+ if calc_feature_importance:
+ imp = _calculate_feature_importances(estimator)
+ importances += [imp]
+ # log top parameters
+ # for now we will cast as a str (instead of dict) so that we can count
+ # frequency of unique elements below
+ top_params += [str(estimator.named_steps.est.get_params())]
+
+ # Report most frequent best params
+ # convert top_params to a set, order by count (hence str conversion above)
+ # max will be the most frequent... then we convert back to a dict via eval
+ # which should be safe since this is always a dict of param values reported
+ # by sklearn.
+ tops = max(set(top_params), key=top_params.count)
+ tops = eval(tops)
+
+ # calculate mean feature importances
+ if calc_feature_importance:
+ importances = _mean_feature_importance(importances)
+ else:
+ importances = _null_feature_importance(table)
+
+ predictions.columns = ['prediction']
+ predictions.index.name = 'SampleID'
+ probabilities.index.name = 'SampleID'
+
+ return scores, predictions, importances, tops, probabilities
+
+
+def predict_probabilities(estimator, test_set, index):
+ '''
+ Predict class probabilities for a set of test samples.
+
+ estimator: sklearn trained classifier
+ test_set: array-like of y_values (features) for test set samples that will
+ have their class probabilities predicted.
+ index: array-like of sample names
+ '''
+ # all used classifiers have a predict_proba attribute
+ # (approximated for SVCs)
+ probs = pd.DataFrame(estimator.predict_proba(test_set),
+ index=index, columns=estimator.classes_)
+
+ return probs
+
+
+def _mean_feature_importance(importances):
+ '''Calculate mean feature importance across a list of pd.dataframes
+ containing importance scores of the same features from multiple models
+ (e.g., CV importance scores).
+ '''
+ imp = pd.concat(importances, axis=1, sort=True)
+ # groupby column name instead of taking column mean to support 2d arrays
+ imp = imp.groupby(imp.columns, axis=1).mean()
+ return imp.sort_values(imp.columns[0], ascending=False)
+
+
+def _null_feature_importance(table):
+ feature_extractor = DictVectorizer()
+ feature_extractor.fit(table)
+ imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+ imp.index.name = "feature"
+ imp["importance"] = 1
+ return imp
+
+
+def _select_estimator(estimator, n_jobs, n_estimators, random_state=None):
+ '''Select estimator and parameters from argument name.'''
+ # Regressors
+ if estimator == 'RandomForestRegressor':
+ param_dist = {**parameters['ensemble'], **parameters['bootstrap']}
+ estimator = RandomForestRegressor(
+ n_jobs=n_jobs, n_estimators=n_estimators,
+ random_state=random_state)
+ elif estimator == 'ExtraTreesRegressor':
+ param_dist = {**parameters['ensemble'], **parameters['bootstrap']}
+ estimator = ExtraTreesRegressor(
+ n_jobs=n_jobs, n_estimators=n_estimators,
+ random_state=random_state)
+ elif estimator == 'GradientBoostingRegressor':
+ param_dist = parameters['ensemble']
+ estimator = GradientBoostingRegressor(
+ n_estimators=n_estimators, random_state=random_state)
+ elif estimator == 'SVR':
+ param_dist = {**parameters['svm'], 'epsilon': [0.0, 0.1]}
+ estimator = SVR(kernel='rbf', gamma='scale')
+ elif estimator == 'LinearSVR':
+ param_dist = {**parameters['svm'], 'epsilon': [0.0, 0.1]}
+ estimator = SVR(kernel='linear')
+ elif estimator == 'Ridge':
+ param_dist = parameters['linear']
+ estimator = Ridge(solver='auto', random_state=random_state)
+ elif estimator == 'Lasso':
+ param_dist = parameters['linear']
+ estimator = Lasso(random_state=random_state)
+ elif estimator == 'ElasticNet':
+ param_dist = parameters['linear']
+ estimator = ElasticNet(random_state=random_state)
+ elif estimator == 'KNeighborsRegressor':
+ param_dist = parameters['kneighbors']
+ estimator = KNeighborsRegressor(algorithm='auto')
+
+ # Classifiers
+ elif estimator == 'RandomForestClassifier':
+ param_dist = {**parameters['ensemble'], **parameters['bootstrap'],
+ **parameters['criterion']}
+ estimator = RandomForestClassifier(
+ n_jobs=n_jobs, n_estimators=n_estimators,
+ random_state=random_state)
+ elif estimator == 'ExtraTreesClassifier':
+ param_dist = {**parameters['ensemble'], **parameters['bootstrap'],
+ **parameters['criterion']}
+ estimator = ExtraTreesClassifier(
+ n_jobs=n_jobs, n_estimators=n_estimators,
+ random_state=random_state)
+ elif estimator == 'GradientBoostingClassifier':
+ param_dist = parameters['ensemble']
+ estimator = GradientBoostingClassifier(
+ n_estimators=n_estimators, random_state=random_state)
+ elif estimator == 'LinearSVC':
+ param_dist = parameters['svm']
+ estimator = SVC(kernel='linear', random_state=random_state,
+ gamma='scale', probability=True)
+ elif estimator == 'SVC':
+ param_dist = parameters['svm']
+ estimator = SVC(kernel='rbf', random_state=random_state,
+ gamma='scale', probability=True)
+ elif estimator == 'KNeighborsClassifier':
+ param_dist = parameters['kneighbors']
+ estimator = KNeighborsClassifier(algorithm='auto')
+
+ return param_dist, estimator
+
+
+def _train_adaboost_base_estimator(table, metadata, column, n_estimators,
+ n_jobs, cv, random_state=None,
+ parameter_tuning=False,
+ classification=True,
+ missing_samples='error'):
+ param_dist = parameters['ensemble']
+ if classification:
+ base_estimator = DecisionTreeClassifier()
+ adaboost_estimator = AdaBoostClassifier
+ else:
+ base_estimator = DecisionTreeRegressor()
+ adaboost_estimator = AdaBoostRegressor
+ base_estimator = Pipeline(
+ [('dv', DictVectorizer()), ('est', base_estimator)])
+
+ if parameter_tuning:
+ features, targets = _load_data(
+ table, metadata, missing_samples=missing_samples)
+ param_dist = _map_params_to_pipeline(param_dist)
+ base_estimator = _tune_parameters(
+ features, targets[column], base_estimator, param_dist,
+ n_jobs=n_jobs, cv=cv, random_state=random_state).best_estimator_
+
+ return Pipeline(
+ [('dv', base_estimator.named_steps.dv),
+ ('est', adaboost_estimator(base_estimator.named_steps.est,
+ n_estimators, random_state=random_state))])
+
+
+def _disable_feature_selection(estimator, optimize_feature_selection):
+ '''disable feature selection for unsupported classifiers.'''
+
+ unsupported = ['KNeighborsClassifier', 'SVC', 'KNeighborsRegressor', 'SVR']
+
+ if estimator in unsupported:
+ optimize_feature_selection = False
+ calc_feature_importance = False
+ _warn_feature_selection()
+ else:
+ calc_feature_importance = True
+
+ return optimize_feature_selection, calc_feature_importance
+
+
+def _set_parameters_and_estimator(estimator, table, metadata, column,
+ n_estimators, n_jobs, cv, random_state,
+ parameter_tuning, classification=True,
+ missing_samples='error'):
+ # specify parameters and distributions to sample from for parameter tuning
+ if estimator in ['AdaBoostClassifier', 'AdaBoostRegressor']:
+ estimator = _train_adaboost_base_estimator(
+ table, metadata, column, n_estimators, n_jobs, cv, random_state,
+ parameter_tuning, classification=classification,
+ missing_samples=missing_samples)
+ parameter_tuning = False
+ param_dist = None
+ else:
+ param_dist, estimator = _select_estimator(
+ estimator, n_jobs, n_estimators, random_state)
+ estimator = Pipeline([('dv', DictVectorizer()), ('est', estimator)])
+ param_dist = _map_params_to_pipeline(param_dist)
+ return estimator, param_dist, parameter_tuning
+
+
+def _warn_feature_selection():
+ warning = (
+ ('This estimator does not support recursive feature extraction with '
+ 'the parameter settings requested. See documentation or try a '
+ 'different estimator model.'))
+ warnings.warn(warning, UserWarning)
+
+
+def _warn_zero_test_split():
+ return 'Using test_size = 0.0, you are using your complete dataset for ' \
+ 'fitting the estimator. Hence, any returned model evaluations are ' \
+ 'based on that same training dataset and are not representative of ' \
+ 'your model\'s performance on a previously unseen dataset. Please ' \
+ 'consider evaluating this model on a separate dataset.'
diff --git a/q2_sample_classifier/visuals.py b/q2_sample_classifier/visuals.py
new file mode 100644
index 0000000..6a7f0b1
--- /dev/null
+++ b/q2_sample_classifier/visuals.py
@@ -0,0 +1,388 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from sklearn.metrics import (
+ mean_squared_error, confusion_matrix, accuracy_score, roc_curve, auc)
+from sklearn.preprocessing import label_binarize
+from itertools import cycle
+from numpy import interp
+import pandas as pd
+import numpy as np
+import seaborn as sns
+from scipy.stats import linregress
+import matplotlib.pyplot as plt
+
+
+def _custom_palettes():
+ return {
+ 'YellowOrangeBrown': 'YlOrBr',
+ 'YellowOrangeRed': 'YlOrRd',
+ 'OrangeRed': 'OrRd',
+ 'PurpleRed': 'PuRd',
+ 'RedPurple': 'RdPu',
+ 'BluePurple': 'BuPu',
+ 'GreenBlue': 'GnBu',
+ 'PurpleBlue': 'PuBu',
+ 'YellowGreen': 'YlGn',
+ 'summer': 'summer_r',
+ 'copper': 'copper_r',
+ 'viridis': 'viridis_r',
+ 'cividis': 'cividis_r',
+ 'plasma': 'plasma_r',
+ 'inferno': 'inferno_r',
+ 'magma': 'magma_r',
+ 'sirocco': sns.cubehelix_palette(
+ dark=0.15, light=0.95, as_cmap=True),
+ 'drifting': sns.cubehelix_palette(
+ start=5, rot=0.4, hue=0.8, as_cmap=True),
+ 'melancholy': sns.cubehelix_palette(
+ start=25, rot=0.4, hue=0.8, as_cmap=True),
+ 'enigma': sns.cubehelix_palette(
+ start=2, rot=0.6, gamma=2.0, hue=0.7, dark=0.45, as_cmap=True),
+ 'eros': sns.cubehelix_palette(start=0, rot=0.4, gamma=2.0, hue=2,
+ light=0.95, dark=0.5, as_cmap=True),
+ 'spectre': sns.cubehelix_palette(
+ start=1.2, rot=0.4, gamma=2.0, hue=1, dark=0.4, as_cmap=True),
+ 'ambition': sns.cubehelix_palette(start=2, rot=0.9, gamma=3.0, hue=2,
+ light=0.9, dark=0.5, as_cmap=True),
+ 'mysteriousstains': sns.light_palette(
+ 'baby shit green', input='xkcd', as_cmap=True),
+ 'daydream': sns.blend_palette(
+ ['egg shell', 'dandelion'], input='xkcd', as_cmap=True),
+ 'solano': sns.blend_palette(
+ ['pale gold', 'burnt umber'], input='xkcd', as_cmap=True),
+ 'navarro': sns.blend_palette(
+ ['pale gold', 'sienna', 'pine green'], input='xkcd', as_cmap=True),
+ 'dandelions': sns.blend_palette(
+ ['sage', 'dandelion'], input='xkcd', as_cmap=True),
+ 'deepblue': sns.blend_palette(
+ ['really light blue', 'petrol'], input='xkcd', as_cmap=True),
+ 'verve': sns.cubehelix_palette(
+ start=1.4, rot=0.8, gamma=2.0, hue=1.5, dark=0.4, as_cmap=True),
+ 'greyscale': sns.blend_palette(
+ ['light grey', 'dark grey'], input='xkcd', as_cmap=True)}
+
+
+def _regplot_from_dataframe(x, y, plot_style="whitegrid", arb=True,
+ color="grey"):
+ '''Seaborn regplot with true 1:1 ratio set by arb (bool).'''
+ sns.set_style(plot_style)
+ reg = sns.regplot(x=x, y=y, color=color)
+ plt.xlabel('True value')
+ plt.ylabel('Predicted value')
+ if arb is True:
+ x0, x1 = reg.axes.get_xlim()
+ y0, y1 = reg.axes.get_ylim()
+ lims = [min(x0, y0), max(x1, y1)]
+ reg.axes.plot(lims, lims, ':k')
+ return reg
+
+
+def _linear_regress(actual, pred):
+ '''Calculate linear regression on predicted versus expected values.
+ actual: pandas.DataFrame
+ Actual y-values for test samples.
+ pred: pandas.DataFrame
+ Predicted y-values for test samples.
+ '''
+ slope, intercept, r_value, p_value, std_err = linregress(actual, pred)
+ mse = mean_squared_error(actual, pred)
+ return pd.DataFrame(
+ [(mse, r_value, r_value**2, p_value, std_err, slope, intercept)],
+ columns=["Mean squared error", "r-value", "r-squared", "P-value",
+ "Std Error", "Slope", "Intercept"],
+ index=[actual.name])
+
+
+def _plot_heatmap_from_confusion_matrix(cm, palette, vmin=None, vmax=None):
+ palette = _custom_palettes()[palette]
+ plt.figure()
+ scaler, labelsize, dpi, cbar_min = 20, 8, 100, .15
+ sns.set(rc={'xtick.labelsize': labelsize, 'ytick.labelsize': labelsize,
+ 'figure.dpi': dpi})
+ fig, (ax, cax) = plt.subplots(ncols=2, constrained_layout=True)
+ heatmap = sns.heatmap(cm, vmin=vmin, vmax=vmax, cmap=palette, ax=ax,
+ cbar_ax=cax, cbar_kws={'label': 'Proportion'},
+ square=True, xticklabels=True, yticklabels=True)
+
+ # Resize the plot dynamically based on number of classes
+ hm_pos = ax.get_position()
+ scale = len(cm) / scaler
+ # prevent cbar from getting unreadably small
+ cbar_height = max(cbar_min, scale)
+ ax.set_position([hm_pos.x0, hm_pos.y0, scale, scale])
+ cax.set_position([hm_pos.x0 + scale * .95, hm_pos.y0, scale / len(cm),
+ cbar_height])
+
+ # Make the heatmap subplot (not the colorbar) the active axis object so
+ # labels apply correctly on return
+ plt.sca(ax)
+ return heatmap
+
+
+def _add_sample_size_to_xtick_labels(ser, classes):
+ '''ser is a pandas series.'''
+ labels = ['{0} (n={1})'.format(c, ser[ser == c].count()) for c in classes]
+ return labels
+
+
+def _plot_confusion_matrix(y_test, y_pred, classes, normalize, palette,
+ vmin=None, vmax=None):
+
+ accuracy = accuracy_score(y_test, pd.DataFrame(y_pred))
+ cm = confusion_matrix(y_test, y_pred)
+ # normalize
+ if normalize:
+ cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+
+ # fill na values (e.g., true values that were not predicted) otherwise
+ # these will appear as whitespace in plots and results table.
+ cm = np.nan_to_num(cm)
+ _check_vmin_and_vmax(cm, vmin, vmax)
+
+ confusion = _plot_heatmap_from_confusion_matrix(cm, palette, vmin=vmin,
+ vmax=vmax)
+
+ x_tick_labels = _add_sample_size_to_xtick_labels(y_pred, classes)
+ y_tick_labels = _add_sample_size_to_xtick_labels(y_test, classes)
+
+ plt.ylabel('True label')
+ plt.xlabel('Predicted label')
+ confusion.set_xticklabels(x_tick_labels, rotation=90, ha='center')
+ confusion.set_yticklabels(y_tick_labels, rotation=0, ha='right')
+
+ # generate confusion matrix as pd.DataFrame for viewing
+ predictions = pd.DataFrame(cm, index=classes, columns=classes)
+ # add empty row/column to show overall accuracy in bottom right cell
+ # baseline error = error rate for a classifier that always guesses the
+ # most common class
+ n_samples, n_samples_largest_class, basline_accuracy, accuracy_ratio = \
+ _calculate_baseline_accuracy(y_test, accuracy)
+ predictions["Overall Accuracy"] = ""
+ predictions.loc["Overall Accuracy"] = ""
+ predictions.loc["Baseline Accuracy"] = ""
+ predictions.loc["Accuracy Ratio"] = ""
+ predictions.loc["Overall Accuracy"]["Overall Accuracy"] = accuracy
+ predictions.loc["Baseline Accuracy"]["Overall Accuracy"] = basline_accuracy
+ predictions.loc["Accuracy Ratio"]["Overall Accuracy"] = accuracy_ratio
+
+ return predictions, confusion
+
+
+def _check_vmin_and_vmax(cm, vmin, vmax):
+ lowest_frequency = np.amin(cm)
+ highest_frequency = np.amax(cm)
+
+ error = ''
+ if vmin is not None:
+ if vmin > lowest_frequency:
+ error += ('vmin must be less than or equal to the lowest '
+ 'predicted class frequency:\n'
+ f'\t{vmin!r} is greater than {lowest_frequency!r}')
+ if vmax is not None:
+ if vmax < highest_frequency:
+ if error:
+ error += '\n'
+ error += ('vmax must be greater than or equal to the highest '
+ 'predicted class frequency:\n'
+ f'\t{vmax!r} is less than {highest_frequency!r}')
+ if error:
+ raise ValueError(error)
+
+
+def _calculate_baseline_accuracy(y_test, accuracy):
+ n_samples = len(y_test)
+ n_samples_largest_class = y_test.value_counts().iloc[0]
+ basline_accuracy = n_samples_largest_class / n_samples
+ accuracy_ratio = accuracy / basline_accuracy
+ return n_samples, n_samples_largest_class, basline_accuracy, accuracy_ratio
+
+
+def _plot_RFE(x, y):
+ rfe = plt.figure()
+ plt.xlabel("Feature Count")
+ plt.ylabel("Accuracy")
+ plt.plot(x, y, 'grey')
+ return rfe
+
+
+def _binarize_labels(metadata, classes):
+ binarized_targets = label_binarize(metadata, classes=classes)
+ # to generalize downstream steps, we need to coerce binary data into an
+ # array of shape [n_samples, n_classes]
+ if len(classes) == 2:
+ binarized_targets = np.hstack((
+ 1 - binarized_targets, binarized_targets))
+ return binarized_targets
+
+
+def _generate_roc_plots(metadata, probabilities, palette):
+ '''
+ metadata: pd.Series of target values.
+ probabilities: pd.DataFrame of class probabilities.
+ palette: str specifying sample-classifier colormap name.
+
+ Returns a pretty Receiver Operating Characteristic plot with AUC scores.
+ '''
+ classes = probabilities.columns
+ probabilities = probabilities.values
+
+ # only accepts binary inputs, so binarize the target data
+ binarized_targets = _binarize_labels(metadata, classes)
+
+ # Compute ROC curve and ROC area for each class
+ fpr, tpr, roc_auc = _roc_per_class(
+ binarized_targets, probabilities, classes)
+
+ # Compute micro-average ROC curve and ROC area under curve
+ fpr, tpr, roc_auc = _roc_micro_average(
+ binarized_targets, probabilities, fpr, tpr, roc_auc)
+
+ # Compute macro-average ROC curve and ROC area
+ fpr, tpr, roc_auc = _roc_macro_average(fpr, tpr, roc_auc, classes)
+
+ # generate ROC plot
+ colors = _roc_palette(palette, len(classes))
+ return _roc_plot(fpr, tpr, roc_auc, classes, colors)
+
+
+def _roc_palette(palette, n_classes):
+ '''
+ palette: str specifying sample-classifier colormap name.
+ n_classes: int specifying number of classes (== n of colors to select).
+
+ Returns an iterator of colors.
+ '''
+ palette = _custom_palettes()[palette]
+
+ # specify color palette. Use different specification for str palette name
+ # vs. ListedColormap.
+ try:
+ colors = cycle(sns.color_palette(palette, n_colors=n_classes))
+ except TypeError:
+ # if using a continuous ListedColormap, select from normalized
+ # colorspace. We use linspace start=0.1 to avoid light colors at start
+ # of some colormaps.
+ palette = palette(np.linspace(0.1, 1, n_classes))
+ colors = cycle(palette)
+ return colors
+
+
+# adapted from scikit-learn examples
+# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
+def _roc_per_class(binarized_targets, probabilities, classes):
+ '''
+ binarized_targets: array of binarized class labels of dimensions [n, c],
+ where n = number of samples, c = number of classes.
+ probabilities: array of class probabilities of dimensions [n, c],
+ where n = number of samples, c = number of classes.
+ classes: list of classes.
+
+ Returns dicts of False Positive Rate (fpr), True Detection Rate (tdr), and
+ ROC Area Under Curve (roc_auc) for each class.
+ '''
+ fpr = dict()
+ tpr = dict()
+ roc_auc = dict()
+ for i, c in zip(range(len(classes)), classes):
+ fpr[c], tpr[c], _ = roc_curve(
+ binarized_targets[:, i], probabilities[:, i])
+ roc_auc[c] = auc(fpr[c], tpr[c])
+ return fpr, tpr, roc_auc
+
+
+# adapted from scikit-learn examples
+# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
+def _roc_micro_average(binarized_targets, probabilities, fpr, tpr, roc_auc):
+ '''
+ binarized_targets: array of binarized class labels of dimensions [n, c],
+ where n = number of samples, c = number of classes.
+ probabilities: array of class probabilities of dimensions [n, c],
+ where n = number of samples, c = number of classes.
+ fpr: dict of false-positive rates for each class.
+ tdr: dict of true-detection rates for each class.
+ roc_auc: dict of auc scores for each class.
+
+ Returns fpr, tdr, roc_auc with micro average scores added.
+ '''
+ fpr["micro"], tpr["micro"], _ = roc_curve(
+ binarized_targets.ravel(), probabilities.ravel())
+ roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
+ return fpr, tpr, roc_auc
+
+
+# adapted from scikit-learn examples
+# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
+def _roc_macro_average(fpr, tpr, roc_auc, classes):
+ '''
+ fpr: dict of false-positive rates for each class.
+ tdr: dict of true-detection rates for each class.
+ roc_auc: dict of auc scores for each class.
+ classes: list of classes.
+
+ Returns fpr, tdr, roc_auc with micro average scores added.
+ '''
+ # Aggregate all false positive rates for computing average
+ all_fpr = np.unique(np.concatenate([fpr[c] for c in classes]))
+
+ # Then interpolate all ROC curves at this point
+ mean_tpr = np.zeros_like(all_fpr)
+ for c in classes:
+ mean_tpr += interp(all_fpr, fpr[c], tpr[c])
+
+ # Finally average it and compute AUC
+ mean_tpr /= len(classes)
+
+ fpr["macro"] = all_fpr
+ tpr["macro"] = mean_tpr
+ roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
+ return fpr, tpr, roc_auc
+
+
+# inspired by scikit-learn examples for multi-class ROC plots
+# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
+def _roc_plot(fpr, tpr, roc_auc, classes, colors):
+ '''
+ fpr: dict of false-positive rates for each class.
+ tdr: dict of true-detection rates for each class.
+ roc_auc: dict of auc scores for each class.
+ classes: list of classes.
+ colors: list of colors.
+ '''
+ fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 4), sharey=True)
+ lw = 3
+
+ # plot averages in each panel
+ for i in [0, 1]:
+ axes[i].plot(fpr['micro'], tpr['micro'], color='navy', linestyle=':',
+ lw=lw,
+ label='micro-average (AUC = %0.2f)' % roc_auc['micro'])
+ axes[i].plot(fpr['macro'], tpr['macro'], color='lightblue',
+ linestyle=':', lw=lw,
+ label='macro-average (AUC = %0.2f)' % roc_auc['macro'])
+ # plot 1:1 ratio line
+ axes[i].plot([0, 1], [0, 1], color='grey', lw=lw, linestyle='--',
+ label='Chance')
+ axes[i].set_xlim([0.0, 1.0])
+ axes[i].set_ylim([0.0, 1.05])
+ axes[i].set_xlabel('False Positive Rate')
+
+ # left panel: averages only
+ axes[0].set_ylabel('True Positive Rate')
+ axes[0].set_title('Receiver Operating Characteristic Average Scores')
+ axes[0].legend(loc="lower right")
+
+ # right panel: averages and per-class ROCs
+ axes[1].set_title('Per-Class Receiver Operating Characteristics')
+
+ for c, color in zip(classes, colors):
+ plt.plot(fpr[c], tpr[c], color=color, lw=lw,
+ label='{0} (AUC = {1:0.2f})'.format(c, roc_auc[c]))
+ axes[1].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
+
+ return fig
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..54440cf
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,8 @@
+[versioneer]
+VCS=git
+style=pep440
+versionfile_source = q2_sample_classifier/_version.py
+versionfile_build = q2_sample_classifier/_version.py
+tag_prefix =
+parentdir_prefix = q2-sample-classifier-
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..925f1d4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,33 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from setuptools import find_packages, setup
+
+import versioneer
+
+
+setup(
+ name='q2-sample-classifier',
+ version=versioneer.get_version(),
+ cmdclass=versioneer.get_cmdclass(),
+ license='BSD-3-Clause',
+ packages=find_packages(),
+ author="Nicholas Bokulich",
+ author_email="nbokulich@gmail.com",
+ description="Machine learning classification and regression tools.",
+ url="https://qiime2.org/",
+ entry_points={
+ 'qiime2.plugins':
+ ['q2-sample-classifier=q2_sample_classifier.plugin_setup:plugin']
+ },
+ package_data={
+ 'q2_sample_classifier.tests': ['data/*'],
+ 'q2_sample_classifier': ['assets/index.html', 'citations.bib']
+ },
+ zip_safe=False,
+)
diff --git a/versioneer.py b/versioneer.py
new file mode 100644
index 0000000..a5e7a20
--- /dev/null
+++ b/versioneer.py
@@ -0,0 +1,1823 @@
+
+# Version: 0.18
+# flake8: noqa
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/warner/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
+* [![Latest Version]
+(https://pypip.in/version/versioneer/badge.svg?style=flat)
+](https://pypi.python.org/pypi/versioneer/)
+* [![Build Status]
+(https://travis-ci.org/warner/python-versioneer.png?branch=master)
+](https://travis-ci.org/warner/python-versioneer)
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere to your $PATH
+* add a `[versioneer]` section to your setup.cfg (see below)
+* run `versioneer install` in your source tree, commit the results
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+ "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+ about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes.
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+ style. This is the most commonly used value for the project's version
+ string. The default "pep440" style yields strings like `0.11`,
+ `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+ below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+ full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+ commit date in ISO 8601 format. This will be None if the date is not
+ available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+ this is only accurate if run in a VCS checkout, otherwise it is likely to
+ be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+ to a string describing the problem, otherwise it will be None. It may be
+ useful to throw an exception in setup.py if this is set, to avoid e.g.
+ creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+ from ._version import get_versions
+ __version__ = get_versions()['version']
+ del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/warner/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+ [Buildbot](https://github.com/buildbot/buildbot), which contains both
+ "master" and "slave" subprojects, each with their own `setup.py`,
+ `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+ distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+ provide bindings to Python (and perhaps other langauges) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+### Unicode version strings
+
+While Versioneer works (and is continually tested) with both Python 2 and
+Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
+Newer releases probably generate unicode version strings on py2. It's not
+clear that this is wrong, but it may be surprising for applications when then
+write these strings to a network connection or include them in bytes-oriented
+APIs like cryptographic checksums.
+
+[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
+this question.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+ indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+ `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+"""
+
+from __future__ import print_function
+try:
+ import configparser
+except ImportError:
+ import ConfigParser as configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+class VersioneerConfig:
+ """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+ """Get the project root directory.
+
+ We require that all commands are run from the project root, i.e. the
+ directory that contains setup.py, setup.cfg, and versioneer.py .
+ """
+ root = os.path.realpath(os.path.abspath(os.getcwd()))
+ setup_py = os.path.join(root, "setup.py")
+ versioneer_py = os.path.join(root, "versioneer.py")
+ if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+ # allow 'python path/to/setup.py COMMAND'
+ root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+ setup_py = os.path.join(root, "setup.py")
+ versioneer_py = os.path.join(root, "versioneer.py")
+ if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+ err = ("Versioneer was unable to run the project root directory. "
+ "Versioneer requires setup.py to be executed from "
+ "its immediate directory (like 'python setup.py COMMAND'), "
+ "or in a way that lets it use sys.argv[0] to find the root "
+ "(like 'python path/to/setup.py COMMAND').")
+ raise VersioneerBadRootError(err)
+ try:
+ # Certain runtime workflows (setup.py install/develop in a setuptools
+ # tree) execute all dependencies in a single python process, so
+ # "versioneer" may be imported multiple times, and python's shared
+ # module-import table will cache the first one. So we can't use
+ # os.path.dirname(__file__), as that will find whichever
+ # versioneer.py was first imported, even in later projects.
+ me = os.path.realpath(os.path.abspath(__file__))
+ me_dir = os.path.normcase(os.path.splitext(me)[0])
+ vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+ if me_dir != vsr_dir:
+ print("Warning: build in %s is using versioneer.py from %s"
+ % (os.path.dirname(me), versioneer_py))
+ except NameError:
+ pass
+ return root
+
+
+def get_config_from_root(root):
+ """Read the project setup.cfg file to determine Versioneer config."""
+ # This might raise EnvironmentError (if setup.cfg is missing), or
+ # configparser.NoSectionError (if it lacks a [versioneer] section), or
+ # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+ # the top of versioneer.py for instructions on writing your setup.cfg .
+ setup_cfg = os.path.join(root, "setup.cfg")
+ parser = configparser.SafeConfigParser()
+ with open(setup_cfg, "r") as f:
+ parser.readfp(f)
+ VCS = parser.get("versioneer", "VCS") # mandatory
+
+ def get(parser, name):
+ if parser.has_option("versioneer", name):
+ return parser.get("versioneer", name)
+ return None
+ cfg = VersioneerConfig()
+ cfg.VCS = VCS
+ cfg.style = get(parser, "style") or ""
+ cfg.versionfile_source = get(parser, "versionfile_source")
+ cfg.versionfile_build = get(parser, "versionfile_build")
+ cfg.tag_prefix = get(parser, "tag_prefix")
+ if cfg.tag_prefix in ("''", '""'):
+ cfg.tag_prefix = ""
+ cfg.parentdir_prefix = get(parser, "parentdir_prefix")
+ cfg.verbose = get(parser, "verbose")
+ return cfg
+
+
+class NotThisMethod(Exception):
+ """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method): # decorator
+ """Decorator to mark a method as the handler for a particular VCS."""
+ def decorate(f):
+ """Store f in HANDLERS[vcs][method]."""
+ if vcs not in HANDLERS:
+ HANDLERS[vcs] = {}
+ HANDLERS[vcs][method] = f
+ return f
+ return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+ env=None):
+ """Call the given command(s)."""
+ assert isinstance(commands, list)
+ p = None
+ for c in commands:
+ try:
+ dispcmd = str([c] + args)
+ # remember shell=False, so use git.cmd on windows, not just git
+ p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None))
+ break
+ except EnvironmentError:
+ e = sys.exc_info()[1]
+ if e.errno == errno.ENOENT:
+ continue
+ if verbose:
+ print("unable to run %s" % dispcmd)
+ print(e)
+ return None, None
+ else:
+ if verbose:
+ print("unable to find command, tried %s" % (commands,))
+ return None, None
+ stdout = p.communicate()[0].strip()
+ if sys.version_info[0] >= 3:
+ stdout = stdout.decode()
+ if p.returncode != 0:
+ if verbose:
+ print("unable to run %s (error)" % dispcmd)
+ print("stdout was %s" % stdout)
+ return None, p.returncode
+ return stdout, p.returncode
+
+
+LONG_VERSION_PY['git'] = '''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.18 (https://github.com/warner/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+ """Get the keywords needed to look up the version information."""
+ # these strings will be replaced by git during git-archive.
+ # setup.py/versioneer.py will grep for the variable names, so they must
+ # each be defined on a line of their own. _version.py will just call
+ # get_keywords().
+ git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+ git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+ git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+ keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+ return keywords
+
+
+class VersioneerConfig:
+ """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+ """Create, populate and return the VersioneerConfig() object."""
+ # these strings are filled in when 'setup.py versioneer' creates
+ # _version.py
+ cfg = VersioneerConfig()
+ cfg.VCS = "git"
+ cfg.style = "%(STYLE)s"
+ cfg.tag_prefix = "%(TAG_PREFIX)s"
+ cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+ cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+ cfg.verbose = False
+ return cfg
+
+
+class NotThisMethod(Exception):
+ """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method): # decorator
+ """Decorator to mark a method as the handler for a particular VCS."""
+ def decorate(f):
+ """Store f in HANDLERS[vcs][method]."""
+ if vcs not in HANDLERS:
+ HANDLERS[vcs] = {}
+ HANDLERS[vcs][method] = f
+ return f
+ return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+ env=None):
+ """Call the given command(s)."""
+ assert isinstance(commands, list)
+ p = None
+ for c in commands:
+ try:
+ dispcmd = str([c] + args)
+ # remember shell=False, so use git.cmd on windows, not just git
+ p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None))
+ break
+ except EnvironmentError:
+ e = sys.exc_info()[1]
+ if e.errno == errno.ENOENT:
+ continue
+ if verbose:
+ print("unable to run %%s" %% dispcmd)
+ print(e)
+ return None, None
+ else:
+ if verbose:
+ print("unable to find command, tried %%s" %% (commands,))
+ return None, None
+ stdout = p.communicate()[0].strip()
+ if sys.version_info[0] >= 3:
+ stdout = stdout.decode()
+ if p.returncode != 0:
+ if verbose:
+ print("unable to run %%s (error)" %% dispcmd)
+ print("stdout was %%s" %% stdout)
+ return None, p.returncode
+ return stdout, p.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+ """Try to determine the version from the parent directory name.
+
+ Source tarballs conventionally unpack into a directory that includes both
+ the project name and a version string. We will also support searching up
+ two directory levels for an appropriately named parent directory
+ """
+ rootdirs = []
+
+ for i in range(3):
+ dirname = os.path.basename(root)
+ if dirname.startswith(parentdir_prefix):
+ return {"version": dirname[len(parentdir_prefix):],
+ "full-revisionid": None,
+ "dirty": False, "error": None, "date": None}
+ else:
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
+
+ if verbose:
+ print("Tried directories %%s but none started with prefix %%s" %%
+ (str(rootdirs), parentdir_prefix))
+ raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+ """Extract version information from the given file."""
+ # the code embedded in _version.py can just fetch the value of these
+ # keywords. When used from setup.py, we don't want to import _version.py,
+ # so we do it with a regexp instead. This function is not used from
+ # _version.py.
+ keywords = {}
+ try:
+ f = open(versionfile_abs, "r")
+ for line in f.readlines():
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ f.close()
+ except EnvironmentError:
+ pass
+ return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+ """Get version information from git keywords."""
+ if not keywords:
+ raise NotThisMethod("no keywords at all, weird")
+ date = keywords.get("date")
+ if date is not None:
+ # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+ # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+ # -like" string, which we must then edit to make compliant), because
+ # it's been around since git-1.5.3, and it's too difficult to
+ # discover which version we're using, or to work around using an
+ # older one.
+ date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+ refnames = keywords["refnames"].strip()
+ if refnames.startswith("$Format"):
+ if verbose:
+ print("keywords are unexpanded, not using")
+ raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+ refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+ # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+ TAG = "tag: "
+ tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ if not tags:
+ # Either we're using git < 1.8.3, or there really are no tags. We use
+ # a heuristic: assume all version tags have a digit. The old git %%d
+ # expansion behaves like git log --decorate=short and strips out the
+ # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+ # between branches and tags. By ignoring refnames without digits, we
+ # filter out many common branch names like "release" and
+ # "stabilization", as well as "HEAD" and "master".
+ tags = set([r for r in refs if re.search(r'\d', r)])
+ if verbose:
+ print("discarding '%%s', no digits" %% ",".join(refs - tags))
+ if verbose:
+ print("likely tags: %%s" %% ",".join(sorted(tags)))
+ for ref in sorted(tags):
+ # sorting will prefer e.g. "2.0" over "2.0rc1"
+ if ref.startswith(tag_prefix):
+ r = ref[len(tag_prefix):]
+ if verbose:
+ print("picking %%s" %% r)
+ return {"version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": None,
+ "date": date}
+ # no suitable tags, so version is "0+unknown", but full hex is still there
+ if verbose:
+ print("no suitable tags, using unknown + full revision id")
+ return {"version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+ """Get version from 'git describe' in the root of the source tree.
+
+ This only gets called if the git-archive 'subst' keywords were *not*
+ expanded, and _version.py hasn't already been rewritten with a short
+ version string, meaning we're inside a checked out source tree.
+ """
+ GITS = ["git"]
+ if sys.platform == "win32":
+ GITS = ["git.cmd", "git.exe"]
+
+ out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=True)
+ if rc != 0:
+ if verbose:
+ print("Directory %%s not under git control" %% root)
+ raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+ # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+ # if there isn't one, this yields HEX[-dirty] (no NUM)
+ describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+ "--always", "--long",
+ "--match", "%%s*" %% tag_prefix],
+ cwd=root)
+ # --long was added in git-1.5.5
+ if describe_out is None:
+ raise NotThisMethod("'git describe' failed")
+ describe_out = describe_out.strip()
+ full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+ if full_out is None:
+ raise NotThisMethod("'git rev-parse' failed")
+ full_out = full_out.strip()
+
+ pieces = {}
+ pieces["long"] = full_out
+ pieces["short"] = full_out[:7] # maybe improved later
+ pieces["error"] = None
+
+ # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+ # TAG might have hyphens.
+ git_describe = describe_out
+
+ # look for -dirty suffix
+ dirty = git_describe.endswith("-dirty")
+ pieces["dirty"] = dirty
+ if dirty:
+ git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+ # now we have TAG-NUM-gHEX or HEX
+
+ if "-" in git_describe:
+ # TAG-NUM-gHEX
+ mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ if not mo:
+ # unparseable. Maybe git-describe is misbehaving?
+ pieces["error"] = ("unable to parse git-describe output: '%%s'"
+ %% describe_out)
+ return pieces
+
+ # tag
+ full_tag = mo.group(1)
+ if not full_tag.startswith(tag_prefix):
+ if verbose:
+ fmt = "tag '%%s' doesn't start with prefix '%%s'"
+ print(fmt %% (full_tag, tag_prefix))
+ pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+ %% (full_tag, tag_prefix))
+ return pieces
+ pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+ # distance: number of commits since tag
+ pieces["distance"] = int(mo.group(2))
+
+ # commit: short hex revision ID
+ pieces["short"] = mo.group(3)
+
+ else:
+ # HEX: no tags
+ pieces["closest-tag"] = None
+ count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+ cwd=root)
+ pieces["distance"] = int(count_out) # total number of commits
+
+ # commit date: see ISO-8601 comment in git_versions_from_keywords()
+ date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
+ cwd=root)[0].strip()
+ pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+ return pieces
+
+
+def plus_or_dot(pieces):
+ """Return a + if we don't already have one, else return a ."""
+ if "+" in pieces.get("closest-tag", ""):
+ return "."
+ return "+"
+
+
+def render_pep440(pieces):
+ """Build up version string, with post-release "local version identifier".
+
+ Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+ get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+ Exceptions:
+ 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += plus_or_dot(pieces)
+ rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_pre(pieces):
+ """TAG[.post.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post.devDISTANCE
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += ".post.dev%%d" %% pieces["distance"]
+ else:
+ # exception #1
+ rendered = "0.post.dev%%d" %% pieces["distance"]
+ return rendered
+
+
+def render_pep440_post(pieces):
+ """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+ The ".dev0" means dirty. Note that .dev0 sorts backwards
+ (a dirty tree will appear "older" than the corresponding clean one),
+ but you shouldn't be releasing software with -dirty anyways.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%%d" %% pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%%s" %% pieces["short"]
+ else:
+ # exception #1
+ rendered = "0.post%%d" %% pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += "+g%%s" %% pieces["short"]
+ return rendered
+
+
+def render_pep440_old(pieces):
+ """TAG[.postDISTANCE[.dev0]] .
+
+ The ".dev0" means dirty.
+
+ Eexceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%%d" %% pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ else:
+ # exception #1
+ rendered = "0.post%%d" %% pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ return rendered
+
+
+def render_git_describe(pieces):
+ """TAG[-DISTANCE-gHEX][-dirty].
+
+ Like 'git describe --tags --dirty --always'.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render_git_describe_long(pieces):
+ """TAG-DISTANCE-gHEX[-dirty].
+
+ Like 'git describe --tags --dirty --always -long'.
+ The distance/hash is unconditional.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render(pieces, style):
+ """Render the given version pieces into the requested style."""
+ if pieces["error"]:
+ return {"version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None}
+
+ if not style or style == "default":
+ style = "pep440" # the default
+
+ if style == "pep440":
+ rendered = render_pep440(pieces)
+ elif style == "pep440-pre":
+ rendered = render_pep440_pre(pieces)
+ elif style == "pep440-post":
+ rendered = render_pep440_post(pieces)
+ elif style == "pep440-old":
+ rendered = render_pep440_old(pieces)
+ elif style == "git-describe":
+ rendered = render_git_describe(pieces)
+ elif style == "git-describe-long":
+ rendered = render_git_describe_long(pieces)
+ else:
+ raise ValueError("unknown style '%%s'" %% style)
+
+ return {"version": rendered, "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"], "error": None,
+ "date": pieces.get("date")}
+
+
+def get_versions():
+ """Get version information or return default if unable to do so."""
+ # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+ # __file__, we can work backwards from there to the root. Some
+ # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+ # case we can only use expanded keywords.
+
+ cfg = get_config()
+ verbose = cfg.verbose
+
+ try:
+ return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+ verbose)
+ except NotThisMethod:
+ pass
+
+ try:
+ root = os.path.realpath(__file__)
+ # versionfile_source is the relative path from the top of the source
+ # tree (where the .git directory might live) to this file. Invert
+ # this to find the root from __file__.
+ for i in cfg.versionfile_source.split('/'):
+ root = os.path.dirname(root)
+ except NameError:
+ return {"version": "0+unknown", "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to find root of source tree",
+ "date": None}
+
+ try:
+ pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+ return render(pieces, cfg.style)
+ except NotThisMethod:
+ pass
+
+ try:
+ if cfg.parentdir_prefix:
+ return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+ except NotThisMethod:
+ pass
+
+ return {"version": "0+unknown", "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+ """Extract version information from the given file."""
+ # the code embedded in _version.py can just fetch the value of these
+ # keywords. When used from setup.py, we don't want to import _version.py,
+ # so we do it with a regexp instead. This function is not used from
+ # _version.py.
+ keywords = {}
+ try:
+ f = open(versionfile_abs, "r")
+ for line in f.readlines():
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ f.close()
+ except EnvironmentError:
+ pass
+ return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+ """Get version information from git keywords."""
+ if not keywords:
+ raise NotThisMethod("no keywords at all, weird")
+ date = keywords.get("date")
+ if date is not None:
+ # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+ # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+ # -like" string, which we must then edit to make compliant), because
+ # it's been around since git-1.5.3, and it's too difficult to
+ # discover which version we're using, or to work around using an
+ # older one.
+ date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+ refnames = keywords["refnames"].strip()
+ if refnames.startswith("$Format"):
+ if verbose:
+ print("keywords are unexpanded, not using")
+ raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+ refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+ # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+ TAG = "tag: "
+ tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ if not tags:
+ # Either we're using git < 1.8.3, or there really are no tags. We use
+ # a heuristic: assume all version tags have a digit. The old git %d
+ # expansion behaves like git log --decorate=short and strips out the
+ # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+ # between branches and tags. By ignoring refnames without digits, we
+ # filter out many common branch names like "release" and
+ # "stabilization", as well as "HEAD" and "master".
+ tags = set([r for r in refs if re.search(r'\d', r)])
+ if verbose:
+ print("discarding '%s', no digits" % ",".join(refs - tags))
+ if verbose:
+ print("likely tags: %s" % ",".join(sorted(tags)))
+ for ref in sorted(tags):
+ # sorting will prefer e.g. "2.0" over "2.0rc1"
+ if ref.startswith(tag_prefix):
+ r = ref[len(tag_prefix):]
+ if verbose:
+ print("picking %s" % r)
+ return {"version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": None,
+ "date": date}
+ # no suitable tags, so version is "0+unknown", but full hex is still there
+ if verbose:
+ print("no suitable tags, using unknown + full revision id")
+ return {"version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+ """Get version from 'git describe' in the root of the source tree.
+
+ This only gets called if the git-archive 'subst' keywords were *not*
+ expanded, and _version.py hasn't already been rewritten with a short
+ version string, meaning we're inside a checked out source tree.
+ """
+ GITS = ["git"]
+ if sys.platform == "win32":
+ GITS = ["git.cmd", "git.exe"]
+
+ out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=True)
+ if rc != 0:
+ if verbose:
+ print("Directory %s not under git control" % root)
+ raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+ # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+ # if there isn't one, this yields HEX[-dirty] (no NUM)
+ describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+ "--always", "--long",
+ "--match", "%s*" % tag_prefix],
+ cwd=root)
+ # --long was added in git-1.5.5
+ if describe_out is None:
+ raise NotThisMethod("'git describe' failed")
+ describe_out = describe_out.strip()
+ full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+ if full_out is None:
+ raise NotThisMethod("'git rev-parse' failed")
+ full_out = full_out.strip()
+
+ pieces = {}
+ pieces["long"] = full_out
+ pieces["short"] = full_out[:7] # maybe improved later
+ pieces["error"] = None
+
+ # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+ # TAG might have hyphens.
+ git_describe = describe_out
+
+ # look for -dirty suffix
+ dirty = git_describe.endswith("-dirty")
+ pieces["dirty"] = dirty
+ if dirty:
+ git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+ # now we have TAG-NUM-gHEX or HEX
+
+ if "-" in git_describe:
+ # TAG-NUM-gHEX
+ mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ if not mo:
+ # unparseable. Maybe git-describe is misbehaving?
+ pieces["error"] = ("unable to parse git-describe output: '%s'"
+ % describe_out)
+ return pieces
+
+ # tag
+ full_tag = mo.group(1)
+ if not full_tag.startswith(tag_prefix):
+ if verbose:
+ fmt = "tag '%s' doesn't start with prefix '%s'"
+ print(fmt % (full_tag, tag_prefix))
+ pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+ % (full_tag, tag_prefix))
+ return pieces
+ pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+ # distance: number of commits since tag
+ pieces["distance"] = int(mo.group(2))
+
+ # commit: short hex revision ID
+ pieces["short"] = mo.group(3)
+
+ else:
+ # HEX: no tags
+ pieces["closest-tag"] = None
+ count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+ cwd=root)
+ pieces["distance"] = int(count_out) # total number of commits
+
+ # commit date: see ISO-8601 comment in git_versions_from_keywords()
+ date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+ cwd=root)[0].strip()
+ pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+ return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+ """Git-specific installation logic for Versioneer.
+
+ For Git, this means creating/changing .gitattributes to mark _version.py
+ for export-subst keyword substitution.
+ """
+ GITS = ["git"]
+ if sys.platform == "win32":
+ GITS = ["git.cmd", "git.exe"]
+ files = [manifest_in, versionfile_source]
+ if ipy:
+ files.append(ipy)
+ try:
+ me = __file__
+ if me.endswith(".pyc") or me.endswith(".pyo"):
+ me = os.path.splitext(me)[0] + ".py"
+ versioneer_file = os.path.relpath(me)
+ except NameError:
+ versioneer_file = "versioneer.py"
+ files.append(versioneer_file)
+ present = False
+ try:
+ f = open(".gitattributes", "r")
+ for line in f.readlines():
+ if line.strip().startswith(versionfile_source):
+ if "export-subst" in line.strip().split()[1:]:
+ present = True
+ f.close()
+ except EnvironmentError:
+ pass
+ if not present:
+ f = open(".gitattributes", "a+")
+ f.write("%s export-subst\n" % versionfile_source)
+ f.close()
+ files.append(".gitattributes")
+ run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+ """Try to determine the version from the parent directory name.
+
+ Source tarballs conventionally unpack into a directory that includes both
+ the project name and a version string. We will also support searching up
+ two directory levels for an appropriately named parent directory
+ """
+ rootdirs = []
+
+ for i in range(3):
+ dirname = os.path.basename(root)
+ if dirname.startswith(parentdir_prefix):
+ return {"version": dirname[len(parentdir_prefix):],
+ "full-revisionid": None,
+ "dirty": False, "error": None, "date": None}
+ else:
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
+
+ if verbose:
+ print("Tried directories %s but none started with prefix %s" %
+ (str(rootdirs), parentdir_prefix))
+ raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.18) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+''' # END VERSION_JSON
+
+
+def get_versions():
+ return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+ """Try to determine the version from _version.py if present."""
+ try:
+ with open(filename) as f:
+ contents = f.read()
+ except EnvironmentError:
+ raise NotThisMethod("unable to read _version.py")
+ mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON",
+ contents, re.M | re.S)
+ if not mo:
+ mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON",
+ contents, re.M | re.S)
+ if not mo:
+ raise NotThisMethod("no version_json in _version.py")
+ return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+ """Write the given version number to the given _version.py file."""
+ os.unlink(filename)
+ contents = json.dumps(versions, sort_keys=True,
+ indent=1, separators=(",", ": "))
+ with open(filename, "w") as f:
+ f.write(SHORT_VERSION_PY % contents)
+
+ print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+ """Return a + if we don't already have one, else return a ."""
+ if "+" in pieces.get("closest-tag", ""):
+ return "."
+ return "+"
+
+
+def render_pep440(pieces):
+ """Build up version string, with post-release "local version identifier".
+
+ Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+ get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+ Exceptions:
+ 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_pre(pieces):
+ """TAG[.post.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post.devDISTANCE
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += ".post.dev%d" % pieces["distance"]
+ else:
+ # exception #1
+ rendered = "0.post.dev%d" % pieces["distance"]
+ return rendered
+
+
+def render_pep440_post(pieces):
+ """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+ The ".dev0" means dirty. Note that .dev0 sorts backwards
+ (a dirty tree will appear "older" than the corresponding clean one),
+ but you shouldn't be releasing software with -dirty anyways.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%s" % pieces["short"]
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += "+g%s" % pieces["short"]
+ return rendered
+
+
+def render_pep440_old(pieces):
+ """TAG[.postDISTANCE[.dev0]] .
+
+ The ".dev0" means dirty.
+
+ Eexceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["dirty"]:
+ rendered += ".dev0"
+ return rendered
+
+
+def render_git_describe(pieces):
+ """TAG[-DISTANCE-gHEX][-dirty].
+
+ Like 'git describe --tags --dirty --always'.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"]:
+ rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render_git_describe_long(pieces):
+ """TAG-DISTANCE-gHEX[-dirty].
+
+ Like 'git describe --tags --dirty --always -long'.
+ The distance/hash is unconditional.
+
+ Exceptions:
+ 1: no tags. HEX[-dirty] (note: no 'g' prefix)
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ else:
+ # exception #1
+ rendered = pieces["short"]
+ if pieces["dirty"]:
+ rendered += "-dirty"
+ return rendered
+
+
+def render(pieces, style):
+ """Render the given version pieces into the requested style."""
+ if pieces["error"]:
+ return {"version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None}
+
+ if not style or style == "default":
+ style = "pep440" # the default
+
+ if style == "pep440":
+ rendered = render_pep440(pieces)
+ elif style == "pep440-pre":
+ rendered = render_pep440_pre(pieces)
+ elif style == "pep440-post":
+ rendered = render_pep440_post(pieces)
+ elif style == "pep440-old":
+ rendered = render_pep440_old(pieces)
+ elif style == "git-describe":
+ rendered = render_git_describe(pieces)
+ elif style == "git-describe-long":
+ rendered = render_git_describe_long(pieces)
+ else:
+ raise ValueError("unknown style '%s'" % style)
+
+ return {"version": rendered, "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"], "error": None,
+ "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+ """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+ """Get the project version from whatever source is available.
+
+ Returns dict with two keys: 'version' and 'full'.
+ """
+ if "versioneer" in sys.modules:
+ # see the discussion in cmdclass.py:get_cmdclass()
+ del sys.modules["versioneer"]
+
+ root = get_root()
+ cfg = get_config_from_root(root)
+
+ assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+ handlers = HANDLERS.get(cfg.VCS)
+ assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+ verbose = verbose or cfg.verbose
+ assert cfg.versionfile_source is not None, \
+ "please set versioneer.versionfile_source"
+ assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+ versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+ # extract version from first of: _version.py, VCS command (e.g. 'git
+ # describe'), parentdir. This is meant to work for developers using a
+ # source checkout, for users of a tarball created by 'setup.py sdist',
+ # and for users of a tarball/zipball created by 'git archive' or github's
+ # download-from-tag feature or the equivalent in other VCSes.
+
+ get_keywords_f = handlers.get("get_keywords")
+ from_keywords_f = handlers.get("keywords")
+ if get_keywords_f and from_keywords_f:
+ try:
+ keywords = get_keywords_f(versionfile_abs)
+ ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+ if verbose:
+ print("got version from expanded keyword %s" % ver)
+ return ver
+ except NotThisMethod:
+ pass
+
+ try:
+ ver = versions_from_file(versionfile_abs)
+ if verbose:
+ print("got version from file %s %s" % (versionfile_abs, ver))
+ return ver
+ except NotThisMethod:
+ pass
+
+ from_vcs_f = handlers.get("pieces_from_vcs")
+ if from_vcs_f:
+ try:
+ pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+ ver = render(pieces, cfg.style)
+ if verbose:
+ print("got version from VCS %s" % ver)
+ return ver
+ except NotThisMethod:
+ pass
+
+ try:
+ if cfg.parentdir_prefix:
+ ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+ if verbose:
+ print("got version from parentdir %s" % ver)
+ return ver
+ except NotThisMethod:
+ pass
+
+ if verbose:
+ print("unable to compute version")
+
+ return {"version": "0+unknown", "full-revisionid": None,
+ "dirty": None, "error": "unable to compute version",
+ "date": None}
+
+
+def get_version():
+ """Get the short version string for this project."""
+ return get_versions()["version"]
+
+
+def get_cmdclass():
+ """Get the custom setuptools/distutils subclasses used by Versioneer."""
+ if "versioneer" in sys.modules:
+ del sys.modules["versioneer"]
+ # this fixes the "python setup.py develop" case (also 'install' and
+ # 'easy_install .'), in which subdependencies of the main project are
+ # built (using setup.py bdist_egg) in the same python process. Assume
+ # a main project A and a dependency B, which use different versions
+ # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+ # sys.modules by the time B's setup.py is executed, causing B to run
+ # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+ # sandbox that restores sys.modules to it's pre-build state, so the
+ # parent is protected against the child's "import versioneer". By
+ # removing ourselves from sys.modules here, before the child build
+ # happens, we protect the child from the parent's versioneer too.
+ # Also see https://github.com/warner/python-versioneer/issues/52
+
+ cmds = {}
+
+ # we add "version" to both distutils and setuptools
+ from distutils.core import Command
+
+ class cmd_version(Command):
+ description = "report generated version string"
+ user_options = []
+ boolean_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def run(self):
+ vers = get_versions(verbose=True)
+ print("Version: %s" % vers["version"])
+ print(" full-revisionid: %s" % vers.get("full-revisionid"))
+ print(" dirty: %s" % vers.get("dirty"))
+ print(" date: %s" % vers.get("date"))
+ if vers["error"]:
+ print(" error: %s" % vers["error"])
+ cmds["version"] = cmd_version
+
+ # we override "build_py" in both distutils and setuptools
+ #
+ # most invocation pathways end up running build_py:
+ # distutils/build -> build_py
+ # distutils/install -> distutils/build ->..
+ # setuptools/bdist_wheel -> distutils/install ->..
+ # setuptools/bdist_egg -> distutils/install_lib -> build_py
+ # setuptools/install -> bdist_egg ->..
+ # setuptools/develop -> ?
+ # pip install:
+ # copies source tree to a tempdir before running egg_info/etc
+ # if .git isn't copied too, 'git describe' will fail
+ # then does setup.py bdist_wheel, or sometimes setup.py install
+ # setup.py egg_info -> ?
+
+ # we override different "build_py" commands for both environments
+ if "setuptools" in sys.modules:
+ from setuptools.command.build_py import build_py as _build_py
+ else:
+ from distutils.command.build_py import build_py as _build_py
+
+ class cmd_build_py(_build_py):
+ def run(self):
+ root = get_root()
+ cfg = get_config_from_root(root)
+ versions = get_versions()
+ _build_py.run(self)
+ # now locate _version.py in the new build/ directory and replace
+ # it with an updated value
+ if cfg.versionfile_build:
+ target_versionfile = os.path.join(self.build_lib,
+ cfg.versionfile_build)
+ print("UPDATING %s" % target_versionfile)
+ write_to_version_file(target_versionfile, versions)
+ cmds["build_py"] = cmd_build_py
+
+ if "cx_Freeze" in sys.modules: # cx_freeze enabled?
+ from cx_Freeze.dist import build_exe as _build_exe
+ # nczeczulin reports that py2exe won't like the pep440-style string
+ # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+ # setup(console=[{
+ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+ # "product_version": versioneer.get_version(),
+ # ...
+
+ class cmd_build_exe(_build_exe):
+ def run(self):
+ root = get_root()
+ cfg = get_config_from_root(root)
+ versions = get_versions()
+ target_versionfile = cfg.versionfile_source
+ print("UPDATING %s" % target_versionfile)
+ write_to_version_file(target_versionfile, versions)
+
+ _build_exe.run(self)
+ os.unlink(target_versionfile)
+ with open(cfg.versionfile_source, "w") as f:
+ LONG = LONG_VERSION_PY[cfg.VCS]
+ f.write(LONG %
+ {"DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ })
+ cmds["build_exe"] = cmd_build_exe
+ del cmds["build_py"]
+
+ if 'py2exe' in sys.modules: # py2exe enabled?
+ try:
+ from py2exe.distutils_buildexe import py2exe as _py2exe # py3
+ except ImportError:
+ from py2exe.build_exe import py2exe as _py2exe # py2
+
+ class cmd_py2exe(_py2exe):
+ def run(self):
+ root = get_root()
+ cfg = get_config_from_root(root)
+ versions = get_versions()
+ target_versionfile = cfg.versionfile_source
+ print("UPDATING %s" % target_versionfile)
+ write_to_version_file(target_versionfile, versions)
+
+ _py2exe.run(self)
+ os.unlink(target_versionfile)
+ with open(cfg.versionfile_source, "w") as f:
+ LONG = LONG_VERSION_PY[cfg.VCS]
+ f.write(LONG %
+ {"DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ })
+ cmds["py2exe"] = cmd_py2exe
+
+ # we override different "sdist" commands for both environments
+ if "setuptools" in sys.modules:
+ from setuptools.command.sdist import sdist as _sdist
+ else:
+ from distutils.command.sdist import sdist as _sdist
+
+ class cmd_sdist(_sdist):
+ def run(self):
+ versions = get_versions()
+ self._versioneer_generated_versions = versions
+ # unless we update this, the command will keep using the old
+ # version
+ self.distribution.metadata.version = versions["version"]
+ return _sdist.run(self)
+
+ def make_release_tree(self, base_dir, files):
+ root = get_root()
+ cfg = get_config_from_root(root)
+ _sdist.make_release_tree(self, base_dir, files)
+ # now locate _version.py in the new base_dir directory
+ # (remembering that it may be a hardlink) and replace it with an
+ # updated value
+ target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+ print("UPDATING %s" % target_versionfile)
+ write_to_version_file(target_versionfile,
+ self._versioneer_generated_versions)
+ cmds["sdist"] = cmd_sdist
+
+ return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+ cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+INIT_PY_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+
+def do_setup():
+ """Main VCS-independent setup function for installing Versioneer."""
+ root = get_root()
+ try:
+ cfg = get_config_from_root(root)
+ except (EnvironmentError, configparser.NoSectionError,
+ configparser.NoOptionError) as e:
+ if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+ print("Adding sample versioneer config to setup.cfg",
+ file=sys.stderr)
+ with open(os.path.join(root, "setup.cfg"), "a") as f:
+ f.write(SAMPLE_CONFIG)
+ print(CONFIG_ERROR, file=sys.stderr)
+ return 1
+
+ print(" creating %s" % cfg.versionfile_source)
+ with open(cfg.versionfile_source, "w") as f:
+ LONG = LONG_VERSION_PY[cfg.VCS]
+ f.write(LONG % {"DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ })
+
+ ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+ "__init__.py")
+ if os.path.exists(ipy):
+ try:
+ with open(ipy, "r") as f:
+ old = f.read()
+ except EnvironmentError:
+ old = ""
+ if INIT_PY_SNIPPET not in old:
+ print(" appending to %s" % ipy)
+ with open(ipy, "a") as f:
+ f.write(INIT_PY_SNIPPET)
+ else:
+ print(" %s unmodified" % ipy)
+ else:
+ print(" %s doesn't exist, ok" % ipy)
+ ipy = None
+
+ # Make sure both the top-level "versioneer.py" and versionfile_source
+ # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+ # they'll be copied into source distributions. Pip won't be able to
+ # install the package without this.
+ manifest_in = os.path.join(root, "MANIFEST.in")
+ simple_includes = set()
+ try:
+ with open(manifest_in, "r") as f:
+ for line in f:
+ if line.startswith("include "):
+ for include in line.split()[1:]:
+ simple_includes.add(include)
+ except EnvironmentError:
+ pass
+ # That doesn't cover everything MANIFEST.in can do
+ # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+ # it might give some false negatives. Appending redundant 'include'
+ # lines is safe, though.
+ if "versioneer.py" not in simple_includes:
+ print(" appending 'versioneer.py' to MANIFEST.in")
+ with open(manifest_in, "a") as f:
+ f.write("include versioneer.py\n")
+ else:
+ print(" 'versioneer.py' already in MANIFEST.in")
+ if cfg.versionfile_source not in simple_includes:
+ print(" appending versionfile_source ('%s') to MANIFEST.in" %
+ cfg.versionfile_source)
+ with open(manifest_in, "a") as f:
+ f.write("include %s\n" % cfg.versionfile_source)
+ else:
+ print(" versionfile_source already in MANIFEST.in")
+
+ # Make VCS-specific changes. For git, this means creating/changing
+ # .gitattributes to mark _version.py for export-subst keyword
+ # substitution.
+ do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+ return 0
+
+
+def scan_setup_py():
+ """Validate the contents of setup.py against Versioneer's expectations."""
+ found = set()
+ setters = False
+ errors = 0
+ with open("setup.py", "r") as f:
+ for line in f.readlines():
+ if "import versioneer" in line:
+ found.add("import")
+ if "versioneer.get_cmdclass()" in line:
+ found.add("cmdclass")
+ if "versioneer.get_version()" in line:
+ found.add("get_version")
+ if "versioneer.VCS" in line:
+ setters = True
+ if "versioneer.versionfile_source" in line:
+ setters = True
+ if len(found) != 3:
+ print("")
+ print("Your setup.py appears to be missing some important items")
+ print("(but I might be wrong). Please make sure it has something")
+ print("roughly like the following:")
+ print("")
+ print(" import versioneer")
+ print(" setup( version=versioneer.get_version(),")
+ print(" cmdclass=versioneer.get_cmdclass(), ...)")
+ print("")
+ errors += 1
+ if setters:
+ print("You should remove lines like 'versioneer.VCS = ' and")
+ print("'versioneer.versionfile_source = ' . This configuration")
+ print("now lives in setup.cfg, and should be removed from setup.py")
+ print("")
+ errors += 1
+ return errors
+
+
+if __name__ == "__main__":
+ cmd = sys.argv[1]
+ if cmd == "setup":
+ errors = do_setup()
+ errors += scan_setup_py()
+ if errors:
+ sys.exit(1)