summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNilesh Patra <nilesh@debian.org>2021-11-15 16:13:29 +0530
committerNilesh Patra <nilesh@debian.org>2021-11-15 16:13:29 +0530
commit60fc9aec7b5defa616a3fdc2619725be84280997 (patch)
tree99cd32072c86a9c0b8fb3f78a8beb439aa446c46
parent31ee2b4d57b43a56f083a1a4b6a271a58b9a1b15 (diff)
parent7cde033d25322c7fc507baa7abfa048ffb0438b1 (diff)
Update upstream source from tag 'upstream/0.4.24'
Update to upstream version '0.4.24' with Debian dir 7fedc474d8530176e86709ee6f6b8496d5c9ca8b
-rw-r--r--.travis.yml37
-rw-r--r--HISTORY.md28
-rwxr-xr-xdata/examples/annotate/SQL_sample.gff110
-rwxr-xr-xdata/examples/annotate/miRNA_sample_list.txt30
-rwxr-xr-xdata/examples/annotate/query_sample.dbbin0 -> 2174976 bytes
-rw-r--r--docs/sql_usage.md399
-rw-r--r--mirtop/bam/filter.py53
-rw-r--r--mirtop/command_line.py6
-rw-r--r--mirtop/libs/parse.py59
-rw-r--r--mirtop/sql/__init__.py3
-rw-r--r--mirtop/sql/sql.py486
-rw-r--r--setup.py2
-rw-r--r--test/test_automated_analysis.py309
-rw-r--r--test/test_functions.py34
14 files changed, 1482 insertions, 74 deletions
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index ddc968a..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-sudo: required
-dist: trusty
-language: python
-python:
- - "3.6"
- - "2.7"
-notifications:
- email:
- recipients:
- - lorena.pantano@gmail.com
- on_failure: always
-before_install:
-- echo $TRAVIS_PYTHON_VERSION
-- miniconda="Miniconda3-latest-Linux-x86_64.sh"
-- if [[ $TRAVIS_PYTHON_VERSION == "2.7" ]] ; then miniconda="Miniconda2-latest-Linux-x86_64.sh"; fi
-- wget -O miniconda.sh http://repo.continuum.io/miniconda/${miniconda}
-- bash miniconda.sh -b -p ~/install
-install:
-- export PATH=~/install/bin/:$PATH
-- conda install --yes ncurses -c conda-forge
-- conda install --yes -c conda-forge -c bioconda bedtools samtools razers3 pip nose pysam pandas pyyaml pybedtools biopython setuptools codecov -q
-script:
-- python setup.py develop
-- nosetests --with-coverage
-after_success:
- - codecov
-deploy:
- provider: pypi
- user: lpantano
- password:
- secure: cxfwBMREy+KIQl7Y2EeT8fBChASr6F7GvZxkwe7xXhwPS5Ezw51IX5ktHz8s8O6psseSP6GFLOzfc6wUTY2KM9i5syFtQUc8dQ8B4GmPbZKb+r8hcu4Q3VQqyrkMgyOV6DZ3jZ/Dowqd0005c5tW872RB3ABjflEq1xOr3ddOAp1UmjswYoM+cHE6gurT3z7XYyWS8O6jBee27Pf/tZkrc5tQaN6uVyEvEbYgtkomJ46CsKhgXACqQJiW6CKbzc03VypjEs+oUDNu6DsXmTuTXRyDHH/CA9M0zmQc9EOfZuBauwJ7/PgAS7LVICTq9cvq5RZ4sGEdZ1I1FDTMupp4Hybt6kbEU+Pntfj0nWWYhRZkeWN96Gar7p2BEQ9y68zVvyoM3S3fshLCYF1gonI4Dvy2XXjy+zyQuOSiNbqKwR/GjeDZV/U/TVhIVakQ7yQllyv2Fio8pwE28qpgfIhaddRC+Cp2KBhFBbpwM8Mxx5PeayTIQnhD3zDUt8ZdP1djvH4q/S+qLp77l2znD9CwaELYEzXvVbocsyed4jsXbMtD1z092JViA9/YzZPnuQ0PR7Ccs1bsdvMI0gOyoIroTEP+aPkXKyrL5O7O3sF8UX0ES0vxOC9cJwpEPH39sRxYxjJ4K1NpaGWlHiZG0hihLP6QkeBDthMYNEb0Veqcwc=
- skip_existing: true
- on:
- all_branches: true
- condition: $TRAVIS_BRANCH =~ ^master|dev$
- distributions: sdist bdist_wheel
- repo: miRTop/mirtop
diff --git a/HISTORY.md b/HISTORY.md
index 2c01485..a695984 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,17 +1,22 @@
-- 0.4.23
+0.4.24
+
+* [fix bad](https://github.com/miRTop/mirtop/issues/64) annotation when 5 or more T/A at the end of the sequence by @DrHogart
+* Add SQL database creation
+
+0.4.23
* fix empty stats file [#61](https://github.com/miRTop/mirtop/issues/61) by @leontienvdbent
-- 0.4.22
+0.4.22
* fix when reads map halfway on to the edge
* fix edge case where limit==variant_size
-- 0.4.21
+0.4.21
* Missing trimming events since 0.4.19
-- 0.4.20
+0.4.20
* Support export isomiR rawData output
* Support genomic coordinates as output in the gff
@@ -19,12 +24,12 @@
* Implement method to create gff line
* Improve docs
-- 0.4.19
+0.4.19
* Add --version option
* Fix bug that ignore sequences starting at 0 in bam files
-- 0.4.18
+0.4.18
* Cast map object to list to avoid errors in py3.
* Support Manatee output.
@@ -52,7 +57,8 @@
* Add class to parse GFF line as a first move toward isolation
* Add JSON log for stats command.
-- 0.3.17
+0.3.17
+
* Normalize the read of the tool outputs.
* Add docs with autodoc plugin.
* Validator by @Vbarrera.
@@ -78,7 +84,7 @@
* Improve PROST! importer
* Fix output for isomiRs package
-- 0.2.*
+0.2.*
* Make GTF default output
* Add function to get SNPs from Variant attribute
@@ -91,7 +97,7 @@
* Improve isomiR reading from srnabench tool
* Add PROST to supported tools
-- 0.1.7
+0.1.7
* Remove deletion from addition isomiRs
* Support for srnabench output
@@ -100,14 +106,14 @@
* Functin to guess database used from GTF file through --mirna parameter
* Adapt output format to https://github.com/miRTop/incubator/blob/master/format/definition.md
-- 0.1.5
+0.1.5
* add function to check correct annotation
* add test data for SAM parsing
* add script to simulate isomiRs
* parse indels from bam file
-- 0.1.4
+0.1.4
* fix index BAM file command line
* add function to accept indels and test unit
diff --git a/data/examples/annotate/SQL_sample.gff b/data/examples/annotate/SQL_sample.gff
new file mode 100755
index 0000000..2b1b34e
--- /dev/null
+++ b/data/examples/annotate/SQL_sample.gff
@@ -0,0 +1,110 @@
+# GFF3 adapted for miRNA sequencing data
+## VERSION 0.0.1
+## source-ontology: miRBase22
+## COLDATA: SRR333680_1
+hsa-miR-342-3p miRBase22 isomiR 61 85 . + . Read TCTCACAAAGAAATCGCACCCGTTC; UID 0t@TeV#5v2; Name hsa-miR-342-3p; Parent hsa-mir-342; Variant iso_snv,iso_add:+2; Cigar 7MA15MTC; Expression 1; Filter Pass
+hsa-let-7i-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGTTTGTGCTGTTG; UID 7AwwRIB71; Name hsa-let-7i-5p; Parent hsa-let-7i; Variant iso_3p:+1; Cigar 23M; Expression 1; Filter Pass
+hsa-miR-10400-5p miRBase22 isomiR 2 21 . + . Read GGCGGCGGCGACTCTGGACT; UID MMM80Kr1; Name hsa-miR-10400-5p; Parent hsa-mir-10400; Variant iso_snvcentral_supp,iso_5p:-1; Cigar 10MA6MAMT; Expression 1; Filter Pass
+hsa-let-7i-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGTTTGTGCTGTTA; UID 7AwwRIBL1; Name hsa-let-7i-5p; Parent hsa-let-7i; Variant iso_add:+1; Cigar 22MA; Expression 27; Filter Pass
+hsa-let-7i-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGTTTGTGCTGTTC; UID 7AwwRIBU1; Name hsa-let-7i-5p; Parent hsa-let-7i; Variant iso_add:+1; Cigar 22MC; Expression 16; Filter Pass
+hsa-miR-320a-3p miRBase22 isomiR 42 67 . + . Read AAAAGCTGGGTTGAGAGGGCGATAAT; UID @ku54hZgg1; Name hsa-miR-320a-3p; Parent hsa-mir-320a; Variant iso_add:+4; Cigar 22MT2MT; Expression 1; Filter Pass
+hsa-miR-99b-5p miRBase22 isomiR 7 24 . + . Read CACCCGTAGAACCGCCCT; UID tyJfEi; Name hsa-miR-99b-5p; Parent hsa-mir-99b; Variant iso_snv,iso_3p:-4; Cigar 14MC3M; Expression 1; Filter Pass
+hsa-let-7i-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGTTTGTGCTGTTT; UID 7AwwRIBQ1; Name hsa-let-7i-5p; Parent hsa-let-7i; Variant iso_add:+1; Cigar 22MT; Expression 261; Filter Pass
+hsa-miR-146a-5p miRBase22 isomiR 21 45 . + . Read TGAGAACTGAATTCCATGGGTTGAT; UID 7Tqo9bA7L2; Name hsa-miR-146a-5p; Parent hsa-mir-146a; Variant iso_add:+3; Cigar 23MAT; Expression 3; Filter Pass
+hsa-miR-449c-5p.SNPC miRBase22 isomiR 18 44 . + . Read AGGCAGTGTATTGCTAGCGGCTGGTCG; UID hDBp6kMuP; Name hsa-miR-449c-5p.SNPC; Parent hsa-mir-449c; Variant iso_snv,iso_add:+3,iso_5p:-1; Cigar 23MGMCG; Expression 1; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 63 86 . + . Read ACCACAGGGTAGAAACACGGATTC; UID al$J@tKO; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_snv,iso_add:+4,iso_5p:-1; Cigar 14MA6MTTC; Expression 1; Filter Pass
+hsa-miR-221-3p miRBase22 isomiR 65 91 . + . Read AGCTACATTGTCTGCTGGGTTTCTTGT; UID kNpYIu50B; Name hsa-miR-221-3p; Parent hsa-mir-221; Variant iso_add:+4; Cigar 23MTTMT; Expression 1; Filter Pass
+hsa-miR-92b-3p miRBase22 isomiR 61 84 . + . Read TATTGCACTCGTCCCGGCCTACAT; UID zIsS#Mrn; Name hsa-miR-92b-3p; Parent hsa-mir-92b; Variant iso_snv,iso_add:+2; Cigar 20MAMAT; Expression 1; Filter Pass
+hsa-miR-1269b.SNPA miRBase22 isomiR 9 27 . + . Read CTGGACTGAGCCATGCTAC; UID q87Xbrv2; Name hsa-miR-1269b.SNPA; Parent hsa-mir-1269b; Variant iso_3p:-3; Cigar 19M; Expression 1; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 30 . + . Read ACTCAACCTGTGGGGGCACTTTTTC; UID svi3$VH%v2; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv_central,iso_add:+5; Cigar 6MC15MTMC; Expression 1; Filter Pass
+hsa-miR-142-5p miRBase22 isomiR 16 36 . + . Read CATAAAGTAGAAAGCACTATC; UID n@WTkse; Name hsa-miR-142-5p; Parent hsa-mir-142; Variant iso_snv; Cigar 19MTC; Expression 5; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 25 . + . Read GCTCAAACTGTGGGGGCACA; UID 6vs3$Vv1; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv_seed; Cigar G18MA; Expression 1; Filter Pass
+hsa-miR-30e-3p miRBase22 isomiR 59 84 . + . Read CTTTCAGTCGGATGTTTACATCGTAT; UID HUYKBQnSg1; Name hsa-miR-30e-3p; Parent hsa-mir-30e; Variant iso_snv,iso_add:+4; Cigar 20MT2MTAT; Expression 1; Filter Pass
+hsa-miR-6727-5p miRBase22 isomiR 8 26 . + . Read TGGGGCAAGCGGCTGGCTC; UID uMcGq6v2; Name hsa-miR-6727-5p; Parent hsa-mir-6727; Variant iso_snv_central_offset,iso_5p:-2,iso_3p:-2; Cigar T6MA8MCTC; Expression 1; Filter Pass
+hsa-miR-185-5p miRBase22 isomiR 15 37 . + . Read TGGAGAGAAAGGCAGTTCCTGTC; UID umThDOqU1; Name hsa-miR-185-5p; Parent hsa-mir-185; Variant iso_snv,iso_add:+1; Cigar 21MTC; Expression 1; Filter Pass
+hsa-miR-21-5p miRBase22 isomiR 8 24 . + . Read NAGCTTATCAGACTGAT; UID .; Name hsa-miR-21-5p; Parent hsa-mir-21; Variant iso_snv_central_offset,iso_3p:-5; Cigar N16M; Expression 1; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 25 . + . Read GCTCAAACTGTGGGGGCACT; UID 6vs3$Vr1; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv_seed; Cigar G19M; Expression 3; Filter Pass
+hsa-miR-372-3p miRBase22 isomiR 42 65 . + . Read AAAGTGCTGCGACATTTGAGCGGT; UID @3qCnRkA; Name hsa-miR-372-3p; Parent hsa-mir-372; Variant iso_snv,iso_add:+1; Cigar 22MGT; Expression 1; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 28 . + . Read ACTCAAACTGTGGGGGCTTTTCG; UID svs3$6%C1; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv,iso_add:+3; Cigar 17MTT2MCG; Expression 1; Filter Pass
+hsa-miR-127-3p miRBase22 isomiR 57 83 . + . Read TCGGATCCGTCTGAGCTTGGCTGTCGT; UID PFy04HMBS; Name hsa-miR-127-3p; Parent hsa-mir-127; Variant iso_add:+5; Cigar 23MTCGT; Expression 1; Filter Pass
+hsa-let-7e-5p miRBase22 isomiR 8 32 . + . Read NGAGGTAGGAGGTTGTATAGTTCGT; UID .; Name hsa-let-7e-5p; Parent hsa-let-7e; Variant iso_snv_central_offset,iso_add:+3; Cigar N21MCGT; Expression 4; Filter Pass
+hsa-miR-409-3p miRBase22 isomiR 45 64 . + . Read ACGAATGTTGCTCGGTGAAT; UID do56G7g1; Name hsa-miR-409-3p; Parent hsa-mir-409; Variant iso_snv,iso_5p:+2,iso_3p:-4; Cigar 19MT; Expression 1; Filter Pass
+hsa-miR-509-3-5p miRBase22 isomiR 10 26 . + . Read TACTGCCGACGTGGCAA; UID NICSM@1; Name hsa-miR-509-3-5p; Parent hsa-mir-509-3; Variant iso_snvcentral_supp,iso_3p:-5; Cigar 6MC10M; Expression 1; Filter Pass
+hsa-let-7c-5p miRBase22 isomiR 12 35 . + . Read GAGGTAGTAGGTTGTATGGTTCCG; UID 4WWABb5y; Name hsa-let-7c-5p; Parent hsa-let-7c; Variant iso_add:+3,iso_5p:-1; Cigar 21MCCM; Expression 1; Filter Pass
+hsa-miR-99b-3p miRBase22 isomiR 45 62 . + . Read CAAGCTCGTGTCTGTGGG; UID v6SYB$; Name hsa-miR-99b-3p; Parent hsa-mir-99b; Variant iso_3p:-4; Cigar 18M; Expression 2; Filter Pass
+hsa-miR-30a-5p miRBase22 isomiR 7 28 . + . Read GTAAACATCCTCGACTGGAAGC; UID Wfej8ucv2; Name hsa-miR-30a-5p; Parent hsa-mir-30a; Variant iso_5p:-1,iso_3p:+1; Cigar 22M; Expression 1; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 62 86 . + . Read TACCACAGGGTAGAACCCCGGAAAT; UID NthWT#G@L2; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_snv,iso_add:+4; Cigar 17MC4MAMT; Expression 1; Filter Pass
+hsa-miR-382-3p miRBase22 isomiR 47 69 . + . Read AATCATTCACGGACAACACTTCG; UID onUGllHC1; Name hsa-miR-382-3p; Parent hsa-mir-382; Variant iso_add:+2; Cigar 21MCG; Expression 2; Filter Pass
+hsa-miR-127-3p miRBase22 isomiR 57 84 . + . Read TCGGATCCGTCTGAGCTTGGCTTTTCGT; UID PFy04HM%PL2; Name hsa-miR-127-3p; Parent hsa-mir-127; Variant iso_add:+6; Cigar 22MTT3MT; Expression 1; Filter Pass
+hsa-miR-372-3p miRBase22 isomiR 43 61 . + . Read AAGTGCTGCGACAGTTGAG; UID cII8w7T2; Name hsa-miR-372-3p; Parent hsa-mir-372; Variant iso_snv,iso_5p:-1,iso_3p:-3; Cigar 13MG5M; Expression 1; Filter Pass
+hsa-let-7f-5p miRBase22 isomiR 7 28 . + . Read TGAGGTAGTAGATTGTATTGTT; UID 7AwmRzBL2; Name hsa-let-7f-5p; Parent hsa-let-7f-1; Variant iso_snv; Cigar 18MT3M; Expression 1; Filter Pass
+hsa-miR-145-5p miRBase22 isomiR 16 40 . + . Read GTCCAGTTTTCCCAGGAATCCCTCG; UID YD%9DT9jT2; Name hsa-miR-145-5p; Parent hsa-mir-145; Variant iso_add:+2; Cigar 23MCG; Expression 2; Filter Pass
+hsa-miR-99b-5p miRBase22 isomiR 7 25 . + . Read CACCCGTAGAACCGACCTT; UID tyJfCiL2; Name hsa-miR-99b-5p; Parent hsa-mir-99b; Variant iso_3p:-3; Cigar 19M; Expression 14; Filter Pass
+hsa-miR-26a-5p miRBase22 isomiR 11 30 . + . Read TCAAGTAATCCAGGATAGGC; UID UwoxKJV1; Name hsa-miR-26a-5p; Parent hsa-mir-26a-1; Variant iso_5p:-1,iso_3p:-1; Cigar 20M; Expression 3; Filter Pass
+hsa-miR-99b-5p miRBase22 isomiR 7 27 . + . Read CACCCGTAGAACCGACCTTGC; UID tyJfCiI; Name hsa-miR-99b-5p; Parent hsa-mir-99b; Variant iso_3p:-1; Cigar 21M; Expression 3; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 25 . + . Read ACTCAAACTGTGGGGGAACT; UID svs3$Tr1; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv; Cigar 16MA3M; Expression 26; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 29 . + . Read ACTAAAACTGTGGGGGCACTTTAG; UID s@s3$VHJ; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv_central,iso_add:+4; Cigar 3MA18MAG; Expression 1; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 62 86 . + . Read TACCACAGGGTAGAACCACGGCTCG; UID NthWTxGjT2; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_add:+4; Cigar 21MCTCM; Expression 2; Filter Pass
+hsa-miR-6765-5p miRBase22 isomiR 15 30 . + . Read CGCGGGTCTGTGGCGC; UID E$03Zv2; Name hsa-miR-6765-5p; Parent hsa-mir-6765; Variant iso_snvcentral_supp,iso_add:+5,iso_5p:-14; Cigar CMC4MC7MC; Expression 1; Filter Pass
+hsa-miR-5010-5p miRBase22 isomiR 23 50 . + . Read GGGGATGGCAGAGCAAAATTCATGGCTC; UID $FMmV@Ob6v2; Name hsa-miR-5010-5p; Parent hsa-mir-5010; Variant iso_add:+8,iso_5p:-2; Cigar 26MTC; Expression 1; Filter Pass
+hsa-miR-320a-3p miRBase22 isomiR 42 64 . + . Read AACAGCTGGGTTGAGAGGGCGAT; UID fku54hZg1; Name hsa-miR-320a-3p; Parent hsa-mir-320a; Variant iso_snv,iso_add:+1; Cigar 2MC19MT; Expression 1; Filter Pass
+hsa-miR-182-5p miRBase22 isomiR 23 47 . + . Read TTTGGCAATGGGAGAACTCACACTT; UID %Mo$mstsL2; Name hsa-miR-182-5p; Parent hsa-mir-182; Variant iso_snv,iso_add:+1; Cigar 11MG12MT; Expression 1; Filter Pass
+hsa-miR-221-3p miRBase22 isomiR 65 86 . + . Read GGCTTAATTGTCTGCTGGGTTT; UID MQpYIu5L2; Name hsa-miR-221-3p; Parent hsa-mir-221; Variant iso_snv,iso_3p:-1; Cigar G3MTA16M; Expression 1; Filter Pass
+hsa-miR-191-5p miRBase22 isomiR 16 40 . + . Read CAACGTAATCCCAAAAGCAGCTTCG; UID vSo#@kkOT2; Name hsa-miR-191-5p; Parent hsa-mir-191; Variant iso_snv,iso_add:+2; Cigar 5MT16MTCG; Expression 1; Filter Pass
+hsa-let-7b-5p miRBase22 isomiR 6 30 . + . Read TGAGGTAGTAGCTTGTGTGGTTAGT; UID 7AwkRBAJL2; Name hsa-let-7b-5p; Parent hsa-let-7b; Variant iso_snvcentral_supp,iso_add:+3; Cigar 11MC10MAGT; Expression 1; Filter Pass
+hsa-let-7d-5p miRBase22 isomiR 8 24 . + . Read TGAGGTAGTCGGTTGTA; UID 7AwGRL1; Name hsa-let-7d-5p; Parent hsa-let-7d; Variant iso_snv_central_offset,iso_3p:-5; Cigar T8MC5MTM; Expression 3; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 63 84 . + . Read ACCACAGGGTAGAACCAAGGTC; UID al$JfvAv2; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_snv,iso_add:+2,iso_5p:-1; Cigar 17MA2MTM; Expression 1; Filter Pass
+hsa-miR-143-3p miRBase22 isomiR 61 81 . + . Read TGAGATGAAGCACTGTAGCTT; UID 7FTVqJH; Name hsa-miR-143-3p; Parent hsa-mir-143; Variant iso_snv; Cigar 20MT; Expression 35; Filter Pass
+hsa-miR-142-5p miRBase22 isomiR 14 32 . + . Read CCCATAAAGTAGAAAGCAT; UID #gcJ@VL2; Name hsa-miR-142-5p; Parent hsa-mir-142; Variant iso_snv,iso_5p:+2,iso_3p:-4; Cigar 18MT; Expression 1; Filter Pass
+hsa-miR-371a-5p miRBase22 isomiR 6 23 . + . Read TCTCAAACTGTGGGGGCA; UID 0vs3$V; Name hsa-miR-371a-5p; Parent hsa-mir-371a; Variant iso_snv_seed,iso_3p:-2; Cigar T17M; Expression 1; Filter Pass
+hsa-miR-1307-3p miRBase22 isomiR 80 95 . + . Read ACTCGGCGTGGCGTCG; UID sGSMYT2; Name hsa-miR-1307-3p; Parent hsa-mir-1307; Variant iso_3p:-6; Cigar 16M; Expression 4; Filter Pass
+hsa-let-7d-5p miRBase22 isomiR 8 33 . + . Read AGAGGTAGTCGGTTGCATAGTTTCGT; UID mAwGRnwOW1; Name hsa-let-7d-5p; Parent hsa-let-7d; Variant iso_snvcentral_supp,iso_add:+4; Cigar 9MC13MCGT; Expression 1; Filter Pass
+hsa-miR-193b-3p miRBase22 isomiR 51 70 . + . Read AACTGGCCCTCAAAGTCCCG; UID fu#Uc9C1; Name hsa-miR-193b-3p; Parent hsa-mir-193b; Variant iso_3p:-2; Cigar 20M; Expression 1; Filter Pass
+hsa-miR-143-3p miRBase22 isomiR 61 81 . + . Read TGAGATGAAGCACTGTAGCTA; UID 7FTVqJr; Name hsa-miR-143-3p; Parent hsa-mir-143; Variant iso_snv; Cigar 20MA; Expression 3; Filter Pass
+hsa-miR-365b-5p miRBase22 isomiR 28 47 . + . Read GAGGGACTTTCAGGGGCAGC; UID 4KHU$VV1; Name hsa-miR-365b-5p; Parent hsa-mir-365b; Variant iso_5p:+1,iso_3p:-3; Cigar 20M; Expression 2; Filter Pass
+hsa-miR-185-5p miRBase22 isomiR 15 38 . + . Read TGGAGAGAAAGGCCGTTCCTGATT; UID umThyOqp; Name hsa-miR-185-5p; Parent hsa-mir-185; Variant iso_snv,iso_add:+2; Cigar 13MC9MT; Expression 1; Filter Pass
+hsa-miR-142-5p miRBase22 isomiR 14 32 . + . Read CCCATAAAGTAGAAAGCAA; UID #gcJ@V@2; Name hsa-miR-142-5p; Parent hsa-mir-142; Variant iso_snv,iso_5p:+2,iso_3p:-4; Cigar 18MA; Expression 1; Filter Pass
+hsa-miR-30a-5p miRBase22 isomiR 6 23 . + . Read TGTAAACATCCTCGACTG; UID B@niCq; Name hsa-miR-30a-5p; Parent hsa-mir-30a; Variant iso_3p:-4; Cigar 18M; Expression 15; Filter Pass
+hsa-miR-142-5p miRBase22 isomiR 14 32 . + . Read CCCATAAAGTAGAAAGCAC; UID #gcJ@Vv2; Name hsa-miR-142-5p; Parent hsa-mir-142; Variant iso_5p:+2,iso_3p:-4; Cigar 19M; Expression 4; Filter Pass
+hsa-miR-130a-3p miRBase22 isomiR 55 71 . + . Read CAGTGCAATGTTAAAAG; UID DIo5@m1; Name hsa-miR-130a-3p; Parent hsa-mir-130a; Variant iso_3p:-5; Cigar 17M; Expression 1; Filter Pass
+hsa-miR-7704 miRBase22 isomiR 0 18 . + . Read CCGGCGGCGGCGGCGACTG; UID yZZZZsT2; Name hsa-miR-7704; Parent hsa-mir-7704; Variant iso_snv,iso_5p:+1,iso_3p:-1; Cigar I3MCMG10MTG; Expression 1; Filter Pass
+hsa-let-7a-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGGTCGTATAGTTT; UID 7AwhPzwQ1; Name hsa-let-7a-5p; Parent hsa-let-7a-1; Variant iso_snv,iso_3p:+1; Cigar 13MC9M; Expression 1; Filter Pass
+hsa-miR-887-3p miRBase22 isomiR 48 68 . + . Read GTGAACGGGCGCCATCCCGAG; UID 3f$En#4; Name hsa-miR-887-3p; Parent hsa-mir-887; Variant iso_3p:-1; Cigar 21M; Expression 1; Filter Pass
+hsa-miR-125b-5p miRBase22 isomiR 15 38 . + . Read TCCCTGAGACCCTAACTTGTGAAT; UID 9qm#LH3o; Name hsa-miR-125b-5p; Parent hsa-mir-125b-1; Variant iso_add:+2; Cigar 22MAT; Expression 6; Filter Pass
+hsa-let-7c-5p miRBase22 isomiR 11 32 . + . Read TGAGGTAGTAGTTTGTATGGTT; UID 7AwwRzAL2; Name hsa-let-7c-5p; Parent hsa-let-7c; Variant iso_snv; Cigar 11MT10M; Expression 2; Filter Pass
+hsa-miR-372-3p miRBase22 isomiR 43 66 . + . Read AAGTGCTGCGACATTTGAGCTTTC; UID cII8p76O; Name hsa-miR-372-3p; Parent hsa-mir-372; Variant iso_snv,iso_add:+2,iso_5p:-1; Cigar 20MTMTC; Expression 1; Filter Pass
+hsa-miR-372-3p miRBase22 isomiR 42 65 . + . Read NAAGTGCTGCGACATTTGAGCGTT; UID .; Name hsa-miR-372-3p; Parent hsa-mir-372; Variant iso_snv,iso_add:+1; Cigar N22MT; Expression 1; Filter Pass
+hsa-miR-320a-3p miRBase22 isomiR 42 64 . + . Read AAAAGCTGGGTTGAGAGGGAGTT; UID @ku54h4Q1; Name hsa-miR-320a-3p; Parent hsa-mir-320a; Variant iso_snv,iso_add:+1; Cigar 19MAMTT; Expression 1; Filter Pass
+hsa-miR-195-5p miRBase22 isomiR 15 38 . + . Read TAGCAGCACAGAAATATTGGTCGT; UID JDtmopAS; Name hsa-miR-195-5p; Parent hsa-mir-195; Variant iso_snv,iso_add:+3; Cigar 20MTCGT; Expression 1; Filter Pass
+hsa-miR-10400-5p miRBase22 isomiR 4 22 . + . Read CGGCGGCGACTCTGGACGC; UID GGCjudv2; Name hsa-miR-10400-5p; Parent hsa-mir-10400; Variant iso_snvcentral_supp,iso_add:+1,iso_5p:-3; Cigar 8MA6MA2MC; Expression 13; Filter Pass
+hsa-miR-17-3p miRBase22 isomiR 51 74 . + . Read ACTGCAGTGAAGGCACTTGTAGAA; UID sV3cVHWT; Name hsa-miR-17-3p; Parent hsa-mir-17; Variant iso_add:+2; Cigar 22MAM; Expression 1; Filter Pass
+hsa-miR-181a-3p miRBase22 isomiR 63 92 . + . Read AACCATCGACCGTTGATTGTATCGTATGCC; UID fnCyRpWPzX; Name hsa-miR-181a-3p; Parent hsa-mir-181a-1; Variant iso_snv,iso_add:+7,iso_5p:+1; Cigar 21MTMG4MCM; Expression 1; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 62 80 . + . Read TACCACAGGGTAGAACCCC; UID NthWT#v2; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_snv,iso_3p:-2; Cigar 17MCM; Expression 1; Filter Pass
+hsa-let-7c-5p miRBase22 isomiR 14 32 . + . Read GGTAGTAGGTTGTATGGTC; UID AwhRzAv2; Name hsa-let-7c-5p; Parent hsa-let-7c; Variant iso_snv,iso_5p:-3; Cigar 18MC; Expression 1; Filter Pass
+hsa-let-7b-5p miRBase22 isomiR 6 27 . + . Read TGAGGTAGTAGGTTGTGCGGTT; UID 7AwhRIAL2; Name hsa-let-7b-5p; Parent hsa-let-7b; Variant iso_snv; Cigar 17MC4M; Expression 35; Filter Pass
+hsa-miR-181c-5p miRBase22 isomiR 27 49 . + . Read AACATTCAACCTGTCGGTGAGTT; UID fpviYA4Q1; Name hsa-miR-181c-5p; Parent hsa-mir-181c; Variant iso_3p:+1; Cigar 23M; Expression 1; Filter Pass
+hsa-let-7f-5p miRBase22 isomiR 7 33 . + . Read TGAGGTAGTAGATTGTATAGTTCTTGT; UID 7AwmRzw0B; Name hsa-let-7f-5p; Parent hsa-let-7f-1; Variant iso_add:+5; Cigar 22MCMTMT; Expression 1; Filter Pass
+hsa-let-7c-5p miRBase22 isomiR 11 35 . + . Read TGAGGTAGTAGGTTGTATGGTTCAT; UID 7AwhRzAUL2; Name hsa-let-7c-5p; Parent hsa-let-7c; Variant iso_add:+3; Cigar 22MCMT; Expression 1; Filter Pass
+hsa-let-7b-5p miRBase22 isomiR 6 24 . + . Read TGGGGTTGTAGGTTGTGTG; UID uABhRBT2; Name hsa-let-7b-5p; Parent hsa-let-7b; Variant iso_snv_central_offset,iso_3p:-3; Cigar 2MG3MT12M; Expression 1; Filter Pass
+hsa-let-7b-5p miRBase22 isomiR 6 28 . + . Read TGAGGTAGTAGGTTGTGTGTTTT; UID 7AwhRB5Q1; Name hsa-let-7b-5p; Parent hsa-let-7b; Variant iso_snv,iso_3p:+1; Cigar 19MT3M; Expression 2; Filter Pass
+hsa-let-7i-5p miRBase22 isomiR 6 29 . + . Read TGAGGTAGTAGTTTGTGCTCGTAT; UID 7AwwRIPz; Name hsa-let-7i-5p; Parent hsa-let-7i; Variant iso_snv,iso_add:+2; Cigar 19MCGMAT; Expression 1; Filter Pass
+hsa-let-7f-5p miRBase22 isomiR 7 26 . + . Read TGAGGTAGTAGATTGTATTC; UID 7AwmRzU1; Name hsa-let-7f-5p; Parent hsa-let-7f-1; Variant iso_snv,iso_3p:-2; Cigar 18MTC; Expression 6; Filter Pass
+hsa-miR-382-3p miRBase22 isomiR 47 66 . + . Read AATCATTCACGGACAACACT; UID onUGllr1; Name hsa-miR-382-3p; Parent hsa-mir-382; Variant iso_3p:-1; Cigar 20M; Expression 2; Filter Pass
+hsa-miR-221-3p miRBase22 isomiR 65 89 . + . Read AGCTACATTGTCTGCTGGGTTAATC; UID kNpYIu5ov2; Name hsa-miR-221-3p; Parent hsa-mir-221; Variant iso_snv,iso_add:+2; Cigar 21MAATC; Expression 2; Filter Pass
+hsa-miR-423-5p miRBase22 isomiR 17 37 . + . Read TGAGGGGCAGAGAGCGAGACT; UID 7$V4k4s; Name hsa-miR-423-5p; Parent hsa-mir-423; Variant iso_3p:-2; Cigar 21M; Expression 8; Filter Pass
+hsa-miR-762 miRBase22 isomiR 49 70 . + . Read GGGGCTGGGGCTGGGGCCGGTC; UID $6$6$XAv2; Name hsa-miR-762; Parent hsa-mir-762; Variant iso_snv; Cigar 11MT7MGTM; Expression 1; Filter Pass
+hsa-miR-101-3p miRBase22 isomiR 46 67 . + . Read GTCCAGTACTGTGATAACTGAA; UID YDNBFf7@2; Name hsa-miR-101-3p; Parent hsa-mir-101-1; Variant iso_snv,iso_5p:+1; Cigar 2MC19M; Expression 1; Filter Pass
+hsa-miR-449b-5p.SNPA miRBase22 isomiR 16 34 . + . Read AGGCAGTGTATCGTTAGCT; UID hDBe5kL2; Name hsa-miR-449b-5p.SNPA; Parent hsa-mir-449b; Variant iso_3p:-3; Cigar 19M; Expression 3; Filter Pass
+hsa-miR-3606-3p miRBase22 isomiR 40 57 . + . Read TTAAAATTTCTGTAACCT; UID Q@%qLi; Name hsa-miR-3606-3p; Parent hsa-mir-3606; Variant iso_snv,iso_5p:+2,iso_3p:-5; Cigar 11MGMA2MCT; Expression 1; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 62 80 . + . Read TACCACAGGGTAGAAACCC; UID NthWTav2; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_snv,iso_3p:-2; Cigar 15MAMCM; Expression 1; Filter Pass
+hsa-miR-199a-3p.SNPC miRBase22 isomiR 47 69 . + . Read ACAGTAGTCTGCAACTTGGTTTC; UID lWYIfR5U1; Name hsa-miR-199a-3p.SNPC; Parent hsa-mir-199a-1; Variant iso_snv,iso_add:+1; Cigar 13MAC6MTC; Expression 1; Filter Pass
+hsa-let-7g-5p miRBase22 isomiR 5 25 . + . Read TGAGGTAGTAGTTTGTACGGT; UID 7AwwRNA; Name hsa-let-7g-5p; Parent hsa-let-7g; Variant iso_snv,iso_3p:-1; Cigar 18MG2M; Expression 2; Filter Pass
+hsa-miR-320a-3p miRBase22 isomiR 42 66 . + . Read AAAAGCTGGGTTGAGATGGCGATCG; UID @ku54bZeT2; Name hsa-miR-320a-3p; Parent hsa-mir-320a; Variant iso_snv,iso_add:+3; Cigar 16MT5MTCG; Expression 1; Filter Pass
+hsa-miR-181b-5p miRBase22 isomiR 37 59 . + . Read ACATTCATTGCTGTCGGTGGTCG; UID lOp6YAAC1; Name hsa-miR-181b-5p; Parent hsa-mir-181b-1; Variant iso_snv,iso_add:+1,iso_5p:-1; Cigar 20MTCG; Expression 1; Filter Pass
+hsa-miR-34a-5p miRBase22 isomiR 22 45 . + . Read TGGCAGTGTCTTAGCTGGTTGTTC; UID uDBHkuRO; Name hsa-miR-34a-5p; Parent hsa-mir-34a; Variant iso_add:+2; Cigar 23MC; Expression 1; Filter Pass
+hsa-miR-145-5p miRBase22 isomiR 16 39 . + . Read GTCCAGTTTTCCCAGGAATCCCAT; UID YD%9DT9n; Name hsa-miR-145-5p; Parent hsa-mir-145; Variant iso_snv,iso_3p:+1; Cigar 22MAM; Expression 1; Filter Pass
+hsa-miR-1301-3p miRBase22 isomiR 48 70 . + . Read TTGCAGCTGCCTGGGAGAGACTT; UID RDqi$m8Q1; Name hsa-miR-1301-3p; Parent hsa-mir-1301; Variant iso_snv,iso_3p:-1; Cigar 17MA5M; Expression 1; Filter Pass
+hsa-miR-3917 miRBase22 isomiR 52 67 . + . Read GCTCGGACTGAGCAGG; UID 6Gs4DT2; Name hsa-miR-3917; Parent hsa-mir-3917; Variant iso_3p:-4; Cigar 16M; Expression 1; Filter Pass
+hsa-miR-99b-5p miRBase22 isomiR 7 22 . + . Read CACCCGTAGAACCGAC; UID tyJfCv2; Name hsa-miR-99b-5p; Parent hsa-mir-99b; Variant iso_3p:-6; Cigar 16M; Expression 2; Filter Pass
+hsa-miR-372-3p miRBase22 isomiR 43 62 . + . Read AAGTGCTGCGACATTTGAGC; UID cII8p7V1; Name hsa-miR-372-3p; Parent hsa-mir-372; Variant iso_5p:-1,iso_3p:-2; Cigar 20M; Expression 105; Filter Pass
+hsa-miR-140-3p miRBase22 isomiR 62 90 . + . Read TACCACAGGGTAGAACCACGGATAGTAAG; UID NthWTxGgWm1; Name hsa-miR-140-3p; Parent hsa-mir-140; Variant iso_add:+8; Cigar 22MT2MTMAG; Expression 1; Filter Pass
diff --git a/data/examples/annotate/miRNA_sample_list.txt b/data/examples/annotate/miRNA_sample_list.txt
new file mode 100755
index 0000000..a761060
--- /dev/null
+++ b/data/examples/annotate/miRNA_sample_list.txt
@@ -0,0 +1,30 @@
+hsa-miR-6727-5p
+hsa-miR-409-3p
+hsa-miR-30a-5p
+hsa-miR-372-3p
+hsa-miR-26a-5p
+hsa-miR-142-5p
+hsa-miR-365b-5p
+hsa-miR-142-5p
+hsa-miR-142-5p
+hsa-miR-7704
+hsa-miR-3606-3p
+hsa-miR-372-3p
+hsa-miR-372-3p
+hsa-let-7a-5p
+hsa-miR-92b-3p
+hsa-miR-140-3p
+hsa-miR-6866-5p
+hsa-miR-3926
+hsa-miR-576-3p
+hsa-miR-10400-5p
+hsa-miR-4655-5p.SNPC
+hsa-miR-23a-3p
+hsa-miR-140-3p
+hsa-miR-103a-3p
+hsa-miR-140-3p
+hsa-miR-664a-5p.SNPC
+hsa-miR-142-5p
+hsa-miR-26a-5p
+hsa-miR-10400-5p
+hsa-miR-192-5p
diff --git a/data/examples/annotate/query_sample.db b/data/examples/annotate/query_sample.db
new file mode 100755
index 0000000..62abb13
--- /dev/null
+++ b/data/examples/annotate/query_sample.db
Binary files differ
diff --git a/docs/sql_usage.md b/docs/sql_usage.md
new file mode 100644
index 0000000..626f777
--- /dev/null
+++ b/docs/sql_usage.md
@@ -0,0 +1,399 @@
+# SQLite Command Options
+
+The `mirtop sql` package allows users to create and query a SQLite database using GFF3 file format as input. This option creates a database with two tables such as summary and data\_sets. The summary table contains the header portion of the gff file and data\_sets contains the body of the gff containing the rest of the information related to miRNAs.
+
+Use `mirtop sql -h` to display help information
+
+```
+mirtop sql -h
+```
+SQL arguments:
+```
+['sql', '-h']
+usage: mirtop sql [-h] [--db] (-c | -q) [--gff] [-o] [-t] [-txto] [-col] [-n]
+ [-miR] [-var] [-f] [-l] [-e] [-d] [-vd]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --db SQL Database name. (default: mirtop.db)
+ -c, --create Creates a SQLite database from GFF
+ -q, --query Query from a SQLite database
+ -d, --debug max verbosity mode
+ -vd, --print_debug print debug messages on terminal
+
+SQL create usage mode:
+ --gff GFF file with precursor and mature position to genome
+ -o , --out Directory of output files
+
+SQL query usage mode:
+ -t , --table Specify table name to use
+ -txto , --txtout Writes the output of the query to a file speficied. Format (-fmt) is a tab-delimited text file by default
+ -col , --columns Select specific columns from the table to display (Default: all columns), or use with -n option to return n-counts. For information of the available columns see 'show-schema' or 'show-columns'. NOTE: option -e select must be applied!
+ -n , --count Returns 'n' counts for the query. Options 'T' for True, if not 'F' (Default: -n F). NOTE: option -e select must be applied and accepts only one column from -col option.
+ -miR , --miRNA Specify the miRNA names to query. For multiple miRNAs use comma(,) as separator; or supply a text file (.txt) separated with a new line character.
+ -pm , --miRNA_prefix (3 digit species code, ex. hsa)
+ Specify the prefix name for miRNAs to query. Example: -pm hsa -miR let-7a-5p results into querying hsa-let-7a-5p.
+ -var , --variant Specify one or more types of variants to query. Use comma(,) as separator
+ The following choices are supported:
+ iso_5p - indicates the shift at the reference 5' miRNA
+ iso_3p - indicates the shift at the reference 3' miRNA
+ iso_add3p - Number of non-template nucleotides added at 3p
+ iso_add5p - Number of non-template nucleotides added at 5p
+ iso_snv_seed - when affected nucleotides are between [2-7]
+ iso_snv_central_offset - when affected nucleotide is at position [8]
+ iso_snv_central - when affected nucleotides are between [9-12]
+ iso_snv_central_supp - when affected nucleotides are between [13-17]
+ iso_snv - anything else
+
+ -f , --filter Specify Filter tag attribute. Options: Pass, Reject. (Default: None)
+ -l , --limit Specify the number of rows to output. (Example: --limit 30, to limit the first 30 rows)
+ -e , --expr Expression is the query that you want to run; (-e "<statement>")
+ Choices supports the following:
+ show-tables - Displays tables in the database (default: mirtop.db)
+ show-schema - Displays the table schema (requires -t)
+ show-columns - Displays available columns in the table
+ describe-gff - Prints out the header information from the GFF file
+ isomirs-per-mirna - Displays the count of isomiRs for miRNA (requires -miR)
+ select - Allows specific query construction.
+ Example: mirtop sql --db tmp_mirtop/SRR333680_revised2.db -qe select -var iso_5p,iso_3p -miR hsa-let-7a-5p,hsa-let-7d-5p -l 30
+ The above expression evaluates to selecting miRNAs in -miR with variants in -var and prints out the first 30 rows in --limit.
+
+
+```
+## Creating Database
+
+The `mirtop sql -c` takes `--gff` gff3 file and creates a database with name `--db`
+
+### From GFF3 to SQLite database
+
+```
+git clone mirtop
+cd mirtop/data
+```
+
+You can use the example data to create a database file from GFF3.
+
+```
+mirtop sql -c --gff examples/annotate/SQL_sample.gff -o examples/annotate/ --db SQL_sample.db
+```
+NOTE-1: If you are re-creating the database with the same name, make sure to delete the existing database in the working directory. As the database from the same set of samples will append the gff rows to the existing database and different set of samples throws an error as shown below.
+
+`sqlite3.OperationalError: table data_sets has xy columns but yz values were supplied`
+Where, xy is the number of columns present in database and yz are the number of column-values sent to append to the database.
+
+
+## Querying a Database
+
+Understanding the database structure helps to quickly query and fetch the results. The following content lists the useful commands to query based on the availabe options. To start with the available options type: `mirtop sql -h`.
+
+NOTE-2: To query from a database, the argument `mirtop sql -q --db <db_name.db>` must always be specified with the correct name of the database along with absolute path (if required). The query will begin with an expression (-e) that is specified by the User. Passing a suitable query is mandatory such as: `mirtop sql -q --db <db_name.db> -e <expression>`. The following expressions allows users the ability to query from the database:
+
+* show-tables - Displays tables in the database (default: mirtop.db)
+* show-schema - Displays the table schema (requires -t)
+* show-columns - Displays available columns in the table
+* describe-gff - Prints out the header information from the GFF file
+* isomirs-per-mirna - Displays the count of isomiRs for miRNA (requires -miR)
+* select - Allows specific query construction
+
+### show-tables:
+Display the contents of the database. The data or information from a gff file is loaded as a table, and the collection of tables make it a database. To see the contents of the database, we should see the available tables in the database.
+
+```
+cd mirtop/data
+```
+
+You can use the example data (query\_sample.db) to query the contents of the database.
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e show-tables
+```
+
+OUTPUT:
+
+```
+11/29/2019 01:03:49 INFO Run Convert GFF.
+ +------------------------- +
+ | Tables |
+ +------------------------- +
+ | data_sets |
+ | summary |
+ +------------------------- +
+11/29/2019 01:03:49 INFO It took 0.000 minutes
+```
+
+
+### show-schema:
+Display the schema of a table. The table schema represents the data type of each column and lists available columns such as samples names. Generally this information is essential for developers and can aid in debugging any errors during the query operation. show-schema essentially requires one to specify a table name for which the schema is intended to be displayed.
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e show-schema -t summary
+```
+
+OUTPUT:
+```
+11/29/2019 01:13:55 INFO Run Convert GFF.
+ +---------------------------------------------------------+
+ | Sl | Field | Type | NULL | Key |
+ +---------------------------------------------------------+
+ | 0 | version | text | NO | |
+ | 1 | source | text | NO | |
+ | 2 | data_sets | text | NO | |
+ | 3 | tools | text | NO | |
+ | 4 | commands_exec | text | NO | |
+ | 5 | filter_tags | text | NO | |
+ | 6 | citation | text | NO | |
+ | 7 | records | real | NO | |
+ | 8 | date_stamp | text | NO | |
+ +---------------------------------------------------------+
+11/29/2019 01:13:55 INFO It took 0.000 minutes
+```
+
+### show-columns: Query the columns of the table
+
+This is similar to show-schema, however, only columns from the table are listed excluding the data type and other information. This is required for select query options, explained later.
+NOTE-3: This parameter is only available for table data\_sets.
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e show-columns
+```
+OUTPUT:
+```
+11/29/2019 01:18:55 INFO Run Convert GFF.
+
+Serial Column names
+ 1 seqID
+ 2 source_file
+ 3 type
+ 4 start
+ 5 end
+ 6 score
+ 7 strand
+ 8 phase
+ 9 UID
+ 10 Read
+ 11 Name
+ 12 Parent
+ 13 Variant
+ 14 iso_5p
+ 15 iso_3p
+ 16 iso_add3p
+ 17 iso_add5p
+ 18 iso_snv
+ 19 iso_snv_seed
+ 20 iso_snv_central
+ 21 iso_snv_central_offset
+ 22 iso_snv_central_supp
+ 23 source
+ 24 cigar
+ 25 hits
+ 26 alias
+ 27 genomic_pos
+ 28 filter
+ 29 seed_fam
+ 30 SRR333680_1
+
+11/29/2019 01:18:55 INFO It took 0.000 minutes
+```
+
+### describe-gff:
+This option prints out the header information of the gff3 (table: summary)
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e describe-gff
+```
+OUTPUT:
+```
+11/29/2019 01:21:18 INFO Run Convert GFF.
+
+Serial Column names Description
+ 1 version "--"
+ 2 source "miRBase22"
+ 3 data_sets "SRR333680_1"
+ 4 tools "--"
+ 5 commands_exec "--"
+ 6 filter_tags "--"
+ 7 citation "--"
+ 8 records "11672.0"
+ 9 date_stamp "November 22, 2019 16:41:50"
+
+11/29/2019 01:21:18 INFO It took 0.000 minutes
+```
+
+### isomirs-per-mirna:
+This expression provides a summary of the number of the isomiRs for the query miRNA (`-miR`)
+
+Query miRNA (`-miR` or `--miRNA`) can be a particular miRNA of interest, or list of a few miRNAs (separated by comma) or a file of miRNAs (.txt file). The output can be redirected to a text document with `-txto <text_file.txt>` argument. The users can also chooses the prefix for the miRNAs with `-pm`. For example: A query `-pm hsa -miR let-7a-5p,let-7d-5p` will result into querying the database as `-miR hsa-let-7a-5p,hsa-let-7d-5p`.
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e isomirs-per-mirna -miR hsa-let-7a-5p
+ (-- OR --)
+mirtop sql -q --db examples/annotate/query_sample.db -e isomirs-per-mirna -miR hsa-let-7a-5p,hsa-let-7d-5p
+ (-- OR --)
+mirtop sql -q --db examples/annotate/query_sample.db -e isomirs-per-mirna -miR let-7a-5p,let-7d-5p -pm hsa
+ (-- OR --)
+mirtop sql -q --db examples/annotate/query_sample.db -e isomirs-per-mirna -miR examples/annotate/miRNA_sample_list.txt -txto examples/annotate/queryOutput_isomirs.txt
+```
+OUTPUT:
+
+```
+11/29/2019 02:26:44 INFO Run Convert GFF.
+
+OUTPUT:
+1. isomiRs for miRNA hsa-let-7a-5p: 397
+2. isomiRs for miRNA hsa-let-7d-5p: 59
+
+11/29/2019 02:26:44 INFO It took 0.000 minutes
+```
+
+### select:
+This expression represents the SELECT statement used in MySQL database. It offers a lot of query options and can be combined with one or more optional arguments mentioned below.
+
+* limit (`-l or --limit`): Specify the number of rows to output
+
+The following command uses the conventional "SELECT * FROM data\_sets" option to display the contents of the table data\_sets from database query\_sample.db. However using `-l 2 or --limit 2` limits the output to display only the first two rows. If -l is not provided, it prints out all of the rows on the terminal (or) prints to a file if `-txto fileName.txt` is provided.
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select --limit 2
+```
+OUTPUT:
+```
+11/29/2019 03:04:28 INFO Run Convert GFF.
+seqID source_file type start end score strand phase UID Read Name Parent Variant iso_5p iso_3p iso_add3p iso_add5p iso_snv iso_snv_seed iso_snv_central iso_snv_central_offset iso_snv_central_supp source cigar hits alias genomic_pos filter seed_fam SRR333680_1
+hsa-miR-342-3p miRBase22 isomiR 61.0 85.0 . + . 0t@TeV#5v2 TCTCACAAAGAAATCGCACCCGTTC hsa-miR-342-3p hsa-mir-342 iso_snp,iso_add:+2 None None None None 1.0 0.0 0.0 0.0 0.0 miRBase22 7MA15MTC None None None Pass None 1
+hsa-let-7i-5p miRBase22 isomiR 6.0 28.0 . + . 7AwwRIB71 TGAGGTAGTAGTTTGTGCTGTTG hsa-let-7i-5p hsa-let-7i iso_3p:+1 None 1.0 None None 0.0 0.0 0.0 0.0 0.0 miRBase22 23M None None None Pass None 1
+11/29/2019 03:04:28 INFO It took 0.000 minutes
+```
+
+NOTE-4: In the above query option, we could limit the number of rows to be printed. What about columns? Can we limit them as well? and the answer is Yes.
+
+* columns: Select specific columns from the table to display
+
+The following command use the conventional "SELECT seqID,UID,Read,iso\_5p,iso\_3p,start,end FROM data\_sets LIMIT 2" option to display the contents of the table data\_sets from database query\_sample.db for specific columns. It prints out all the rows on the terminal (or) prints to a file if `-txto fileName.txt` is provided.
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -l 2 -col seqID,UID,Read,iso_5p,iso_3p,start,end
+```
+OUTPUT:
+```
+11/29/2019 03:28:40 INFO Run Convert GFF.
+seqID UID Read iso_5p iso_3p start end
+hsa-miR-342-3p 0t@TeV#5v2 TCTCACAAAGAAATCGCACCCGTTC None None 61.0 85.0
+hsa-let-7i-5p 7AwwRIB71 TGAGGTAGTAGTTTGTGCTGTTG None 1.0 6.0 28.0
+11/29/2019 03:28:40 INFO It took 0.000 minutes
+```
+* miRNA: Specify one or more miRNA to query.
+
+The user can specify miRNAs to query from the database. Use comma (miRNA-1,NO-SPACES,miRNA-n) to separate miRNAs while passing as an argument. For large set of query miRNAs use a text-file as input, separated by new line character. If short names are preffered over including the species name every time, then please refer to argument `-pm` or `--miRNA_prefix` to prefix the sepcies name along with the names of miRNAs.
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -l 4 -col seqID,UID,Read,iso_5p,iso_3p,start,end -miR hsa-let-7i-5p
+```
+OUTPUT:
+```
+seqID UID Read iso_5p iso_3p start end
+hsa-let-7i-5p 7AwwRIB71 TGAGGTAGTAGTTTGTGCTGTTG None 1.0 6.0 28.0
+hsa-let-7i-5p 7AwwRIBL1 TGAGGTAGTAGTTTGTGCTGTTA None None 6.0 28.0
+hsa-let-7i-5p 7AwwRIBU1 TGAGGTAGTAGTTTGTGCTGTTC None None 6.0 28.0
+hsa-let-7i-5p 7AwwRIBQ1 TGAGGTAGTAGTTTGTGCTGTTT None None 6.0 28.0
+11/29/2019 03:43:21 INFO It took 0.000 minutes
+```
+* variant: Specify the query with one or more variant types. The following variant types can be queried using `-var` argument.
+
+ * iso_5p - indicates the shift at the reference 5' miRNA
+ * iso_3p - indicates the shift at the reference 3' miRNA
+ * iso_add3p - number of non-template nucleotides added at 3p
+ * iso_add5p - number of non-template nucleotides added at 5p
+ * iso_snv_seed - when affected nucleotides are between [2-7]
+ * iso_snv_central_offset - when affected nucleotide is at position [8]
+ * iso_snv_central - when affected nucleotides are between [9-12]
+ * iso_snv_central_supp - when affected nucleotides are between [13-17]
+ * iso_snv - anything else
+
+The conventional query for selecting rows with TRUE values of iso_5p, iso_3p and iso_snv_central_offset would be as "SELECT * FROM data_sets WHERE iso_5p!="None" AND iso_3p!="None" AND iso_snv_central_offset!=0". In `mirtop sql` we can specifiy the same as shown in the example.
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_3p,iso_snv_central_offset -l 5
+```
+
+OUTPUT:
+```
+12/02/2019 11:52:39 INFO Run Convert GFF.
+seqID source_file type start end score strand phase UID Read Name Parent Variant iso_5p iso_3p iso_add3p iso_add5p iso_snv iso_snv_seed iso_snv_central iso_snv_central_offset iso_snv_central_supp source cigar hits alias genomic_pos filter seed_fam SRR333680_1
+hsa-miR-6727-5p miRBase22 isomiR 8.0 26.0 . + . uMcGq6v2 TGGGGCAAGCGGCTGGCTC hsa-miR-6727-5p hsa-mir-6727 iso_snv_central_offset,iso_5p:-2,iso_3p:-2 -2.0 -2.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 T6MA8MCTC None None None Pass None 1
+hsa-miR-6809-5p miRBase22 isomiR 8.0 25.0 . + . xh@L$4 CCAAGGAAATAAGGGGAG hsa-miR-6809-5p hsa-mir-6809 iso_snv_central_offset,iso_5p:-2,iso_3p:-2 -2.0 -2.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 C8MT3MG3MG None None None Pass None 1
+hsa-miR-6727-5p miRBase22 isomiR 8.0 24.0 . + . hMGGDx1 AGGGGCCGGCGGCAGCC hsa-miR-6727-5p hsa-mir-6727 iso_snv_central_offset,iso_5p:-2,iso_3p:-4 -2.0 -4.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 A5MC6MAMCC None None None Pass None 1
+hsa-let-7f-5p miRBase22 isomiR 8.0 27.0 . + . . NAGGTAGTAGATTGTATAGT hsa-let-7f-5p hsa-let-7f-1 iso_snv_central_offset,iso_5p:-1,iso_3p:-1 -1.0 -1.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 N19M None None None Pass None 1
+12/02/2019 11:52:39 INFO It took 0.001 minutes
+```
+* filter: Filter attribute lets a user choose data query such that the attribute is 'Pass' for the reads from the miRNA sequencing is OK; 'Reject' if the reads are false positive; 'Reject lowcount'where the miRNA is rejected due to a low count in data. This filter decision is made by the aligner tools and is supplied to the GFF3.
+
+
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_3p,iso_snv_central_offset -l 5 -f Pass
+```
+OUTPUT:
+```
+seqID source_file type start end score strand phase UID Read Name Parent Variant iso_5p iso_3p iso_add3p iso_add5p iso_snv iso_snv_seed iso_snv_central iso_snv_central_offset iso_snv_central_supp source cigar hits alias genomic_pos filter seed_fam SRR333680_1
+hsa-miR-6727-5p miRBase22 isomiR 8.0 26.0 . + . uMcGq6v2 TGGGGCAAGCGGCTGGCTC hsa-miR-6727-5p hsa-mir-6727 iso_snv_central_offset,iso_5p:-2,iso_3p:-2 -2.0 -2.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 T6MA8MCTC None None None Pass None 1
+hsa-miR-6809-5p miRBase22 isomiR 8.0 25.0 . + . xh@L$4 CCAAGGAAATAAGGGGAG hsa-miR-6809-5p hsa-mir-6809 iso_snv_central_offset,iso_5p:-2,iso_3p:-2 -2.0 -2.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 C8MT3MG3MG None None None Pass None 1
+hsa-miR-6727-5p miRBase22 isomiR 8.0 24.0 . + . hMGGDx1 AGGGGCCGGCGGCAGCC hsa-miR-6727-5p hsa-mir-6727 iso_snv_central_offset,iso_5p:-2,iso_3p:-4 -2.0 -4.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 A5MC6MAMCC None None None Pass None 1
+hsa-let-7f-5p miRBase22 isomiR 8.0 27.0 . + . . NAGGTAGTAGATTGTATAGT hsa-let-7f-5p hsa-let-7f-1 iso_snv_central_offset,iso_5p:-1,iso_3p:-1 -1.0 -1.0 None None 0.0 0.0 0.0 1.0 0.0 miRBase22 N19M None None None Pass None 1
+12/02/2019 12:33:28 INFO It took 0.001 minutes
+```
+* count: Count is used to retrieve a summary for a custom query. Generally, `select * from data_sets` or `select columns from data_sets` is used to retrieve the information and a WHERE clause is optionally supplied such as `--variants \| --miRNA` etc. This can be leveraged to get the count for a specific query, such that the query `select count(*) from data_sets` or `select count(columns) from data_sets` will be executed along with any optional WHERE clause. Argument `-n T` where T is True else False (Default False).
+
+For example:
+1) How many miRNA isoforms exists for a variant type iso_snv_central_offset and iso_5p?
+2) How many miRNA isoforms exists for a variant type iso_5p and iso_3p?
+3) How many miRNA isoforms exists for a variant type iso_5p and iso_3p for miRNA hsa-miR-142-5p and hsa-miR-372-3p?
+
+Query in that order of example is below:
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_snv_central_offset -f Pass -n T
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_3p -f Pass -n T
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_3p -f Pass -l 30 -miR hsa-miR-142-5p,hsa-miR-372-3p -n T
+```
+
+OUTPUT:
+
+Example 1:
+
+```
+12/02/2019 01:00:03 INFO Run Convert GFF.
+COUNT(*)
+Unique counts for all rows is: 10
+12/02/2019 01:00:03 INFO It took 0.000 minutes
+
+```
+
+Example 2:
+
+```
+12/02/2019 01:01:59 INFO Run Convert GFF.
+COUNT(*)
+Unique counts for all rows is: 751
+12/02/2019 01:01:59 INFO It took 0.000 minutes
+```
+
+Example 3:
+
+```
+12/02/2019 01:02:40 INFO Run Convert GFF.
+COUNT(*)
+1. hsa-miR-142-5p: 28
+2. hsa-miR-372-3p: 46
+12/02/2019 01:02:40 INFO It took 0.000 minutes
+```
+
+* txtout: Specify the query to redirect the output to a file instead of printing it on the screen. The name of the file must have an extension of (.txt).
+
+Extending from our previous example (3): How many miRNA isoforms exists for a variant type iso_5p and iso_3p for miRNA hsa-miR-142-5p and hsa-miR-372-3p and redirect the output to sample_count.txt
+```
+mirtop sql -q --db examples/annotate/query_sample.db -e select -var iso_5p,iso_3p -f Pass -l 30 -miR hsa-miR-142-5p,hsa-miR-372-3p -n T -txto sample_count.txt
+```
+OUTPUT:
+```
+12/02/2019 01:10:06 INFO Run Convert GFF.
+
+Writing data to file: sample_count.txt
+
+12/02/2019 01:10:06 INFO It took 0.000 minutes
+```
+
diff --git a/mirtop/bam/filter.py b/mirtop/bam/filter.py
index f51fdf0..27c644f 100644
--- a/mirtop/bam/filter.py
+++ b/mirtop/bam/filter.py
@@ -29,6 +29,7 @@ def tune(seq, precursor, start, cigar):
cigar (str): updated cigar
"""
+
end = len(seq)
if start < 0:
end = end + start
@@ -43,41 +44,49 @@ def tune(seq, precursor, start, cigar):
if seq.endswith("-"):
seq = seq[:-1]
logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature))
+ subs, add = [], []
+ if seq == mature:
+ logger.debug("TUNE:: %s %s" % (subs, add))
+ return subs, "".join(add), make_cigar(seq, mature)
+
error = set()
for pos in range(0, len(seq)):
if seq[pos] != mature[pos]:
error.add(pos)
- subs, add = [], []
-
prob = 0
add_position = []
- for e in range(len(seq) - 1, len(seq) - 6, -1):
- if e in error:
- prob = 1
- if prob == 1:
- add.append(seq[e])
- add_position.append(e)
- if e not in error and prob == 0 and seq[e] in ["A", "T"]:
- add.append(seq[e])
- add_position.append(e)
- continue
- if e not in error:
- if add:
- add.pop()
- add_position.pop()
- if prob == 0:
- add = []
- add_position = []
- break
+ positions_to_look = [*range(len(seq) - 1, len(seq) - 6, -1)]
+ is_overlapped = len([value for value in error if value in positions_to_look])
+ if is_overlapped:
+ for e in positions_to_look:
+ if e in error:
+ prob = 1
+ if prob == 1:
+ add.append(seq[e])
+ add_position.append(e)
+ if e not in error and prob == 0 and seq[e] in ["A", "T"]:
+ add.append(seq[e])
+ add_position.append(e)
+ continue
+ if e not in error:
+ if add:
+ add.pop()
+ add_position.pop()
+ if prob == 0:
+ add = []
+ add_position = []
+ break
for e in error:
if e not in add_position:
subs.append([e, seq[e], mature[e]])
-
+ if not error:
+ add = []
+
logger.debug("TUNE:: %s %s" % (subs, add))
-
+
return subs, "".join(add), make_cigar(seq, mature)
diff --git a/mirtop/command_line.py b/mirtop/command_line.py
index e5da56a..556db47 100644
--- a/mirtop/command_line.py
+++ b/mirtop/command_line.py
@@ -13,6 +13,7 @@ from mirtop.exporter import export
from mirtop.gff import validator
from mirtop.libs import spikeins
from mirtop.gff import update
+from mirtop.sql import sql
import mirtop.libs.logger as mylog
import time
@@ -24,7 +25,7 @@ def main(**kwargs):
kwargs['args'].print_debug)
logger = mylog.getLogger(__name__)
start = time.time()
- #logger.warning("This is devel-live changes")
+
if "gff" in kwargs:
logger.info("Run annotation")
reader(kwargs["args"])
@@ -54,4 +55,7 @@ def main(**kwargs):
elif "update" in kwargs:
logger.info("Run update tools")
update.convert(kwargs["args"])
+ elif "sql" in kwargs:
+ logger.info("Run Convert GFF.")
+ sql.sql_options(kwargs["args"])
logger.info('It took %.3f minutes' % ((time.time()-start)/60))
diff --git a/mirtop/libs/parse.py b/mirtop/libs/parse.py
index 5749d24..e21b91e 100644
--- a/mirtop/libs/parse.py
+++ b/mirtop/libs/parse.py
@@ -17,7 +17,8 @@ def parse_cl(in_args):
"export": _add_subparser_export,
"validate": _add_subparser_validator,
"spikein": _add_subparser_spikein,
- "update": _add_subparser_update
+ "update": _add_subparser_update,
+ "sql": _add_subparser_sql
}
parser = argparse.ArgumentParser(description="small RNA analysis")
parser.add_argument("--version", action="store_true",help="show version.")
@@ -195,3 +196,59 @@ def _add_subparser_update(subparsers):
help="folder of output files")
parser = _add_debug_option(parser)
return parser
+
+
+def _add_subparser_sql(subparsers):
+ parser = subparsers.add_parser("sql", help="SQL create or query from GFF.", formatter_class=argparse.RawTextHelpFormatter, )
+ #parser.add_argument("--gff", help="GFF file with precursor and mature position to genome.")
+ parser.add_argument('--db', metavar='', action='store', help='SQL Database name. (default: mirtop.db)')
+
+ group = parser.add_mutually_exclusive_group(required=True)
+ group.add_argument('-c','--create', help="Creates a SQLite database from GFF", action='store_true')
+ group.add_argument('-q', '--query', help="Query from a SQLite database", action='store_true')
+
+ group1 = parser.add_argument_group('SQL create usage mode')
+ group1.add_argument("--gff", metavar='', help="GFF file with precursor and mature position to genome")
+ group1.add_argument("-o", "--out", metavar='', dest="out", default="tmp_mirtop", help="Directory of output files")
+
+ group2 = parser.add_argument_group('SQL query usage mode')
+ group2.add_argument("-t", "--table", metavar='', help="Specify table name to use")
+ #group2.add_argument("-txti", "--txt-in", metavar='', help="Provide the list of miRNA's in the text file as input. NOTE: List of miRNA's should be separated by new line")
+ group2.add_argument("-txto", "--txtout", metavar='', help="Writes the output of the query to a file speficied. Format (-fmt) is a tab-delimited text file by default")
+ #group2.add_argument("-a", "--all", metavar='', help="Selects all the columns from the table")
+ group2.add_argument("-col", "--columns", metavar='', help="Select specific columns from the table to display (Default: all columns), or use with -n option to return n-counts. For information of the available columns see 'show-schema' or 'show-columns'. NOTE: options -e select must be applied!.")
+ group2.add_argument("-n", "--count", metavar='', help="Returns 'n' counts for the query. Options 'T' for True, if not 'F' (Default: -n F). NOTE: options -e select must be applied! and accepts only one column from -col option." )
+ group2.add_argument("-miR", "--miRNA", metavar='', help="Specify the miRNA names to query. For multiple miRNAs use comma(,) as separator; or text file (.txt) separated with new line character")
+ group2.add_argument("-pm", "--miRNA_prefix", metavar='', help="Specify the prefix name for miRNAs to query. Example: -pm hsa -miR let-7a-5p results into querying hsa-let-7a-5p")
+ group2.add_argument("-var", "--variant", metavar='', help="""Specify one or more types of variants to query. Use comma(,) as separator
+ Choices supports the following:
+ iso_5p - indicates the shift at the reference 5' miRNA
+ iso_3p - indicates the shift at the reference 3' miRNA
+ iso_add3p - Number of non-template nucleotides added at 3p
+ iso_add5p - Number of non-template nucleotides added at 5p
+ iso_snv_seed - when affected nucleotides are between [2-7]
+ iso_snv_central_offset - when affected nucleotides is at position [8]
+ iso_snv_central - when affected nucleotides are between [9-12]
+ iso_snv_central_supp - when affected nucleotides are between [13-17]
+ iso_snv - anything else
+ """)
+ group2.add_argument("-f", "--filter", metavar='', help="Specify Filter tag attribute. Options: Pass, Reject. (Default: None)")
+ group2.add_argument("-l", "--limit", metavar='', help="Specify the number of rows to output. (Example: --limit 30, to limit the first 30 rows)")
+ # group2.add_argument("-imiR", "--isomiR", metavar='', help="Specify the miRNA name to query")
+ # group2.add_argument("-s", "--schema", metavar='', help="Show the schema of the select tables; (-s <table_name>)")
+ group2.add_argument("-e", "--expr", metavar='',
+ help="""Expression is the query that you want to run; (-e \"<statement>\")
+ Choices supports the following:
+ show-tables - Displays tables in the database (default: mirtop.db)
+ show-schema - Displays the table schema (requires -t)
+ show-columns - Displays available columns in the table
+ describe-gff - Prints out the header information from the GFF file
+ isomirs-per-mirna - Displays the count of isomiRs for miRNA (requires -miR)
+ select - Allows specific query construction.
+ Example: mirtop sql --db tmp_mirtop/SRR333680_revised2.db -qe select -var iso_5p,iso_3p -miR hsa-let-7a-5p,hsa-let-7d-5p -l 30
+ The above expression evaluates to selecting miRNAs in -miR with variants in -var and prints out first 30 rows in --limit
+ """)
+
+ parser = _add_debug_option(parser)
+ return parser
+
diff --git a/mirtop/sql/__init__.py b/mirtop/sql/__init__.py
new file mode 100644
index 0000000..80e72b1
--- /dev/null
+++ b/mirtop/sql/__init__.py
@@ -0,0 +1,3 @@
+from mirtop.libs import config
+
+__version__ = config.version \ No newline at end of file
diff --git a/mirtop/sql/sql.py b/mirtop/sql/sql.py
new file mode 100644
index 0000000..59608cf
--- /dev/null
+++ b/mirtop/sql/sql.py
@@ -0,0 +1,486 @@
+# import os
+import argparse
+import sqlite3
+import re
+from datetime import datetime
+import time
+import os.path as op
+
+now = datetime.now()
+# dd/mm/YY H:M:S
+d2 = now.strftime("%B %d, %Y %H:%M:%S")
+
+
+def create_table(conn, sample_names):
+ c = conn.cursor()
+ data_columns = ['seqID text', 'source_file text', 'type text', 'start real',
+ 'end real', 'score text', 'strand text', 'phase text', 'UID text', 'Read text', 'Name text', 'Parent text', 'Variant text',
+ 'iso_5p real', 'iso_3p real', 'iso_add3p real', 'iso_add5p real', 'iso_snv real', 'iso_snv_seed real', 'iso_snv_central real', 'iso_snv_central_offset real',
+ 'iso_snv_central_supp real', 'source text', 'cigar text', 'hits real', 'alias text', 'genomic_pos text', 'filter text',
+ 'seed_fam text']
+ complete_headers = data_columns + sample_names
+ q = "CREATE TABLE IF NOT EXISTS data_sets(%s)" % ", ".join(complete_headers)
+ c.execute(q)
+ conn.commit()
+
+
+def gff_insert_values(conn, complete_list):
+ try:
+ conn.execute('INSERT INTO data_sets VALUES(' + ','.join("?" * len(complete_list)) + ')', complete_list)
+ conn.commit()
+ except sqlite3.OperationalError as e:
+ print()
+ print("ERROR:")
+ print("sqlite3.OperationalError: {0}".format(e))
+ print("Help: Make sure to delete any existing database with tables of different schema")
+ exit()
+
+
+# print("date and time =", d2)
+
+def insert_sql(args):
+ if args.db:
+ out_file = op.join(args.out, args.db)
+ conn = sqlite3.connect(out_file)
+ c = conn.cursor()
+ else:
+ out_file = op.join(args.out, "mirtop.db")
+ conn = sqlite3.connect(out_file)
+ c = conn.cursor()
+
+ with open(args.gff, 'r') as f:
+ version = source = data_sets = tools = commands_exec = filter_tags = citation = num_records = ""
+ cnt = 0
+ for text in f:
+ # HEADER INFORMATION
+ if re.search("^## .* VERSION", text): # (R)
+ version = (text.strip().split(' ')[-1])
+ elif re.search("^## source-ontology", text): # (R)
+ source = (text.strip().split(' ')[-1])
+ elif re.search("^## COLDATA", text): # (R)
+ data_sets = (
+ text.strip().split(' ')[-1]) # Might contain more than one data set
+ sample_names = data_sets.split(',')
+ sample_names = [w.replace('-', '_') for w in sample_names]
+ string_text = "text"
+ output_sample_names = ["{} {}".format(i, string_text) for i in sample_names]
+ create_table(conn, output_sample_names)
+ elif re.search("^## TOOLS", text): # (R)
+ tools = (text.strip().split(' ')[-1])
+ elif re.search("^## CMD", text): # (O)
+ commands_exec = (text.strip().split(' ')[-1])
+ elif re.search("^## FILTER", text): # (O)
+ filter_tags = (text.strip().split(' ')[-1])
+ elif re.search("^## REFERENCE", text): # (O)
+ citation = (text.strip().split(' ')[-1])
+ # BODY - INFORMATION
+ elif not re.search("^#", text):
+ cnt += 1
+ lines = text.strip().split('\t')
+ if '=' in lines[-1]:
+ lines_info_array = lines[-1].replace("=", " ")
+ else:
+ lines_info_array = lines[-1]
+
+ info = lines_info_array.split('; ')
+ info_dict = dict()
+ for elements in info:
+ (k, v) = elements.split(' ')
+ info_dict.update([(k, v)])
+ if 'Variant' in k and ":" in v:
+ value_list = v.split(',')
+ for iso_vars in value_list:
+ if ":" in iso_vars:
+ (sub_k, sub_v) = iso_vars.split(':')
+ info_dict.update([(str(sub_k), str(sub_v))])
+ else:
+ ### Exception for miRge format START
+ if iso_vars == "iso_snp":
+ iso_vars = "iso_snv"
+ elif iso_vars == "iso_add":
+ iso_vars = "iso_add3p"
+ ### Exception for miRge format END
+ info_dict['iso_snv'] = "1" if iso_vars == 'iso_snv' else 0
+ info_dict['iso_snv_seed'] = "1" if iso_vars == 'iso_snv_seed' else 0
+ info_dict['iso_snv_central'] = "1" if iso_vars == 'iso_snv_central' else 0
+ info_dict['iso_snv_central_offset'] = "1" if iso_vars == 'iso_snv_central_offset' else 0
+ info_dict['iso_snv_central_supp'] = "1" if iso_vars == 'iso_snv_central_supp' else 0
+
+ prefix_list = [lines[0], lines[1], lines[2], lines[3], lines[4], lines[5], lines[6], lines[7],
+ info_dict.get('UID'), info_dict.get('Read'), info_dict.get('Name'),
+ info_dict.get('Parent'),
+ info_dict.get('Variant'),
+ str(info_dict.setdefault('iso_5p', None)),
+ str(info_dict.setdefault('iso_3p', None)),
+ str(info_dict.setdefault('iso_add3p', None)),
+ str(info_dict.setdefault('iso_add5p', None)),
+ str(info_dict.setdefault('iso_snv', "0")),
+ str(info_dict.setdefault('iso_snv_seed', "0")),
+ str(info_dict.setdefault('iso_snv_central', "0")),
+ str(info_dict.setdefault('iso_snv_central_offset', "0")),
+ str(info_dict.setdefault('iso_snv_central_supp', "0")),
+ source,
+ info_dict.setdefault('Cigar', None),
+ info_dict.setdefault('Hits', None), info_dict.setdefault('Alias', None),
+ info_dict.setdefault('Genomic', None),
+ info_dict.setdefault('Filter', None), info_dict.setdefault('Seed_fam', None)]
+ expression_list = info_dict.get('Expression').split(',')
+ complete_list = prefix_list + expression_list
+ gff_insert_values(conn, complete_list)
+
+ c.execute('''CREATE TABLE IF NOT EXISTS summary(version text, source text, data_sets text, tools text,
+ commands_exec text, filter_tags text, citation text, records real, date_stamp text)''')
+ c.execute("INSERT INTO summary(version, source, data_sets, tools, commands_exec, filter_tags, citation, "
+ "records, date_stamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ (version, source, data_sets, tools, commands_exec, filter_tags, citation, cnt, d2))
+
+ # info_dict.setdefault('Sex', None)
+
+ conn.commit()
+
+
+def query_sql(args):
+ #print("Function query is being implemented, will be updated soon!!!")
+ #print(args)
+ if args.db:
+ out_file = op.join(args.db)
+ conn = sqlite3.connect(out_file)
+ c = conn.cursor()
+ else:
+ out_file = op.join("mirtop.db")
+ conn = sqlite3.connect(out_file)
+ c = conn.cursor()
+
+ #c.execute("SELECT name FROM sqlite_master WHERE type = 'table';")
+ #record = c.fetchall()
+ #print(args.db)
+ if args.expr == "show-tables":
+ show_tables(conn)
+ if args.expr == "show-schema":
+ if args.table:
+ show_schema(conn, args.table)
+ else:
+ print("Error: Require table name")
+ print("Usage: mirtop sql --query --db <input_database> -e show-schema -t <table_name>")
+ if args.expr == "show-columns":
+ show_columns(conn, args)
+ if args.expr == "describe-gff":
+ describe_gff_info(conn, args)
+ if args.expr == "isomirs-per-mirna":
+ if args.miRNA:
+ stats_isomiR_per_miRNA(conn, args.miRNA, args)
+ else:
+ print("Error: Require miRNA name")
+ print("Usage: mirtop sql --query --db <input_database> -e isomirs-per-mirna -miR <miRNA>")
+ if args.expr == "select":
+ select_query(conn, args)
+ pass
+
+
+def show_tables(connection):
+ print(" +" + 25 * "-" + " +")
+ print(' | Tables |')
+ print(" +" + 25 * "-" + " +")
+ for (tableName,) in connection.execute(
+ """
+ select NAME from SQLITE_MASTER where TYPE='table' order by NAME;
+ """
+ ):
+ tn_name = len(tableName)
+ req_format_space = 25 - tn_name
+ print(" | {tn}{format_space}|".format(
+ tn=tableName,
+ format_space=req_format_space * " "
+ )) # Table name (for each table)
+ print(" +" + 25 * "-" + " +")
+
+
+def show_schema(connection, table_name):
+ for (tableName,) in connection.execute(
+ """
+ select NAME from SQLITE_MASTER where TYPE='table' order by NAME;
+ """
+ ):
+ # print("{}:".format(tableName)) # Table name (for each table)
+ if tableName == table_name:
+ print(" +" + 57 * "-" + "+")
+ print(' | Sl | Field | Type | NULL | Key |')
+ print(" +" + 57 * "-" + "+")
+ for (
+ columnID, columnName, columnType,
+ columnNotNull, columnDefault, columnPK,
+ ) in connection.execute("pragma table_info('{}');".format(tableName)):
+ name_size = len(columnName)
+ columnID_size = len(str(columnID))
+ required_len = 30 - name_size
+ req_col_size = 2 - columnID_size
+
+ print(" | {colSpace}{id} | {name}{space}| {type} | {null} | {pk} |".format(
+ colSpace=req_col_size * " ",
+ id=columnID,
+ name=columnName,
+ space=required_len * " ",
+ type=columnType if columnType else "NULL",
+ null=" not null" if columnNotNull else " NO ",
+ default=" [{}]".format(columnDefault) if columnDefault else "NULL",
+ pk=" *{}".format(columnPK) if columnPK else " ",
+ ))
+ print(" +" + 57 * "-" + "+")
+
+
+def show_columns(connection, args):
+ cur = connection.cursor()
+ query="SELECT * FROM data_sets LIMIT 2"
+ cur.execute(query)
+ rows = cur.fetchall()
+ col_name_list = [tuple[0] for tuple in cur.description]
+ sl_no=1
+ print("\nSerial\tColumn names")
+ for col in col_name_list:
+ print(" "+str(sl_no)+"\t"+str(col))
+ sl_no+=1
+ print()
+
+
+def describe_gff_info(connection, args):
+ cur = connection.cursor()
+ query="SELECT * FROM summary"
+ cur.execute(query)
+ rows = cur.fetchone()
+ col_name_list = [tuple[0] for tuple in cur.description]
+ sl_no=1
+ print("\nSerial\tColumn names\tDescription")
+ for i, col in enumerate(col_name_list):
+ desc_g = rows[i]
+ if (desc_g ==""):
+ desc_g = "--"
+ print(" "+str(sl_no)+"\t"+str(col)+"\t\""+str(desc_g)+"\"")
+ sl_no+=1
+ print()
+
+
+def stats_isomiR_per_miRNA(connection, miRNA_name, args):
+ cur = connection.cursor()
+ miR_array = add_mirnas(args)
+ #cur.execute('SELECT * FROM data_sets WHERE seqID=?', (miRNA_name,))
+ query="SELECT COUNT(*) FROM data_sets WHERE seqID=? AND type='isomiR' "
+ query = add_filter(query, args)
+ print()
+ stat_counts=0
+ if args.txtout:
+ print("The results are being fetched and formated to be written to "+ args.txtout)
+ #format_results()
+ #with open(args.txtout, 'w') as w_stat:
+ w_stat = open(args.txtout, 'w')
+ w_stat.write("Serial number\tmiRNA\tisomiR Count\n")
+ else:
+ print("OUTPUT:")
+
+ for miRs in miR_array:
+ t=(miRs, )
+ cur.execute(query, t)
+ #cur.execute("SELECT COUNT(*) FROM data_sets WHERE seqID=? AND type='isomiR'", t)
+ rows = cur.fetchall()
+ for row in rows:
+ stat_counts+=1
+ row = row[0]
+ if args.txtout:
+ w_stat.write(str(stat_counts) +"\t"+miRs+"\t"+str(row)+"\n")
+ else:
+ print(str(stat_counts) +". " +"isomiRs for miRNA "+ miRs + ": "+ str(row))
+ print()
+ if args.txtout:
+ w_stat.close()
+ pass
+
+
+def WHERE_CLAUSE(query, args):
+ if "WHERE" in query:
+ query = query + " AND "
+ return(query)
+ else:
+ query = query + " WHERE "
+ return(query)
+
+
+# ALWAYS EXECUTE THIS LIMIT FUNCTION AT THE END
+def add_limit(query, args):
+ if args.limit:
+ query = query + " LIMIT "+ args.limit
+ return query
+ else:
+ return query
+
+
+def add_filter(query, args):
+ if args.filter:
+ query = WHERE_CLAUSE(query, args)
+ query = query + " filter='" + args.filter +"' "
+ return query
+ else:
+ return query
+
+
+def add_variants(query, args):
+ my_var_dict = {'iso_5p': 'iso_5p != "None"', 'iso_3p': 'iso_3p != "None"', 'iso_add3p': 'iso_add3p != "None"', 'iso_add5p': 'iso_add5p != "None"',
+ 'iso_snv_seed':'iso_snv_seed != 0', 'iso_snv_central_offset':'iso_snv_central_offset != 0', 'iso_snv_central':'iso_snv_central != 0',
+ 'iso_snv_central_supp':'iso_snv_central_supp != 0', 'iso_snv':'iso_snv != 0'}
+ user_req_var = args.variant.split(',')
+ values_req_var =[]
+ for eachVar in user_req_var:
+ try:
+ values_req_var.append(my_var_dict[eachVar])
+ except KeyError:
+ print("\nError: \"" + eachVar + "\" does not exist in the choices supported by (-var , --variant)\n")
+ print("use: mirtop sql -qh for more options")
+ exit()
+ #print(values_req_var)
+ insert_betwn = " AND "
+ query_suffix = (insert_betwn.join( values_req_var ))
+ query = WHERE_CLAUSE(query, args)
+ query = query + query_suffix
+ return query
+ #if args.filter:
+ #query = query + " AND " + query_suffix
+ #return query
+ #else:
+ #query = query + "WHERE " + query_suffix
+ #return query
+
+def add_mirnas(args):
+ if args.miRNA.endswith('.txt'):
+ #print("I am called and I am safe here to read from a file")
+ with open(args.miRNA, 'r') as miList:
+ miR_array = miList.read().splitlines()
+ if args.miRNA_prefix:
+ element = str(args.miRNA_prefix) + "-"
+ miR_array = [element + s for s in miR_array]
+ return(miR_array)
+ else:
+ return(miR_array)
+ else:
+ miR_array=args.miRNA.split(',')
+ if args.miRNA_prefix:
+ element = str(args.miRNA_prefix) + "-"
+ miR_array = [element + s for s in miR_array]
+ return(miR_array)
+ else:
+ return(miR_array)
+
+
+def perform_execution(conn, query, args):
+ cur = conn.cursor()
+ #print("QUERY: \n"+ query + "\n")
+ cur.execute(query)
+ rows = cur.fetchall()
+ col_name_list = [tuple[0] for tuple in cur.description]
+ if args.miRNA:
+ return(col_name_list, rows)
+ else:
+ format_results(col_name_list, rows, args)
+
+def format_results(header, output, args):
+ header = '\t'.join(str(col) for col in header)
+ if args.txtout:
+ outList = open(args.txtout, 'w')
+ print("\nWriting data to file: "+ args.txtout + "\n")
+ outList.write(header+"\n")
+ write_to_file(output, args, outList)
+ else:
+ print(header)
+ if args.count:
+ output = list(output[0])
+ if args.columns:
+ print("Unique counts for "+ str(args.columns) + " is: " + str(output[0]))
+ else:
+ print("Unique counts for all rows is: " + str(output[0]))
+ else:
+ for eachrow in output:
+ row_tab = '\t'.join(str(items) for items in eachrow)
+ print(row_tab)
+
+
+def write_to_file(output, args, fileHandler):
+ if args.miRNA:
+ fileHandler.write(output + "\n")
+ elif args.count:
+ output = list(output[0])
+ fileHandler.write("Unique counts for "+ args.columns + " is:\t" + str(output))
+ else:
+ for eachrow in output:
+ row_tab = '\t'.join(str(items) for items in eachrow)
+ fileHandler.write(row_tab+"\n")
+
+
+def select_query(connection, args):
+ if args.columns:
+ if args.count:
+ if args.count == "T":
+ query = "SELECT COUNT(" + args.columns + ") FROM data_sets "
+ else:
+ print("\nERROR: -n is incorrect!. \nPlease use -n T and optionally specify any one column in -col.\nFor more options see mirtop sql -h")
+ exit()
+ else:
+ query = "SELECT " + args.columns + " FROM data_sets "
+ elif args.count:
+ query = "SELECT COUNT(*) FROM data_sets "
+ else:
+ query = "SELECT * FROM data_sets "
+ query = add_filter(query, args)
+ if args.variant:
+ query = add_variants(query, args)
+ if args.miRNA:
+ miR_array = add_mirnas(args)
+ query = WHERE_CLAUSE(query, args)
+ query_series = query + "seqID= "
+ header_var= ""
+ j=0
+ if args.txtout:
+ outList = open(args.txtout, 'w')
+ print("\nWriting data to file: "+ args.txtout + "\n")
+
+ for miRs in miR_array:
+ query = query_series + "\"" + miRs + "\" "
+ query = add_limit(query, args)
+ (header, rows) = perform_execution(connection, query, args)
+ j += 1
+ for i, row in enumerate(rows):
+ if (i == 0):
+ if header_var == "":
+ header_var = '\t'.join(str(col) for col in header)
+ if args.txtout:
+ outList.write(header_var+"\n")
+ else:
+ print(header_var)
+ row_tab = '\t'.join(str(items) for items in row)
+ if args.count:
+ newOut = str(j) + ". "+ miRs + ":\t" + row_tab
+ if args.txtout:
+ write_to_file(newOut, args, outList)
+ else:
+ print(newOut)
+ else:
+ if args.txtout:
+ write_to_file(row_tab, args, outList)
+ else:
+ print(row_tab)
+ else:
+ query = add_limit(query, args)
+ perform_execution(connection, query, args)
+
+
+def sql_options(args):
+ user_options = vars(args)
+ if args.create:
+ if args.gff:
+ insert_sql(args)
+ else:
+ print("Usage: mirtop sql --create --gff <input.gff> --db <new_db_name> \(Default: mirtop.db\)")
+ elif args.query:
+ if args.expr:
+ # print("Usage: mirtop sql --query --db <input_database> -e <user_query>")
+ query_sql(args)
+ else:
+ print("Usage: mirtop sql --query --db <input_database> -e <user_query>")
+ else:
+ print("Usage: mirtop sql -h")
diff --git a/setup.py b/setup.py
index ce4e222..7ee2b42 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
import os
from setuptools import setup, find_packages
-version = '0.4.23'
+version = '0.4.24'
url = 'http://github.com/mirtop/mirtop'
diff --git a/test/test_automated_analysis.py b/test/test_automated_analysis.py
index 7e38d4a..18cfc82 100644
--- a/test/test_automated_analysis.py
+++ b/test/test_automated_analysis.py
@@ -475,4 +475,311 @@ class AutomatedAnalysisTest(unittest.TestCase):
"../../data/examples/gff/correct_file.gff"]
print("")
print(" ".join(clcode))
- subprocess.check_call(clcode) \ No newline at end of file
+ subprocess.check_call(clcode)
+
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_create_1_cmd(self):
+ """Run sql command to incorporate GFF to SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-c",
+ "--gff",
+ "../../data/examples/annotate/SQL_sample.gff",
+ "-o",
+ "../../data/examples/annotate",
+ "--db",
+ "SQL_sample.db"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_create_2_cmd(self):
+ """Run sql command to incorporate GFF to SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-c",
+ "--gff",
+ "../../data/examples/annotate/SQL_sample.gff",
+ "-o",
+ "../../data/examples/annotate"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_showTables_cmd(self):
+ """Run sql command to query from a database to show tables using SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "show-tables"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_showSchema_cmd(self):
+ """Run sql command to query from a database to show schema using SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "show-schema",
+ "-t",
+ "summary"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_showColumns_cmd(self):
+ """Run sql command to query from a database to show columns using SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "show-columns"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_descSummary_cmd(self):
+ """Run sql command to query from a database to display the header of the GFF using SQLite
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "describe-gff"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_statIsomirs_cmd(self):
+ """Run sql command to query from a database to summarize isomirs per miRNA
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "isomirs-per-mirna",
+ "-miR",
+ "hsa-let-7a-5p,hsa-let-7d-5p"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_statIsomirsFile_cmd(self):
+ """Run sql command to query from a database to summarize isomirs per miRNA reading from afile
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "isomirs-per-mirna",
+ "-miR",
+ "../../data/examples/annotate/miRNA_sample_list.txt"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectLimit_cmd(self):
+ """Run sql command to query from database using limit option
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "select",
+ "--limit",
+ "2"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectColumns_cmd(self):
+ """Run sql command to query from database using limit option
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "select",
+ "-l",
+ "2",
+ "-col",
+ "seqID,UID,Read,iso_5p,iso_3p,start,end"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectMirna_cmd(self):
+ """Run sql command to query from database for specific miRNAs
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "select",
+ "-l",
+ "4",
+ "-col",
+ "seqID,UID,Read,iso_5p,iso_3p,start,end",
+ "-miR",
+ "hsa-let-7i-5p"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectiVariant_cmd(self):
+ """Run sql command to query from database for specific variant types
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "select",
+ "-l",
+ "5",
+ "-var",
+ "iso_5p,iso_3p,iso_snv_central_offset"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectFilter_cmd(self):
+ """Run sql command to query from database using filters
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e",
+ "select",
+ "-l",
+ "5",
+ "-var",
+ "iso_5p,iso_3p,iso_snv_central_offset",
+ "-f",
+ "Pass"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectCount_cmd(self):
+ """Run sql command to query from database to fetch counts of the return values
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e", "select",
+ "-var", "iso_5p,iso_3p",
+ "-miR", "hsa-miR-142-5p,hsa-miR-372-3p",
+ "-n","T"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
+
+ @attr(complete=True)
+ @attr(cmd_validate=True)
+ @attr(cmd=True)
+ def test_sql_query_SelectTextOut_cmd(self):
+ """Run sql command to query from database and return the output to a text file
+ """
+ with make_workdir():
+ clcode = ["mirtop",
+ "sql",
+ "-q",
+ "--db",
+ "../../data/examples/annotate/query_sample.db",
+ "-e", "select",
+ "-var", "iso_5p,iso_3p",
+ "-miR", "hsa-miR-142-5p,hsa-miR-372-3p",
+ "-n","T",
+ "-txto","sample_count.txt"]
+ print("")
+ print(" ".join(clcode))
+ subprocess.check_call(clcode)
diff --git a/test/test_functions.py b/test/test_functions.py
index 56037da..fb7f8f9 100644
--- a/test/test_functions.py
+++ b/test/test_functions.py
@@ -308,10 +308,12 @@ class FunctionsTest(unittest.TestCase):
if not res:
if res[0][0] != 10:
raise ValueError("Wrong alignment for test 7 %s" % res)
+
res = align_from_variants("AGGTAGTAGGATGTATAGAA", mature,
"iso_5p:+2,iso_3p:-2,iso_add3p:2")
if res:
raise ValueError("Wrong alignment for test 8 %s" % res)
+
@attr(alignment=True)
def test_alignment(self):
@@ -528,3 +530,35 @@ class FunctionsTest(unittest.TestCase):
from mirtop.gff.update import update_file
print("\n")
update_file("data/examples/versions/version1.0.gff", None)
+
+ @attr(sql=True)
+ def test_sql(self):
+ """testing mirtop_sql in sql.py function"""
+ from mirtop.libs import logger
+ from mirtop.sql import sql
+ import argparse
+ logger.initialize_logger("test SQLite Create", True, True)
+ logger = logger.getLogger(__name__)
+ args = argparse.Namespace()
+ args.create = "True"
+ args.db = "SQL_sample.db"
+ args.gff = 'data/examples/annotate/SQL_sample.gff'
+ args.out = 'data/examples/annotate/'
+ sql.sql_options(args)
+ os.remove(os.path.join(args.out, "SQL_sample.db"))
+ return True
+
+ @attr(issue64=True)
+ def test_issue64(self):
+ from mirtop.bam.filter import tune
+ subs, add, cigar = tune("TATCACAGTGGCTGTTCTTTTTT", "CCCCCTATCACAGTGGCTGTTCTTTTTT", 5, None)
+ if add:
+ raise ValueError("Bad annotation in for seqs with 6T/As at the end")
+ @attr(error69=True)
+ def test_error69(self):
+ from mirtop.bam.filter import tune
+ v = tune("CTTATCAGATTGTATTGTAATT",
+ "TACATCGGCCATTATAATACAACCTGATAAGTGTTATAGCACTTATCAGATTGTATTGTAATTGTCTGTGTANNNNNNNNNNNN",
+ 41, [(0, 22)])
+ if v[2] != "22M":
+ raise ValueError("Issue 69 is back. Variantion not detected correctly.")