summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteffen Moeller <moeller@debian.org>2019-12-05 11:42:23 +0100
committerSteffen Moeller <moeller@debian.org>2019-12-05 11:42:23 +0100
commit073ddf8aa778dc9efea8d0283558f44b6e0f0895 (patch)
tree7162287aadf59c87890613d177da4874d4b5576c
parentda456c1aee23986cf2bb34e54084060435771ae4 (diff)
parenta1354aa97ae8abef49920fbad458d967c17a900b (diff)
Update upstream source from tag 'upstream/0.4.23'
Update to upstream version '0.4.23' with Debian dir 2619c95f1b8f9b5233ffacf4a06a1688dc85b6cb
-rw-r--r--HISTORY.md9
-rw-r--r--mirtop/bam/bam.py17
-rw-r--r--mirtop/command_line.py2
-rw-r--r--mirtop/gff/stats.py2
-rw-r--r--mirtop/importer/manatee.py2
-rw-r--r--mirtop/mirna/realign.py5
-rw-r--r--setup.py3
-rw-r--r--test/test_automated_analysis.py18
8 files changed, 32 insertions, 26 deletions
diff --git a/HISTORY.md b/HISTORY.md
index 60dc0d0..2c01485 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,12 @@
+- 0.4.23
+
+* fix empty stats file [#61](https://github.com/miRTop/mirtop/issues/61) by @leontienvdbent
+
+- 0.4.22
+
+* fix when reads map halfway on to the edge
+* fix edge case where limit==variant_size
+
- 0.4.21
* Missing trimming events since 0.4.19
diff --git a/mirtop/bam/bam.py b/mirtop/bam/bam.py
index feb43fa..1528b6a 100644
--- a/mirtop/bam/bam.py
+++ b/mirtop/bam/bam.py
@@ -110,6 +110,7 @@ def low_memory_genomic_bam(bam_fn, sample, out_handle, args):
logger.info("Intersecting bed file.")
intersect_fn = intersect(bed_fn, args.gtf)
logger.info("Loading database.")
+ # TODO this'll return conn_reads and conn_counts
conn = _read_lifted_bam_alpha(intersect_fn, bam_fn, args)
rows = sql.select_all_reads(conn)
lines = []
@@ -120,6 +121,8 @@ def low_memory_genomic_bam(bam_fn, sample, out_handle, args):
lines.append(row)
current = row[0]
else:
+ # TODO counts of sequence = conn_counts.query UID
+ # it could be counts only same location UID+chrom+start, or counts all UID
reads = _read_lifted_lines(lines, precursors, database)
ann = annotate(reads, args.matures, args.precursors, quiet=True)
gff_lines = body.create(ann, args.database, sample, args, quiet=True)
@@ -152,15 +155,14 @@ def _analyze_line(line, reads, precursors, handle, args):
if query_name not in reads:
reads[query_name].set_sequence(sequence)
reads[query_name].counts = _get_freq(query_name)
+ # TODO if args.quant set to 0
+ # TODO if args.quant increase by 1
if line.is_reverse and not args.genomic:
logger.debug("READ::Sequence is reverse: %s" % line.query_name)
return reads
chrom = handle.getrname(line.reference_id)
start = line.reference_start
- # If genomic endcode, liftover to precursor position
- # if not start:
- # logger.debug(("READ::not start found %s" % line.reference_start))
- # return reads
+
cigar = line.cigartuples
# if line.cigarstring.find("I") > -1:
# indels_skip += 1
@@ -168,7 +170,7 @@ def _analyze_line(line, reads, precursors, handle, args):
iso.align = line
iso.set_pos(start, len(reads[query_name].sequence))
logger.debug("READ::From BAM start %s end %s at chrom %s" % (iso.start, iso.end, chrom))
- if len(precursors[chrom]) < start + len(reads[query_name].sequence):
+ if len(precursors[chrom].replace("N","")) + 3 < start + len(reads[query_name].sequence):
logger.debug("READ::%s start + %s sequence size are bigger than"
" size precursor %s" % (
line.reference_id,
@@ -198,11 +200,13 @@ def _read_lifted_bam_alpha(bed_fn, bam_fn, args):
conn = sql.create_connection()
key = "name" if args.keep_name else "sequence"
sql.create_reads_table(conn, key)
+ # TODO create counts table sequence and autoincrement or from read
cur = conn.cursor()
counts = 0
seen = set()
for line in bed_fn:
fields = _parse_intersect(line, database, bed=True)
+ # TODO add sequence to count table args.quant on/off name=UID or name=UID+chrom+pos
if fields:
hit = ".".join(fields[:3])
if hit not in seen:
@@ -211,9 +215,10 @@ def _read_lifted_bam_alpha(bed_fn, bam_fn, args):
seen.add(hit)
# if counts == 1000:
# counts = 0
- del(hit)
+ del(seen)
logger.info("Read %s lines that intersected with miRNAs." % counts)
conn.commit()
+ # TODO this'll return conn_reads and conn_counts
return conn
diff --git a/mirtop/command_line.py b/mirtop/command_line.py
index 97cc10a..e5da56a 100644
--- a/mirtop/command_line.py
+++ b/mirtop/command_line.py
@@ -24,7 +24,7 @@ def main(**kwargs):
kwargs['args'].print_debug)
logger = mylog.getLogger(__name__)
start = time.time()
-
+ #logger.warning("This is devel-live changes")
if "gff" in kwargs:
logger.info("Run annotation")
reader(kwargs["args"])
diff --git a/mirtop/gff/stats.py b/mirtop/gff/stats.py
index e2d65ca..ad3e7fe 100644
--- a/mirtop/gff/stats.py
+++ b/mirtop/gff/stats.py
@@ -73,7 +73,7 @@ def _calc_stats(fn):
cols = gff.columns
attr = gff.attributes
logger.debug("## STATS: attribute %s" % attr)
- if ok.match(attr['Filter']):
+ if not ok.match(attr['Filter']):
continue
if "-".join([attr['UID'], attr['Variant'], attr['Name']]) in seen:
continue
diff --git a/mirtop/importer/manatee.py b/mirtop/importer/manatee.py
index a9a5279..32a444d 100644
--- a/mirtop/importer/manatee.py
+++ b/mirtop/importer/manatee.py
@@ -139,7 +139,7 @@ def _bed(handle, bed_fn):
query_sequence = cols[9]
counts = cols[14]
start = int(cols[3])
- strand = cols[1]
+ strand = "-" if cols[1] == "16" else "+"
chrom = cols[2]
# if there no hits
# if the sequence always matching the read, assuming YES now
diff --git a/mirtop/mirna/realign.py b/mirtop/mirna/realign.py
index 7b26d20..f2bad95 100644
--- a/mirtop/mirna/realign.py
+++ b/mirtop/mirna/realign.py
@@ -464,13 +464,14 @@ def align_from_variants(sequence, mature, variants):
if "iso_add3p" in k:
sequence = sequence[:-1 * var_dict["iso_add3p"]]
if "iso_3p" in k:
- mature = mature[:-(7 + (-1 * var_dict["iso_3p"]))]
+ shift = 7 + (-1 * var_dict["iso_3p"])
+ if shift != 0:
+ mature = mature[:-(shift)]
else:
mature = mature[:-7]
logger.debug("realign::align_from_variants::snp %s" % snp)
logger.debug("realign::align_from_variants::sequence %s" % sequence)
logger.debug("realign::align_from_variants::mature %s" % mature)
-
if len(sequence) != len(mature): # in case of indels, align again
a = align(sequence, mature)
sequence = a[0]
diff --git a/setup.py b/setup.py
index ac57ac5..ce4e222 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,8 @@
import os
from setuptools import setup, find_packages
-version = '0.4.21'
+version = '0.4.23'
+
url = 'http://github.com/mirtop/mirtop'
diff --git a/test/test_automated_analysis.py b/test/test_automated_analysis.py
index 890ae75..7e38d4a 100644
--- a/test/test_automated_analysis.py
+++ b/test/test_automated_analysis.py
@@ -307,20 +307,10 @@ class AutomatedAnalysisTest(unittest.TestCase):
print("")
print(" ".join(clcode))
subprocess.check_call(clcode)
-
- @attr(complete=True)
- @attr(cmd_stats=True)
- @attr(cmd=True)
- def test_srnaseq_stats(self):
- """Run stats analysis
- """
- with make_workdir():
- clcode = ["mirtop",
- "stats",
- "../../data/examples/gff/correct_file.gff"]
- print("")
- print(" ".join(clcode))
- subprocess.check_call(clcode)
+ if not os.path.exists("test_out_mirs/mirtop_stats.txt"):
+ raise ValueError("File doesn't exist, something is wrong with stats cmd.")
+ if sum(1 for line in open('test_out_mirs/mirtop_stats.txt')) == 1:
+ raise ValueError("File is empty, something is wrong with stats cmd.")
@attr(complete=True)
@attr(cmd_merge=True)