diff options
author | Steffen Moeller <moeller@debian.org> | 2019-12-05 11:42:23 +0100 |
---|---|---|
committer | Steffen Moeller <moeller@debian.org> | 2019-12-05 11:42:23 +0100 |
commit | 073ddf8aa778dc9efea8d0283558f44b6e0f0895 (patch) | |
tree | 7162287aadf59c87890613d177da4874d4b5576c | |
parent | da456c1aee23986cf2bb34e54084060435771ae4 (diff) | |
parent | a1354aa97ae8abef49920fbad458d967c17a900b (diff) |
Update upstream source from tag 'upstream/0.4.23'
Update to upstream version '0.4.23'
with Debian dir 2619c95f1b8f9b5233ffacf4a06a1688dc85b6cb
-rw-r--r-- | HISTORY.md | 9 | ||||
-rw-r--r-- | mirtop/bam/bam.py | 17 | ||||
-rw-r--r-- | mirtop/command_line.py | 2 | ||||
-rw-r--r-- | mirtop/gff/stats.py | 2 | ||||
-rw-r--r-- | mirtop/importer/manatee.py | 2 | ||||
-rw-r--r-- | mirtop/mirna/realign.py | 5 | ||||
-rw-r--r-- | setup.py | 3 | ||||
-rw-r--r-- | test/test_automated_analysis.py | 18 |
8 files changed, 32 insertions, 26 deletions
@@ -1,3 +1,12 @@ +- 0.4.23 + +* fix empty stats file [#61](https://github.com/miRTop/mirtop/issues/61) by @leontienvdbent + +- 0.4.22 + +* fix when reads map halfway on to the edge +* fix edge case where limit==variant_size + - 0.4.21 * Missing trimming events since 0.4.19 diff --git a/mirtop/bam/bam.py b/mirtop/bam/bam.py index feb43fa..1528b6a 100644 --- a/mirtop/bam/bam.py +++ b/mirtop/bam/bam.py @@ -110,6 +110,7 @@ def low_memory_genomic_bam(bam_fn, sample, out_handle, args): logger.info("Intersecting bed file.") intersect_fn = intersect(bed_fn, args.gtf) logger.info("Loading database.") + # TODO this'll return conn_reads and conn_counts conn = _read_lifted_bam_alpha(intersect_fn, bam_fn, args) rows = sql.select_all_reads(conn) lines = [] @@ -120,6 +121,8 @@ def low_memory_genomic_bam(bam_fn, sample, out_handle, args): lines.append(row) current = row[0] else: + # TODO counts of sequence = conn_counts.query UID + # it could be counts only same location UID+chrom+start, or counts all UID reads = _read_lifted_lines(lines, precursors, database) ann = annotate(reads, args.matures, args.precursors, quiet=True) gff_lines = body.create(ann, args.database, sample, args, quiet=True) @@ -152,15 +155,14 @@ def _analyze_line(line, reads, precursors, handle, args): if query_name not in reads: reads[query_name].set_sequence(sequence) reads[query_name].counts = _get_freq(query_name) + # TODO if args.quant set to 0 + # TODO if args.quant increase by 1 if line.is_reverse and not args.genomic: logger.debug("READ::Sequence is reverse: %s" % line.query_name) return reads chrom = handle.getrname(line.reference_id) start = line.reference_start - # If genomic endcode, liftover to precursor position - # if not start: - # logger.debug(("READ::not start found %s" % line.reference_start)) - # return reads + cigar = line.cigartuples # if line.cigarstring.find("I") > -1: # indels_skip += 1 @@ -168,7 +170,7 @@ def _analyze_line(line, reads, precursors, handle, args): iso.align = line iso.set_pos(start, len(reads[query_name].sequence)) logger.debug("READ::From BAM start %s end %s at chrom %s" % (iso.start, iso.end, chrom)) - if len(precursors[chrom]) < start + len(reads[query_name].sequence): + if len(precursors[chrom].replace("N","")) + 3 < start + len(reads[query_name].sequence): logger.debug("READ::%s start + %s sequence size are bigger than" " size precursor %s" % ( line.reference_id, @@ -198,11 +200,13 @@ def _read_lifted_bam_alpha(bed_fn, bam_fn, args): conn = sql.create_connection() key = "name" if args.keep_name else "sequence" sql.create_reads_table(conn, key) + # TODO create counts table sequence and autoincrement or from read cur = conn.cursor() counts = 0 seen = set() for line in bed_fn: fields = _parse_intersect(line, database, bed=True) + # TODO add sequence to count table args.quant on/off name=UID or name=UID+chrom+pos if fields: hit = ".".join(fields[:3]) if hit not in seen: @@ -211,9 +215,10 @@ def _read_lifted_bam_alpha(bed_fn, bam_fn, args): seen.add(hit) # if counts == 1000: # counts = 0 - del(hit) + del(seen) logger.info("Read %s lines that intersected with miRNAs." % counts) conn.commit() + # TODO this'll return conn_reads and conn_counts return conn diff --git a/mirtop/command_line.py b/mirtop/command_line.py index 97cc10a..e5da56a 100644 --- a/mirtop/command_line.py +++ b/mirtop/command_line.py @@ -24,7 +24,7 @@ def main(**kwargs): kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() - + #logger.warning("This is devel-live changes") if "gff" in kwargs: logger.info("Run annotation") reader(kwargs["args"]) diff --git a/mirtop/gff/stats.py b/mirtop/gff/stats.py index e2d65ca..ad3e7fe 100644 --- a/mirtop/gff/stats.py +++ b/mirtop/gff/stats.py @@ -73,7 +73,7 @@ def _calc_stats(fn): cols = gff.columns attr = gff.attributes logger.debug("## STATS: attribute %s" % attr) - if ok.match(attr['Filter']): + if not ok.match(attr['Filter']): continue if "-".join([attr['UID'], attr['Variant'], attr['Name']]) in seen: continue diff --git a/mirtop/importer/manatee.py b/mirtop/importer/manatee.py index a9a5279..32a444d 100644 --- a/mirtop/importer/manatee.py +++ b/mirtop/importer/manatee.py @@ -139,7 +139,7 @@ def _bed(handle, bed_fn): query_sequence = cols[9] counts = cols[14] start = int(cols[3]) - strand = cols[1] + strand = "-" if cols[1] == "16" else "+" chrom = cols[2] # if there no hits # if the sequence always matching the read, assuming YES now diff --git a/mirtop/mirna/realign.py b/mirtop/mirna/realign.py index 7b26d20..f2bad95 100644 --- a/mirtop/mirna/realign.py +++ b/mirtop/mirna/realign.py @@ -464,13 +464,14 @@ def align_from_variants(sequence, mature, variants): if "iso_add3p" in k: sequence = sequence[:-1 * var_dict["iso_add3p"]] if "iso_3p" in k: - mature = mature[:-(7 + (-1 * var_dict["iso_3p"]))] + shift = 7 + (-1 * var_dict["iso_3p"]) + if shift != 0: + mature = mature[:-(shift)] else: mature = mature[:-7] logger.debug("realign::align_from_variants::snp %s" % snp) logger.debug("realign::align_from_variants::sequence %s" % sequence) logger.debug("realign::align_from_variants::mature %s" % mature) - if len(sequence) != len(mature): # in case of indels, align again a = align(sequence, mature) sequence = a[0] @@ -3,7 +3,8 @@ import os from setuptools import setup, find_packages -version = '0.4.21' +version = '0.4.23' + url = 'http://github.com/mirtop/mirtop' diff --git a/test/test_automated_analysis.py b/test/test_automated_analysis.py index 890ae75..7e38d4a 100644 --- a/test/test_automated_analysis.py +++ b/test/test_automated_analysis.py @@ -307,20 +307,10 @@ class AutomatedAnalysisTest(unittest.TestCase): print("") print(" ".join(clcode)) subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_stats=True) - @attr(cmd=True) - def test_srnaseq_stats(self): - """Run stats analysis - """ - with make_workdir(): - clcode = ["mirtop", - "stats", - "../../data/examples/gff/correct_file.gff"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) + if not os.path.exists("test_out_mirs/mirtop_stats.txt"): + raise ValueError("File doesn't exist, something is wrong with stats cmd.") + if sum(1 for line in open('test_out_mirs/mirtop_stats.txt')) == 1: + raise ValueError("File is empty, something is wrong with stats cmd.") @attr(complete=True) @attr(cmd_merge=True) |