summaryrefslogtreecommitdiff
path: root/bt2_search.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'bt2_search.cpp')
-rw-r--r--bt2_search.cpp59
1 files changed, 49 insertions, 10 deletions
diff --git a/bt2_search.cpp b/bt2_search.cpp
index e5e02c6..7decf9b 100644
--- a/bt2_search.cpp
+++ b/bt2_search.cpp
@@ -660,7 +660,7 @@ static void printUsage(ostream& out) {
tool_name = "bowtie2";
}
out << "Usage: " << endl
- << " " << tool_name.c_str() << " [options]* -x <bt2-idx> {-1 <m1> -2 <m2> | -U <r>} [-S <sam>]" << endl
+ << " " << tool_name.c_str() << " [options]* -x <bt2-idx> {-1 <m1> -2 <m2> | -U <r> | --interleaved <i>} [-S <sam>]" << endl
<< endl
<< " <bt2-idx> Index filename prefix (minus trailing .X." + gEbwt_ext + ")." << endl
<< " NOTE: Bowtie 1 and Bowtie 2 indexes are not compatible." << endl
@@ -676,6 +676,10 @@ static void printUsage(ostream& out) {
if(wrapper == "basic-0") {
out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
}
+ out << " <i> Files with interleaved paired-end FASTQ reads" << endl;
+ if(wrapper == "basic-0") {
+ out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
+ }
out << " <sam> File for SAM output (default: stdout)" << endl
<< endl
<< " <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be" << endl
@@ -686,7 +690,6 @@ static void printUsage(ostream& out) {
<< endl
<< " Input:" << endl
<< " -q query input files are FASTQ .fq/.fastq (default)" << endl
- << " --interleaved query input files are interleaved paired-end FASTQ reads" << endl
<< " --tab5 query input files are TAB5 .tab5" << endl
<< " --tab6 query input files are TAB6 .tab6" << endl
<< " --qseq query input files are in Illumina's qseq format" << endl
@@ -767,10 +770,10 @@ static void printUsage(ostream& out) {
//}
out << " -t/--time print wall-clock time taken by search phases" << endl;
if(wrapper == "basic-0") {
- out << " --un <path> write unpaired reads that didn't align to <path>" << endl
- << " --al <path> write unpaired reads that aligned at least once to <path>" << endl
- << " --un-conc <path> write pairs that didn't align concordantly to <path>" << endl
- << " --al-conc <path> write pairs that aligned concordantly at least once to <path>" << endl
+ out << " --un <path> write unpaired reads that didn't align to <path>" << endl
+ << " --al <path> write unpaired reads that aligned at least once to <path>" << endl
+ << " --un-conc <path> write pairs that didn't align concordantly to <path>" << endl
+ << " --al-conc <path> write pairs that aligned concordantly at least once to <path>" << endl
<< " (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g." << endl
<< " --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.)" << endl;
}
@@ -787,6 +790,8 @@ static void printUsage(ostream& out) {
<< " --rg <text> add <text> (\"lab:value\") to @RG line of SAM header." << endl
<< " Note: @RG line only printed when --rg-id is set." << endl
<< " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl
+ << " --sam-noqname-trunc Suppress standard behavior of truncating readname at first whitespace " << endl
+ << " at the expense of generating non-standard SAM." << endl
<< endl
<< " Performance:" << endl
// << " -o/--offrate <int> override offrate of index; must be >= index's offrate" << endl
@@ -3196,7 +3201,7 @@ static void multiseedSearchWorker(void *vp) {
// Whether we're done with mate1 / mate2
bool done[2] = { !filt[0], !filt[1] };
size_t nelt[2] = {0, 0};
-
+
// Find end-to-end exact alignments for each read
if(doExactUpFront) {
for(size_t matei = 0; matei < (paired ? 2:1); matei++) {
@@ -3376,6 +3381,7 @@ static void multiseedSearchWorker(void *vp) {
}
}
}
+
// 1-mismatch
if(do1mmUpFront && !seedSumm) {
for(size_t matei = 0; matei < (paired ? 2:1); matei++) {
@@ -3562,7 +3568,11 @@ static void multiseedSearchWorker(void *vp) {
nrounds[1] = min<size_t>(nrounds[1], interval[1]);
Constraint gc = Constraint::penaltyFuncBased(scoreMin);
size_t seedsTried = 0;
+ size_t seedsTriedMS[] = {0, 0, 0, 0};
size_t nUniqueSeeds = 0, nRepeatSeeds = 0, seedHitTot = 0;
+ size_t nUniqueSeedsMS[] = {0, 0, 0, 0};
+ size_t nRepeatSeedsMS[] = {0, 0, 0, 0};
+ size_t seedHitTotMS[] = {0, 0, 0, 0};
for(size_t roundi = 0; roundi < nSeedRounds; roundi++) {
ca.nextRead(); // Clear cache in preparation for new search
shs[0].clearSeeds();
@@ -3612,6 +3622,7 @@ static void multiseedSearchWorker(void *vp) {
continue;
}
// Instantiate the seeds
+ std::pair<int, int> instFw, instRc;
std::pair<int, int> inst = al.instantiateSeeds(
*seeds[mate], // search seeds
offset, // offset to begin extracting
@@ -3622,7 +3633,9 @@ static void multiseedSearchWorker(void *vp) {
norc[mate], // don't align revcomp read
ca, // holds some seed hits from previous reads
shs[mate], // holds all the seed hits
- sdm); // metrics
+ sdm, // metrics
+ instFw,
+ instRc);
assert(shs[mate].repOk(&ca.current()));
if(inst.first + inst.second == 0) {
// No seed hits! Done with this mate.
@@ -3631,6 +3644,8 @@ static void multiseedSearchWorker(void *vp) {
break;
}
seedsTried += (inst.first + inst.second);
+ seedsTriedMS[mate * 2 + 0] = instFw.first + instFw.second;
+ seedsTriedMS[mate * 2 + 1] = instRc.first + instRc.second;
// Align seeds
al.searchAllSeeds(
*seeds[mate], // search seeds
@@ -3654,8 +3669,14 @@ static void multiseedSearchWorker(void *vp) {
for(size_t mate = 0; mate < 2; mate++) {
if(!shs[mate].empty()) {
nUniqueSeeds += shs[mate].numUniqueSeeds();
+ nUniqueSeedsMS[mate * 2 + 0] += shs[mate].numUniqueSeedsStrand(true);
+ nUniqueSeedsMS[mate * 2 + 1] += shs[mate].numUniqueSeedsStrand(false);
nRepeatSeeds += shs[mate].numRepeatSeeds();
+ nRepeatSeedsMS[mate * 2 + 0] += shs[mate].numRepeatSeedsStrand(true);
+ nRepeatSeedsMS[mate * 2 + 1] += shs[mate].numRepeatSeedsStrand(false);
seedHitTot += shs[mate].numElts();
+ seedHitTotMS[mate * 2 + 0] += shs[mate].numEltsFw();
+ seedHitTotMS[mate * 2 + 1] += shs[mate].numEltsRc();
}
}
double uniqFactor[2] = { 0.0f, 0.0f };
@@ -3819,10 +3840,25 @@ static void multiseedSearchWorker(void *vp) {
}
}
} // end loop over reseeding rounds
- if(seedsTried != 0) {
+ if(seedsTried > 0) {
prm.seedPctUnique = (float)nUniqueSeeds / seedsTried;
prm.seedPctRep = (float)nRepeatSeeds / seedsTried;
prm.seedHitAvg = (float)seedHitTot / seedsTried;
+ } else {
+ prm.seedPctUnique = -1.0f;
+ prm.seedPctRep = -1.0f;
+ prm.seedHitAvg = -1.0f;
+ }
+ for(int i = 0; i < 4; i++) {
+ if(seedsTriedMS[i] > 0) {
+ prm.seedPctUniqueMS[i] = (float)nUniqueSeedsMS[i] / seedsTriedMS[i];
+ prm.seedPctRepMS[i] = (float)nRepeatSeedsMS[i] / seedsTriedMS[i];
+ prm.seedHitAvgMS[i] = (float)seedHitTotMS[i] / seedsTriedMS[i];
+ } else {
+ prm.seedPctUniqueMS[i] = -1.0f;
+ prm.seedPctRepMS[i] = -1.0f;
+ prm.seedHitAvgMS[i] = -1.0f;
+ }
}
size_t totnucs = 0;
for(size_t mate = 0; mate < (paired ? 2:1); mate++) {
@@ -3834,7 +3870,10 @@ static void multiseedSearchWorker(void *vp) {
totnucs += len;
}
}
- prm.seedsPerNuc = (float)seedsTried / totnucs;
+ prm.seedsPerNuc = totnucs > 0 ? ((float)seedsTried / totnucs) : -1;
+ for(int i = 0; i < 4; i++) {
+ prm.seedsPerNucMS[i] = totnucs > 0 ? ((float)seedsTriedMS[i] / totnucs) : -1;
+ }
for(size_t i = 0; i < 2; i++) {
assert_leq(prm.nExIters, mxIter[i]);
assert_leq(prm.nExDps, mxDp[i]);