diff options
author | Andreas Tille <tille@debian.org> | 2019-07-12 21:52:07 +0200 |
---|---|---|
committer | Andreas Tille <tille@debian.org> | 2019-07-12 21:52:07 +0200 |
commit | a87d5923b625c95c335e7664764c07e804107125 (patch) | |
tree | 70efa51a85a111a62a6b288823883171c2805729 | |
parent | 2dfc156de286a9d83e86487a4b372aba0f2ac6f2 (diff) |
New upstream version 1.3.2+dfsg
-rw-r--r-- | EM.cpp | 82 | ||||
-rw-r--r-- | WriteResults.h | 10 | ||||
-rwxr-xr-x | rsem-calculate-expression | 53 |
3 files changed, 91 insertions, 54 deletions
@@ -612,35 +612,59 @@ int main(int argc, char* argv[]) { fin>>N0>>N1>>N2>>N_tot; fin.close(); - general_assert(N1 > 0, "There are no alignable reads!"); - - if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; - - //set model parameters - mparams.M = M; - mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; - mparams.refs = &refs; - - sprintf(mparamsF, "%s.mparams", imdName); - fin.open(mparamsF); - - general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); - - fin>> mparams.minL>> mparams.maxL>> mparams.probF; - int val; // 0 or 1 , for estRSPD - fin>>val; - mparams.estRSPD = (val != 0); - fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; - fin>> mparams.seedLen; - fin.close(); - - //run EM - switch(read_type) { - case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; - case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; - case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; - case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; - default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); + if (N1 == 0) { + printf("Warning: There are no alignable reads!\n"); + theta.resize(M + 1, 0.0); + FILE *fo = NULL; + sprintf(thetaF, "%s.theta", statName); + fo = fopen(thetaF, "w"); + fclose(fo); + sprintf(modelF, "%s.model", statName); + fo = fopen(modelF, "w"); + fclose(fo); + eel.resize(M + 1, 0.0); + for (int i = 1; i <= M; ++i) eel[i] = transcripts.getTranscriptAt(i).getLength(); + double *countv = new double[M + 1]; + memset(countv, 0, sizeof(double) * (M + 1)); + writeResultsEM(M, refName, imdName, transcripts, theta, eel, countv, appendNames); + if (genBamF) { + sprintf(outBamF, "%s.transcript.bam", outName); + char command[1005]; + sprintf(command, "cp %s %s", inpSamF, outBamF); + printf("%s\n", command); + system(command); + } + delete[] countv; + } + else { + if ((READ_INT_TYPE)nThreads > N1) nThreads = N1; + + //set model parameters + mparams.M = M; + mparams.N[0] = N0; mparams.N[1] = N1; mparams.N[2] = N2; + mparams.refs = &refs; + + sprintf(mparamsF, "%s.mparams", imdName); + fin.open(mparamsF); + + general_assert(fin.is_open(), "Cannot open " + cstrtos(mparamsF) + "It may not exist."); + + fin>> mparams.minL>> mparams.maxL>> mparams.probF; + int val; // 0 or 1 , for estRSPD + fin>>val; + mparams.estRSPD = (val != 0); + fin>> mparams.B>> mparams.mate_minL>> mparams.mate_maxL>> mparams.mean>> mparams.sd; + fin>> mparams.seedLen; + fin.close(); + + //run EM + switch(read_type) { + case 0 : EM<SingleRead, SingleHit, SingleModel>(); break; + case 1 : EM<SingleReadQ, SingleHit, SingleQModel>(); break; + case 2 : EM<PairedEndRead, PairedEndHit, PairedEndModel>(); break; + case 3 : EM<PairedEndReadQ, PairedEndHit, PairedEndQModel>(); break; + default : fprintf(stderr, "Unknown Read Type!\n"); exit(-1); + } } time_t b = time(NULL); diff --git a/WriteResults.h b/WriteResults.h index 889d4c2..d3100d5 100644 --- a/WriteResults.h +++ b/WriteResults.h @@ -86,7 +86,8 @@ void calcExpressionValues(int M, const std::vector<double>& theta, const std::ve frac[i] = theta[i]; denom += frac[i]; } - general_assert(denom >= EPSILON, "No alignable reads?!"); + // general_assert(denom >= EPSILON, "No alignable reads?!"); + if (denom < EPSILON) denom = 1.0; for (int i = 1; i <= M; i++) frac[i] /= denom; //calculate FPKM @@ -98,6 +99,7 @@ void calcExpressionValues(int M, const std::vector<double>& theta, const std::ve tpm.assign(M + 1, 0.0); denom = 0.0; for (int i = 1; i <= M; i++) denom += fpkm[i]; + if (denom < EPSILON) denom = 1.0; for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6; } @@ -173,7 +175,7 @@ void writeResultsEM(int M, const char* refName, const char* imdName, Transcripts } else { for (int j = b; j < e; j++) { - isopct[j] = tpm[j] / gene_tpm[i]; + isopct[j] = gene_tpm[i] > EPSILON ? tpm[j] / gene_tpm[i] : 0.0; glens[i] += tlens[j] * isopct[j]; gene_eels[i] += eel[j] * isopct[j]; } @@ -203,7 +205,7 @@ void writeResultsEM(int M, const char* refName, const char* imdName, Transcripts } else { for (int j = b; j < e; j++) { - ta_pct[j] = tpm[j] / trans_tpm[i]; + ta_pct[j] = trans_tpm[i] > EPSILON ? tpm[j] / trans_tpm[i] : 0.0; trans_lens[i] += tlens[j] * ta_pct[j]; trans_eels[i] += eel[j] * ta_pct[j]; } @@ -214,7 +216,7 @@ void writeResultsEM(int M, const char* refName, const char* imdName, Transcripts for (int i = 0; i < m; i++) if (gene_tpm[i] >= EPSILON) { int b = gt.spAt(i), e = gt.spAt(i + 1); - for (int j = b; j < e; j++) gt_pct[j] = trans_tpm[j] / gene_tpm[i]; + for (int j = b; j < e; j++) gt_pct[j] = gene_tpm[i] > EPSILON ? trans_tpm[j] / gene_tpm[i] : 0.0; } } diff --git a/rsem-calculate-expression b/rsem-calculate-expression index e2bf826..4d036ef 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -417,25 +417,11 @@ if (!$is_alignment) { " --outFileNamePrefix $imdName "; ## -#<<<<<<< HEAD - #if ( $gzipped_read_file ) { - # $command .= ' --readFilesCommand zcat '; - #} elsif ( $bzipped_read_file ) { - # $command .= ' --readFilesCommand bzcat '; - #} - - #if ( $read_type == 0 || $read_type == 1 ) { - # $command .= " --readFilesIn $mate1_list "; - #} else { - # $command .= " --readFilesIn $mate1_list $mate2_list"; - #} -#======= if ( $star_gzipped_read_file ) { $command .= ' --readFilesCommand zcat '; } elsif ( $star_bzipped_read_file ) { $command .= ' --readFilesCommand bzip2 -c '; } -#>>>>>>> master if ( $read_type == 0 || $read_type == 1 ) { $command .= " --readFilesIn $mate1_list "; @@ -552,13 +538,24 @@ if ($quiet) { $command .= " -q"; } &runCommand($command); -$command = "rsem-build-read-index $gap"; -if ($read_type == 0) { $command .= " 0 $quiet $imdName\_alignable.fa"; } -elsif ($read_type == 1) { $command .= " 1 $quiet $imdName\_alignable.fq"; } -elsif ($read_type == 2) { $command .= " 0 $quiet $imdName\_alignable_1.fa $imdName\_alignable_2.fa"; } -elsif ($read_type == 3) { $command .= " 1 $quiet $imdName\_alignable_1.fq $imdName\_alignable_2.fq"; } -else { print "Impossible! read_type is not in [1,2,3,4]!\n"; exit(-1); } -&runCommand($command); +my $inpCntF = "$statName.cnt"; +my $local_status = open(INPUT, $inpCntF); +if ($local_status == 0) { print "Fail to open file $inpF!\n"; exit(-1); } +my $line = <INPUT>; +chomp($line); +my @Ns = split(/ /, $line); +close(INPUT); +my $no_aligned = ($Ns[1] == 0); + +if (!$no_aligned) { + $command = "rsem-build-read-index $gap"; + if ($read_type == 0) { $command .= " 0 $quiet $imdName\_alignable.fa"; } + elsif ($read_type == 1) { $command .= " 1 $quiet $imdName\_alignable.fq"; } + elsif ($read_type == 2) { $command .= " 0 $quiet $imdName\_alignable_1.fa $imdName\_alignable_2.fa"; } + elsif ($read_type == 3) { $command .= " 1 $quiet $imdName\_alignable_1.fq $imdName\_alignable_2.fq"; } + else { print "Impossible! read_type is not in [1,2,3,4]!\n"; exit(-1); } + &runCommand($command); +} my $doesOpen = open(OUTPUT, ">$imdName.mparams"); if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); } @@ -628,6 +625,20 @@ if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; } if ($mTime) { $time_start = time(); } +if ($no_aligned) { + print "Since no aligned reads, further steps will not be performed!\n"; + if (!$keep_intermediate_files) { + &runCommand("rm -rf $temp_dir", "Fail to delete the temporary folder!"); + } + if ($mTime) { + open(OUTPUT, ">$sampleName.time"); + print OUTPUT "Aligning reads: $time_alignment s.\n"; + print OUTPUT "Estimating expression levels: $time_rsem s.\n"; + close(OUTPUT); + } + exit(0); +} + if ($calcPME || $calcCI ) { $command = "rsem-run-gibbs $refName $imdName $statName $BURNIN $NCV $SAMPLEGAP"; $command .= " -p $nThreads"; |