summaryrefslogtreecommitdiff
path: root/demo
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-06-17 02:24:46 +0000
committerAaron M. Ucko <ucko@debian.org>2005-06-17 02:24:46 +0000
commitd76c50353c9e74f6915ca6352afb29ae53d45777 (patch)
tree9045ebb0b61573bfa4ff3d8778899c5a2cb76d6c /demo
parent402b112099aa816a02fd502b7f0261a99fe7126a (diff)
Load /tmp/.../ncbi-tools6-6.1.20050605 into
branches/upstream/current.
Diffstat (limited to 'demo')
-rw-r--r--demo/.BLAST_VERSION2
-rw-r--r--demo/bl2seq.c50
-rw-r--r--demo/blast_driver.c136
-rw-r--r--demo/blastall.c11
-rw-r--r--demo/blastpgp.c85
-rw-r--r--demo/copymat.c9
-rw-r--r--demo/fastacmd.c18
-rw-r--r--demo/formatrpsdb.c10
-rw-r--r--demo/megablast.c141
-rw-r--r--demo/nps2gps.c997
-rw-r--r--demo/rpsblast.c271
-rw-r--r--demo/tbl2asn.c119
-rw-r--r--demo/trna2tbl.c238
13 files changed, 1739 insertions, 348 deletions
diff --git a/demo/.BLAST_VERSION b/demo/.BLAST_VERSION
index 0d3ad67a..0b6e4313 100644
--- a/demo/.BLAST_VERSION
+++ b/demo/.BLAST_VERSION
@@ -1 +1 @@
-2.2.10
+2.2.11
diff --git a/demo/bl2seq.c b/demo/bl2seq.c
index a2cc5f1a..e0866598 100644
--- a/demo/bl2seq.c
+++ b/demo/bl2seq.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: bl2seq.c,v 6.75 2005/03/16 00:43:40 dondosha Exp $";
+static char const rcsid[] = "$Id: bl2seq.c,v 6.77 2005/06/02 20:45:32 dondosha Exp $";
/**************************************************************************
* *
@@ -27,6 +27,12 @@ static char const rcsid[] = "$Id: bl2seq.c,v 6.75 2005/03/16 00:43:40 dondosha E
***************************************************************************
*
* $Log: bl2seq.c,v $
+* Revision 6.77 2005/06/02 20:45:32 dondosha
+* Use BlastFormattingInfo structure for formatting
+*
+* Revision 6.76 2005/05/02 17:00:27 coulouri
+* change default to new engine
+*
* Revision 6.75 2005/03/16 00:43:40 dondosha
* Correction to previous commit to make reported deflines the same as before
*
@@ -550,8 +556,8 @@ static Args myargs [] = {
"F", NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_LCASE */
{ "Input sequences in the form of accession.version",
"F", NULL, NULL, FALSE, 'A', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_ACCN */
- {"Force use of old engine",
- "T", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
+ {"Force use of the legacy BLAST engine",
+ "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
};
/**
@@ -807,7 +813,7 @@ Int2 Main_new(void)
BioseqPtr query_bsp=NULL, subject_bsp=NULL;
BioseqPtr bsp1=NULL, bsp2=NULL;
BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
- BlastFormattingOptions* format_options;
+ BlastFormattingInfo* format_info = NULL;
BLAST_SummaryOptions* options=NULL;
Blast_SummaryReturn* extra_returns=NULL;
Boolean believe_query= FALSE;
@@ -818,7 +824,7 @@ Int2 Main_new(void)
DbtagPtr dbtagptr;
EBlastProgramType program_number;
Int2 status; /* return value */
- Int4 align_view=0; /* Used for formatting */
+ EAlignView align_view = eAlignViewPairwise; /* Used for formatting */
SeqAlignPtr seqalign=NULL;
SeqEntryPtr sep=NULL, sep1=NULL;
SeqLocPtr slp1, slp2; /* Used for actual search. */
@@ -826,14 +832,16 @@ Int2 Main_new(void)
SeqLocPtr lcase_mask=NULL; /* For lower-case masking info from query FASTA. */
SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
Uint1 strand_option = 0; /* FIXME */
+ SBlastOptions* search_options = NULL; /* Needed for formatting. */
strand_option = (Uint1) myargs[ARG_STRAND].intvalue;
entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
believe_query = (seqannot_output || entrez_lookup);
+ /* Non-zero value for -m option means tabular output. */
if (myargs[ARG_FORMAT].intvalue != 0)
- align_view = 9; /* Means tabular output. */
+ align_view = eAlignViewTabularWithComments;
BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
@@ -914,17 +922,17 @@ Int2 Main_new(void)
asnout = AsnIoClose(asnout);
}
- if ((status = BlastFormattingOptionsNew(program_number,
- myargs[ARG_OUT].strvalue, 0, 1, align_view,
- &format_options)) != 0)
- return status;
-
- if (myargs[ARG_HTML].intvalue) {
- format_options->html = TRUE;
- format_options->align_options += TXALIGN_HTML;
- format_options->print_options += TXALIGN_HTML;
- }
- format_options->believe_query = believe_query;
+ /* Pass NULL for the database name, since there is no database. */
+ BlastFormattingInfoNewBasic(align_view, options, slp1,
+ myargs[ARG_OUT].strvalue,
+ &search_options, &format_info);
+
+ /* Always show gis in the output, hence pass TRUE for respective
+ argument. */
+ BlastFormattingInfoSetUpOptions(format_info, 0, 1,
+ (Boolean) myargs[ARG_HTML].intvalue,
+ (Boolean) myargs[ARG_USEMEGABLAST].intvalue,
+ TRUE, believe_query);
if (mask_at_hash) {
/* Masking locations in SeqLoc form are no longer needed */
@@ -933,14 +941,14 @@ Int2 Main_new(void)
/* Format the results */
status =
- BLAST_FormatResults(seqalign, NULL, myargs[ARG_PROGRAM].strvalue,
- 1, slp1, filter_loc, format_options, FALSE, NULL,
+ BLAST_FormatResults(seqalign, 1, slp1, filter_loc, format_info,
extra_returns);
- status = Blast_PrintOutputFooter(program_number, format_options, NULL, extra_returns);
+ status = Blast_PrintOutputFooter(format_info, extra_returns);
- format_options = BlastFormattingOptionsFree(format_options);
+ format_info = BlastFormattingInfoFree(format_info);
extra_returns = Blast_SummaryReturnFree(extra_returns);
+ search_options = SBlastOptionsFree(search_options);
if (entrez_lookup) {
BioseqFree(query_bsp);
diff --git a/demo/blast_driver.c b/demo/blast_driver.c
index 48ac0c75..f9b9a759 100644
--- a/demo/blast_driver.c
+++ b/demo/blast_driver.c
@@ -1,4 +1,4 @@
-/* $Id: blast_driver.c,v 1.92 2005/04/27 20:01:00 dondosha Exp $
+/* $Id: blast_driver.c,v 1.96 2005/06/02 20:59:38 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,10 +32,10 @@ Author: Ilya Dondoshansky
Contents: Main function for running BLAST
******************************************************************************
- * $Revision: 1.92 $
+ * $Revision: 1.96 $
* */
-static char const rcsid[] = "$Id: blast_driver.c,v 1.92 2005/04/27 20:01:00 dondosha Exp $";
+static char const rcsid[] = "$Id: blast_driver.c,v 1.96 2005/06/02 20:59:38 dondosha Exp $";
#include <ncbi.h>
#include <sqnutils.h>
@@ -247,7 +247,6 @@ s_FillOptions(SBlastOptions* options)
Boolean greedy_with_ungapped = FALSE;
Boolean is_gapped = FALSE;
EBlastProgramType program_number = options->program;
- Boolean use_pssm = FALSE;
/* The following options are for blastn only */
if (program_number == eBlastTypeBlastn) {
@@ -268,7 +267,7 @@ s_FillOptions(SBlastOptions* options)
BLAST_FillLookupTableOptions(lookup_options, program_number, mb_lookup,
myargs[ARG_THRESHOLD].intvalue, (Int2)myargs[ARG_WORDSIZE].intvalue,
- variable_wordsize, use_pssm);
+ variable_wordsize);
/* Fill the rest of the lookup table options */
lookup_options->mb_template_length =
(Uint1) myargs[ARG_TEMPL_LEN].intvalue;
@@ -391,9 +390,9 @@ Int2 Nlm_Main(void)
Int2 status = 0;
SeqLoc* lcase_mask = NULL;
SeqLoc* query_slp = NULL;
- FILE *infp, *outfp;
+ FILE *infp, *outfp = NULL;
SBlastOptions* options = NULL;
- BlastFormattingOptions* format_options = NULL;
+ BlastFormattingInfo* format_info = NULL;
Int2 ctr = 1;
Int4 num_queries=0;
int tabular_output = FALSE;
@@ -403,6 +402,7 @@ Int2 Nlm_Main(void)
Blast_SummaryReturn* sum_returns = NULL;
Boolean phi_blast = FALSE;
ValNode* phivnps = NULL;
+ Blast_SummaryReturn* full_sum_returns = NULL;
if (! GetArgs (buf, NUMARG, myargs))
return (1);
@@ -439,8 +439,7 @@ Int2 Nlm_Main(void)
program_number == eBlastTypeTblastx ||
program_number == eBlastTypePhiBlastn);
- phi_blast = (program_number == eBlastTypePhiBlastn ||
- program_number == eBlastTypePhiBlastp);
+ phi_blast = Blast_ProgramIsPhiBlast(program_number);
if ((dbname = myargs[ARG_DB].strvalue) == NULL) {
Int4 letters_read;
@@ -475,59 +474,50 @@ Int2 Nlm_Main(void)
s_FillOptions(options);
- tabular_output = myargs[ARG_TABULAR].intvalue;
-
- if (tabular_output) {
- if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
- ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
- myargs[ARG_OUT].strvalue);
- return (1);
- }
- } else {
- if ((status = BlastFormattingOptionsNew(program_number,
- myargs[ARG_OUT].strvalue,
- myargs[ARG_DESCRIPTIONS].intvalue,
- myargs[ARG_ALIGNMENTS].intvalue,
- myargs[ARG_FORMAT].intvalue, &format_options)) != 0)
- return status;
-
- if (myargs[ARG_HTML].intvalue) {
- format_options->html = TRUE;
- format_options->align_options += TXALIGN_HTML;
- format_options->print_options += TXALIGN_HTML;
- }
-
- if (myargs[ARG_SHOWGI].intvalue) {
- format_options->align_options += TXALIGN_SHOW_GI;
- format_options->print_options += TXALIGN_SHOW_GI;
- }
-
- if (dbname) {
- BLAST_PrintOutputHeader(format_options,
- (Boolean)myargs[ARG_GREEDY].intvalue, blast_program, dbname, !db_is_na);
- }
-
- if (myargs[ARG_UNGAPPED].intvalue != 0)
- format_options->print_options += TXALIGN_SHOW_NO_OF_SEGS;
-
- }
-
if ((infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n",
myargs[ARG_QUERY].strvalue);
return (1);
}
- if (tabular_output) {
- believe_defline = TRUE;
- }
-
if (s_ParseIntervalLocationArgument(myargs[ARG_QUERY_LOC].strvalue,
&q_from, &q_to)) {
ErrPostEx(SEV_FATAL, 1, 0, "Invalid query sequence location\n");
return -1;
}
+ tabular_output = myargs[ARG_TABULAR].intvalue;
+
+ /* For on-the-fly tabular and for ASN.1 output, the believe query defline
+ option must be set to true, otherwise it should be false. */
+ believe_defline =
+ (tabular_output || myargs[ARG_FORMAT].intvalue == eAlignViewAsnText ||
+ myargs[ARG_FORMAT].intvalue == eAlignViewAsnBinary);
+
+
+ if (tabular_output) {
+ if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
+ ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
+ myargs[ARG_OUT].strvalue);
+ return (1);
+ }
+ } else {
+ BlastFormattingInfoNew(myargs[ARG_FORMAT].intvalue, options,
+ blast_program, dbname,
+ myargs[ARG_OUT].strvalue, &format_info);
+
+ BlastFormattingInfoSetUpOptions(format_info,
+ myargs[ARG_DESCRIPTIONS].intvalue,
+ myargs[ARG_ALIGNMENTS].intvalue,
+ (Boolean) myargs[ARG_HTML].intvalue,
+ (Boolean) myargs[ARG_GREEDY].intvalue,
+ (Boolean) myargs[ARG_SHOWGI].intvalue,
+ believe_defline);
+
+ if (dbname)
+ BLAST_PrintOutputHeader(format_info);
+ }
+
/* Get the query (queries), loop if necessary. */
while (1) {
SeqAlign* seqalign = NULL;
@@ -539,7 +529,7 @@ Int2 Nlm_Main(void)
if ((Boolean)myargs[ARG_LCASE].intvalue) {
letters_read =
BLAST_GetQuerySeqLoc(infp, query_is_na,
- (Uint1)myargs[ARG_STRAND].intvalue, 0, q_from, q_to, &lcase_mask,
+ (Uint1)myargs[ARG_STRAND].intvalue, 10000, q_from, q_to, &lcase_mask,
&query_slp, &ctr, &num_queries, believe_defline);
} else {
letters_read =
@@ -622,9 +612,8 @@ Int2 Nlm_Main(void)
if (!status) {
if (phi_blast) {
status =
- PHIBlastFormatResults(program_number, phivnps, dbname,
- query_slp, format_options,
- sum_returns);
+ PHIBlastFormatResults(phivnps, query_slp, format_info,
+ sum_returns);
phivnps = PHIBlastResultsFree(phivnps);
} else if (!tabular_output) {
if (myargs[ARG_ASNOUT].strvalue) {
@@ -636,40 +625,39 @@ Int2 Nlm_Main(void)
/* Format the results */
status =
- BLAST_FormatResults(seqalign, dbname, blast_program,
- num_queries, query_slp,
- filter_loc, format_options,
- (Boolean)myargs[ARG_FRAMESHIFT].intvalue,
- NULL, sum_returns);
+ BLAST_FormatResults(seqalign, num_queries, query_slp,
+ filter_loc, format_info, sum_returns);
seqalign = SeqAlignSetFree(seqalign);
}
- Blast_PrintOutputFooter(program_number, format_options, dbname,
- sum_returns);
}
- /* Clean the summary returns substructures. */
+ /* Update the cumulative summary returns structure and clean the returns
+ substructures for the current search iteration. */
+ Blast_SummaryReturnUpdate(sum_returns, &full_sum_returns);
Blast_SummaryReturnClean(sum_returns);
filter_loc = Blast_ValNodeMaskListFree(filter_loc);
query_slp = SeqLocSetFree(query_slp);
} /* End loop on sets of queries */
+ if (infp)
+ FileClose(infp);
+
subject_slp = SeqLocSetFree(subject_slp);
- options = SBlastOptionsFree(options);
+
+ /* Print the footer with cumulative summary information. */
+ Blast_PrintOutputFooter(format_info, full_sum_returns);
+
sum_returns = Blast_SummaryReturnFree(sum_returns);
+ full_sum_returns = Blast_SummaryReturnFree(full_sum_returns);
- if (!tabular_output) {
- if(format_options->html && myargs[ARG_FORMAT].intvalue < 7) {
- fprintf(format_options->outfp, "</PRE>\n</BODY>\n</HTML>\n");
- }
- BlastFormattingOptionsFree(format_options);
- } else {
- FileClose(outfp);
- }
+ if (!tabular_output)
+ format_info = BlastFormattingInfoFree(format_info);
+ else
+ FileClose(outfp);
+
+ options = SBlastOptionsFree(options);
- if (infp)
- FileClose(infp);
-
return status;
}
diff --git a/demo/blastall.c b/demo/blastall.c
index 202ff6b3..00fad90f 100644
--- a/demo/blastall.c
+++ b/demo/blastall.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastall.c,v 6.150 2005/02/07 15:30:39 dondosha Exp $";
+static char const rcsid[] = "$Id: blastall.c,v 6.151 2005/05/05 14:41:32 coulouri Exp $";
-/* $Id: blastall.c,v 6.150 2005/02/07 15:30:39 dondosha Exp $
+/* $Id: blastall.c,v 6.151 2005/05/05 14:41:32 coulouri Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,9 @@ static char const rcsid[] = "$Id: blastall.c,v 6.150 2005/02/07 15:30:39 dondosh
**************************************************************************
*
* $Log: blastall.c,v $
+ * Revision 6.151 2005/05/05 14:41:32 coulouri
+ * plug object manager entity id leak - rt ticket 15084082
+ *
* Revision 6.150 2005/02/07 15:30:39 dondosha
* Removed restriction on the value of longest intron option
*
@@ -1847,8 +1850,6 @@ Int2 Main (void)
}
} /* show text align, loop over seqalign/seqannots for concat */
ObjMgrClearHold();
-
- ObjMgrFreeCache(0);
} /* if outfp */
for (sap_iter=0; sap_iter < num_queries; sap_iter++) {
/* upper bound is num_queries, take care not to do this unless concat */
@@ -1965,6 +1966,8 @@ Int2 Main (void)
if (!options->is_megablast_search)
BlastDeleteUserErrorString(err_ticket);
+
+ ObjMgrFreeCache(0);
} /* while(TRUE) - main loop of the program over all FASTA entries */
#ifdef BLAST_CS_API
diff --git a/demo/blastpgp.c b/demo/blastpgp.c
index c0320b17..2c69bfff 100644
--- a/demo/blastpgp.c
+++ b/demo/blastpgp.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastpgp.c,v 6.127 2005/04/04 14:57:47 papadopo Exp $";
+static char const rcsid[] = "$Id: blastpgp.c,v 6.130 2005/05/18 17:35:49 papadopo Exp $";
-/* $Id: blastpgp.c,v 6.127 2005/04/04 14:57:47 papadopo Exp $ */
+/* $Id: blastpgp.c,v 6.130 2005/05/18 17:35:49 papadopo Exp $ */
/**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -26,8 +26,18 @@ static char const rcsid[] = "$Id: blastpgp.c,v 6.127 2005/04/04 14:57:47 papadop
* appreciated. *
* *
**************************************************************************
- * $Revision: 6.127 $
+ * $Revision: 6.130 $
* $Log: blastpgp.c,v $
+ * Revision 6.130 2005/05/18 17:35:49 papadopo
+ * add warnings if new composition-based statistics options are selected
+ *
+ * Revision 6.129 2005/05/17 17:51:20 papadopo
+ * make the -t argument a string and handle values of 'T' or 'F' manually (required for backward compatibility)
+ *
+ * Revision 6.128 2005/05/16 17:41:00 papadopo
+ * From Alejandro Schaffer: Added support for compositional adjustment
+ * of matrices via enhanced -t flag, which now is integer to allow 4 options.
+ *
* Revision 6.127 2005/04/04 14:57:47 papadopo
* remove requirement for a fasta format query file if restarting from scoremat
*
@@ -528,6 +538,8 @@ star_callback(Int4 sequence_number, Int4 number_of_positive_hits)
return 0;
}
+#define EVALUE_EXPAND 10
+
#define YES_TO_DECLINE_TO_ALIGN
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
@@ -617,8 +629,13 @@ static Args myargs[] = {
NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL},
{"Use lower case filtering of FASTA sequence", /* 41 */
"F", NULL,NULL,TRUE,'U',ARG_BOOLEAN, 0.0,0,NULL},
- { "Use composition based statistics", /* 42 */
- "T", NULL, NULL, FALSE, 't', ARG_BOOLEAN, 0.0, 0, NULL},
+ { "Use composition based statistics\n" /* 42 */
+ "0 or F or f: no composition-based statistics\n"
+ "1 or T or t: Composition-based statistics as in NAR 29:2994--3005, 2001\n"
+ "2: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties in round 1\n"
+ "3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally in round 1\n",
+
+ "1", NULL, NULL, FALSE, 't', ARG_STRING, 0.0, 0, NULL},
{ "ASN.1 Scoremat input of checkpoint data:\n"
"0: no scoremat input\n"
"1: Restart is from ASCII scoremat checkpoint file,\n"
@@ -990,7 +1007,33 @@ PGPBlastOptionsPtr PGPReadBlastOptions(void)
if (myargs[34].floatvalue)
options->searchsp_eff = (Nlm_FloatHi) myargs[34].floatvalue;
- options->tweak_parameters = (Boolean) myargs[42].intvalue;
+ switch (myargs[42].strvalue[0]) {
+ case 'F':
+ case 'f':
+ case '0':
+ options->tweak_parameters = NO_COMP_ADJUSTMENT;
+ break;
+ case 'T':
+ case 't':
+ case '1':
+ options->tweak_parameters = COMP_BASED_STATISTICS;
+ break;
+ case '2':
+ ErrPostEx(SEV_WARNING, 1, 0, "this argument for composition-"
+ "based statistics is currently experimental\n");
+ options->tweak_parameters = COMP_MATRIX_ADJUSTMENT;
+ break;
+ case '3':
+ ErrPostEx(SEV_WARNING, 1, 0, "this argument for composition-"
+ "based statistics is currently experimental\n");
+ options->tweak_parameters = COMP_BASED_STATISTICS |
+ COMP_MATRIX_ADJUSTMENT;
+ break;
+ default:
+ ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
+ "based statistics; see -t options\n");
+ break;
+ }
options->smith_waterman = (Boolean) myargs[33].intvalue;
if (bop->options->tweak_parameters) {
@@ -998,6 +1041,14 @@ PGPBlastOptionsPtr PGPReadBlastOptions(void)
hitlist_size */
bop->options->original_expect_value = bop->options->expect_value;
bop->options->hitlist_size *= 2;
+ if (bop->options->tweak_parameters > 1) {
+ if ((NULL == myargs[29].strvalue) && (NULL == myargs[39].strvalue)) {
+ /*round 1 and not recovering from checkpoint*/
+ bop->options->expect_value = EVALUE_EXPAND * bop->options->expect_value;
+ }
+ else
+ bop->options->tweak_parameters = 1;
+ }
}
@@ -1089,6 +1140,16 @@ Boolean PGPFormatHeader(PGPBlastOptionsPtr bop)
fprintf(bop->outfp, "\n");
BlastPrintReference(html, 90, bop->outfp);
fprintf(bop->outfp, "\n");
+ if (bop->options->tweak_parameters > 1) {
+ CAdjustmentPrintReference(html, 90, bop->outfp);
+ fprintf(bop->outfp, "\n");
+ }
+ if ((1== bop->options->tweak_parameters) ||
+ ((1 < bop->options->tweak_parameters) && (bop->options->maxNumPasses > 1))) {
+ CBStatisticsPrintReference(html, 90, (1 == bop->options->tweak_parameters),
+ (bop->options->maxNumPasses > 1), bop->outfp);
+ fprintf(bop->outfp, "\n");
+ }
AcknowledgeBlastQuery(bop->query_bsp, 70,
bop->outfp, bop->believe_query, html);
PrintDbInformation(bop->blast_database, TRUE, 70, bop->outfp, html);
@@ -1677,6 +1738,7 @@ Int2 Main (void)
BioseqBlastEngineCore for the second pass. */
bop->options->original_expect_value =
bop->options->expect_value;
+ search->pbp->cutoff_e = bop->options->expect_value;
bop->options->hitlist_size *= 2;
}
@@ -1846,9 +1908,14 @@ Int2 Main (void)
head = SeqAlignSetFree(head);
/* Here we will print out footer of BLAST output */
-
- if(!bop->is_xml_output && !tabular_output) {
- PGPFormatFooter(bop, search);
+
+ /*need to temporarily adjust cutoff_e for printing*/
+ if(!bop->is_xml_output && !tabular_output) {
+ if ((bop->options->tweak_parameters > 1) && (1 == thisPassNum))
+ search->pbp->cutoff_e = EVALUE_EXPAND * bop->options->expect_value;
+ PGPFormatFooter(bop, search);
+ if ((bop->options->tweak_parameters > 1) && (1 == thisPassNum))
+ search->pbp->cutoff_e = bop->options->original_expect_value;
}
/* PGPOneQueryCleanup */
diff --git a/demo/copymat.c b/demo/copymat.c
index b9a5b96b..4f802e39 100644
--- a/demo/copymat.c
+++ b/demo/copymat.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: copymat.c,v 6.41 2005/02/14 14:11:55 camacho Exp $";
+static char const rcsid[] = "$Id: copymat.c,v 6.42 2005/05/20 18:57:51 camacho Exp $";
/*
* ===========================================================================
@@ -36,6 +36,9 @@ Contents: main routines for copymatrices program to convert
score matrices output by makematrices into a single byte-encoded file.
$Log: copymat.c,v $
+Revision 6.42 2005/05/20 18:57:51 camacho
+Update to use new signature to BLAST_FillLookupTableOptions
+
Revision 6.41 2005/02/14 14:11:55 camacho
Changes to use SBlastScoreMatrix
@@ -608,8 +611,8 @@ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles,
sbp = BlastScoreBlkNew(BLASTAA_SEQ_CODE, 1);
RPSPsiMatrixAttach(sbp, posMatrix);
LookupTableOptionsNew(eBlastTypeBlastp, &lookup_options);
- BLAST_FillLookupTableOptions(lookup_options, eBlastTypeBlastp, FALSE,
- (Int4) (myargs[3].floatvalue*scalingFactor), myargs[4].intvalue, FALSE, TRUE); /* add last arg for psi-blast?? */
+ BLAST_FillLookupTableOptions(lookup_options, eBlastTypePsiBlast, FALSE,
+ (Int4) (myargs[3].floatvalue*scalingFactor), myargs[4].intvalue, FALSE);
BlastSeqLocNew(&lookup_segment, 0, all_length);
diff --git a/demo/fastacmd.c b/demo/fastacmd.c
index 4387f4f3..87f6d742 100644
--- a/demo/fastacmd.c
+++ b/demo/fastacmd.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: fastacmd.c,v 6.34 2004/12/04 03:39:48 camacho Exp $";
+static char const rcsid[] = "$Id: fastacmd.c,v 6.35 2005/05/05 15:54:06 dondosha Exp $";
-/* $Id: fastacmd.c,v 6.34 2004/12/04 03:39:48 camacho Exp $
+/* $Id: fastacmd.c,v 6.35 2005/05/05 15:54:06 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,12 +31,15 @@ static char const rcsid[] = "$Id: fastacmd.c,v 6.34 2004/12/04 03:39:48 camacho
*
* Initial Version Creation Date: 05/20/1997
*
-* $Revision: 6.34 $
+* $Revision: 6.35 $
*
* File Description:
* FASTA retrievel system using ISAM indexes
*
* $Log: fastacmd.c,v $
+* Revision 6.35 2005/05/05 15:54:06 dondosha
+* Enhanced comment for the -s option and fixed typo in -i option description
+*
* Revision 6.34 2004/12/04 03:39:48 camacho
* Set range of data on -D option
*
@@ -173,11 +176,12 @@ static Args myargs [] = {
" T - protein \n"
" F - nucleotide",
"G", NULL,NULL,TRUE,'p',ARG_STRING,0.0,0,NULL},
- { "Search string: GIs, accessions and loci may be used delimited\n"
- " by comma.", /* 2 */
+ { "Comma-delimited search string(s).\n"
+ " GIs, accessions, loci, or fullSeq-id strings may be used,\n"
+ " e.g. 555, AC147927, 'gnl|dbname|tag'", /* 2 */
NULL, NULL, NULL, TRUE, 's', ARG_STRING, 0.0, 0, NULL},
- { "Input file wilth GIs/accessions/loci for batch\n"
- " retrieval",/* 3 */
+ { "Input file with GIs/accessions/loci for batch\n"
+ " retrieval", /* 3 */
NULL, NULL, NULL, TRUE, 'i', ARG_STRING, 0.0, 0, NULL},
{ "Retrieve duplicate accessions", /* 4 */
"F", NULL, NULL, TRUE, 'a', ARG_BOOLEAN, 0.0, 0, NULL},
diff --git a/demo/formatrpsdb.c b/demo/formatrpsdb.c
index aabda5dd..0a53c78f 100644
--- a/demo/formatrpsdb.c
+++ b/demo/formatrpsdb.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: formatrpsdb.c,v 1.15 2005/02/22 14:17:31 camacho Exp $";
+static char const rcsid[] = "$Id: formatrpsdb.c,v 1.16 2005/05/20 18:57:51 camacho Exp $";
/*****************************************************************************
@@ -38,6 +38,9 @@ static char const rcsid[] = "$Id: formatrpsdb.c,v 1.15 2005/02/22 14:17:31 camac
***************************************************************************
$Log: formatrpsdb.c,v $
+ Revision 1.16 2005/05/20 18:57:51 camacho
+ Update to use new signature to BLAST_FillLookupTableOptions
+
Revision 1.15 2005/02/22 14:17:31 camacho
Fix bioseq data type
@@ -633,12 +636,11 @@ Int2 RPSAddFirstSequence(RPS_DbInfo *info,
}
if (BLAST_FillLookupTableOptions(info->lookup_options,
- eBlastTypeBlastp,
+ eBlastTypePsiBlast,
FALSE, /* no megablast */
threshold, /* neighboring threshold */
BLAST_WORDSIZE_PROT,
- FALSE, /* no variable words */
- TRUE /* use a PSSM */
+ FALSE /* no variable words */
) != 0) {
ErrPostEx(SEV_ERROR, 0, 0, "Cannot set lookup table options");
return 1;
diff --git a/demo/megablast.c b/demo/megablast.c
index dc400340..32e0d167 100644
--- a/demo/megablast.c
+++ b/demo/megablast.c
@@ -1,5 +1,5 @@
-static char const rcsid[] = "$Id: megablast.c,v 6.158 2005/04/27 14:55:09 papadopo Exp $";
-/* $Id: megablast.c,v 6.158 2005/04/27 14:55:09 papadopo Exp $
+static char const rcsid[] = "$Id: megablast.c,v 6.162 2005/06/02 20:45:05 dondosha Exp $";
+/* $Id: megablast.c,v 6.162 2005/06/02 20:45:05 dondosha Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -26,6 +26,20 @@ static char const rcsid[] = "$Id: megablast.c,v 6.158 2005/04/27 14:55:09 papado
* *
**************************************************************************
* $Log: megablast.c,v $
+ * Revision 6.162 2005/06/02 20:45:05 dondosha
+ * 1. Print footer only once after the loop over sets of queries;
+ * 2. Format XML output in a valid XML;
+ * 3. Use BlastFormattingInfo structure for formatting.
+ *
+ * Revision 6.161 2005/05/20 18:57:51 camacho
+ * Update to use new signature to BLAST_FillLookupTableOptions
+ *
+ * Revision 6.160 2005/05/02 17:44:07 coulouri
+ * bring default evalue and output type in line with other blast programs
+ *
+ * Revision 6.159 2005/05/02 17:00:28 coulouri
+ * change default to new engine
+ *
* Revision 6.158 2005/04/27 14:55:09 papadopo
* change signature of BlastFillHitSavingOptions
*
@@ -528,6 +542,7 @@ static char const rcsid[] = "$Id: megablast.c,v 6.158 2005/04/27 14:55:09 papado
#include <algo/blast/api/blast_api.h>
#include <algo/blast/api/blast_seq.h>
#include <algo/blast/api/repeats_filter.h>
+#include <algo/blast/core/blast_setup.h>
#endif
#define DEFLINE_BUF 255
@@ -581,7 +596,6 @@ MegaBlastPrintEndpoints(VoidPtr ptr)
BLAST_HSPPtr hsp;
Int2 context;
Char context_sign;
- Uint4 header_index = 0;
Int4 subject_gi, score;
FILE *fp = (FILE *) search->output;
@@ -611,7 +625,7 @@ MegaBlastPrintEndpoints(VoidPtr ptr)
!StringICmp(db_tag->db, "TI")) &&
db_tag->tag->id != 0) {
subject_buffer = (CharPtr) Malloc(16);
- sprintf(subject_buffer, "%ld", db_tag->tag->id);
+ sprintf(subject_buffer, "%ld", (long) db_tag->tag->id);
} else {
subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
subject_descr = subject_buffer;
@@ -680,9 +694,8 @@ MegaBlastPrintEndpoints(VoidPtr ptr)
query_buffer = StringTokMT(title, " ", &title);
else {
Int4 query_gi;
- Boolean numeric_query_id =
- GetAccessionFromSeqId(query_bsp->id, &query_gi,
- &query_buffer);
+ GetAccessionFromSeqId(query_bsp->id, &query_gi,
+ &query_buffer);
}
BioseqUnlock(query_bsp);
} else {
@@ -720,7 +733,7 @@ MegaBlastPrintEndpoints(VoidPtr ptr)
hsp->subject.end += s_shift;
if (numeric_sip_type)
- fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", subject_gi,
+ fprintf(fp, "'%ld'=='%c%s' (%d %d %d %d) %d\n", (long) subject_gi,
context_sign, query_buffer, hsp->subject.offset, q_start,
hsp->subject.end, q_end, score);
else
@@ -859,9 +872,8 @@ MegaBlastPrintSegments(VoidPtr ptr)
query_buffer = StringTokMT(title, " ", &title);
else {
Int4 query_gi;
- Boolean numeric_query_id =
- GetAccessionFromSeqId(query_bsp->id, &query_gi,
- &query_buffer);
+ GetAccessionFromSeqId(query_bsp->id, &query_gi,
+ &query_buffer);
}
BioseqUnlock(query_bsp);
} else {
@@ -876,7 +888,7 @@ MegaBlastPrintSegments(VoidPtr ptr)
if (numeric_sip_type)
sprintf(buffer, "\n#'>%ld'=='%c%s' (%d %d %d %d) %d\na {\n s %d\n b %d %d\n e %d %d\n",
- subject_gi, strand, query_buffer,
+ (long) subject_gi, strand, query_buffer,
s_start, q_start, s_end, q_end, score, score,
s_start, q_start, s_end, q_end);
else
@@ -1030,7 +1042,7 @@ static Args myargs [] = {
{ "Query File",
NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
{ "Expectation value",
- "1000000.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},/* ARG_EVALUE */
+ "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},/* ARG_EVALUE */
{ "alignment view options:\n0 = pairwise,\n1 = query-anchored showing identities,\n2 = query-anchored no identities,\n3 = flat query-anchored, show identities,\n4 = flat query-anchored, no identities,\n5 = query-anchored no identities and blunt ends,\n6 = flat query-anchored, no identities and blunt ends,\n7 = XML Blast output,\n8 = tabular, \n9 tabular with comment lines,\n10 ASN, text\n11 ASN, binary",
"0", NULL, NULL, FALSE, 'm', ARG_INT, 0.0, 0, NULL}, /* ARG_FORMAT */
{ "BLAST report Output File",
@@ -1050,7 +1062,7 @@ static Args myargs [] = {
{ "Number of database sequence to show alignments for (B)",
"250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL}, /* ARG_ALIGNMENTS */
{ "Type of output:\n0 - alignment endpoints and score,\n1 - all ungapped segments endpoints,\n2 - traditional BLAST output,\n3 - tab-delimited one line format",
- "0", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_OUTTYPE */
+ "2", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_OUTTYPE */
{ "Number of processors to use",
"1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL}, /* ARG_THREADS */
{ "ASN.1 SeqAlign file; must be used in conjunction with -D2 option",
@@ -1108,8 +1120,8 @@ static Args myargs [] = {
"0", NULL, NULL, FALSE, 'H', ARG_INT, 0.0, 0, NULL}, /* ARG_MAXHSP */
#endif
#if MB_ALLOW_NEW
- {"Force use of old engine",
- "T", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
+ {"Force use of the legacy BLAST engine",
+ "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
#endif
};
@@ -1119,7 +1131,7 @@ static Args myargs [] = {
static Int2 Main_old (void)
{
- AsnIoPtr aip, xml_aip;
+ AsnIoPtr aip, xml_aip = NULL;
BioseqPtr query_bsp, PNTR query_bsp_array;
BioSourcePtr source;
BLAST_MatrixPtr matrix;
@@ -1762,11 +1774,9 @@ BLAST_FillOptions(SBlastOptions* options)
BlastExtensionOptions* ext_options = options->ext_options;
BlastHitSavingOptions* hit_options = options->hit_options ;
BlastScoringOptions* score_options = options->score_options;
- BlastEffectiveLengthsOptions* eff_len_options = options->eff_len_options;
Boolean ag_blast = FALSE, variable_wordsize = FALSE, mb_lookup = TRUE;
Boolean greedy=TRUE; /* greedy alignment should be done. */
double lambda=0;
- Int2 status;
const Uint1 program_number = eBlastTypeBlastn;
if (myargs[ARG_DYNAMIC].intvalue != 0)
@@ -1795,7 +1805,7 @@ BLAST_FillOptions(SBlastOptions* options)
}
BLAST_FillLookupTableOptions(lookup_options, program_number, mb_lookup,
- 0, myargs[ARG_WORDSIZE].intvalue, variable_wordsize, FALSE);
+ 0, myargs[ARG_WORDSIZE].intvalue, variable_wordsize);
/* Fill the rest of the lookup table options */
lookup_options->mb_template_length =
(Uint1) myargs[ARG_TEMPL_LEN].intvalue;
@@ -1850,25 +1860,24 @@ BLAST_FillOptions(SBlastOptions* options)
static Int2 Main_new(void)
{
const Boolean query_is_na = TRUE;
- const Boolean db_is_na = TRUE;
Boolean believe_query = FALSE;
char* program_name = "blastn";
Int2 status = 0;
Int4 start=0, end=0; /* start and end of sequence to be searched as specified by ARG_QUERYLOC */
SeqLoc* lcase_mask = NULL;
- SeqLoc* filter_loc=NULL; /* All masking locations */
SeqLoc* query_slp = NULL;
FILE *infp=NULL, *outfp=NULL;
SBlastOptions* options = NULL;
- BlastFormattingOptions* format_options;
Int2 ctr = 1;
Int4 num_queries_total=0; /* total number of queries read. */
Boolean tabular_output = FALSE;
BlastTabularFormatData* tf_data = NULL;
int num_threads;
char* dbname = myargs[ARG_DB].strvalue;
+ BlastFormattingInfo* format_info = NULL;
Blast_SummaryReturn* sum_returns = NULL;
- Int4 align_view;
+ Blast_SummaryReturn* full_sum_returns = NULL;
+ EAlignView align_view = eAlignViewMax;
if (myargs[ARG_OUTTYPE].intvalue == 3)
tabular_output = TRUE;
@@ -1886,19 +1895,19 @@ static Int2 Main_new(void)
BLAST_FillOptions(options);
- align_view = myargs[ARG_FORMAT].intvalue;
+ if (myargs[ARG_FORMAT].intvalue >= eAlignViewPairwise &&
+ myargs[ARG_FORMAT].intvalue < eAlignViewMax)
+ align_view = (EAlignView) myargs[ARG_FORMAT].intvalue;
+ else
+ ErrPostEx(SEV_FATAL, 1, 0, "Unsupported value for the -m option");
- /* If tabular output is requested, set believe_query to TRUE,
- to guarantee meaningful sequence ids in the output. */
- if (tabular_output)
- believe_query = TRUE;
- else
- believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
+ believe_query = (Boolean) myargs[ARG_BELIEVEQUERY].intvalue;
/* If ASN.1 output is requested and believe_query is not set to TRUE,
exit with an error. */
if (!believe_query && (myargs[ARG_ASNOUT].strvalue ||
- align_view == 10 || align_view == 11)) {
+ align_view == eAlignViewAsnText ||
+ align_view == eAlignViewAsnBinary)) {
ErrPostEx(SEV_FATAL, 1, 0,
"-J option must be TRUE to produce ASN.1 output; before "
"changing -J to TRUE please also ensure that all query "
@@ -1907,31 +1916,30 @@ static Int2 Main_new(void)
}
if (!tabular_output) {
- if ((status = BlastFormattingOptionsNew(options->program,
- myargs[ARG_OUT].strvalue,
- myargs[ARG_DESCRIPTIONS].intvalue,
- myargs[ARG_ALIGNMENTS].intvalue,
- align_view, &format_options)) != 0)
- return status;
- format_options->html = (Boolean) myargs[ARG_HTML].intvalue;
- if (myargs[ARG_SHOWGIS].intvalue == 0)
- { /* These are default in new api, so we turn off if not requested. */
- format_options->align_options -= TXALIGN_SHOW_GI;
- format_options->print_options -= TXALIGN_SHOW_GI;
- }
-
- if (dbname) {
- BLAST_PrintOutputHeader(format_options, TRUE, program_name, dbname,
- !db_is_na);
- }
- }
- else
- { /* tabular output requires raw FILE*. */
+ BlastFormattingInfoNew(myargs[ARG_FORMAT].intvalue, options,
+ program_name, dbname,
+ myargs[ARG_OUT].strvalue, &format_info);
+
+ /* Pass TRUE for the "is megablast" argument. Since megablast is always
+ gapped, pass FALSE for the "is ungapped" argument. */
+ BlastFormattingInfoSetUpOptions(format_info,
+ myargs[ARG_DESCRIPTIONS].intvalue,
+ myargs[ARG_ALIGNMENTS].intvalue,
+ (Boolean) myargs[ARG_HTML].intvalue,
+ TRUE,
+ (Boolean) myargs[ARG_SHOWGIS].intvalue,
+ believe_query);
+
+ if (dbname)
+ BLAST_PrintOutputHeader(format_info);
+ } else { /* tabular output requires raw FILE*. */
if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
myargs[ARG_OUT].strvalue);
return (1);
}
+ /* If tabular output is requested, automatically set believe_query to
+ TRUE, to guarantee meaningful sequence ids in the output. */
believe_query = TRUE;
}
@@ -2027,29 +2035,33 @@ static Int2 Main_new(void)
/* Format the results; note that seqalign and filter locations
are freed inside. */
status =
- BLAST_FormatResults(seqalign, myargs[ARG_DB].strvalue,
- program_name, num_queries, query_slp,
- filter_loc, format_options, FALSE, NULL,
- sum_returns);
+ BLAST_FormatResults(seqalign, num_queries, query_slp,
+ filter_loc, format_info, sum_returns);
seqalign = SeqAlignSetFree(seqalign);
- Blast_PrintOutputFooter(options->program, format_options, dbname,
- sum_returns);
}
+ /* Update the cumulative summary returns structure and clean the returns
+ substructures for the current search iteration. */
+ Blast_SummaryReturnUpdate(sum_returns, &full_sum_returns);
Blast_SummaryReturnClean(sum_returns);
filter_loc = Blast_ValNodeMaskListFree(filter_loc);
query_slp = SeqLocSetFree(query_slp);
} /* End loop on sets of queries */
- options = SBlastOptionsFree(options);
+ if (infp)
+ FileClose(infp);
+
+ Blast_PrintOutputFooter(format_info, full_sum_returns);
+
sum_returns = Blast_SummaryReturnFree(sum_returns);
+ full_sum_returns = Blast_SummaryReturnFree(full_sum_returns);
if (!tabular_output)
{
- if (align_view < 7 && myargs[ARG_LOGINFO].intvalue)
- BlastPrintLogReport(format_options, num_queries_total);
- format_options = BlastFormattingOptionsFree(format_options);
+ if (align_view < eAlignViewXml && myargs[ARG_LOGINFO].intvalue)
+ BlastPrintLogReport(format_info);
+ format_info = BlastFormattingInfoFree(format_info);
}
else
{
@@ -2059,9 +2071,8 @@ static Int2 Main_new(void)
FileClose(outfp);
}
- if (infp)
- FileClose(infp);
-
+ options = SBlastOptionsFree(options);
+
return status;
}
#endif
diff --git a/demo/nps2gps.c b/demo/nps2gps.c
new file mode 100644
index 00000000..e2d9d292
--- /dev/null
+++ b/demo/nps2gps.c
@@ -0,0 +1,997 @@
+/* nps2gps.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: nps2gps.c
+*
+* Author: Jonathan Kans
+*
+* Version Creation Date: 5/12/05
+*
+* $Revision: 1.3 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <sqnutils.h>
+#include <seqport.h>
+#include <explore.h>
+#include <subutil.h>
+#include <toasn3.h>
+
+#define NPS2GPSAPP_VER "1.2"
+
+CharPtr NPS2GPSAPPLICATION = NPS2GPSAPP_VER;
+
+typedef struct n2gdata {
+ CharPtr results;
+ CharPtr outfile;
+ Boolean failure;
+} N2GData, PNTR N2GPtr;
+
+typedef struct npsseqs {
+ BioseqPtr nuc;
+ BioseqPtr prot;
+} NpsSeqs, PNTR NpsSeqsPtr;
+
+static void FindNucProtSeqs (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ NpsSeqsPtr nsp;
+
+ if (bsp == NULL) return;
+ nsp = (NpsSeqsPtr) userdata;
+ if (nsp == NULL) return;
+
+ if (ISA_na (bsp->mol)) {
+ nsp->nuc = bsp;
+ } else if (ISA_aa (bsp->mol)) {
+ nsp->prot = bsp;
+ }
+}
+
+static void LclMakeNucProtCDS (
+ BioseqSetPtr bssp,
+ Pointer userdata
+)
+
+{
+ CodeBreakPtr cbp;
+ SeqFeatPtr cds;
+ CdRegionPtr crp;
+ SeqFeatPtr mrna;
+ NpsSeqs ns;
+ Boolean partial5, partial3;
+ SeqFeatPtr sfp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ Int4 start, stop;
+ SeqFeatPtr temp;
+
+ ns.nuc = NULL;
+ ns.prot = NULL;
+ if (VisitBioseqsInSet (bssp, (Pointer) &ns, FindNucProtSeqs) != 2) return;
+ if (ns.nuc == NULL || ns.prot == NULL) return;
+
+ cds = SeqMgrGetCDSgivenProduct (ns.prot, NULL);
+ mrna = SeqMgrGetRNAgivenProduct (ns.nuc, NULL);
+ if (cds == NULL || mrna == NULL) return;
+
+ CheckSeqLocForPartial (cds->location, &partial5, &partial3);
+
+ start = GetOffsetInLoc (cds->location, mrna->location, SEQLOC_START);
+ stop = GetOffsetInLoc (cds->location, mrna->location, SEQLOC_STOP);
+
+ if (start < 0 || start >= ns.nuc->length ||
+ stop < 0 || stop >= ns.nuc->length) return;
+
+ sip = SeqIdFindBest (ns.nuc->id, 0);
+ if (sip == NULL) return;
+
+ /* copy cds feature fields to paste into new cds feature */
+ temp = AsnIoMemCopy (cds,
+ (AsnReadFunc) SeqFeatAsnRead,
+ (AsnWriteFunc) SeqFeatAsnWrite);
+ if (temp == NULL) return;
+
+ sfp = CreateNewFeatureOnBioseq (ns.nuc, SEQFEAT_CDREGION, NULL);
+ if (sfp == NULL) return;
+
+ sfp->location = SeqLocFree (sfp->location);
+ if (StringISearch (cds->except_text, "ribosomal slippage") == NULL &&
+ StringISearch (cds->except_text, "ribosome slippage") == NULL &&
+ StringISearch (cds->except_text, "trans splicing") == NULL &&
+ StringISearch (cds->except_text, "trans-splicing") == NULL &&
+ StringISearch (cds->except_text, "artificial frameshift") == NULL) {
+ sfp->location = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3);
+ } else {
+ slp = SeqLocFindNext (cds->location, NULL);
+ while (slp != NULL) {
+ start = GetOffsetInLoc (slp, mrna->location, SEQLOC_START);
+ stop = GetOffsetInLoc (slp, mrna->location, SEQLOC_STOP);
+ sfp->location = AddIntervalToLocation (sfp->location, sip, start,
+ stop, partial5, partial3);
+ slp = SeqLocFindNext (cds->location, slp);
+ }
+ sfp->location = SeqLocMergeEx (ns.nuc, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE);
+ }
+ SetSeqFeatProduct (sfp, ns.prot);
+
+ /* paste fields from temp copy of original cds */
+ crp = (CdRegionPtr) temp->data.value.ptrvalue;
+ sfp->data.value.ptrvalue = (Pointer) crp;
+
+ sfp->partial = temp->partial;
+ sfp->excpt = temp->excpt;
+ sfp->comment = temp->comment;
+ sfp->qual = temp->qual;
+ sfp->title = temp->title;
+ sfp->ext = temp->ext;
+ sfp->cit = temp->cit;
+ sfp->exp_ev = temp->exp_ev;
+ sfp->xref = temp->xref;
+ sfp->dbxref = temp->dbxref;
+ sfp->pseudo = temp->pseudo;
+ sfp->except_text = temp->except_text;
+
+ MemFree (temp); /* do not SeqFeatFree */
+
+ /* update code break locations */
+ for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
+ CheckSeqLocForPartial (cbp->loc, &partial5, &partial3);
+ start = GetOffsetInLoc (cbp->loc, mrna->location, SEQLOC_START);
+ stop = GetOffsetInLoc (cbp->loc, mrna->location, SEQLOC_STOP);
+ if (start < 0 || start >= ns.nuc->length ||
+ stop < 0 || stop >= ns.nuc->length) continue;
+ cbp->loc = SeqLocFree (cbp->loc);
+ cbp->loc = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3);;
+ }
+}
+
+/* copy gene from contig onto nuc-prot, single interval on cdna bioseq */
+
+static void LclCopyGene (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BioseqPtr bsp;
+ SeqMgrFeatContext gcontext;
+ SeqFeatPtr gene, copy, temp;
+ GeneRefPtr grp, xref;
+ Boolean partial5, partial3;
+
+ /* input mrna features are multi-interval on contig */
+
+ if (sfp->data.choice != SEQFEAT_RNA) return;
+
+ /* find cdna product of mrna */
+
+ bsp = BioseqFindFromSeqLoc (sfp->product);
+ if (bsp == NULL) return;
+
+ /* check for gene xref */
+
+ xref = SeqMgrGetGeneXref (sfp);
+ if (xref != NULL) {
+ if (SeqMgrGeneIsSuppressed (xref)) return;
+
+ /* copy gene xref for new gene feature */
+
+ grp = AsnIoMemCopy (xref,
+ (AsnReadFunc) GeneRefAsnRead,
+ (AsnWriteFunc) GeneRefAsnWrite);
+ if (grp == NULL) return;
+
+ /* make new gene feature on full-length of cdna */
+
+ copy = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, NULL);
+ if (copy == NULL) return;
+
+ copy->data.value.ptrvalue = grp;
+ return;
+ }
+
+ /* overlapping gene should be single interval on contig */
+
+ gene = SeqMgrGetOverlappingGene (sfp->location, &gcontext);
+ if (gene == NULL) return;
+
+ CheckSeqLocForPartial (gene->location, &partial5, &partial3);
+
+ /* copy gene feature fields to paste into new gene feature */
+
+ temp = AsnIoMemCopy (gene,
+ (AsnReadFunc) SeqFeatAsnRead,
+ (AsnWriteFunc) SeqFeatAsnWrite);
+ if (temp == NULL) return;
+
+ /* make new gene feature on full-length of cdna */
+
+ copy = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, NULL);
+ if (copy == NULL) {
+ SeqFeatFree (temp);
+ return;
+ }
+
+ /* paste fields from temp copy of original gene */
+
+ copy->data.value.ptrvalue = temp->data.value.ptrvalue;
+ copy->partial = temp->partial;
+ copy->excpt = temp->excpt;
+ copy->comment = temp->comment;
+ copy->qual = temp->qual;
+ copy->title = temp->title;
+ copy->ext = temp->ext;
+ copy->cit = temp->cit;
+ copy->exp_ev = temp->exp_ev;
+ copy->xref = temp->xref;
+ copy->dbxref = temp->dbxref;
+ copy->pseudo = temp->pseudo;
+ copy->except_text = temp->except_text;
+
+ SetSeqLocPartial (copy->location, partial5, partial3);
+
+ SeqLocFree (temp->location);
+ MemFree (temp); /* do not SeqFeatFree */
+}
+
+static void LclAddMrnaTitles (
+ SeqLocPtr slp,
+ Pointer userdata
+)
+
+{
+ BioseqPtr bsp;
+ SeqMgrFeatContext ccontext;
+ CharPtr cdslabel = NULL;
+ SeqMgrFeatContext gcontext;
+ CharPtr genelabel = NULL;
+ size_t len;
+ CharPtr organism;
+ SeqFeatPtr sfp;
+ CharPtr str;
+
+ if (slp == NULL) return;
+ bsp = BioseqFindFromSeqLoc (slp);
+ if (bsp == NULL) return;
+ if (! ISA_na (bsp->mol)) return;
+ organism = (CharPtr) userdata;
+ if (BioseqGetTitle (bsp) != NULL) return;
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &gcontext);
+ if (sfp != NULL) {
+ genelabel = gcontext.label;
+ if (StringHasNoText (genelabel)) {
+ genelabel = NULL;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &ccontext);
+ if (sfp != NULL) {
+ cdslabel = ccontext.label;
+ if (StringHasNoText (cdslabel)) {
+ cdslabel = NULL;
+ }
+ }
+ len = StringLen (organism) + StringLen (genelabel) + StringLen (cdslabel) +
+ StringLen (" mRNA, complete cds.") + 10;
+ str = (CharPtr) MemNew (len * sizeof (Char));
+ if (str == NULL) return;
+ str [0] = '\0';
+
+ if (! StringHasNoText (organism)) {
+ StringCat (str, organism);
+ }
+ if (cdslabel != NULL) {
+ StringCat (str, " ");
+ StringCat (str, cdslabel);
+ }
+ if (genelabel != NULL) {
+ StringCat (str, " (");
+ StringCat (str, genelabel);
+ StringCat (str, ")");
+ }
+ if (cdslabel != NULL && genelabel != NULL) {
+ if (ccontext.partialL || ccontext.partialR) {
+ StringCat (str, " mRNA, partial cds.");
+ } else {
+ StringCat (str, " mRNA, complete cds.");
+ }
+ } else if (genelabel != NULL) {
+ StringCat (str, " mRNA.");
+ }
+ SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) str);
+}
+
+static Boolean LIBCALLBACK DummyACMProc (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+)
+
+{
+ return TRUE;
+}
+
+static Boolean AmbiguousCdsMrna (BioseqPtr bsp)
+
+{
+ Int2 count;
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+
+ if (bsp == NULL) return TRUE;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ while (sfp != NULL) {
+ count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_mRNA, NULL, 0,
+ CHECK_INTERVALS, NULL, DummyACMProc);
+ if (count > 1) return TRUE;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
+ }
+
+ return FALSE;
+}
+
+static SeqIdPtr MakeIdFromLocusTag (SeqFeatPtr mrna)
+
+{
+ Char buf [64], suffix [8];
+ SeqFeatPtr gene;
+ GeneRefPtr grp;
+
+ if (mrna == NULL) return NULL;
+ suffix [0] = '\0';
+ grp = SeqMgrGetGeneXref (mrna);
+ if (grp != NULL) {
+ if (SeqMgrGeneIsSuppressed (grp)) return NULL;
+ }
+ if (grp == NULL) {
+ gene = SeqMgrGetOverlappingGene (mrna->location, NULL);
+ if (gene != NULL) {
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (gene->id.choice == 1) {
+ (gene->id.value.intvalue)++;
+ sprintf (suffix, "_%ld", (long) gene->id.value.intvalue);
+ }
+ }
+ }
+ if (grp != NULL) {
+ if (StringDoesHaveText (grp->locus_tag)) {
+ /* StringCpy (buf, "lcl|"); */
+ StringCpy (buf, "gnl|MTRACK|");
+ StringCat (buf, grp->locus_tag);
+ StringCat (buf, suffix);
+ return MakeSeqID (buf);
+ }
+ }
+ return NULL;
+}
+
+static void InstantiateMrnaIntoProt (SeqFeatPtr cds, SeqFeatPtr mrna, Int2Ptr ctrp, Boolean ambig)
+
+{
+ ByteStorePtr bs;
+ Int4 len;
+ BioseqPtr mbsp, pbsp;
+ MolInfoPtr mip;
+ SeqEntryPtr msep, psep;
+ Boolean partial5, partial3;
+ CharPtr rnaseq;
+ ValNodePtr vnp;
+
+ if (cds == NULL || mrna == NULL) return;
+ if (cds->product == NULL || mrna->product != NULL) return;
+
+ pbsp = BioseqFindFromSeqLoc (cds->product);
+ if (pbsp == NULL) return;
+ psep = SeqMgrGetSeqEntryForData (pbsp);
+ if (psep == NULL) return;
+
+ rnaseq = GetSequenceByFeature (mrna);
+ if (rnaseq == NULL) return;
+ len = (Int4) StringLen (rnaseq);
+
+ bs = BSNew (len + 2);
+ if (bs == NULL) return;
+ BSWrite (bs, (VoidPtr) rnaseq, len);
+ MemFree (rnaseq);
+
+ mbsp = BioseqNew ();
+ if (mbsp == NULL) return;
+ mbsp->repr = Seq_repr_raw;
+ mbsp->mol = Seq_mol_rna;
+ mbsp->seq_data_type = Seq_code_iupacna;
+ mbsp->seq_data = bs;
+ mbsp->length = BSLen (bs);
+ BioseqPack (mbsp);
+
+ /*
+ if (! ambig) {
+ mbsp->id = MakeIdFromLocusTag (mrna);
+ }
+ */
+ /* now adds _# suffix to general Seq-id if ambiguous */
+ mbsp->id = MakeIdFromLocusTag (mrna);
+ if (mbsp->id == NULL) {
+ mbsp->id = MakeNewProteinSeqIdEx (mrna->location, NULL, NULL, ctrp);
+ }
+ CheckSeqLocForPartial (mrna->location, &partial5, &partial3);
+ SeqMgrAddToBioseqIndex (mbsp);
+
+ msep = SeqEntryNew ();
+ if (msep == NULL) return;
+ msep->choice = 1;
+ msep->data.ptrvalue = (Pointer) mbsp;
+ SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) mbsp, msep);
+
+ mip = MolInfoNew ();
+ if (mip == NULL) return;
+ mip->biomol = MOLECULE_TYPE_MRNA;
+ if (partial5 && partial3) {
+ mip->completeness = 5;
+ } else if (partial5) {
+ mip->completeness = 3;
+ } else if (partial3) {
+ mip->completeness = 4;
+ }
+ vnp = CreateNewDescriptor (msep, Seq_descr_molinfo);
+ if (vnp != NULL) {
+ vnp->data.ptrvalue = (Pointer) mip;
+ }
+
+ SetSeqFeatProduct (mrna, mbsp);
+
+ /* add cDNA to protein, promoting to nuc-prot set component of genomic product set */
+
+ AddSeqEntryToSeqEntry (psep, msep, FALSE);
+}
+
+static void RemoveFeatIDs (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ if (sfp == NULL) return;
+ if (sfp->id.choice == 1) {
+ sfp->id.choice = 0;
+ sfp->id.value.intvalue = 0;
+ }
+}
+
+typedef struct loopdata {
+ Boolean ambig;
+ Int2 count;
+ SeqFeatPtr mrna;
+} LoopData, PNTR LoopDataPtr;
+
+static Boolean LIBCALLBACK FindSingleMrnaProc (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+)
+
+{
+ LoopDataPtr ldp;
+
+ ldp = (LoopDataPtr) context->userdata;
+
+ if (sfp->id.choice == 0) {
+ (ldp->count)++;
+ ldp->mrna = sfp;
+ } else {
+ ldp->ambig = TRUE;
+ }
+
+ return TRUE;
+}
+
+static void LoopThroughCDSs (BioseqPtr bsp, Int2Ptr ctrp, N2GPtr ngp)
+
+{
+ Int2 count;
+ SeqMgrFeatContext fcontext;
+ Boolean goOn;
+ LoopData ld;
+ SeqFeatPtr sfp;
+
+ /* loop through CDS features, finding single unused mRNA partner */
+
+ goOn = TRUE;
+ while (goOn) {
+ goOn = FALSE;
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ while (sfp != NULL) {
+ if (sfp->id.choice == 0) {
+ ld.ambig = FALSE;
+ ld.count = 0;
+ ld.mrna = NULL;
+ count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_mRNA, NULL, 0,
+ CHECK_INTERVALS, (Pointer) &ld, FindSingleMrnaProc);
+ if (ld.count == 1 && ld.mrna != NULL) {
+ InstantiateMrnaIntoProt (sfp, ld.mrna, ctrp, ld.ambig);
+ sfp->id.choice = 1;
+ ld.mrna->id.choice = 1;
+ goOn = TRUE;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
+ }
+ }
+
+ /* if any CDSs were not promoted, record failure */
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ while (sfp != NULL) {
+ if (sfp->id.choice == 0) {
+ if (ngp != NULL) {
+ ngp->failure = TRUE;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
+ }
+}
+
+static void NPStoGPS (Uint2 entityID, CharPtr filename, N2GPtr ngp)
+
+{
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ Int2 ctr = 1;
+ SeqMgrFeatContext gcontext, mcontext;
+ SeqFeatPtr gene, mrna, sfp, lastsfp;
+ GeneRefPtr grp;
+ SeqEntryPtr old, top;
+ CharPtr organism;
+ OrgRefPtr orp;
+ Uint2 parenttype;
+ Pointer parentptr;
+ SeqAnnotPtr sap, annot, lastsap;
+ SeqDescrPtr sdp;
+
+ top = GetTopSeqEntryForEntityID (entityID);
+ if (top == NULL || top->choice != 2) return;
+ bssp = (BioseqSetPtr) top->data.ptrvalue;
+ if (bssp == NULL || bssp->_class != BioseqseqSet_class_nuc_prot) return;
+
+ if (StringHasNoText (filename)) {
+ filename = "?";
+ }
+
+ bsp = FindNucBioseq (top);
+ if (bsp == NULL) {
+ Message (MSG_OK, "Unable to find nucleotide Bioseq in %s", filename);
+ if (ngp != NULL) {
+ ngp->failure = TRUE;
+ }
+ return;
+ }
+
+ /*
+ if (AmbiguousCdsMrna (bsp)) {
+ Message (MSG_POSTERR,
+ "Unable to proceed because of unresolved CDS-mRNA ambiguity in %s",
+ filename);
+ if (ngp != NULL) {
+ ngp->failure = TRUE;
+ }
+ return;
+ }
+ */
+
+ GetSeqEntryParent (top, &parentptr, &parenttype);
+
+ bssp->_class = BioseqseqSet_class_gen_prod_set;
+
+ /* move feature table from nuc-prot set to nucleotide bioseq */
+
+ sap = bssp->annot;
+ bssp->annot = NULL;
+ annot = bsp->annot;
+ if (annot == NULL) {
+ bsp->annot = sap;
+ } else if (sap->next == NULL && annot->next == NULL &&
+ sap->type == 1 && annot->type == 1 &&
+ sap->data != NULL && annot->data != NULL) {
+ lastsfp = (SeqFeatPtr) annot->data;
+ while (lastsfp->next != NULL) {
+ lastsfp = lastsfp->next;
+ }
+ sfp = (SeqFeatPtr) sap->data;
+ lastsfp->next = sfp;
+ sap->data = NULL;
+ SeqAnnotFree (sap);
+ } else {
+ lastsap = bsp->annot;
+ while (lastsap->next != NULL) {
+ lastsap = lastsap->next;
+ }
+ lastsap->next = sap;
+ }
+
+ VisitFeaturesInSep (top, NULL, RemoveFeatIDs);
+
+ old = SeqEntrySetScope (top);
+
+ ctr = (Int2) VisitBioseqsInSep (top, NULL, NULL) + 1;
+
+ /* count mRNAs per overlapping gene */
+
+ mrna = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &mcontext);
+ while (mrna != NULL) {
+ grp = SeqMgrGetGeneXref (mrna);
+ if (grp == NULL) {
+ gene = SeqMgrGetOverlappingGene (mrna->location, NULL);
+ if (gene != NULL) {
+ if (gene->id.choice == 0) {
+ gene->id.choice = 1;
+ gene->id.value.intvalue = 1;
+ } else if (gene->id.choice == 1) {
+ (gene->id.value.intvalue)++;
+ }
+ }
+ }
+ mrna = SeqMgrGetNextFeature (bsp, mrna, 0, FEATDEF_mRNA, &mcontext);
+ }
+
+ /* only leave genes with multiple mRNAs marked */
+
+ gene = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &gcontext);
+ while (gene != NULL) {
+ if (gene->id.choice == 1) {
+ if (gene->id.value.intvalue == 1) {
+ /* if count was 1, clear id */
+ gene->id.value.intvalue = 0;
+ gene->id.choice = 0;
+ } else {
+ /* if count was > 1, just reset count, do not clear id */
+ gene->id.value.intvalue = 0;
+ }
+ }
+ gene = SeqMgrGetNextFeature (bsp, gene, SEQFEAT_GENE, 0, &gcontext);
+ }
+
+ /*
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ while (sfp != NULL) {
+ mrna = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_mRNA, NULL, 0,
+ NULL, CHECK_INTERVALS, &mcontext);
+ if (mrna != NULL) {
+ InstantiateMrnaIntoProt (sfp, mrna, &ctr);
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
+ }
+ */
+
+ /* make cDNA bioseq from mRNA feature, package with protein product */
+
+ LoopThroughCDSs (bsp, &ctr, ngp);
+
+ SeqMgrLinkSeqEntry (top, parenttype, parentptr);
+
+ SeqEntrySetScope (old);
+
+ SeqMgrClearFeatureIndexes (bssp->idx.entityID, NULL);
+
+ VisitFeaturesInSep (top, NULL, RemoveFeatIDs);
+
+ /* need to reindex to get mRNA and CDS features from cDNA and protein */
+ SeqMgrIndexFeatures (bssp->idx.entityID, NULL);
+ VisitSetsInSet (bssp, NULL, LclMakeNucProtCDS);
+
+ /* need to reindex before copying genes, instantiating protein titles */
+ SeqMgrIndexFeatures (bssp->idx.entityID, NULL);
+
+ VisitFeaturesInSep (top, NULL, LclCopyGene);
+
+ /* need to reindex before instantiating mRNA titles */
+ SeqMgrIndexFeatures (bssp->idx.entityID, NULL);
+
+ organism = NULL;
+ for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice != Seq_descr_source) continue;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) continue;
+ orp = biop->org;
+ if (orp == NULL) continue;
+ if (! StringHasNoText (orp->taxname)) {
+ organism = orp->taxname;
+ }
+ }
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &mcontext);
+ while (sfp != NULL) {
+ LclAddMrnaTitles (sfp->product, organism);
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_mRNA, &mcontext);
+ }
+
+ SeqMgrClearFeatureIndexes (bssp->idx.entityID, NULL);
+
+ move_cds (top);
+}
+
+static void ProcessOneRecord (
+ CharPtr filename,
+ Pointer userdata
+)
+
+{
+ AsnIoPtr aip;
+ BioseqPtr bsp;
+ ValNodePtr bsplist;
+ BioseqSetPtr bssp;
+ Pointer dataptr = NULL;
+ Uint2 datatype, entityID = 0;
+ SeqDescrPtr descr1, descr2;
+ Char file [FILENAME_MAX], id [42], path [PATH_MAX];
+ FILE *fp;
+ N2GPtr ngp;
+ SeqEntryPtr nsep, sep;
+ CharPtr ptr, str;
+ SeqIdPtr sip;
+
+ if (StringHasNoText (filename)) return;
+ ngp = (N2GPtr) userdata;
+ if (ngp == NULL) return;
+
+ fp = FileOpen (filename, "r");
+ if (fp == NULL) {
+ Message (MSG_POSTERR, "Failed to open '%s'", filename);
+ return;
+ }
+
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
+
+ FileClose (fp);
+
+ entityID = ObjMgrRegister (datatype, dataptr);
+
+ if (entityID < 1 || dataptr == NULL) {
+ Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
+ return;
+ }
+
+ if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
+ datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+
+ if (sep == NULL) {
+ sep = SeqEntryNew ();
+ if (sep != NULL) {
+ if (datatype == OBJ_BIOSEQ) {
+ bsp = (BioseqPtr) dataptr;
+ sep->choice = 1;
+ sep->data.ptrvalue = bsp;
+ SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
+ } else if (datatype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) dataptr;
+ sep->choice = 2;
+ sep->data.ptrvalue = bssp;
+ SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
+ } else {
+ sep = SeqEntryFree (sep);
+ }
+ }
+ sep = GetTopSeqEntryForEntityID (entityID);
+ }
+
+ if (sep != NULL) {
+ bsplist = NULL;
+
+ str = StringRChr (filename, DIRDELIMCHR);
+ if (str != NULL) {
+ str++;
+ } else {
+ str = filename;
+ }
+ StringCpy (file, str);
+
+ bsp = FindNucBioseq (sep);
+ id [0] = '\0';
+ if (bsp != NULL) {
+ sip = SeqIdFindWorst (bsp->id);
+ SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id));
+ Message (MSG_POST, "Processing %s (%s)", id, file);
+ }
+
+ SeqMgrIndexFeatures (entityID, NULL);
+
+ /* remove pubs and biosources from nuc-prot set and nucleotide bioseq */
+
+ nsep = FindNucSeqEntry (sep);
+ descr1 = ExtractBioSourceAndPubs (sep);
+ descr2 = ExtractBioSourceAndPubs (nsep);
+ if (descr1 == NULL) {
+ descr1 = descr2;
+ } else if (descr2 != NULL) {
+ ValNodeLink (&descr1, descr2);
+ }
+
+ /* convert from nuc-prot set to genomic-product set */
+
+ NPStoGPS (entityID, file, ngp);
+
+ /* put pubs and biosources onto genomic-product set */
+
+ ReplaceBioSourceAndPubs (sep, descr1);
+
+ if (ngp->failure) {
+ Message (MSG_POST, "Not saving %s", file);
+ } else if (StringDoesHaveText (ngp->outfile)) {
+ aip = AsnIoOpen (ngp->outfile, "w");
+ if (aip != NULL) {
+ SeqEntryAsnWrite (sep, aip, NULL);
+ AsnIoClose (aip);
+ }
+ } else {
+ str = StringRChr (filename, DIRDELIMCHR);
+ if (str != NULL) {
+ *str = '\0';
+ str++;
+ } else {
+ str = filename;
+ }
+ if (StringDoesHaveText (ngp->results)) {
+ StringCpy (path, ngp->results);
+ } else {
+ StringCpy (path, filename);
+ }
+ if (str != NULL) {
+ ptr = StringRChr (str, '.');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ StringCpy (file, str);
+ StringCat (file, ".gps");
+ FileBuildPath (path, NULL, file);
+ aip = AsnIoOpen (path, "w");
+ if (aip != NULL) {
+ SeqEntryAsnWrite (sep, aip, NULL);
+ AsnIoClose (aip);
+ }
+ }
+ }
+ }
+
+ } else {
+
+ Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
+ }
+
+ ObjMgrFree (datatype, dataptr);
+}
+
+/* Args structure contains command-line arguments */
+
+#define p_argInputPath 0
+#define r_argOutputPath 1
+#define i_argInputFile 2
+#define o_argOutputFile 3
+#define f_argFilter 4
+#define x_argSuffix 5
+
+
+Args myargs [] = {
+ {"Path to Files", NULL, NULL, NULL,
+ TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
+ {"Path for Results", NULL, NULL, NULL,
+ TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
+ {"Single Input File", "stdin", NULL, NULL,
+ TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Single Output File", "stdout", NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"Substring Filter", NULL, NULL, NULL,
+ TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
+ {"File Selection Suffix", ".ent", NULL, NULL,
+ TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
+};
+
+Int2 Main (void)
+
+{
+ Char app [64];
+ CharPtr directory, filter, infile, outfile, results, suffix;
+ N2GData ngd;
+ N2GPtr ngp;
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ ErrSetLogfile ("stderr", ELOG_APPEND);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+ /* process command line arguments */
+
+ sprintf (app, "nps2gps %s", NPS2GPSAPPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ MemSet ((Pointer) &ngd, 0, sizeof (N2GData));
+ ngp = &ngd;
+ ngd.failure = FALSE;
+
+ directory = (CharPtr) myargs [p_argInputPath].strvalue;
+ results = (CharPtr) myargs [r_argOutputPath].strvalue;
+ if (StringHasNoText (results)) {
+ results = NULL;
+ }
+ infile = (CharPtr) myargs [i_argInputFile].strvalue;
+ outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
+ filter = (CharPtr) myargs [f_argFilter].strvalue;
+ suffix = (CharPtr) myargs [x_argSuffix].strvalue;
+
+ /* process input file */
+
+ if (StringDoesHaveText (directory)) {
+
+ ngd.results = results;
+
+ DirExplore (directory, filter, suffix, ProcessOneRecord, (Pointer) &ngd);
+
+ } else if (StringDoesHaveText (infile) && StringDoesHaveText (outfile)) {
+
+ ngd.outfile = outfile;
+
+ ProcessOneRecord (infile, (Pointer) &ngd);
+ }
+
+ if (ngd.failure) return 1;
+
+ return 0;
+}
+
diff --git a/demo/rpsblast.c b/demo/rpsblast.c
index 07b1bb59..4e134e67 100644
--- a/demo/rpsblast.c
+++ b/demo/rpsblast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: rpsblast.c,v 6.67 2005/04/27 14:55:09 papadopo Exp $";
+static char const rcsid[] = "$Id: rpsblast.c,v 6.70 2005/06/02 20:43:38 dondosha Exp $";
-/* $Id: rpsblast.c,v 6.67 2005/04/27 14:55:09 papadopo Exp $
+/* $Id: rpsblast.c,v 6.70 2005/06/02 20:43:38 dondosha Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,12 +31,23 @@ static char const rcsid[] = "$Id: rpsblast.c,v 6.67 2005/04/27 14:55:09 papadopo
*
* Initial Version Creation Date: 12/14/1999
*
-* $Revision: 6.67 $
+* $Revision: 6.70 $
*
* File Description:
* Main file for RPS BLAST program
*
* $Log: rpsblast.c,v $
+* Revision 6.70 2005/06/02 20:43:38 dondosha
+* 1. Added loop over concatenated query sets;
+* 2. Removed unused variables and other clean-up;
+* 3. Use BlastFormattingInfo structure for formatting.
+*
+* Revision 6.69 2005/05/20 18:57:51 camacho
+* Update to use new signature to BLAST_FillLookupTableOptions
+*
+* Revision 6.68 2005/05/02 17:00:28 coulouri
+* change default to new engine
+*
* Revision 6.67 2005/04/27 14:55:09 papadopo
* change signature of BlastFillHitSavingOptions
*
@@ -260,7 +271,7 @@ static char const rcsid[] = "$Id: rpsblast.c,v 6.67 2005/04/27 14:55:09 papadopo
#include <algo/blast/api/blast_api.h>
#include <algo/blast/api/blast_seq.h>
-#if PURIFY
+#ifdef PURIFY
#include "/am/purew/solaris2/new/../purify/purify-4.5-solaris2/purify.h"
#endif
@@ -378,8 +389,8 @@ static Args myargs[] = {
" 0 in 'stop' refers to the end of the sequence",
"0,0", NULL, NULL, TRUE, 'L', ARG_STRING, 0.0, 0, NULL},
/* OPT_FORCE_OLD_ENGINE */
- { "Force use of the original BLAST engine",
- "T", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL},
+ { "Force use of the legacy BLAST engine",
+ "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL},
};
@@ -389,7 +400,7 @@ static Args myargs[] = {
in blast_driver.c; unneeded setup is removed */
static Int2
-BLAST_FillOptions(SBlastOptions* options)
+s_FillOptions(SBlastOptions* options)
{
Int2 status;
EBlastProgramType program_number = options->program;
@@ -406,8 +417,7 @@ BLAST_FillOptions(SBlastOptions* options)
FALSE, /* megablast */
0, /* default threshold */
0, /* default wordsize */
- FALSE, /* no variable wordsize */
- FALSE);/* no PSSM (i.e. not psiblast) */
+ FALSE);/* no variable wordsize */
BLAST_FillQuerySetUpOptions(query_setup_options, program_number,
myargs[OPT_FILTER].strvalue, 0);
@@ -485,32 +495,24 @@ Int2 Main_New(void)
{
Int2 status;
Boolean query_is_na;
- LookupTableOptions* lookup_options;
char* blast_program = NULL;
EBlastProgramType program_number;
- BlastInitialWordOptions* word_options;
- BlastScoringOptions* score_options;
- BlastExtensionOptions* ext_options;
- BlastHitSavingOptions* hit_options;
char* dbname = NULL;
FILE *infp;
Int2 ctr = 1;
Int4 letters_read;
Int4 query_from = 0, query_to = 0;
SBlastOptions* options = NULL;
- QuerySetUpOptions* query_options=NULL;
- BlastEffectiveLengthsOptions* eff_len_options=NULL;
SeqLoc* query_slp = NULL;
SeqAlign* seqalign = NULL;
- BlastFormattingOptions* format_options;
- PSIBlastOptions* psi_options = NULL;
- BlastDatabaseOptions* db_options = NULL;
+ BlastFormattingInfo* format_info = NULL;
Blast_SummaryReturn* sum_returns=Blast_SummaryReturnNew();
- Boolean believe_defline = FALSE;
Int4 num_queries = 0;
SeqLoc* lcase_mask = NULL;
SeqLoc* filter_loc = NULL;
Boolean mask_at_hash;
+ const int kMaxConcatLength = 40000;
+ Blast_SummaryReturn* full_sum_returns = NULL;
/* select protein or nucleotide query, and choose
the appropriate program type */
@@ -537,32 +539,9 @@ Int2 Main_New(void)
dbname = myargs[OPT_DB].strvalue;
- BLAST_FillOptions(options);
+ s_FillOptions(options);
program_number = options->program;
- if ((status = BlastFormattingOptionsNew(program_number,
- myargs[OPT_OUTPUT_FILE].strvalue,
- myargs[OPT_NUM_DESC].intvalue,
- myargs[OPT_NUM_RESULTS].intvalue,
- myargs[OPT_FORMAT].intvalue, &format_options)) != 0) {
- ErrPostEx(SEV_FATAL, 1, 0, "Formatting setup failed");
- }
-
- if (myargs[OPT_HTML].intvalue) {
- format_options->html = TRUE;
- format_options->align_options += TXALIGN_HTML;
- format_options->print_options += TXALIGN_HTML;
- }
- if (myargs[OPT_SHOW_GI].intvalue) {
- format_options->align_options += TXALIGN_SHOW_GI;
- format_options->print_options += TXALIGN_SHOW_GI;
- }
-
- BLAST_PrintOutputHeader(format_options, FALSE, blast_program,
- dbname, TRUE);
-
- believe_defline = myargs[OPT_BELIEVE_QUERY].intvalue;
-
/* Get the query */
if ((infp = FileOpen(myargs[OPT_QUERY_FILE].strvalue, "r")) == NULL) {
@@ -576,88 +555,116 @@ Int2 Main_New(void)
ErrPostEx(SEV_FATAL, 1, 0, "Invalid subject sequence location\n");
return -1;
}
+
if ((query_from != 0 || query_to != 0) &&
(Boolean)myargs[OPT_PROT_QUERY].intvalue == FALSE) {
ErrPostEx(SEV_FATAL, 1, 0, "No query range allowed for nucleotide query");
return -1;
}
- if ((Boolean)myargs[OPT_LCASE].intvalue) {
- letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na, 0, 0,
- query_from, query_to, &lcase_mask,
- &query_slp, &ctr, &num_queries,
- believe_defline);
- } else {
- letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na, 0, 0,
- query_from, query_to, NULL,
- &query_slp, &ctr, &num_queries,
- believe_defline);
- }
- if (letters_read <= 0) {
- ErrPostEx(SEV_FATAL, 1, 0, "Failure reading query\n");
- return -1;
- }
-
- /* Call database search function. Pass NULL for tabular formatting structure
- * pointer, because on-the-fly tabular formatting is not allowed for RPS
- * BLAST.
- */
- status =
- Blast_DatabaseSearch(query_slp, dbname, lcase_mask, options, NULL,
- &seqalign, &filter_loc, &mask_at_hash, sum_returns);
-
- /* Free the lower case mask in SeqLoc form. */
- lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
-
- /* If masking was done for lookup table only, free the masking locations,
- because they will not be used for formatting. */
- if (mask_at_hash)
+ BlastFormattingInfoNew(myargs[OPT_FORMAT].intvalue, options, blast_program,
+ dbname, myargs[OPT_OUTPUT_FILE].strvalue,
+ &format_info);
+
+ /* This is not megablast, so pass FALSE for the respective arguments. */
+ BlastFormattingInfoSetUpOptions(format_info, myargs[OPT_NUM_DESC].intvalue,
+ myargs[OPT_NUM_RESULTS].intvalue,
+ (Boolean) myargs[OPT_HTML].intvalue,
+ FALSE, (Boolean) myargs[OPT_SHOW_GI].intvalue,
+ (Boolean) myargs[OPT_BELIEVE_QUERY].intvalue);
+
+ BLAST_PrintOutputHeader(format_info);
+
+ /* Loop over sets of queries. */
+ while (1) {
+
+ if ((Boolean)myargs[OPT_LCASE].intvalue) {
+ letters_read =
+ BLAST_GetQuerySeqLoc(infp, query_is_na, 0, kMaxConcatLength,
+ query_from, query_to, &lcase_mask,
+ &query_slp, &ctr, &num_queries,
+ myargs[OPT_BELIEVE_QUERY].intvalue);
+ } else {
+ letters_read =
+ BLAST_GetQuerySeqLoc(infp, query_is_na, 0, kMaxConcatLength,
+ query_from, query_to, NULL,
+ &query_slp, &ctr, &num_queries,
+ myargs[OPT_BELIEVE_QUERY].intvalue);
+ }
+
+ if (letters_read <= 0)
+ break;
+
+ /* Call database search function. Pass NULL for tabular formatting
+ * structure pointer, because on-the-fly tabular formatting is not
+ * allowed for RPS BLAST.
+ */
+ status =
+ Blast_DatabaseSearch(query_slp, dbname, lcase_mask, options, NULL,
+ &seqalign, &filter_loc, &mask_at_hash,
+ sum_returns);
+
+ /* Free the lower case mask in SeqLoc form. */
+ lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
+
+ /* If masking was done for lookup table only, free the masking locations,
+ because they will not be used for formatting. */
+ if (mask_at_hash)
+ filter_loc = Blast_ValNodeMaskListFree(filter_loc);
+
+ /* Post warning or error messages, no matter what the search status
+ was. */
+ Blast_SummaryReturnsPostError(sum_returns);
+
+ if (status != 0) {
+ ErrPostEx(SEV_FATAL, 1, 0, "BLAST search failed");
+ return status;
+ }
+
+ /* do initial cleanup */
+
+ /* format results */
+
+ if (myargs[OPT_ASNOUT].strvalue) {
+ AsnIoPtr asnout = AsnIoOpen(myargs[OPT_ASNOUT].strvalue, (char*)"w");
+ GenericSeqAlignSetAsnWrite(seqalign, asnout);
+ asnout = AsnIoClose(asnout);
+ }
+
+ status =
+ BLAST_FormatResults(seqalign, num_queries, query_slp, filter_loc,
+ format_info, sum_returns);
+
+ /* finish cleanup */
filter_loc = Blast_ValNodeMaskListFree(filter_loc);
-
- /* Post warning or error messages, no matter what the search status was. */
- Blast_SummaryReturnsPostError(sum_returns);
-
- if (status != 0) {
- ErrPostEx(SEV_FATAL, 1, 0, "BLAST search failed");
- return status;
- }
-
- /* do initial cleanup */
-
- /* format results */
-
- if (myargs[OPT_ASNOUT].strvalue) {
- AsnIoPtr asnout = AsnIoOpen(myargs[OPT_ASNOUT].strvalue, (char*)"w");
- GenericSeqAlignSetAsnWrite(seqalign, asnout);
- asnout = AsnIoClose(asnout);
+ seqalign = SeqAlignSetFree(seqalign);
+ query_slp = SeqLocSetFree(query_slp);
+
+ /* Update the cumulative summary returns structure and clean the returns
+ substructures for the current search iteration. */
+ Blast_SummaryReturnUpdate(sum_returns, &full_sum_returns);
+ Blast_SummaryReturnClean(sum_returns);
}
-
- status = BLAST_FormatResults(seqalign, dbname, blast_program,
- num_queries, query_slp,
- filter_loc, format_options,
- FALSE, NULL, sum_returns);
-
- /* finish cleanup */
- filter_loc = Blast_ValNodeMaskListFree(filter_loc);
- seqalign = SeqAlignSetFree(seqalign);
- query_slp = SeqLocSetFree(query_slp);
- options = SBlastOptionsFree(options);
- /* Print the footer with summary information */
- Blast_PrintOutputFooter(program_number, format_options, dbname, sum_returns);
+ if (infp)
+ FileClose(infp);
+
+ /* Print the footer with summary information. */
+ Blast_PrintOutputFooter(format_info, full_sum_returns);
sum_returns = Blast_SummaryReturnFree(sum_returns);
+ full_sum_returns = Blast_SummaryReturnFree(full_sum_returns);
- if(format_options->html && myargs[OPT_FORMAT].intvalue < 7) {
- fprintf(format_options->outfp, "</PRE>\n</BODY>\n</HTML>\n");
- }
- BlastFormattingOptionsFree(format_options);
+ format_info = BlastFormattingInfoFree(format_info);
+ options = SBlastOptionsFree(options);
+
return status;
}
/*------------------- RPS BLAST USING OLD ENGINE ------------------------*/
-void PGPGetPrintOptions(Boolean gapped, Uint4Ptr align_options_out,
+static void
+s_PGPGetPrintOptions(Boolean gapped, Uint4Ptr align_options_out,
Uint4Ptr print_options_out)
{
Uint4 print_options, align_options;
@@ -708,7 +715,8 @@ void PGPGetPrintOptions(Boolean gapped, Uint4Ptr align_options_out,
return;
}
-void RPSBlastOptionsFree(RPSBlastOptionsPtr rpsbop)
+static void
+s_RPSBlastOptionsFree(RPSBlastOptionsPtr rpsbop)
{
FileClose(rpsbop->outfp);
@@ -723,11 +731,11 @@ void RPSBlastOptionsFree(RPSBlastOptionsPtr rpsbop)
return;
}
-static RPSBlastOptionsPtr RPSReadBlastOptions(void)
+static RPSBlastOptionsPtr
+s_RPSReadBlastOptions(void)
{
RPSBlastOptionsPtr rpsbop;
BLAST_OptionsBlkPtr options;
- static Int4 count=0;
CharPtr location = NULL;
rpsbop = MemNew(sizeof(RPSBlastOptions));
@@ -791,8 +799,8 @@ static RPSBlastOptionsPtr RPSReadBlastOptions(void)
rpsbop->asn1_mode = StringSave("wb");
}
else
- PGPGetPrintOptions(options->gapped_calculation,
- &rpsbop->align_options, &rpsbop->print_options);
+ s_PGPGetPrintOptions(options->gapped_calculation,
+ &rpsbop->align_options, &rpsbop->print_options);
if (!rpsbop->is_xml_output &&
@@ -882,9 +890,9 @@ static RPSBlastOptionsPtr RPSReadBlastOptions(void)
return rpsbop;
}
-static void RPSViewSeqAlign(BioseqPtr query_bsp,
- SeqAlignPtr seqalign, RPSBlastOptionsPtr rpsbop,
- ValNodePtr other_returns)
+static void
+s_RPSViewSeqAlign(BioseqPtr query_bsp, SeqAlignPtr seqalign,
+ RPSBlastOptionsPtr rpsbop, ValNodePtr other_returns)
{
SeqAnnotPtr seqannot;
BlastPruneSapStructPtr prune;
@@ -978,7 +986,8 @@ static void RPSViewSeqAlign(BioseqPtr query_bsp,
return;
}
-Boolean RPSFormatFooter(RPSBlastOptionsPtr rpsbop, ValNodePtr other_returns)
+static Boolean
+s_RPSFormatFooter(RPSBlastOptionsPtr rpsbop, ValNodePtr other_returns)
{
ValNodePtr mask_loc, vnp;
BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
@@ -1044,7 +1053,8 @@ Boolean RPSFormatFooter(RPSBlastOptionsPtr rpsbop, ValNodePtr other_returns)
return TRUE;
}
-static SeqEntryPtr LIBCALLBACK RPSGetNextSeqEntry(SeqLocPtr PNTR slp, VoidPtr data)
+static SeqEntryPtr LIBCALLBACK
+s_RPSGetNextSeqEntry(SeqLocPtr PNTR slp, VoidPtr data)
{
SeqEntryPtr sep;
static TNlmMutex read_mutex;
@@ -1091,11 +1101,10 @@ static SeqEntryPtr LIBCALLBACK RPSGetNextSeqEntry(SeqLocPtr PNTR slp, VoidPtr da
return sep;
}
-static Boolean LIBCALLBACK RPSResultsCallback(BioseqPtr query_bsp,
- RPSBlastOptionsPtr rpsbop,
- SeqAlignPtr seqalign,
- ValNodePtr other_returns,
- ValNodePtr error_returns, VoidPtr data)
+static Boolean LIBCALLBACK
+s_RPSResultsCallback(BioseqPtr query_bsp, RPSBlastOptionsPtr rpsbop,
+ SeqAlignPtr seqalign, ValNodePtr other_returns,
+ ValNodePtr error_returns, VoidPtr data)
{
if(rpsbop->is_xml_output == TRUE) {
AsnIoPtr aip = AsnIoOpen(rpsbop->out_filename, "wx");
@@ -1116,8 +1125,8 @@ static Boolean LIBCALLBACK RPSResultsCallback(BioseqPtr query_bsp,
rpsbop->believe_query, 0, 0, rpsbop->outfp,
FALSE);
} else {
- RPSViewSeqAlign(query_bsp, seqalign, rpsbop, other_returns);
- RPSFormatFooter(rpsbop, other_returns);
+ s_RPSViewSeqAlign(query_bsp, seqalign, rpsbop, other_returns);
+ s_RPSFormatFooter(rpsbop, other_returns);
}
return TRUE;
@@ -1132,7 +1141,7 @@ Int2 Main_Old(void)
if((fd = FileOpen(myargs[OPT_OUTPUT_FILE].strvalue, "w")) != NULL)
FileClose(fd);
- if((rpsbop = RPSReadBlastOptions()) == NULL) {
+ if((rpsbop = s_RPSReadBlastOptions()) == NULL) {
ErrPostEx(SEV_FATAL, 1, 0, "Unable to create RPS Blast options");
return 1;
}
@@ -1140,9 +1149,11 @@ Int2 Main_Old(void)
if (!rpsbop->is_xml_output && !rpsbop->is_tabular && !rpsbop->is_asn1_output)
BlastPrintVersionInfo("RPS-BLAST", rpsbop->html, rpsbop->outfp);
- RPSBlastSearchMT(rpsbop, RPSGetNextSeqEntry, NULL,
- RPSResultsCallback, NULL);
- RPSBlastOptionsFree(rpsbop);
+ RPSBlastSearchMT(rpsbop, s_RPSGetNextSeqEntry, NULL,
+ s_RPSResultsCallback, NULL);
+ s_RPSBlastOptionsFree(rpsbop);
+
+ return 0;
}
/*-------------------------- TOP LEVEL DRIVER -------------------------*/
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c
index b2164a6e..a5fa5057 100644
--- a/demo/tbl2asn.c
+++ b/demo/tbl2asn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 6.107 $
+* $Revision: 6.111 $
*
* File Description:
*
@@ -64,7 +64,7 @@
#include <pmfapi.h>
#include <tax3api.h>
-#define TBL2ASN_APP_VER "3.6"
+#define TBL2ASN_APP_VER "3.8"
CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER;
@@ -1235,7 +1235,8 @@ static void ProcessOneNuc (
Uint2 entityID,
BioseqPtr bsp,
BioSourcePtr src,
- TblArgsPtr tbl
+ TblArgsPtr tbl,
+ MolInfoPtr template_molinfo
)
{
@@ -1306,6 +1307,10 @@ static void ProcessOneNuc (
if (stp != NULL) {
mip = ParseTitleIntoMolInfo (stp, mip);
}
+ if (mip->biomol == 0 && template_molinfo != NULL)
+ {
+ mip->biomol = template_molinfo->biomol;
+ }
if (mip->biomol == 0) {
mip->biomol = MOLECULE_TYPE_GENOMIC;
}
@@ -1590,7 +1595,8 @@ static Uint2 ProcessOneAsn (
BioSourcePtr src,
TblArgsPtr tbl,
CharPtr localname,
- SeqEntryPtr gsep
+ SeqEntryPtr gsep,
+ MolInfoPtr template_molinfo
)
{
@@ -1621,6 +1627,14 @@ static Uint2 ProcessOneAsn (
} else if (datatype == OBJ_BIOSEQSET) {
bssp->seq_set = SeqMgrGetSeqEntryForData (dataptr);
SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) dataptr, gsep);
+ } else if (datatype == OBJ_SEQENTRY) {
+ sep = (SeqEntryPtr) dataptr;
+ bssp->seq_set = sep;
+ if (IS_Bioseq (sep)) {
+ SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) sep->data.ptrvalue, gsep);
+ } else if (IS_Bioseq_set (sep)) {
+ SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) sep->data.ptrvalue, gsep);
+ } else return 0;
} else return 0;
SeqMgrLinkSeqEntry (gsep, parenttype, parentptr);
@@ -1649,7 +1663,7 @@ static Uint2 ProcessOneAsn (
}
}
- ProcessOneNuc (entityID, bsp, src, tbl);
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
return entityID;
}
@@ -1657,7 +1671,8 @@ static Uint2 ProcessOneAsn (
static Uint2 ProcessBulkSet (
FILE* fp,
BioSourcePtr src,
- TblArgsPtr tbl
+ TblArgsPtr tbl,
+ MolInfoPtr template_molinfo
)
{
@@ -1733,7 +1748,7 @@ while ((bsp = ReadDeltaFasta (fp, NULL)) != NULL) {
}
lastsep = sep;
- ProcessOneNuc (entityID, bsp, src, tbl);
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
}
SeqMgrLinkSeqEntry (topsep, 0, NULL);
@@ -1776,7 +1791,8 @@ static Uint2 ProcessDeltaSet (
BioSourcePtr src,
TblArgsPtr tbl,
CharPtr localname,
- SeqEntryPtr gsep
+ SeqEntryPtr gsep,
+ MolInfoPtr template_molinfo
)
{
@@ -1847,7 +1863,7 @@ static Uint2 ProcessDeltaSet (
entityID = ObjMgrRegister (OBJ_BIOSEQ, (Pointer) bsp);
}
- ProcessOneNuc (entityID, bsp, src, tbl);
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
return entityID;
}
@@ -1962,7 +1978,7 @@ static Uint2 ProcessDeltaSet (
}
}
- ProcessOneNuc (entityID, deltabsp, src, tbl);
+ ProcessOneNuc (entityID, deltabsp, src, tbl, template_molinfo);
return entityID;
}
@@ -2029,7 +2045,8 @@ static void ShowAlignmentNotes
static Uint2 ProcessAlignSet (
FILE *fp,
BioSourcePtr src,
- TblArgsPtr tbl
+ TblArgsPtr tbl,
+ MolInfoPtr template_molinfo
)
{
@@ -2129,7 +2146,7 @@ static Uint2 ProcessAlignSet (
if (IS_Bioseq (sep)) {
bsp = (BioseqPtr) sep->data.ptrvalue;
entityID = ObjMgrRegister (OBJ_BIOSEQ, (Pointer) bsp);
- ProcessOneNuc (entityID, bsp, src, tbl);
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
} else if (IS_Bioseq_set (sep)) {
bssp = (BioseqSetPtr) sep->data.ptrvalue;
bssp->_class = BioseqseqSet_class_phy_set;
@@ -2137,7 +2154,7 @@ static Uint2 ProcessAlignSet (
for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
if (IS_Bioseq (tmp)) {
bsp = (BioseqPtr) tmp->data.ptrvalue;
- ProcessOneNuc (entityID, bsp, src, tbl);
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
}
}
} else return 0;
@@ -3086,6 +3103,46 @@ static void ProcessSourceTable (
}
}
+static SeqDescrPtr GetDescriptorTypeAlreadyInList (Uint1 descr_choice, SeqDescrPtr list)
+
+{
+ while (list != NULL && list->choice != descr_choice)
+ {
+ list = list->next;
+ }
+ return list;
+}
+
+static void AddTemplateDescriptors
+(
+ SeqDescrPtr PNTR current_list,
+ SeqDescrPtr new_list
+)
+
+{
+ SeqDescrPtr sdp_next, sdp;
+
+ if (current_list == NULL || new_list == NULL) return;
+
+ for (sdp = new_list; sdp != NULL; sdp = sdp->next)
+ {
+ if ((sdp->choice == Seq_descr_molinfo || sdp->choice == Seq_descr_source)
+ && GetDescriptorTypeAlreadyInList (Seq_descr_molinfo, *current_list) != NULL)
+ {
+ continue;
+ }
+ else
+ {
+ sdp_next = sdp->next;
+ sdp->next = NULL;
+ ValNodeLink (current_list, AsnIoMemCopy ((Pointer) sdp,
+ (AsnReadFunc) SeqDescrAsnRead,
+ (AsnWriteFunc) SeqDescrAsnWrite));
+ sdp->next = sdp_next;
+ }
+ }
+}
+
static void ProcessOneRecord (
SubmitBlockPtr sbp,
PubdescPtr pdp,
@@ -3132,6 +3189,7 @@ static void ProcessOneRecord (
SimpleSeqPtr ssp;
CharPtr str;
SeqEntryPtr tmp;
+ MolInfoPtr template_molinfo = NULL;
fp = OpenOneFile (directory, base, suffix);
if (fp == NULL) return;
@@ -3154,16 +3212,25 @@ static void ProcessOneRecord (
localname = base;
}
+ /* find MolInfo from template, if there is any */
+ sdp = sdphead;
+ while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
+ sdp = sdp->next;
+ }
+ if (sdp != NULL) {
+ template_molinfo = (MolInfoPtr) sdp->data.ptrvalue;
+ }
+
/* read one or more ASN.1 or FASTA sequence files */
if (tbl->fastaset) {
- entityID = ProcessBulkSet (fp, src, tbl);
+ entityID = ProcessBulkSet (fp, src, tbl, template_molinfo);
} else if (tbl->deltaset) {
- entityID = ProcessDeltaSet (fp, src, tbl, localname, gsep);
+ entityID = ProcessDeltaSet (fp, src, tbl, localname, gsep, template_molinfo);
} else if (tbl->alignset) {
- entityID = ProcessAlignSet (fp, src, tbl);
+ entityID = ProcessAlignSet (fp, src, tbl, template_molinfo);
} else {
- entityID = ProcessOneAsn (fp, src, tbl, localname, gsep);
+ entityID = ProcessOneAsn (fp, src, tbl, localname, gsep, template_molinfo);
}
FileClose (fp);
@@ -3359,16 +3426,10 @@ static void ProcessOneRecord (
if (sdphead != NULL) {
if (IS_Bioseq (sep)) {
bsp = (BioseqPtr) sep->data.ptrvalue;
- ValNodeLink (&(bsp->descr),
- AsnIoMemCopy ((Pointer) sdphead,
- (AsnReadFunc) SeqDescrAsnRead,
- (AsnWriteFunc) SeqDescrAsnWrite));
+ AddTemplateDescriptors (&(bsp->descr), sdphead);
} else if (IS_Bioseq_set (sep)) {
dssp = (BioseqSetPtr) sep->data.ptrvalue;
- ValNodeLink (&(dssp->descr),
- AsnIoMemCopy ((Pointer) sdphead,
- (AsnReadFunc) SeqDescrAsnRead,
- (AsnWriteFunc) SeqDescrAsnWrite));
+ AddTemplateDescriptors (&(dssp->descr), sdphead);
}
}
dp = DateCurr ();
@@ -3412,11 +3473,9 @@ static void ProcessOneRecord (
VisitFeaturesInSep (sep, NULL, RemoveUnnecGeneXref);
}
- if (SeqMgrFeaturesAreIndexed (entityID)) {
- InstantiateProteinTitles (entityID, NULL);
- } else {
- Message (MSG_POSTERR, "Unable to instantiate protein titles due to dropped index");
- }
+ /* need to reindex so hypothetical protein titles pick up locus_tag */
+ SeqMgrIndexFeatures (entityID, NULL);
+ InstantiateProteinTitles (entityID, NULL);
if (tbl->genprodset) {
/* need to reindex before instantiating mRNA titles */
diff --git a/demo/trna2tbl.c b/demo/trna2tbl.c
new file mode 100644
index 00000000..0270cc82
--- /dev/null
+++ b/demo/trna2tbl.c
@@ -0,0 +1,238 @@
+/* trna2tbl.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: trna2tbl.c
+*
+* Author: Jonathan Kans
+*
+* Version Creation Date: 5/31/05
+*
+* $Revision: 1.1 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+/*
+* To compile on Linux:
+*
+* gcc -o trna2tbl trna2tbl.c -lm
+*
+* To compile on Solaris:
+*
+* cc -xildoff -o trna2tbl trna2tbl.c -lgen -lm
+*
+* To compile on SGI:
+*
+* cc -mips1 -o trna2tbl trna2tbl.c -lm -lPW -lsun
+*
+* To compile on Darwin:
+*
+* cc -pipe -o trna2tbl trna2tbl.c -lc
+*
+*/
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* convenient defines and typedefs from NCBI toolkit */
+
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef Pointer
+typedef void * Pointer;
+#endif
+
+#ifndef Char
+typedef char Char, * CharPtr;
+#endif
+
+#ifndef Bool
+typedef unsigned char Bool, * BoolPtr;
+#endif
+
+#ifndef TRUE
+#define TRUE ((Bool)1)
+#endif
+
+#ifndef FALSE
+#define FALSE ((Bool)0)
+#endif
+
+#ifndef Int2
+typedef short Int2, * Int2Ptr;
+#endif
+
+#ifndef Int4
+typedef long Int4, * Int4Ptr;
+#endif
+
+#ifndef MIN
+#define MIN(a,b) ((a)>(b)?(b):(a))
+#endif
+
+/* useful portable character macros from NCBI toolkit (assumes ASCII) */
+
+#define IS_DIGIT(c) ('0'<=(c) && (c)<='9')
+#define IS_UPPER(c) ('A'<=(c) && (c)<='Z')
+#define IS_LOWER(c) ('a'<=(c) && (c)<='z')
+#define IS_ALPHA(c) (IS_UPPER(c) || IS_LOWER(c))
+#define TO_LOWER(c) ((Char)(IS_UPPER(c) ? (c)+' ' : (c)))
+#define TO_UPPER(c) ((Char)(IS_LOWER(c) ? (c)-' ' : (c)))
+#define IS_WHITESP(c) (((c) == ' ') || ((c) == '\n') || ((c) == '\r') || ((c) == '\t'))
+#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_DIGIT(c))
+#define IS_PRINT(c) (' '<=(c) && (c)<='~')
+
+#define MAX_FIELDS 9
+
+static void RunTrnaScan (void)
+
+{
+ CharPtr aa;
+ CharPtr beg;
+ Char buf [256];
+ Char cmmd [256];
+ CharPtr end;
+ CharPtr field [MAX_FIELDS];
+ CharPtr id;
+ Int2 idNotSent = TRUE;
+ Int2 inBody = FALSE;
+ CharPtr intronBeg;
+ CharPtr intronEnd;
+ long int intronStart;
+ long int intronStop;
+ Int2 numFields = 0;
+ CharPtr ptr;
+ CharPtr speed;
+ long int start;
+ long int stop;
+ Char str [80];
+
+/* line by line processing of tRNAscan-SE output table */
+
+ while (fgets (buf, sizeof (buf), stdin) != NULL) {
+
+ if (inBody) {
+ memset (field, 0, sizeof (field));
+
+/*
+* parse tab-delimited output line into array of fields, avoiding use of
+* strtok so that empty columns (adjacent tabs) are properly assigned to
+* field array
+*/
+
+ ptr = buf;
+ for (numFields = 0; numFields < MAX_FIELDS && ptr != NULL; numFields++) {
+ field [numFields] = ptr;
+ ptr = strchr (ptr, '\t');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ }
+
+/* interested in ID, start, stop, amino acid, and intron start and stop */
+
+ id = field [0];
+ beg = field [2];
+ end = field [3];
+ aa = field [4];
+ intronBeg = field [6];
+ intronEnd = field [7];
+
+ if (numFields > 7 &&
+ sscanf (beg, "%ld", &start) == 1 &&
+ sscanf (end, "%ld", &stop) == 1 &&
+ sscanf (intronBeg, "%ld", &intronStart) == 1 &&
+ sscanf (intronEnd, "%ld", &intronStop) == 1) {
+
+/* first line of output gives SeqId from FASTA definition line */
+
+ if (idNotSent) {
+ fprintf (stdout, ">Features %s\n", id);
+ fflush (stdout);
+ idNotSent = FALSE;
+ }
+
+/* first line of feature has start (tab) stop (tab) feature key */
+/* multiple intervals would have lines of start (tab) stop */
+
+ if (intronStart == 0 && intronStop == 0) {
+ fprintf (stdout, "%ld\t%ld\ttRNA\n", (long) start, (long) stop);
+ fflush (stdout);
+ } else {
+ fprintf (stdout, "%ld\t%ld\ttRNA\n", (long) start, (long) (intronStart - 1));
+ fprintf (stdout, "%ld\t%ld\t\n", (long) (intronStop + 1), (long) stop);
+ fflush (stdout);
+ }
+
+/* qualifier lines are (tab) (tab) (tab) qualifier key (tab) value */
+
+ if (strstr (aa, "Pseudo") != NULL) {
+ fprintf (stdout, "\t\t\tnote\ttRNA-Pseudo\n");
+ fflush (stdout);
+ } else {
+ fprintf (stdout, "\t\t\tproduct\t%s\n", aa);
+ fflush (stdout);
+ }
+
+/* dash (formerly empty) gene qualifier to suppress /gene (e.g., if tRNA is in an intron) */
+
+ fprintf (stdout, "\t\t\tgene\t-\n");
+ fflush (stdout);
+ }
+ }
+
+/* detect last line of table header, ignoring everything before data section */
+
+ if (strstr (buf, "-----") != NULL) {
+ inBody = TRUE;
+ }
+ }
+
+ if (idNotSent) {
+ fprintf (stdout, ">Message\ntRNAscan-SE found no tRNA genes in this sequence\n");
+ fflush (stdout);
+ }
+}
+
+main (int argc, char *argv[])
+
+{
+ RunTrnaScan ();
+ return 0;
+}
+