summaryrefslogtreecommitdiff
path: root/demo
diff options
context:
space:
mode:
Diffstat (limited to 'demo')
-rw-r--r--demo/.BLAST_VERSION2
-rw-r--r--demo/aceread_tst.c855
-rw-r--r--demo/alint.c218
-rw-r--r--demo/asn2all.c7
-rw-r--r--demo/asn2fsa.c9
-rw-r--r--demo/asn2gb.c212
-rw-r--r--demo/asn2idx.c4
-rw-r--r--demo/asnbarval.c10
-rw-r--r--demo/asndisc.c216
-rwxr-xr-xdemo/asnmacro.c4
-rw-r--r--demo/asnval.c176
-rw-r--r--demo/blastall.c9
-rw-r--r--demo/blastpgp.c14
-rw-r--r--demo/cleanasn.c740
-rw-r--r--demo/copymat.c37
-rw-r--r--demo/cspeedtest.c340
-rw-r--r--demo/entrez2.c4
-rw-r--r--demo/formatrpsdb.c53
-rw-r--r--demo/nps2gps.c49
-rw-r--r--demo/rpsblast.c40
-rw-r--r--demo/scantest.c793
-rwxr-xr-xdemo/src_chk.c398
-rw-r--r--demo/subfuse.c229
-rw-r--r--demo/sugint.c214
-rw-r--r--demo/taxblast_main.c99
-rw-r--r--demo/tbl2asn.c1609
26 files changed, 5464 insertions, 877 deletions
diff --git a/demo/.BLAST_VERSION b/demo/.BLAST_VERSION
index 8c57128f..ef93bccb 100644
--- a/demo/.BLAST_VERSION
+++ b/demo/.BLAST_VERSION
@@ -1 +1 @@
-2.2.18
+2.2.19
diff --git a/demo/aceread_tst.c b/demo/aceread_tst.c
new file mode 100644
index 00000000..0754f0f6
--- /dev/null
+++ b/demo/aceread_tst.c
@@ -0,0 +1,855 @@
+/* aceread_tst.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: aceread_tst.c
+*
+* Author: Colleen Bollin
+*
+* Version Creation Date: 7/22/08
+*
+* $Revision: 1.11 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <seqport.h>
+#include <sequtil.h>
+#include <sqnutils.h>
+#include <subutil.h>
+#include <gather.h>
+#include <explore.h>
+#include <lsqfetch.h>
+#include <valid.h>
+#include <pmfapi.h>
+#ifdef INTERNAL_NCBI_ASNDISC
+#include <accpubseq.h>
+#include <tax3api.h>
+#endif
+
+#include "aceread.h"
+#include "acerdapi.h"
+
+typedef enum {
+ i_argInputFile,
+ o_argOutputFile,
+ f_argFASTA,
+ S_argIDSubstitutionFile,
+ R_argSRRids,
+ L_argSuppressIdLookup,
+ Q_argMakeQualScores,
+ X_argXMLFile,
+ t_argTemplateFile,
+ T_argTSAFields,
+ C_argCenter,
+ F_argFormat,
+ G_argGapString,
+ V_argValidateAgainstAsn1File,
+ q_argReadQualScoresFile,
+ r_argReadFASTAFile,
+ N_argRecalculateConsensus,
+ l_argLimitNumContigs
+} EArgNum;
+
+Args myargs [] = {
+ {"Single Input File", "stdin", NULL, NULL,
+ TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Single Output File", NULL, NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"FASTA Output", "F", NULL, NULL,
+ TRUE, 'f', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"ID Substitution File", "", NULL, NULL,
+ TRUE, 'S', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Replacement IDs are SRR", "F", NULL, NULL,
+ TRUE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Suppress ID Lookup", "F", NULL, NULL,
+ TRUE, 'L', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Make Qual Scores", "T", NULL, NULL,
+ TRUE, 'Q', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"XML Output File", "", NULL, NULL,
+ TRUE, 'X', ARG_FILE_OUT, 0.0, 0, NULL },
+ {"Template File", "", NULL, NULL,
+ TRUE, 't', ARG_FILE_IN, 0.0, 0, NULL },
+ {"TSA fields", NULL, NULL, NULL,
+ TRUE, 'T', ARG_STRING, 0.0, 0, NULL },
+ {"Genome Center Tag", NULL, NULL, NULL,
+ TRUE, 'C', ARG_STRING, 0.0, 0, NULL},
+ {"Assembly Format\n\tM MAQ\n\tE Standalone Eland\n\tA ACE", "A", NULL, NULL,
+ TRUE, 'F', ARG_STRING, 0.0, 0, NULL},
+ {"Gap String", NULL, NULL, NULL,
+ TRUE, 'G', ARG_STRING, 0.0, 0, NULL},
+ {"ASN.1 File to validate against", NULL, NULL, NULL,
+ TRUE, 'V', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Quality score file for read sequences", NULL, NULL, NULL,
+ TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
+ {"FASTA file for read sequences (to use when trimming read quality scores)", NULL, NULL, NULL,
+ TRUE, 'r', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Recalculate consensus sequence using read data\n\tW Whole Consensus\n\tN Ns Only", "", NULL, NULL,
+ TRUE, 'N', ARG_STRING, 0.0, 0, NULL},
+ {"Limit number of contigs to read", NULL, NULL, NULL,
+ TRUE, 'l', ARG_INT, 0.0, 0, NULL},
+};
+
+
+static FILE *OpenAceFile (CharPtr infile)
+{
+ FILE *f;
+ Int4 len;
+#ifdef OS_UNIX
+ Char cmmd [256];
+ CharPtr gzcatprog;
+ int ret;
+ Boolean usedPopen = FALSE;
+#endif
+
+ len = StringLen (infile);
+ if (StringCmp (infile + len - 3, ".gz") == 0) {
+#ifdef OS_UNIX
+ gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
+ if (gzcatprog != NULL) {
+ sprintf (cmmd, "%s %s", gzcatprog, infile);
+ } else {
+ ret = system ("gzcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "gzcat %s", infile);
+ } else if (ret == -1) {
+ Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
+ return NULL;
+ } else {
+ ret = system ("zcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "zcat %s", infile);
+ } else if (ret == -1) {
+ Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
+ return NULL;
+ } else {
+ Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
+ return NULL;
+ }
+ }
+ }
+ f = popen (cmmd, "r");
+ usedPopen = TRUE;
+#else
+ Message (MSG_POSTERR, "Unable to read gzipped files when not running in UNIX");
+ return NULL;
+#endif
+ } else {
+ f = FileOpen (infile, "r");
+ }
+ return f;
+}
+
+
+static Boolean ValidateAgainstASNFile (TACEFilePtr ace_file, CharPtr filename, char *has_errors)
+{
+ Pointer dataptr;
+ Uint2 datatype;
+ SeqEntryPtr sep = NULL;
+ SeqSubmitPtr ssp = NULL;
+ Boolean chars_stripped = FALSE;
+ FILE *fp;
+ Boolean rval = FALSE;
+
+
+ fp = FileOpen (filename, "r");
+ if (fp == NULL) {
+ printf ("Unable to open %s\n", filename);
+ return FALSE;
+ }
+
+ /* Read in one sequence from the file */
+ dataptr = ReadAsnFastaOrFlatFileEx (fp, &datatype, NULL, FALSE, FALSE,
+ TRUE, FALSE, &chars_stripped);
+ FileClose (fp);
+ if (NULL == dataptr)
+ {
+ printf ("Unable to read SeqEntry from %s\n", filename);
+ return FALSE;
+ }
+
+ /* Convert the file data to a SeqEntry */
+
+ if (datatype == OBJ_SEQENTRY)
+ sep = (SeqEntryPtr) dataptr;
+ else if (datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET)
+ sep = SeqMgrGetSeqEntryForData (dataptr);
+ else if (datatype == OBJ_SEQSUB)
+ {
+ ssp = (SeqSubmitPtr) dataptr;
+ if (ssp != NULL && ssp->datatype == 1)
+ {
+ sep = (SeqEntryPtr) ssp->data;
+ }
+ }
+
+ rval = ValidateACEFileAgainstSeqEntry (ace_file, sep, has_errors);
+
+ if (ssp != NULL) {
+ ssp = SeqSubmitFree (ssp);
+ } else {
+ sep = SeqEntryFree (sep);
+ }
+ return rval;
+
+}
+
+
+static Boolean StringNHasNoText (CharPtr str, Int4 n)
+{
+ CharPtr cp;
+ Int4 i;
+ if (str == NULL) return TRUE;
+ cp = str;
+ i = 0;
+ while (i < n) {
+ if (*cp == 0) return TRUE;
+ if (!isspace (*cp)) return FALSE;
+ cp++;
+ i++;
+ }
+ return TRUE;
+}
+
+
+static Boolean BracketMatchesLabel (CharPtr cp, CharPtr cp_equal, CharPtr label)
+{
+ Int4 len;
+
+ if (cp == NULL || cp_equal == NULL || label == NULL) return FALSE;
+
+ len = StringLen (label);
+ if (StringNCmp (cp, label, len) == 0
+ && StringNHasNoText (cp + len, cp_equal - cp - len)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static CharPtr GetBracketValue (CharPtr cp, CharPtr cp_end)
+{
+ Int4 len;
+ CharPtr val = NULL;
+
+ if (cp == NULL || cp_end == NULL || cp_end <= cp) return NULL;
+
+ cp += StringSpn (cp, " \t");
+ len = (cp_end - cp) + 1;
+ val = (CharPtr) MemNew (sizeof (Char) * len);
+ StringNCpy (val, cp, len - 1);
+ val [len] = 0;
+ while (len > 1 && isspace (val [len-1])) {
+ len--;
+ val[len] = 0;
+ }
+ return val;
+}
+
+
+static Boolean
+GetTSAFieldsFromString
+(CharPtr str,
+ CharPtr PNTR p_submitter_reference,
+ CharPtr PNTR p_archive_id,
+ CharPtr PNTR p_description)
+{
+ CharPtr cp, cp_next, cp_equal, cp_end;
+ CharPtr subref = NULL, arch_id = NULL, desc = NULL;
+ Boolean is_bad = FALSE;
+
+ if (p_submitter_reference != NULL) {
+ *p_submitter_reference = NULL;
+ }
+ if (p_archive_id != NULL) {
+ *p_archive_id = NULL;
+ }
+ if (p_submitter_reference != NULL) {
+ *p_description = NULL;
+ }
+ if (StringHasNoText (str)) {
+ return TRUE;
+ }
+
+ cp = StringChr (str, '[');
+ while (cp != NULL && !is_bad) {
+ cp++;
+ cp_next = StringChr (cp + 1, '[');
+ cp_equal = StringChr (cp, '=');
+ cp_end = StringChr (cp, ']');
+ if (cp_equal == NULL || cp_end == NULL) {
+ is_bad = TRUE;
+ } else if (cp_equal > cp_end) {
+ is_bad = TRUE;
+ } else if (cp_next != NULL && (cp_equal > cp_next || cp_end > cp_next)) {
+ is_bad = TRUE;
+ } else {
+ cp += StringSpn (cp, " \t");
+ if (BracketMatchesLabel (cp, cp_equal, "subref")) {
+ if (subref == NULL) {
+ subref = GetBracketValue (cp_equal + 1, cp_end);
+ } else {
+ is_bad = TRUE;
+ }
+ } else if (BracketMatchesLabel (cp, cp_equal, "archive_id")) {
+ if (arch_id == NULL) {
+ arch_id = GetBracketValue (cp_equal + 1, cp_end);
+ } else {
+ is_bad = TRUE;
+ }
+ } else if (BracketMatchesLabel (cp, cp_equal, "desc")) {
+ if (desc == NULL) {
+ desc = GetBracketValue (cp_equal + 1, cp_end);
+ } else {
+ is_bad = TRUE;
+ }
+ } else {
+ is_bad = TRUE;
+ }
+ }
+ cp = cp_next;
+ }
+ if (p_submitter_reference == NULL) {
+ subref = MemFree (subref);
+ } else {
+ *p_submitter_reference = subref;
+ }
+ if (p_archive_id == NULL) {
+ arch_id = MemFree (arch_id);
+ } else {
+ *p_archive_id = arch_id;
+ }
+ if (p_description == NULL) {
+ desc = MemFree (desc);
+ } else {
+ *p_description = desc;
+ }
+ return TRUE;
+}
+
+
+static void PrintTraceGapsXML (TGapInfoPtr gap_info)
+{
+ Int4 i;
+
+ if (gap_info != NULL) {
+ printf (" <ntracegaps>%d</ntracegaps>\n", gap_info->num_gaps);
+ if (gap_info->num_gaps > 0) {
+ printf (" <tracegaps source=\"INLINE\">");
+ for (i = 0; i < gap_info->num_gaps - 1; i++) {
+ printf ("%d,", gap_info->gap_offsets[i]);
+ }
+ printf ("%d</tracegaps>\n", gap_info->gap_offsets[gap_info->num_gaps - 1]);
+ }
+ }
+}
+
+
+static void TestPosConversions (TGapInfoPtr gap_info)
+{
+ Int4 i, t_pos, s_pos = 0, r_pos;
+ Int4 test_len = 0;
+
+ if (gap_info != NULL && gap_info->num_gaps > 0) {
+ for (i = 0; i < gap_info->num_gaps; i++) {
+ test_len += gap_info->gap_offsets[i] + 1;
+ }
+ for (i = 0; i < test_len; i++) {
+ s_pos = SeqPosFromTilingPos (i, gap_info);
+ t_pos = TilingPosFromSeqPos (s_pos, gap_info);
+ if (t_pos != i) {
+ printf ("Failed! %d -> SeqPosFromTilingPos -> %d -> TilingPosFromSeqPos -> %d\n",
+ i, s_pos, t_pos);
+ }
+ r_pos = SeqPosFromTilingPos (t_pos, gap_info);
+ if (r_pos != s_pos) {
+ printf ("Failed! %d -> TilingPosFromSeqPos -> %d -> SeqPosFromTilingPos -> %d\n",
+ s_pos, t_pos, r_pos);
+ }
+ /* printf ("%d:%d:%d:%d\n", i, s_pos, t_pos, r_pos); */
+ }
+ }
+}
+
+
+static void PrintTraceReadXML (TContigReadPtr read)
+{
+ if (read == NULL) {
+ printf ("Bad read\n");
+ } else {
+ printf ("<trace>\n");
+ printf (" <trace_name>%s</trace_name>\n", read->read_id == NULL ? "" : read->read_id);
+ PrintTraceGapsXML (read->gaps);
+ printf (" <nbasecalls>%d</nbasecalls>\n", StringLen (read->read_seq));
+ printf (" <valid>\n");
+ printf (" <start>%d</start>\n", read->read_assem_start + 1);
+ printf (" <stop>%d</stop>\n", read->read_assem_stop + 1);
+ printf (" </valid>\n");
+ printf (" <tiling direction = \"%s\">\n", read->is_complement ? "REVERSE" : "FORWARD");
+ printf (" <start>%d</start>\n", read->cons_start + 1);
+ printf (" <start>%d</start>\n", read->cons_start + StringLen (read->read_seq) + 1);
+ printf (" </tiling>\n");
+ printf (" <consensus>\n");
+ printf (" <start>%d</start>\n", read->cons_start + 1);
+ printf (" <start>%d</start>\n", read->cons_start + StringLen (read->read_seq) + 1);
+ printf (" </consensus>\n");
+ printf ("<trace>\n");
+ }
+}
+
+
+
+static void TestGapInfoReading (CharPtr gap_string)
+{
+ TGapInfoPtr gap_info;
+ ValNodePtr list, vnp;
+
+ if (!StringHasNoText (gap_string)) {
+ gap_info = GapInfoFromSequenceString(gap_string, "*");
+ if (gap_info == NULL) {
+ printf ("error reading");
+ } else {
+ PrintTraceGapsXML (gap_info);
+ TestPosConversions (gap_info);
+ list = GetTransitionsFromGapInfo (gap_info, 0, 0, 40);
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ printf ("%d\n", vnp->data.intvalue);
+ }
+ }
+ GapInfoFree (gap_info);
+ }
+}
+
+
+static void AddAlignmentToSeqEntry (DenseSegPtr dsp, SeqEntryPtr sep)
+{
+ SeqAnnotPtr sap;
+ SeqAlignPtr salp;
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+
+ if (dsp == NULL || sep == NULL) return;
+
+ sap = SeqAnnotNew ();
+ sap->type = 2;
+
+ salp = SeqAlignNew ();
+ salp->type = 3;
+ salp->segtype = 2;
+ salp->segs = (Pointer) dsp;
+ salp->dim = dsp->dim;
+ sap->data = (Pointer) salp;
+
+ if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ sap->next = bsp->annot;
+ bsp->annot = sap;
+ } else if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ sap->next = bssp->annot;
+ bssp->annot = sap;
+ }
+}
+
+
+static void AddDescrToNucBioseqCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqDescrPtr sdp, sdp_copy;
+
+ if (bsp == NULL || !ISA_na (bsp->mol) || data == NULL) {
+ return;
+ }
+ sdp = (SeqDescrPtr) data;
+ sdp_copy = (SeqDescrPtr) AsnIoMemCopy (sdp, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite);
+ sdp_copy->next = bsp->descr;
+ bsp->descr = sdp_copy;
+}
+
+
+static SeqSubmitPtr AddSeqSubmitFromTemplate (SeqEntryPtr sep, CharPtr filename)
+{
+ SeqSubmitPtr ssp = NULL;
+ SubmitBlockPtr sbp;
+ CitSubPtr csp;
+ FILE *fp = NULL;
+ Pointer dataptr;
+ Uint2 datatype;
+
+ if (StringHasNoText (filename)) {
+ return NULL;
+ }
+
+ fp = FileOpen (filename, "r");
+ if (fp == NULL) {
+ printf ("Unable to read template file %s\n", filename);
+ return NULL;
+ }
+
+ while ((dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL) {
+ if (datatype == OBJ_SEQSUB) {
+ ssp = (SeqSubmitPtr) dataptr;
+ ssp->datatype = 1;
+ ssp->data = sep;
+ } else if (datatype == OBJ_SUBMIT_BLOCK) {
+ sbp = (SubmitBlockPtr) dataptr;
+ ssp = SeqSubmitNew ();
+ ssp->datatype = 1;
+ ssp->data = sep;
+ ssp->sub = sbp;
+ } else if (datatype == OBJ_SEQDESC) {
+ VisitBioseqsInSep (sep, dataptr, AddDescrToNucBioseqCallback);
+ ObjMgrFree (datatype, dataptr);
+ } else {
+ ObjMgrFree (datatype, dataptr);
+ }
+ }
+ FileClose (fp);
+ if (ssp == NULL) {
+ ssp = SeqSubmitNew ();
+ ssp->datatype = 1;
+ ssp->data = sep;
+ }
+
+ if (ssp->sub == NULL) {
+ ssp->sub = SubmitBlockNew ();
+ }
+
+ ssp->sub->tool = MemFree (ssp->sub->tool);
+ ssp->sub->tool = StringSave ("aceread");
+ ssp->sub->hup = FALSE;
+ ssp->sub->reldate = DateFree (ssp->sub->reldate);
+ csp = ssp->sub->cit;
+ if (csp != NULL) {
+ csp->date = DateFree (csp->date);
+ csp->date = DateCurr ();
+ }
+ return ssp;
+}
+
+
+static Boolean AddReadQualityScores (TACEFilePtr afp, CharPtr qs_filename, CharPtr rd_filename)
+{
+ ReadBufferData q, r;
+ Boolean use_fasta = FALSE;
+ Boolean rval = FALSE;
+
+ if (afp == NULL || StringHasNoText (qs_filename)) {
+ return TRUE;
+ }
+
+ q.current_data = NULL;
+ r.current_data = NULL;
+
+ q.fp = FileOpen (qs_filename, "r");
+ if (q.fp == NULL) {
+ printf ("Unable to read quality score file\n");
+ return FALSE;
+ }
+
+ if (!StringHasNoText (rd_filename)) {
+ r.fp = FileOpen (rd_filename, "r");
+ if (r.fp == NULL) {
+ printf ("Unable to open read FASTA file\n");
+ FileClose (q.fp);
+ return FALSE;
+ }
+ use_fasta = TRUE;
+ }
+
+ if (AddReadQualScores (afp, AbstractReadFunction, &q, use_fasta ? AbstractReadFunction : NULL, &r) > 0) {
+ rval = TRUE;
+ }
+
+ FileClose (q.fp);
+ if (use_fasta) {
+ FileClose (r.fp);
+ }
+ return rval;
+}
+
+
+Int2 Main (void)
+
+{
+ CharPtr infile, outfile, xmlfile;
+
+ ReadBufferData rbd;
+ TACEFilePtr afp;
+ Int4 i, len;
+ SeqEntryPtr sep;
+ AsnIoPtr aip;
+ FILE *f = NULL;
+ FILE *f2;
+ CharPtr app = "aceread_tst";
+ BioseqSetPtr bssp;
+ SeqEntryPtr last_sep = NULL;
+ Uint2 entityID;
+ Boolean make_qual_scores, suppress_lookup, srr_ids, fasta_out;
+ CharPtr submitter_ref = NULL, archive_id = NULL, description = NULL;
+ CharPtr center_name = NULL;
+ CharPtr format = NULL;
+ CharPtr gap_string;
+ CharPtr asn_file = NULL;
+ Int4 limit = 0;
+ char has_errors = 0;
+ Boolean recalculate_consensus = FALSE, recalculate_only_Ns = FALSE;
+ CharPtr recalculate_options;
+ SeqSubmitPtr ssp;
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrSetMessageLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ ErrSetLogfile ("stderr", ELOG_APPEND);
+ ErrSetOpts (ERR_IGNORE, ERR_LOG_ON);
+
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ PubSeqFetchEnable ();
+
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ recalculate_options = (CharPtr) myargs[N_argRecalculateConsensus].strvalue;
+ if (!StringHasNoText (recalculate_options)) {
+ if (StringCmp (recalculate_options, "W") == 0) {
+ recalculate_consensus = TRUE;
+ recalculate_only_Ns = FALSE;
+ } else if (StringCmp (recalculate_options, "N") == 0) {
+ recalculate_consensus = TRUE;
+ recalculate_only_Ns = TRUE;
+ } else {
+ Message (MSG_FATAL, "Invalid consensus sequence recalculation option");
+ return 1;
+ }
+ }
+
+
+ /* test gap info reading if provided */
+ gap_string = (CharPtr) myargs[G_argGapString].strvalue;
+ TestGapInfoReading (gap_string);
+
+ /* limit number of contigs? for debugging purposes */
+ limit = myargs[l_argLimitNumContigs].intvalue;
+
+ /* select format of input file */
+ format = (CharPtr) myargs[F_argFormat].strvalue;
+ if (StringHasNoText (format)) {
+ format = "A";
+ }
+
+ infile = (CharPtr) myargs [i_argInputFile].strvalue;
+ if (StringHasNoText (infile)) {
+ Message (MSG_FATAL, "Must supply input file!");
+ return 1;
+ }
+ outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
+ xmlfile = (CharPtr) myargs[X_argXMLFile].strvalue;
+ make_qual_scores = (Boolean) myargs [Q_argMakeQualScores].intvalue;
+ center_name = (CharPtr) myargs[C_argCenter].strvalue;
+ suppress_lookup = (Boolean) myargs [L_argSuppressIdLookup].intvalue;
+ srr_ids = (Boolean) myargs[R_argSRRids].intvalue;
+ fasta_out = (Boolean) myargs[f_argFASTA].intvalue;
+
+ /* ASN.1 file to validate against */
+ asn_file = (CharPtr) myargs [V_argValidateAgainstAsn1File].strvalue;
+
+ if (!GetTSAFieldsFromString ((CharPtr) myargs [T_argTSAFields].strvalue,
+ &submitter_ref,
+ &archive_id,
+ &description)) {
+ Message (MSG_FATAL, "Error reading TSA fields");
+ return 1;
+ }
+
+ len = StringLen (infile);
+ if (StringHasNoText (outfile)) {
+ if (len > 3 && StringCmp (infile + len - 4, ".ace") == 0) {
+ outfile = StringSave (infile);
+ StringCpy (outfile + len - 3, "sqn");
+ } else if (len > 6 && StringCmp (infile + len - 7, ".ace.gz") == 0) {
+ outfile = StringSave (infile);
+ StringCpy (outfile + len - 6, "sqn");
+ } else {
+ outfile = (CharPtr) MemNew (sizeof (Char) * (len + 5));
+ sprintf (outfile, "%s.sqn", infile);
+ }
+ }
+
+ if (!StringHasNoText ((CharPtr) myargs [S_argIDSubstitutionFile].strvalue)) {
+ f = FileOpen (myargs [S_argIDSubstitutionFile].strvalue, "r");
+ if (f == NULL) {
+ Message (MSG_FATAL, "Unable to open %s", myargs [S_argIDSubstitutionFile].strvalue);
+ return 1;
+ }
+ }
+
+ if (StringChr (format, 'M') != NULL) {
+ rbd.fp = FileOpen (infile, "r");
+ if (rbd.fp == NULL) {
+ Message (MSG_FATAL, "Unable to open %s", infile);
+ return 1;
+ }
+
+ rbd.current_data = NULL;
+ afp = ReadMAQFile (AbstractReadFunction, &rbd);
+ } else if (StringChr (format, 'E') != NULL) {
+ rbd.fp = FileOpen (infile, "r");
+ if (rbd.fp == NULL) {
+ Message (MSG_FATAL, "Unable to open %s", infile);
+ return 1;
+ }
+
+ rbd.current_data = NULL;
+ afp = ReadElandStandaloneFile (AbstractReadFunction, &rbd);
+ } else if (StringChr (format, 'A') != NULL) {
+ rbd.fp = OpenAceFile (infile);
+ if (rbd.fp == NULL) {
+ Message (MSG_FATAL, "Unable to open %s", infile);
+ return 1;
+ }
+ rbd.current_data = NULL;
+ afp = ReadACEFile ( AbstractReadFunction, &rbd, make_qual_scores, &has_errors);
+ } else {
+ Message (MSG_FATAL, "Unrecognized format: %s\n", format);
+ return 1;
+ }
+ FileClose (rbd.fp);
+ if (afp == NULL) {
+ printf ("<message severity=\"ERROR\" seq-id=\"No ID\" code=\"bad_format\">Unable to read file</message>\n");
+ } else {
+ if (recalculate_consensus) {
+ if (!AddReadQualityScores (afp, (CharPtr) myargs [q_argReadQualScoresFile].strvalue, (CharPtr) myargs [r_argReadFASTAFile].strvalue)) {
+ printf ("<message severity=\"ERROR\" seq-id=\"No ID\" code=\"bad_format\">Failed to add read quality scores</message>\n");
+ } else {
+ RecalculateConsensusSequences (afp, recalculate_only_Ns);
+ }
+ }
+
+ if (limit > 0) {
+ for (i = limit; i < afp->num_contigs; i++) {
+ ContigFree (afp->contigs[i]);
+ afp->contigs[i] = NULL;
+ }
+ afp->num_contigs = limit;
+ }
+
+ if (f != NULL) {
+ UpdateAceFileIds (afp, f, suppress_lookup, srr_ids, &has_errors);
+ FileClose (f);
+ f = NULL;
+ }
+ ValidateAceFileIds (afp, &has_errors);
+
+ if (asn_file != NULL) {
+ if (ValidateAgainstASNFile (afp, asn_file, &has_errors)) {
+ printf ("Validation against %s succeeded\n", asn_file);
+ }
+ }
+
+ if (!StringHasNoText (xmlfile)) {
+ f2 = FileOpen (xmlfile, "w");
+ WriteTraceAssemblyFromAceFile (afp, submitter_ref, center_name, 0, description, f2);
+ FileClose (f2);
+ }
+
+ if (fasta_out) {
+ f2 = FileOpen (outfile, "w");
+ WriteFASTAFromAceFile (afp, f2);
+ FileClose (f2);
+ } else {
+ aip = AsnIoOpen (outfile, "w");
+ if (aip == NULL) {
+ printf ("Unable to open %s\n", outfile);
+ } else {
+ bssp = BioseqSetNew ();
+ bssp->_class = BioseqseqSet_class_genbank;
+
+ for (i = 0; i < afp->num_contigs; i++) {
+ sep = MakeSeqEntryFromContig (afp->contigs[i]);
+ if (last_sep == NULL) {
+ bssp->seq_set = sep;
+ } else {
+ last_sep->next = sep;
+ }
+ last_sep = sep;
+ }
+ sep = ValNodeNew (NULL);
+ sep->choice = 2;
+ sep->data.ptrvalue = bssp;
+ bssp->seqentry = sep;
+ SeqMgrLinkSeqEntry (sep, 0, NULL);
+ entityID = ObjMgrGetEntityIDForChoice (sep);
+ AssignIDsInEntityEx (entityID, 0, NULL, NULL);
+ SeqMgrIndexFeatures (entityID, sep);
+ ssp = AddSeqSubmitFromTemplate (sep, (CharPtr) myargs[t_argTemplateFile].strvalue);
+ if (ssp == NULL) {
+ SeqEntryAsnWrite (sep, aip, NULL);
+ sep = SeqEntryFree (sep);
+ } else {
+ SeqSubmitAsnWrite (ssp, aip, NULL);
+ ssp = SeqSubmitFree (ssp);
+ }
+ AsnIoClose (aip);
+ }
+ }
+ }
+
+ if (has_errors) {
+ printf ("</aceread>\n");
+ }
+
+ return 0;
+
+}
+
diff --git a/demo/alint.c b/demo/alint.c
new file mode 100644
index 00000000..24d752e3
--- /dev/null
+++ b/demo/alint.c
@@ -0,0 +1,218 @@
+/* alint.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: alint.c
+*
+* Author: Jonathan Kans
+*
+* Version Creation Date: 11/10/08
+*
+* $Revision: 1.1 $
+*
+* File Description:
+*
+* Lint for Alignments in FASTA format - upper cases points of exact match
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <sqnutils.h>
+
+static CharPtr GetSequence (
+ CharPtr str,
+ Boolean skiptoken
+)
+
+{
+ Char ch;
+
+ if (str == NULL) return NULL;
+
+ if (! skiptoken) return str;
+
+ ch = *str;
+ while (ch != '\0' && ch != ' ') {
+ str++;
+ ch = *str;
+ }
+ if (ch == ' ') {
+ str++;
+ }
+
+ return str;
+}
+
+static void ProcessAlignedFASTA (
+ FILE *ifp,
+ FILE *ofp,
+ Boolean skiptoken
+)
+
+{
+ CharPtr PNTR array;
+ Char ch, ch0;
+ FileCache fc;
+ ValNodePtr head = NULL, last = NULL, vnp;
+ Int2 i, j, num = 0, len, minlen = INT2_MAX, matches = 0, mismatches = 0;
+ Char line [4096];
+ Boolean match;
+ CharPtr ptr, str;
+
+ FileCacheSetup (&fc, ifp);
+
+ str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
+ if (str == NULL) return;
+
+ while (str != NULL) {
+ TrimSpacesAroundString (str);
+ if (StringDoesHaveText (str)) {
+ vnp = ValNodeCopyStr (&last, 0, str);
+ if (head == NULL) {
+ head = vnp;
+ }
+ last = vnp;
+ num++;
+ str = GetSequence (str, skiptoken);
+ len = (Int2) StringLen (str);
+ if (minlen > len) {
+ minlen = len;
+ }
+ }
+ str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
+ }
+
+ if (num < 1 || minlen < 1) return;
+
+ array = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num + 1));
+ if (array == NULL) return;
+
+ for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ array [i] = str;
+ }
+
+ for (j = 0; j < minlen; j++) {
+ ptr = GetSequence (array [0], skiptoken);
+ ch0 = ptr [j];
+ match = TRUE;
+
+ for (i = 1; i < num; i++) {
+ ptr = GetSequence (array [i], skiptoken);
+ ch = ptr [j];
+ if (ch != ch0) {
+ match = FALSE;
+ }
+ }
+
+ if (match) {
+ matches++;
+ } else {
+ mismatches++;
+ }
+
+ for (i = 0; i < num; i++) {
+ ptr = GetSequence (array [i], skiptoken);
+ ch = ptr [j];
+ if (match) {
+ ptr [j] = TO_UPPER (ch);
+ } else {
+ ptr [j] = TO_LOWER (ch);
+ }
+ }
+ }
+
+ for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ fprintf (ofp, "%s\n", str);
+ }
+
+ fprintf (ofp, "\n%d matches, %d mismatches, length %d, %d percent matching\n",
+ (int) matches, (int) mismatches, (int) minlen,
+ (int) (matches * 100 / minlen));
+
+ MemFree (array);
+ ValNodeFreeData (head);
+}
+
+#define i_argInputFile 0
+#define o_argOutputFile 1
+#define s_argSkipToken 2
+
+Args myargs [] = {
+ {"Input File", "stdin", NULL, NULL,
+ FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Output File", "stdout", NULL, NULL,
+ FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"Skip First Token", "F", NULL, NULL,
+ TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
+};
+
+Int2 Main (void)
+
+{
+ FILE *ifp, *ofp;
+ CharPtr infile, outfile;
+ Boolean skiptoken;
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ ErrPathReset ();
+
+ if (! GetArgs ("alint", sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ infile = (CharPtr) myargs [i_argInputFile].strvalue;
+ outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
+ skiptoken = (Boolean) myargs [s_argSkipToken].intvalue;
+
+ ifp = FileOpen (infile, "r");
+ if (ifp == NULL) {
+ Message (MSG_FATAL, "Unable to open input file");
+ return 1;
+ }
+
+ ofp = FileOpen (outfile, "w");
+ if (ofp == NULL) {
+ Message (MSG_FATAL, "Unable to open output file");
+ return 1;
+ }
+
+ ProcessAlignedFASTA (ifp, ofp, skiptoken);
+
+ FileClose (ofp);
+ FileClose (ifp);
+
+ return 0;
+}
+
diff --git a/demo/asn2all.c b/demo/asn2all.c
index 9af408e9..5e8c092c 100644
--- a/demo/asn2all.c
+++ b/demo/asn2all.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/26/04
*
-* $Revision: 1.37 $
+* $Revision: 1.45 $
*
* File Description:
*
@@ -53,7 +53,7 @@
#include <pmfapi.h>
#include <lsqfetch.h>
-#define ASN2ALL_APP_VER "2.4"
+#define ASN2ALL_APP_VER "3.2"
CharPtr ASN2ALL_APPLICATION = ASN2ALL_APP_VER;
@@ -342,7 +342,6 @@ static void ProcessSingleRecord (
{
AsnIoPtr aip;
- AsnTypePtr atp = NULL;
BioseqPtr bsp;
ValNodePtr bsplist;
BioseqSetPtr bssp;
@@ -363,7 +362,7 @@ static void ProcessSingleRecord (
return;
}
- dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
FileClose (fp);
diff --git a/demo/asn2fsa.c b/demo/asn2fsa.c
index e48aa210..ffb60c20 100644
--- a/demo/asn2fsa.c
+++ b/demo/asn2fsa.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 3/4/04
*
-* $Revision: 1.40 $
+* $Revision: 1.46 $
*
* File Description:
*
@@ -61,7 +61,7 @@
#include <accpubseq.h>
#endif
-#define ASN2FSA_APP_VER "2.2"
+#define ASN2FSA_APP_VER "2.7"
CharPtr ASN2FSA_APPLICATION = ASN2FSA_APP_VER;
@@ -578,7 +578,7 @@ static void ProcessSingleRecord (
return;
}
- dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
FileClose (fp);
@@ -715,7 +715,7 @@ static void ProcessMultipleRecord (
AsnTypePtr atp, atp_bss, atp_desc, atp_se;
BioseqPtr bsp;
ValNodePtr bsplist;
- Char buf [64], cmmd [256], file [FILENAME_MAX], path [PATH_MAX], longest [64];
+ Char buf [64], file [FILENAME_MAX], path [PATH_MAX], longest [64];
StreamFlgType flags = STREAM_CORRECT_INVAL;
FILE *fp;
Int4 numrecords = 0;
@@ -723,6 +723,7 @@ static void ProcessMultipleRecord (
ObjMgrPtr omp;
time_t starttime, stoptime, worsttime;
#ifdef OS_UNIX
+ Char cmmd [256];
CharPtr gzcatprog;
int ret;
Boolean usedPopen = FALSE;
diff --git a/demo/asn2gb.c b/demo/asn2gb.c
index ffb209b8..fd5cd000 100644
--- a/demo/asn2gb.c
+++ b/demo/asn2gb.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.103 $
+* $Revision: 6.117 $
*
* File Description: New GenBank flatfile generator application
*
@@ -48,9 +48,13 @@
#include <sequtil.h>
#include <sqnutils.h>
#include <explore.h>
+#include <toasn3.h>
#include <asn2gnbp.h>
-#define ASN2GB_APP_VER "4.4"
+/* asn2gnbi.h needed to test PUBSEQGetAccnVer in accpubseq.c */
+#include <asn2gnbi.h>
+
+#define ASN2GB_APP_VER "5.5"
CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER;
@@ -190,7 +194,7 @@ static Int2 HandleSingleRecord (
return 1;
}
- dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
FileClose (fp);
@@ -529,6 +533,52 @@ static CharPtr ffvew [] = {
NULL
};
+static void ReportDiffs (
+ CharPtr path1,
+ CharPtr path2,
+ CharPtr path3,
+ FILE* fp,
+ CharPtr ffdiff,
+ Boolean useFfdiff
+)
+
+{
+ Char buf [256];
+ Char cmmd [256];
+ size_t ct;
+ FILE *fpo;
+
+ if (useFfdiff) {
+ sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
+ system (cmmd);
+
+ sprintf (cmmd, "rm %s; rm %s", path1, path2);
+ system (cmmd);
+ } else {
+ sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
+ system (cmmd);
+
+ sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
+ system (cmmd);
+
+ sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
+ system (cmmd);
+
+ sprintf (cmmd, "cat %s", path3);
+ fpo = popen (cmmd, "r");
+ if (fpo != NULL) {
+ while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
+ fwrite (buf, 1, ct, fp);
+ fflush (fp);
+ }
+ pclose (fpo);
+ }
+
+ sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
+ system (cmmd);
+ }
+}
+
static void CompareFlatFiles (
CharPtr path1,
CharPtr path2,
@@ -580,39 +630,21 @@ static void CompareFlatFiles (
SaveAsn2gnbk (sep, path1, format, SEQUIN_MODE, style, flags, locks, custom);
SaveAsn2gnbk (sep, path2, format, RELEASE_MODE, style, flags, locks, custom);
- if (useFfdiff) {
- sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
- system (cmmd);
-
- sprintf (cmmd, "rm %s; rm %s", path1, path2);
- system (cmmd);
- } else {
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
- system (cmmd);
-
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
- system (cmmd);
+ ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
- sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
- system (cmmd);
+ } else if (batch == 3) {
- sprintf (cmmd, "cat %s", path3);
- fpo = popen (cmmd, "r");
- if (fpo != NULL) {
- while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
- fwrite (buf, 1, ct, fp);
- fflush (fp);
- }
- pclose (fpo);
- }
+#ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
+ VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
+#endif
- sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
- system (cmmd);
- }
+ SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
+ SeriousSeqEntryCleanupBulk (sep);
+ SaveAsn2gnbk (sep, path2, format, mode, style, flags, locks, custom);
- } else if (batch == 3) {
+ ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
- SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
+ } else if (batch == 4) {
aip = AsnIoOpen (path3, "w");
if (aip == NULL) return;
@@ -627,43 +659,24 @@ static void CompareFlatFiles (
SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
arguments [0] = '\0';
- sprintf (arguments, "-format %s -mode %s -style %s -view %s",
+ sprintf (arguments, "-format %s -mode %s -style %s -view %s -nocleanup",
fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
- sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
+ sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path1);
system (cmmd);
- if (useFfdiff) {
- sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
- system (cmmd);
-
- sprintf (cmmd, "rm %s; rm %s", path1, path2);
- system (cmmd);
- } else {
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
- system (cmmd);
-
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
- system (cmmd);
+ arguments [0] = '\0';
+ sprintf (arguments, "-format %s -mode %s -style %s -view %s",
+ fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
- sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
- system (cmmd);
+ sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
+ system (cmmd);
- sprintf (cmmd, "cat %s", path3);
- fpo = popen (cmmd, "r");
- if (fpo != NULL) {
- while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
- fwrite (buf, 1, ct, fp);
- fflush (fp);
- }
- pclose (fpo);
- }
+ ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
- sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
- system (cmmd);
- }
+ } else if (batch == 5) {
- } else if (batch == 4) {
+ SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
aip = AsnIoOpen (path3, "w");
if (aip == NULL) return;
@@ -678,50 +691,15 @@ static void CompareFlatFiles (
SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
arguments [0] = '\0';
- sprintf (arguments, "-format %s -mode %s -style %s -view %s -nocleanup",
- fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
-
- sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path1);
- system (cmmd);
-
- arguments [0] = '\0';
sprintf (arguments, "-format %s -mode %s -style %s -view %s",
fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
system (cmmd);
- if (useFfdiff) {
- sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
- system (cmmd);
-
- sprintf (cmmd, "rm %s; rm %s", path1, path2);
- system (cmmd);
- } else {
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
- system (cmmd);
-
- sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
- system (cmmd);
+ ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
- sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
- system (cmmd);
-
- sprintf (cmmd, "cat %s", path3);
- fpo = popen (cmmd, "r");
- if (fpo != NULL) {
- while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
- fwrite (buf, 1, ct, fp);
- fflush (fp);
- }
- pclose (fpo);
- }
-
- sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
- system (cmmd);
- }
-
- } else if (batch == 5) {
+ } else if (batch == 6) {
aip = AsnIoOpen (path3, "w");
if (aip == NULL) return;
@@ -974,7 +952,7 @@ static Int2 HandleMultipleRecords (
return 1;
}
- if ((batch == 1 || batch == 3 || batch == 4 || batch == 5 || format != GENBANK_FMT) && extra == NULL) {
+ if ((batch == 1 || batch == 4 || batch == 5 || format != GENBANK_FMT) && extra == NULL) {
ofp = FileOpen (outputFile, "w");
if (ofp == NULL) {
AsnIoClose (aip);
@@ -1064,7 +1042,7 @@ static Int2 HandleMultipleRecords (
if (batch != 1) {
printf ("%s\n", buf);
fflush (stdout);
- if (batch != 3 && batch != 4 && batch != 5) {
+ if (batch != 4 && batch != 5) {
if (ofp != NULL) {
fprintf (ofp, "%s\n", buf);
fflush (ofp);
@@ -1282,6 +1260,24 @@ static SeqEntryPtr SeqEntryFromAccnOrGi (
TrimSpacesAroundString (accn);
+#ifdef INTERNAL_NCBI_ASN2GB
+ /* temporary code to test PUBSEQGetAccnVer in accpubseq.c */
+
+ if (*accn == '*') {
+ Char buf [64];
+ accn++;
+ if (sscanf (accn, "%ld", &val) == 1) {
+ uid = (Int4) val;
+ if (GetAccnVerFromServer (uid, buf)) {
+ Message (MSG_POST, "GetAccnVerFromServer returned %s", buf);
+ } else {
+ Message (MSG_POST, "GetAccnVerFromServer failed");
+ }
+ }
+ return NULL;
+ }
+#endif
+
alldigits = TRUE;
ptr = accn;
ch = *ptr;
@@ -1368,9 +1364,17 @@ Args myargs [] = {
FALSE, 'h', ARG_INT, 0.0, 0, NULL},
{"Custom Flags (4 HideFeats, 1792 HideRefs, 8192 HideSources, 262144 HideTranslation)", "0", NULL, NULL,
FALSE, 'u', ARG_INT, 0.0, 0, NULL},
- {"ASN.1 Type (a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit)", "a", NULL, NULL,
+ {"ASN.1 Type\n"
+ " Single Record: a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit\n"
+ " Release File: t Batch Bioseq-set, u Batch Seq-submit\n", "a", NULL, NULL,
TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
- {"Batch (1 Report, 2 Sequin/Release, 3 asn2gb/asn2flat, 4 asn2flat BSEC/nocleanup, 5 oldasn2gb/newasn2gb)", "0", "0", "5",
+ {"Batch\n"
+ " 1 Report\n"
+ " 2 Sequin/Release\n"
+ " 3 asn2gb SSEC/nocleanup\n"
+ " 4 asn2flat BSEC/nocleanup\n"
+ " 5 asn2gb/asn2flat\n"
+ " 6 oldasn2gb/newasn2gb)", "0", "0", "5",
FALSE, 't', ARG_INT, 0.0, 0, NULL},
{"Input File is Binary", "F", NULL, NULL,
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
@@ -1534,6 +1538,12 @@ Int2 Main (
do_gbseq = TRUE;
format = GENPEPT_FMT;
+ } else if (StringICmp (str, "xz") == 0 || StringICmp (str, "zx") == 0) {
+ do_gbseq = TRUE;
+ do_insdseq = TRUE;
+ format = GENBANK_FMT;
+ altformat = GENPEPT_FMT;
+
} else if (StringICmp (str, "x") == 0) {
do_gbseq = TRUE;
do_insdseq = TRUE;
diff --git a/demo/asn2idx.c b/demo/asn2idx.c
index cee0b4db..e47842c9 100644
--- a/demo/asn2idx.c
+++ b/demo/asn2idx.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 8/2/04
*
-* $Revision: 1.4 $
+* $Revision: 1.5 $
*
* File Description:
*
@@ -181,7 +181,7 @@ Args myargs [] = {
TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
{"File Selection Substring", ".aso", NULL, NULL,
TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
- {"Filter", "gbcon,gbest,gbgss,gbsts", NULL, NULL,
+ {"Filter", "gbcon,gbest,gbgss,gbhtg,gbsts", NULL, NULL,
FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
{"Bioseq-sets are Binary", "F", NULL, NULL,
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
diff --git a/demo/asnbarval.c b/demo/asnbarval.c
index 7e925556..8c940404 100644
--- a/demo/asnbarval.c
+++ b/demo/asnbarval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/23/07
*
-* $Revision: 1.3 $
+* $Revision: 1.5 $
*
* File Description:
*
@@ -81,7 +81,6 @@ typedef struct brflags {
Int4 numrecords;
ValNodePtr sep_list;
ValNodePtr bsplist;
- ValNodePtr filename_list;
BarcodeTestConfigData bcd;
} BRFlagData, PNTR BRFlagPtr;
@@ -568,8 +567,6 @@ static void ProcessSeqEntryList (BRFlagPtr drfp, CharPtr filename)
SeqEntrySetScope (NULL);
drfp->sep_list = ValNodeFree (drfp->sep_list);
- drfp->filename_list = FreeFilenameList (drfp->filename_list);
-
drfp->bsplist = UnlockFarComponents (drfp->bsplist);
if (ofp != NULL && need_ofp_close) {
@@ -682,8 +679,6 @@ static void ProcessSingleRecord (
if (sep != NULL) {
ValNodeAddPointer (&(drfp->sep_list), 0, sep);
- ValNodeAddInt (&(drfp->filename_list), FILENAME_LIST_ENTITY_ID_ITEM, (Int4) entityID);
- ValNodeAddPointer (&(drfp->filename_list), FILENAME_LIST_FILENAME_ITEM, StringSave (filename));
}
} else {
@@ -1115,6 +1110,9 @@ Int2 Main (void)
/* minimum length */
dfd.bcd.min_length = myargs[l_argMinLength].intvalue;
+ /* require keyword to be present */
+ dfd.bcd.require_keyword = TRUE;
+
/* set up Barcode Report Configuration */
enabled_list = (CharPtr) myargs [e_argEnableTests].strvalue;
disabled_list = (CharPtr) myargs [d_argDisableTests].strvalue;
diff --git a/demo/asndisc.c b/demo/asndisc.c
index a5da7aef..87bf938d 100644
--- a/demo/asndisc.c
+++ b/demo/asndisc.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/23/07
*
-* $Revision: 1.20 $
+* $Revision: 1.24 $
*
* File Description:
*
@@ -80,10 +80,10 @@ typedef struct drflags {
CharPtr output_dir;
FILE *outfp;
Int4 numrecords;
- DiscReportOutputConfigData ocd;
- DiscrepancyConfigData dcd;
ValNodePtr sep_list;
ValNodePtr bsplist;
+
+ GlobalDiscrepReportPtr global_report;
} DRFlagData, PNTR DRFlagPtr;
#ifdef INTERNAL_NCBI_ASNDISC
@@ -493,67 +493,17 @@ static ValNodePtr DoLockFarComponents (
return rsult;
}
-static void ProcessSeqEntryList (DRFlagPtr drfp, CharPtr filename)
+
+static void ReleaseDiscrepancyReportSeqEntries (DRFlagPtr drfp)
{
- ValNodePtr discrepancy_list, vnp;
- ObjMgrPtr omp;
+ ValNodePtr vnp;
SeqEntryPtr sep;
- FILE *ofp = NULL;
- Boolean need_ofp_close = FALSE;
- Char path [PATH_MAX];
- CharPtr ptr;
-
- if (drfp == NULL || drfp->sep_list == NULL) return;
+ ObjMgrPtr omp;
- if (drfp->outfp == NULL) {
- if (StringDoesHaveText (drfp->output_dir)) {
- if (StringLen (drfp->output_dir) > PATH_MAX) {
- Message (MSG_ERROR, "Unable to generate output file - path name is too long");
- return;
- }
- StringCpy (path, drfp->output_dir);
-#ifdef OS_WINNT
- ptr = StringRChr (filename, '\\');
- if (path[StringLen(path) - 1] != '\\') {
- StringCat (path, "\\");
- }
-#else
- ptr = StringRChr (filename, '/');
- if (path[StringLen(path) - 1] != '/') {
- StringCat (path, "/");
- }
-#endif;
- if (ptr == NULL) {
- StringNCat (path, filename, PATH_MAX - StringLen(path) - 1);
- } else {
- StringNCat (path, ptr + 1, PATH_MAX - StringLen(path) - 1);
- }
- } else {
- StringNCpy_0 (path, filename, sizeof (path));
- }
- ptr = StringRChr (path, '.');
- if (ptr != NULL) {
- *ptr = '\0';
- }
- if (StringDoesHaveText (drfp->output_suffix)) {
- StringNCat (path, drfp->output_suffix, PATH_MAX - StringLen(path) - 1);
- path[PATH_MAX - 1] = 0;
- } else {
- StringCat (path, ".dr");
- }
- if (drfp->outfp == NULL) {
- ofp = FileOpen (path, "w");
- need_ofp_close = TRUE;
- } else {
- ofp = drfp->outfp;
- }
- } else {
- ofp = drfp->outfp;
+ if (drfp == NULL) {
+ return;
}
- discrepancy_list = CollectDiscrepancies (&(drfp->dcd), drfp->sep_list, taxlookup);
- WriteAsnDiscReport (discrepancy_list, ofp, &(drfp->ocd), TRUE);
- discrepancy_list = FreeClickableList (discrepancy_list);
for (vnp = drfp->sep_list; vnp != NULL; vnp = vnp->next) {
sep = vnp->data.ptrvalue;
SeqEntryFree (sep);
@@ -566,13 +516,61 @@ static void ProcessSeqEntryList (DRFlagPtr drfp, CharPtr filename)
SeqEntrySetScope (NULL);
drfp->sep_list = ValNodeFree (drfp->sep_list);
- drfp->ocd.filename_list = FreeFilenameList (drfp->ocd.filename_list);
-
drfp->bsplist = UnlockFarComponents (drfp->bsplist);
+}
+
+
+static void ProcessSeqEntryList (DRFlagPtr drfp, CharPtr filename)
+{
+ ValNodePtr discrepancy_list;
+ FILE *ofp = NULL;
+ Char path [PATH_MAX];
+ CharPtr ptr;
- if (ofp != NULL && need_ofp_close) {
- FileClose (ofp);
+ if (drfp == NULL || drfp->sep_list == NULL) return;
+
+ if (StringDoesHaveText (drfp->output_dir)) {
+ if (StringLen (drfp->output_dir) > PATH_MAX) {
+ Message (MSG_ERROR, "Unable to generate output file - path name is too long");
+ return;
+ }
+ StringCpy (path, drfp->output_dir);
+#ifdef OS_WINNT
+ ptr = StringRChr (filename, '\\');
+ if (path[StringLen(path) - 1] != '\\') {
+ StringCat (path, "\\");
+ }
+#else
+ ptr = StringRChr (filename, '/');
+ if (path[StringLen(path) - 1] != '/') {
+ StringCat (path, "/");
+ }
+#endif;
+ if (ptr == NULL) {
+ StringNCat (path, filename, PATH_MAX - StringLen(path) - 1);
+ } else {
+ StringNCat (path, ptr + 1, PATH_MAX - StringLen(path) - 1);
+ }
+ } else {
+ StringNCpy_0 (path, filename, sizeof (path));
}
+ ptr = StringRChr (path, '.');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ if (StringDoesHaveText (drfp->output_suffix)) {
+ StringNCat (path, drfp->output_suffix, PATH_MAX - StringLen(path) - 1);
+ path[PATH_MAX - 1] = 0;
+ } else {
+ StringCat (path, ".dr");
+ }
+ ofp = FileOpen (path, "w");
+
+ discrepancy_list = CollectDiscrepancies (drfp->global_report->test_config, drfp->sep_list, taxlookup);
+ WriteAsnDiscReport (discrepancy_list, ofp, drfp->global_report->output_config, TRUE);
+ discrepancy_list = FreeClickableList (discrepancy_list);
+
+ FileClose (ofp);
}
@@ -681,8 +679,6 @@ static void ProcessSingleRecord (
if (sep != NULL) {
ValNodeAddPointer (&(drfp->sep_list), 0, sep);
- ValNodeAddInt (&(drfp->ocd.filename_list), FILENAME_LIST_ENTITY_ID_ITEM, (Int4) entityID);
- ValNodeAddPointer (&(drfp->ocd.filename_list), FILENAME_LIST_FILENAME_ITEM, StringSave (filename));
if (drfp->lock) {
bsplist_next = DoLockFarComponents (sep, drfp);
@@ -883,11 +879,23 @@ static void ProcessMultipleRecord (
}
-static void ProcessOneRecord (
- CharPtr filename,
- Pointer userdata
-)
+static void ProcessSeqEntryListWithCollation (GlobalDiscrepReportPtr g, ValNodePtr sep_list, CharPtr filename)
+{
+ ValNodePtr vnp;
+ SeqEntryPtr sep;
+
+ if (g == NULL || sep_list == NULL) return;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ sep = vnp->data.ptrvalue;
+ AddSeqEntryToGlobalDiscrepReport (sep, g, filename);
+ }
+
+}
+
+
+static void ProcessOneRecord (CharPtr filename, Pointer userdata)
{
DRFlagPtr drfp;
@@ -899,11 +907,16 @@ static void ProcessOneRecord (
} else {
ProcessSingleRecord (filename, drfp);
}
+
if (drfp->outfp == NULL) {
ProcessSeqEntryList (drfp, filename);
+ } else {
+ ProcessSeqEntryListWithCollation (drfp->global_report, drfp->sep_list, filename);
}
+ ReleaseDiscrepancyReportSeqEntries (drfp);
}
+
/* Args structure contains command-line arguments */
typedef enum {
@@ -928,6 +941,7 @@ typedef enum {
T_argThreads,
X_argExpandCategories,
S_argSummaryReport,
+ B_argBigSequenceReport,
C_argMaxCount
} DRFlagNum;
@@ -989,6 +1003,8 @@ Args myargs [] = {
TRUE, 'X', ARG_STRING, 0.0, 0, NULL},
{"Summary Report", "F", NULL, NULL,
TRUE, 'S', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Big Sequence Report", "F", NULL, NULL,
+ TRUE, 'B', ARG_BOOLEAN, 0.0, 0, NULL},
{"Max Count", "0", NULL, NULL,
TRUE, 'C', ARG_INT, 0.0, 0, NULL},
};
@@ -1016,6 +1032,7 @@ static CharPtr GetTestNameList (CharPtr intro)
return text;
}
+
Int2 Main (void)
{
@@ -1024,8 +1041,9 @@ Int2 Main (void)
CharPtr enabled_list, disabled_list, err_msg;
Boolean batch, binary, compressed, dorecurse,
indexed, local, lock, remote, usethreads;
- Int2 type = 0, k;
+ Int2 type = 0;
DRFlagData dfd;
+ Boolean big_sequence_report;
/* standard setup */
@@ -1099,36 +1117,50 @@ Int2 Main (void)
lock = (Boolean) myargs [l_argLockFar].intvalue;
usethreads = (Boolean) myargs [T_argThreads].intvalue;
dfd.farFetchCDSproducts = (Boolean) myargs [Z_argRemoteCDS].intvalue;
- ExpandDiscrepancyReportTestsFromString ((CharPtr) myargs [X_argExpandCategories].strvalue, TRUE, &dfd.ocd);
- dfd.ocd.summary_report = (Boolean) myargs [S_argSummaryReport].intvalue;
/* set up Discrepancy Report Configuration */
+ dfd.global_report = GlobalDiscrepReportNew ();
+ dfd.global_report->test_config = DiscrepancyConfigNew();
+
+ ExpandDiscrepancyReportTestsFromString ((CharPtr) myargs [X_argExpandCategories].strvalue, TRUE, dfd.global_report->output_config);
+ dfd.global_report->output_config->summary_report = (Boolean) myargs [S_argSummaryReport].intvalue;
+
+ big_sequence_report = (Boolean) myargs [B_argBigSequenceReport].intvalue;
+
enabled_list = (CharPtr) myargs [e_argEnableTests].strvalue;
disabled_list = (CharPtr) myargs [d_argDisableTests].strvalue;
+
+#ifdef INTERNAL_NCBI_ASNDISC
+ dfd.global_report->taxlookup = CheckTaxNamesAgainstTaxDatabase;
+#endif
+
err_msg = NULL;
if (StringDoesHaveText (enabled_list) && StringDoesHaveText (disabled_list)) {
err_msg = StringSave ("Cannot specify both -e and -d. Choose -e to enable only a few tests and disable the rest, choose -d to disable only a few tests and enable the rest.");
} else if (StringDoesHaveText (disabled_list)) {
- for (k = 0; k < MAX_DISC_TYPE; k++) {
- dfd.dcd.conf_list[k] = TRUE;
+ if (big_sequence_report) {
+ ConfigureForBigSequence (dfd.global_report->test_config);
+ } else {
+ ConfigureForGenomes (dfd.global_report->test_config);
}
- DisableTRNATests (&(dfd.dcd));
/* now disable tests from string */
- err_msg = SetDiscrepancyReportTestsFromString (disabled_list, FALSE, &(dfd.dcd));
+ err_msg = SetDiscrepancyReportTestsFromString (disabled_list, FALSE, dfd.global_report->test_config);
} else if (StringDoesHaveText (enabled_list)) {
- for (k = 0; k < MAX_DISC_TYPE; k++) {
- dfd.dcd.conf_list[k] = FALSE;
+ if (big_sequence_report) {
+ ConfigureForBigSequence (dfd.global_report->test_config);
+ } else {
+ ConfigureForGenomes (dfd.global_report->test_config);
}
/* now enable tests from string */
- err_msg = SetDiscrepancyReportTestsFromString (enabled_list, TRUE, &(dfd.dcd));
+ err_msg = SetDiscrepancyReportTestsFromString (enabled_list, TRUE, dfd.global_report->test_config);
} else {
- /* enable all tests by default */
- for (k = 0; k < MAX_DISC_TYPE; k++) {
- dfd.dcd.conf_list[k] = TRUE;
+ if (big_sequence_report) {
+ ConfigureForBigSequence (dfd.global_report->test_config);
+ } else {
+ ConfigureForGenomes (dfd.global_report->test_config);
}
- DisableTRNATests (&(dfd.dcd));
}
if (err_msg != NULL) {
Message (MSG_FATAL, err_msg);
@@ -1137,8 +1169,8 @@ Int2 Main (void)
}
if ((Boolean) myargs[f_argUseFT].intvalue) {
- dfd.dcd.use_feature_table_format = TRUE;
- dfd.ocd.use_feature_table_format = TRUE;
+ dfd.global_report->test_config->use_feature_table_format = TRUE;
+ dfd.global_report->output_config->use_feature_table_format = TRUE;
}
dfd.maxcount = (Int4) myargs [C_argMaxCount].intvalue;
@@ -1226,27 +1258,21 @@ Int2 Main (void)
AsnIndexedLibFetchEnable (asnidx, TRUE);
}
- /* recurse through all files within source directory or subdirectories */
-
if (StringDoesHaveText (directory)) {
-
DirExplore (directory, NULL, suffix, dorecurse, ProcessOneRecord, (Pointer) &dfd);
- if (dfd.outfp != NULL) {
- ProcessSeqEntryList (&dfd, NULL);
- }
} else if (StringDoesHaveText (infile)) {
ProcessOneRecord (infile, (Pointer) &dfd);
- if (dfd.outfp != NULL) {
- ProcessSeqEntryList (&dfd, NULL);
- }
}
-
if (dfd.outfp != NULL) {
+ WriteGlobalDiscrepancyReport (dfd.global_report, dfd.outfp);
FileClose (dfd.outfp);
+ dfd.outfp = NULL;
}
+ dfd.global_report = GlobalDiscrepReportFree (dfd.global_report);
+
/* close fetch functions */
if (indexed) {
diff --git a/demo/asnmacro.c b/demo/asnmacro.c
index 7e980c28..54fbd82c 100755
--- a/demo/asnmacro.c
+++ b/demo/asnmacro.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/12/07
*
-* $Revision: 1.3 $
+* $Revision: 1.4 $
*
* File Description:
*
@@ -146,7 +146,7 @@ static SeqAlignPtr LIBCALLBACK GetSeqAlignPiece (SeqLocPtr slp1, SeqLocPtr slp2)
static SeqAlignPtr GlobalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp)
{
- return Sqn_GlobalAlign2SeqEx (bsp1, bsp2, revcomp, GetSeqAlign, GetSeqAlignPiece);
+ return Sqn_GlobalAlign2SeqEx (bsp1, bsp2, revcomp, GetSeqAlign, GetSeqAlignPiece, TRUE);
}
diff --git a/demo/asnval.c b/demo/asnval.c
index 4a676193..7fbfcf1c 100644
--- a/demo/asnval.c
+++ b/demo/asnval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/04
*
-* $Revision: 1.56 $
+* $Revision: 1.76 $
*
* File Description:
*
@@ -60,7 +60,7 @@
#include <accpubseq.h>
#endif
-#define ASNVAL_APP_VER "4.8"
+#define ASNVAL_APP_VER "5.9"
CharPtr ASNVAL_APPLICATION = ASNVAL_APP_VER;
@@ -78,17 +78,20 @@ typedef struct valflags {
Boolean farFetchMRNAproducts;
Boolean locusTagGeneralMatch;
Boolean validateIDSet;
+ Boolean seqSubmitParent;
Boolean ignoreExceptions;
Boolean validateExons;
Boolean inferenceAccnCheck;
Boolean testLatLonSubregion;
Boolean strictLatLonCountry;
+ Boolean indexerVersion;
Boolean batch;
Boolean binary;
Boolean compressed;
Boolean lock;
Boolean useThreads;
Boolean usePUBSEQ;
+ Boolean validateBarcode;
Int2 verbosity;
Int2 type;
Int4 skipcount;
@@ -98,6 +101,7 @@ typedef struct valflags {
FILE *logfp;
Int4 num_errors;
Int4 fatal_errors;
+ Boolean has_errors;
Boolean io_failure;
Char longest [64];
time_t worsttime;
@@ -514,7 +518,7 @@ static ValNodePtr DoLockFarComponents (
}
static CharPtr severityLabel [] = {
- "NONE", "INFO", "WARN", "ERROR", "REJECT", "FATAL", "MAX", NULL
+ "NONE", "INFO", "WARNING", "ERROR", "REJECT", "FATAL", "MAX", NULL
};
static CharPtr compatSeverityLabel [] = {
@@ -522,13 +526,64 @@ static CharPtr compatSeverityLabel [] = {
};
typedef struct vcdaa {
- FILE *ofp;
- Int2 verbosity;
- Int2 lowCutoff;
- Int2 highCutoff;
- CharPtr errcode;
+ FILE *ofp;
+ Int2 verbosity;
+ Int2 lowCutoff;
+ Int2 highCutoff;
+ CharPtr errcode;
+ ValFlagPtr vfp;
} VCData, PNTR VCPtr;
+static void XmlEncode (CharPtr dst, CharPtr src)
+
+{
+ Char ch;
+
+ if (dst == NULL || src == NULL) return;
+
+ ch = *src;
+ while (ch != '\0') {
+ if (ch == '<') {
+ *dst = '&';
+ dst++;
+ *dst = 'l';
+ dst++;
+ *dst = 't';
+ dst++;
+ *dst = ';';
+ dst++;
+ } else if (ch == '>') {
+ *dst = '&';
+ dst++;
+ *dst = 'g';
+ dst++;
+ *dst = 't';
+ dst++;
+ *dst = ';';
+ dst++;
+ } else {
+ *dst = ch;
+ dst++;
+ }
+ src++;
+ ch = *src;
+ }
+ *dst = '\0';
+}
+
+
+static CharPtr GetXmlHeaderText (ErrSev cutoff)
+{
+ CharPtr xml_header = NULL;
+ CharPtr xml_4_fmt = "asnval version=\"%s\" severity_cutoff=\"%s\"";
+
+ xml_header = (CharPtr) MemNew (sizeof (Char) * (10 + StringLen (xml_4_fmt) +
+ StringLen (ASNVAL_APPLICATION) + StringLen (severityLabel[cutoff])));
+ sprintf (xml_header, xml_4_fmt, ASNVAL_APPLICATION, severityLabel[cutoff]);
+ return xml_header;
+}
+
+
static void LIBCALLBACK ValidCallback (
ErrSev severity,
int errcode,
@@ -547,15 +602,21 @@ static void LIBCALLBACK ValidCallback (
)
{
- Char buf [256];
- CharPtr catname, errname;
- FILE *fp;
- VCPtr vcp;
+ Char buf [256];
+ CharPtr catname, errname, urlmssg = NULL;
+ ErrSev cutoff;
+ FILE *fp;
+ size_t len;
+ VCPtr vcp;
+ ValFlagPtr vfp;
+ CharPtr xml_header;
vcp = (VCPtr) userdata;
if (vcp == NULL) return;
fp = vcp->ofp;
if (fp == NULL) return;
+ vfp = vcp->vfp;
+ if (vfp == NULL) return;
if (severity < SEV_NONE || severity > SEV_MAX) {
severity = SEV_MAX;
@@ -628,7 +689,32 @@ static void LIBCALLBACK ValidCallback (
accession, severityLabel [severity],
catname, errname);
+ } else if (vcp->verbosity == 4) {
+
+ if (! vfp->has_errors) {
+ cutoff = (ErrSev) vcp->lowCutoff;
+ if (cutoff < SEV_NONE || cutoff > SEV_MAX) {
+ cutoff = SEV_MAX;
+ }
+
+ xml_header = GetXmlHeaderText (cutoff);
+ fprintf (fp, "<%s>\n", xml_header);
+ xml_header = MemFree (xml_header);
+ }
+
+ len = StringLen (message);
+ if (len > 0) {
+ urlmssg = MemNew (len * 3 + 2);
+ if (urlmssg != NULL) {
+ XmlEncode (urlmssg, message);
+ fprintf (fp, " <message severity=\"%s\" seq-id=\"%s\" code=\"%s_%s\">%s</message>\n",
+ severityLabel [severity], accession, catname, errname, urlmssg);
+ MemFree (urlmssg);
+ }
+ }
}
+
+ vfp->has_errors = TRUE;
}
static void DoValidation (
@@ -641,6 +727,8 @@ static void DoValidation (
Int2 i;
VCData vcd;
ValidStructPtr vsp;
+ ErrSev cutoff;
+ CharPtr xml_header = NULL;
if (vfp == NULL) return;
@@ -653,6 +741,7 @@ static void DoValidation (
vsp->cutoff = vfp->lowCutoff;
vsp->validateAlignments = vfp->validateAlignments;
+ vsp->alignFindRemoteBsp = vfp->alignFindRemoteBsp;
vsp->doSeqHistAssembly = vfp->doSeqHistAssembly;
vsp->farIDsInAlignments = vfp->farIDsInAlignments;
vsp->alwaysRequireIsoJTA = vfp->alwaysRequireIsoJTA;
@@ -660,11 +749,13 @@ static void DoValidation (
vsp->farFetchMRNAproducts = vfp->farFetchMRNAproducts;
vsp->locusTagGeneralMatch = vfp->locusTagGeneralMatch;
vsp->validateIDSet = vfp->validateIDSet;
+ vsp->seqSubmitParent = vfp->seqSubmitParent;
vsp->ignoreExceptions = vfp->ignoreExceptions;
vsp->validateExons = vfp->validateExons;
vsp->inferenceAccnCheck = vfp->inferenceAccnCheck;
vsp->testLatLonSubregion = vfp->testLatLonSubregion;
vsp->strictLatLonCountry = vfp->strictLatLonCountry;
+ vsp->indexerVersion = vfp->indexerVersion;
if (ofp == NULL && vfp->outfp != NULL) {
ofp = vfp->outfp;
@@ -675,6 +766,7 @@ static void DoValidation (
vcd.lowCutoff = vfp->lowCutoff;
vcd.highCutoff = vfp->highCutoff;
vcd.errcode = vfp->errcode;
+ vcd.vfp = vfp;
vsp->errfunc = ValidCallback;
vsp->userdata = (Pointer) &vcd;
vsp->convertGiToAccn = FALSE;
@@ -690,6 +782,22 @@ static void DoValidation (
}
ValidStructFree (vsp);
+ if (vfp->validateBarcode) {
+ if (vfp->verbosity == 4 && !vfp->has_errors) {
+ cutoff = (ErrSev) vfp->lowCutoff;
+ if (cutoff < SEV_NONE || cutoff > SEV_MAX) {
+ cutoff = SEV_MAX;
+ }
+ xml_header = GetXmlHeaderText(cutoff);
+ }
+ if (!BarcodeValidateOneSeqEntry (ofp, sep, FALSE,
+ vfp->verbosity == 4,
+ !vfp->has_errors,
+ xml_header)) {
+ vfp->has_errors = TRUE;
+ }
+ xml_header = MemFree (xml_header);
+ }
}
static void ProcessSingleRecord (
@@ -721,7 +829,7 @@ static void ProcessSingleRecord (
return;
}
- dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
FileClose (fp);
@@ -840,6 +948,12 @@ static void ProcessSingleRecord (
bsplist = UnlockFarComponents (bsplist);
if (ofp != NULL) {
+ if (vfp->has_errors) {
+ if (vfp->verbosity == 4) {
+ fprintf (ofp, "</asnval>\n");
+ }
+ vfp->has_errors = FALSE;
+ }
FileClose (ofp);
}
@@ -1184,6 +1298,12 @@ static void ProcessMultipleRecord (
}
if (ofp != NULL) {
+ if (vfp->has_errors) {
+ if (vfp->verbosity == 4) {
+ fprintf (ofp, "</asnval>\n");
+ }
+ vfp->has_errors = FALSE;
+ }
FileClose (ofp);
}
@@ -1261,9 +1381,12 @@ static void ProcessOneRecord (
#define T_argThreads 26
#define L_argLogFile 27
#define S_argSkipCount 28
-#define C_argMaxCount 29
+#define B_argBarcodeVal 29
+#define C_argMaxCount 30
#ifdef INTERNAL_NCBI_ASN2VAL
-#define H_argAccessHUP 30
+#define w_argSeqSubParent 31
+#define H_argAccessHUP 32
+#define y_argAIndexer 33
#endif
#define LAT_LON_STATE 1
@@ -1306,7 +1429,7 @@ Args myargs [] = {
TRUE, 'Y', ARG_BOOLEAN, 0.0, 0, NULL},
{"Ignore Transcription/Translation Exceptions", "F", NULL, NULL,
TRUE, 'e', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Verbosity", "0", "0", "3",
+ {"Verbosity", "0", "0", "4",
FALSE, 'v', ARG_INT, 0.0, 0, NULL},
{"ASN.1 Type (a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit)", "a", NULL, NULL,
TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
@@ -1328,11 +1451,17 @@ Args myargs [] = {
TRUE, 'L', ARG_FILE_OUT, 0.0, 0, NULL},
{"Skip Count", "0", NULL, NULL,
TRUE, 'S', ARG_INT, 0.0, 0, NULL},
+ {"Barcode Validate", "F", NULL, NULL,
+ TRUE, 'B', ARG_BOOLEAN, 0.0, 0, NULL},
{"Max Count", "0", NULL, NULL,
TRUE, 'C', ARG_INT, 0.0, 0, NULL},
#ifdef INTERNAL_NCBI_ASN2VAL
+ {"SeqSubmitParent Flag", "F", NULL, NULL,
+ TRUE, 'w', ARG_BOOLEAN, 0.0, 0, NULL},
{"Internal Access to HUP", "F", NULL, NULL,
TRUE, 'H', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Special Indexer Tests", "F", NULL, NULL,
+ TRUE, 'y', ARG_BOOLEAN, 0.0, 0, NULL},
#endif
};
@@ -1413,6 +1542,7 @@ Int2 Main (void)
vfd.highCutoff = (Int2) myargs [P_argHighCutoff].intvalue;
vfd.errcode = (CharPtr) myargs [E_argOnlyThisErr].strvalue;
vfd.validateAlignments = (Boolean) myargs [A_argAlignments].intvalue;
+ vfd.alignFindRemoteBsp = (Boolean) (vfd.validateAlignments && remote);
vfd.doSeqHistAssembly = (Boolean) myargs [A_argAlignments].intvalue;
vfd.farIDsInAlignments = (Boolean) myargs [A_argAlignments].intvalue;
vfd.alwaysRequireIsoJTA = (Boolean) myargs [J_argIsoJta].intvalue;
@@ -1423,6 +1553,8 @@ Int2 Main (void)
vfd.ignoreExceptions = (Boolean) myargs [e_argIgnoreExcept].intvalue;
vfd.validateExons = (Boolean) myargs [X_argExonSplice].intvalue;
vfd.inferenceAccnCheck = (Boolean) myargs [G_argInfAccns].intvalue;
+ vfd.validateBarcode = (Boolean) myargs[B_argBarcodeVal].intvalue;
+
val = (Int2) myargs [N_argLatLonStrict].intvalue;
vfd.testLatLonSubregion = (Boolean) ((val & LAT_LON_STATE) != 0);
@@ -1436,6 +1568,11 @@ Int2 Main (void)
vfd.maxcount = INT4_MAX;
}
+#ifdef INTERNAL_NCBI_ASN2VAL
+ vfd.seqSubmitParent = (Boolean) myargs [w_argSeqSubParent].intvalue;
+ vfd.indexerVersion = (Boolean) myargs [y_argAIndexer].intvalue;
+#endif
+
batch = FALSE;
binary = (Boolean) myargs [b_argBinary].intvalue;
compressed = (Boolean) myargs [c_argCompressed].intvalue;
@@ -1488,6 +1625,7 @@ Int2 Main (void)
vfd.logfp = NULL;
vfd.num_errors = 0;
vfd.fatal_errors = 0;
+ vfd.has_errors = FALSE;
vfd.io_failure = FALSE;
vfd.longest [0] = '\0';
vfd.worsttime = 0;
@@ -1563,6 +1701,12 @@ Int2 Main (void)
run_time = stop_time - start_time;
if (vfd.outfp != NULL) {
+ if (vfd.has_errors) {
+ if (vfd.verbosity == 4) {
+ fprintf (vfd.outfp, "</asnval>\n");
+ }
+ vfd.has_errors = FALSE;
+ }
FileClose (vfd.outfp);
}
diff --git a/demo/blastall.c b/demo/blastall.c
index 18f6dd4d..2cdfed86 100644
--- a/demo/blastall.c
+++ b/demo/blastall.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastall.c,v 6.201 2008/01/02 14:02:06 madden Exp $";
+static char const rcsid[] = "$Id: blastall.c,v 6.202 2008/07/01 18:38:14 madden Exp $";
-/* $Id: blastall.c,v 6.201 2008/01/02 14:02:06 madden Exp $
+/* $Id: blastall.c,v 6.202 2008/07/01 18:38:14 madden Exp $
**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -28,6 +28,9 @@ static char const rcsid[] = "$Id: blastall.c,v 6.201 2008/01/02 14:02:06 madden
**************************************************************************
*
* $Log: blastall.c,v $
+ * Revision 6.202 2008/07/01 18:38:14 madden
+ * Correct X3 value for blastn/megablast
+ *
* Revision 6.201 2008/01/02 14:02:06 madden
* Make composition-based score adjustments the default for blastp and tblastn
*
@@ -1058,7 +1061,7 @@ static Args myargs[] = {
"0.0", NULL, NULL, FALSE, 'y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_XDROP_UNGAPPED */
{ "X dropoff value for final gapped alignment in bits "
"(0.0 invokes default behavior)\n"
- " blastn/megablast 50, tblastx 0, all others 25",
+ " blastn/megablast 100, tblastx 0, all others 25",
"0", NULL, NULL, FALSE, 'Z', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP_FINAL */
#ifdef BLAST_CS_API
{ "RPS Blast search", /* 34 */
diff --git a/demo/blastpgp.c b/demo/blastpgp.c
index 31ac2210..71ac8afa 100644
--- a/demo/blastpgp.c
+++ b/demo/blastpgp.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: blastpgp.c,v 6.139 2008/01/02 20:16:11 madden Exp $";
+static char const rcsid[] = "$Id: blastpgp.c,v 6.140 2008/03/31 13:35:18 madden Exp $";
-/* $Id: blastpgp.c,v 6.139 2008/01/02 20:16:11 madden Exp $ */
+/* $Id: blastpgp.c,v 6.140 2008/03/31 13:35:18 madden Exp $ */
/**************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -26,8 +26,12 @@ static char const rcsid[] = "$Id: blastpgp.c,v 6.139 2008/01/02 20:16:11 madden
* appreciated. *
* *
**************************************************************************
- * $Revision: 6.139 $
+ * $Revision: 6.140 $
* $Log: blastpgp.c,v $
+ * Revision 6.140 2008/03/31 13:35:18 madden
+ * Change semantics of -c option, so that a new method for effective observations is used always and a new entropy-based method for column-specific PSI-BLAST pseudocounts is used by default. If default is used (-c 0), then all constants are defined in posit.c; if only the new method of effective observations is used, then the value of -c should be set by the user at approximately 30. (Changes
+ * submitted by Alejandro Schaffer).
+ *
* Revision 6.139 2008/01/02 20:16:11 madden
* XML output respects -v and -b option, JIRA SB-30
*
@@ -674,8 +678,8 @@ static Args myargs[] = {
"F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL},
{ "e-value threshold for inclusion in multipass model", /* ARG_EVALUE_INCLUSION_THRESHOLD */
"0.002", NULL, NULL, FALSE, 'h', ARG_FLOAT, 0.0, 0, NULL},
- { "Constant in pseudocounts for multipass version", /* ARG_PSEUDOCOUNT_CONSTANT */
- "9", NULL, NULL, FALSE, 'c', ARG_INT, 0.0, 0, NULL},
+ { "Constant in pseudocounts for multipass version; 0 uses entropy method; otherwise a value near 30 is recommended", /* ARG_PSEUDOCOUNT_CONSTANT */
+ "0", NULL, NULL, FALSE, 'c', ARG_INT, 0.0, 0, NULL},
{ "Maximum number of passes to use in multipass version", /* ARG_MAX_PASSES */
"1", NULL, NULL, FALSE, 'j', ARG_INT, 0.0, 0, NULL},
{ "Believe the query defline", /* ARG_BELIEVEQUERY */
diff --git a/demo/cleanasn.c b/demo/cleanasn.c
index 297ad432..2bf45c84 100644
--- a/demo/cleanasn.c
+++ b/demo/cleanasn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/19/99
*
-* $Revision: 6.15 $
+* $Revision: 6.29 $
*
* File Description:
*
@@ -48,33 +48,50 @@
#include <objfdef.h>
#include <objsub.h>
#include <sequtil.h>
+#include <gather.h>
#include <sqnutils.h>
#include <explore.h>
#include <tofasta.h>
#include <toasn3.h>
+#include <subutil.h>
+#include <asn2gnbk.h>
#include <pmfapi.h>
#include <tax3api.h>
#ifdef INTERNAL_NCBI_CLEANASN
#include <accpubseq.h>
#endif
-#define CLEANASN_APP_VER "1.6"
+#define CLEANASN_APP_VER "2.2"
CharPtr CLEANASN_APPLICATION = CLEANASN_APP_VER;
typedef struct cleanflags {
+ Char buf [64];
Boolean batch;
Boolean binary;
Boolean compressed;
Int2 type;
CharPtr results;
CharPtr outfile;
+ CharPtr report;
+ CharPtr ffdiff;
+ ModType ffmode;
CharPtr clean;
+ CharPtr modernize;
CharPtr link;
CharPtr feat;
+ CharPtr desc;
CharPtr mods;
Boolean taxon;
Boolean pub;
+ Int4 okay;
+ Int4 bsec;
+ Int4 ssec;
+ Int4 norm;
+ Int4 cumokay;
+ Int4 cumbsec;
+ Int4 cumssec;
+ Int4 cumnorm;
AsnModulePtr amp;
AsnTypePtr atp_bss;
AsnTypePtr atp_bsss;
@@ -169,17 +186,15 @@ static void RemoveUnnecGeneXref (
grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
if (grpx == NULL) return;
- if ((StringDoesHaveText (grp->locus)) &&
- (StringDoesHaveText (grpx->locus))) {
- if ((StringICmp (grp->locus, grpx->locus) != 0)) return;
- } else if (StringDoesHaveText (grp->locus_tag) &&
- StringDoesHaveText (grpx->locus_tag)) {
- if ((StringICmp (grp->locus_tag, grpx->locus_tag) != 0)) return;
+ if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
+ if (StringICmp (grp->locus_tag, grpx->locus_tag) != 0) return;
+ } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
+ if (StringICmp (grp->locus, grpx->locus) != 0) return;
} else if (grp->syn != NULL && grpx->syn != NULL) {
syn1 = (CharPtr) grp->syn->data.ptrvalue;
syn2 = (CharPtr) grpx->syn->data.ptrvalue;
- if ((StringDoesHaveText (syn1)) && (StringDoesHaveText (syn2))) {
- if ((StringICmp (syn1, syn2) != 0)) return;
+ if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
+ if (StringICmp (syn1, syn2) != 0) return;
}
}
@@ -207,7 +222,24 @@ static void RemoveUnnecGeneXref (
}
}
-static void AddSpTaxnameToList (SeqDescrPtr sdp, Pointer userdata)
+static void MarkTitles (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ ObjValNodePtr ovn;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_title) return;
+ if (sdp->extended == 0) return;
+ ovn = (ObjValNodePtr) sdp;
+ ovn->idx.deleteme = TRUE;
+}
+
+static void AddSpTaxnameToList (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
{
BioSourcePtr biop;
@@ -220,7 +252,10 @@ static void AddSpTaxnameToList (SeqDescrPtr sdp, Pointer userdata)
}
-static Boolean ShouldExcludeSp (SeqEntryPtr sep)
+static Boolean ShouldExcludeSp (
+ SeqEntryPtr sep
+)
+
{
ValNodePtr name_list = NULL, vnp1, vnp2;
Boolean all_diff = TRUE;
@@ -356,6 +391,506 @@ static void LookupPubdesc (
PubmedEntryFree (pep);
}
+static void ModGenes (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ ModernizeGeneFields (sfp);
+}
+
+static void ModRNAs (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ ModernizeRNAFields (sfp);
+}
+
+static void ModPCRs (BioSourcePtr biop, Pointer userdata)
+
+{
+ ModernizePCRPrimers (biop);
+}
+
+static CharPtr Se2Str (
+ SeqEntryPtr sep
+)
+
+{
+ AsnIoBSPtr aibp;
+ ByteStorePtr bs;
+ CharPtr str;
+
+ if (sep == NULL) return NULL;
+
+ bs = BSNew (1000);
+ if (bs == NULL) return NULL;
+ aibp = AsnIoBSOpen ("w", bs);
+ if (aibp == NULL) return NULL;
+
+ SeqEntryAsnWrite (sep, aibp->aip, NULL);
+
+ AsnIoFlush (aibp->aip);
+ AsnIoBSClose (aibp);
+
+ str = BSMerge (bs, NULL);
+ BSFree (bs);
+
+ return str;
+}
+
+typedef struct chgdata {
+ Boolean rubisco;
+ Boolean rbc;
+ Boolean its;
+ Boolean rnaother;
+ Boolean trnanote;
+ Boolean oldbiomol;
+ Int4 protdesc;
+ Int4 sfpnote;
+ Int4 gbsource;
+ Int4 cdsconf;
+} ChangeData, PNTR ChangeDataPtr;
+
+static Boolean IsRubisco (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "rubisco large subunit") == 0 ||
+ StringICmp (name, "rubisco small subunit") == 0);
+}
+
+static Boolean IsRbc (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "RbcL") == 0 ||
+ StringICmp (name, "RbcS") == 0);
+}
+
+static Boolean IsITS (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "its1") == 0 ||
+ StringICmp (name, "its 1") == 0 ||
+ StringICmp (name, "its2") == 0 ||
+ StringICmp (name, "its 2") == 0 ||
+ StringICmp (name, "its3") == 0 ||
+ StringICmp (name, "its 3") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 1") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 2") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 3") == 0 ||
+ StringICmp (name, "internal transcribed spacer 1 (ITS1)") == 0 ||
+ StringICmp (name, "internal transcribed spacer 2 (ITS2)") == 0 ||
+ StringICmp (name, "internal transcribed spacer 3 (ITS3)") == 0);
+}
+
+static void ScoreFeature (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ ChangeDataPtr cdp;
+ CharPtr comment;
+ CdRegionPtr crp;
+ CharPtr desc;
+ GBQualPtr gbq;
+ CharPtr name;
+ ProtRefPtr prp;
+ Uint1 residue;
+ RnaRefPtr rrp;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ if (sfp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+
+ comment = sfp->comment;
+ if (StringDoesHaveText (comment)) {
+ (cdp->sfpnote)++;
+ }
+
+ /* skip feature types that do not use data.value.ptrvalue */
+ switch (sfp->data.choice) {
+ case SEQFEAT_COMMENT:
+ case SEQFEAT_BOND:
+ case SEQFEAT_SITE:
+ case SEQFEAT_PSEC_STR:
+ return;
+ default:
+ break;
+ }
+
+ if (sfp->data.value.ptrvalue == NULL) return;
+
+ switch (sfp->data.choice) {
+ case SEQFEAT_CDREGION:
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp->conflict) {
+ (cdp->cdsconf)++;
+ }
+ break;
+ case SEQFEAT_PROT:
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ desc = prp->desc;
+ if (StringDoesHaveText (desc)) {
+ (cdp->protdesc)++;
+ }
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (IsRubisco (str)) {
+ cdp->rubisco = TRUE;
+ }
+ if (IsRbc (str)) {
+ cdp->rbc = TRUE;
+ }
+ }
+ break;
+ case SEQFEAT_RNA :
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp->type == 255 && rrp->ext.choice == 1) {
+ name = (CharPtr) rrp->ext.value.ptrvalue;
+ if (StringCmp (name, "misc_RNA") == 0) {
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "product") != 0) continue;
+ name = gbq->val;
+ if (StringHasNoText (name)) continue;
+ if (IsITS (name)) {
+ cdp->its = TRUE;
+ }
+ }
+ } else if (StringCmp (name, "ncRNA") == 0 || StringCmp (name, "tmRNA") == 0) {
+ } else {
+ cdp->rnaother = TRUE;
+ if (IsITS (name)) {
+ cdp->its = TRUE;
+ }
+ }
+ } else if (rrp->type == 3 && rrp->ext.choice == 2) {
+ if (StringDoesHaveText (comment)) {
+ if (StringNCmp (comment, "aa: ", 4) == 0) {
+ comment += 4;
+ }
+ residue = FindTrnaAA3 (comment);
+ if (residue > 0 && residue != 255) {
+ cdp->trnanote = TRUE;
+ }
+ residue = FindTrnaAA (comment);
+ if (residue > 0 && residue != 255) {
+ cdp->trnanote = TRUE;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void ScoreDescriptor (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ ChangeDataPtr cdp;
+ GBBlockPtr gbp;
+ MolInfoPtr mip;
+
+ if (sdp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+
+ switch (sdp->choice) {
+ case Seq_descr_genbank :
+ gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gbp != NULL) {
+ if (StringDoesHaveText (gbp->source)) {
+ (cdp->gbsource)++;
+ }
+ }
+ break;
+ case Seq_descr_molinfo :
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL) {
+ switch (mip->biomol) {
+ case MOLECULE_TYPE_SNRNA:
+ case MOLECULE_TYPE_SCRNA:
+ case MOLECULE_TYPE_SNORNA:
+ cdp->oldbiomol = TRUE;
+ break;
+ default :
+ break;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+static void CheckForChanges (
+ SeqEntryPtr sep,
+ ChangeDataPtr cdp
+)
+
+{
+ if (sep == NULL || cdp == NULL) return;
+
+ VisitFeaturesInSep (sep, (Pointer) cdp, ScoreFeature);
+ VisitDescriptorsInSep (sep, (Pointer) cdp, ScoreDescriptor);
+}
+
+static void DoASNReport (
+ SeqEntryPtr sep,
+ CleanFlagPtr cfp
+)
+
+{
+ Boolean bsec = FALSE, ssec = FALSE, norm = FALSE;
+ ChangeData cdbefore, cdafter;
+ CharPtr str1, str2, str3, str4;
+
+ if (sep == NULL || cfp == NULL) return;
+
+ MemSet ((Pointer) &cdbefore, 0, sizeof (ChangeData));
+ MemSet ((Pointer) &cdafter, 0, sizeof (ChangeData));
+
+ CheckForChanges (sep, &cdbefore);
+
+ str1 = Se2Str (sep);
+ NormalizeDescriptorOrder (sep);
+ str2 = Se2Str (sep);
+ if (StringCmp (str1, str2) != 0) {
+ norm = TRUE;
+ }
+ BasicSeqEntryCleanup (sep);
+ str3 = Se2Str (sep);
+ if (StringCmp (str2, str3) != 0) {
+ bsec = TRUE;
+ }
+ SeriousSeqEntryCleanup (sep, NULL, NULL);
+ NormalizeDescriptorOrder (sep);
+ str4 = Se2Str (sep);
+ if (StringCmp (str3, str4) != 0) {
+ ssec = TRUE;
+ }
+
+ CheckForChanges (sep, &cdafter);
+
+ if (ssec) {
+ (cfp->ssec)++;
+ (cfp->cumssec)++;
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "SSEC %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ } else if (bsec) {
+ (cfp->bsec)++;
+ (cfp->cumbsec)++;
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "BSEC %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ } else if (norm) {
+ (cfp->norm)++;
+ (cfp->cumnorm)++;
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "NORM %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ } else {
+ (cfp->okay)++;
+ (cfp->cumokay)++;
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "OKAY %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+
+ if (cdbefore.rubisco) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "RUB %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.rbc) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "RBC %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.its) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "ITS %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.rnaother) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "RNA %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.trnanote) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "TRN %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.oldbiomol) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "MOL %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+
+ if (cdbefore.protdesc != cdafter.protdesc) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "PRT %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.sfpnote != cdafter.sfpnote) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "COM %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.gbsource != cdafter.gbsource) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "SRC %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ if (cdbefore.cdsconf != cdafter.cdsconf) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "CNF %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+
+ MemFree (str1);
+ MemFree (str2);
+ MemFree (str3);
+ MemFree (str4);
+}
+
+static void DoGBFFReport (
+ SeqEntryPtr sep,
+ CleanFlagPtr cfp
+)
+
+{
+#ifdef OS_UNIX
+ BioseqPtr bsp;
+ Char cmmd [256];
+ FILE *fp;
+ SeqEntryPtr fsep;
+ Char path1 [PATH_MAX];
+ Char path2 [PATH_MAX];
+ CharPtr rep = "reports";
+ SeqIdPtr sip;
+
+ if (sep == NULL || cfp == NULL) return;
+
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "%s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+
+ fsep = FindNthBioseq (sep, 1);
+ if (fsep != NULL && fsep->choice == 1) {
+ bsp = (BioseqPtr) fsep->data.ptrvalue;
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GENBANK :
+ rep = "gbreports";
+ break;
+ case SEQID_EMBL :
+ rep = "ebreports";
+ break;
+ case SEQID_DDBJ :
+ rep = "djreports";
+ break;
+ case SEQID_OTHER :
+ rep = "rfreports";
+ break;
+ default :
+ break;
+ }
+ }
+ }
+ }
+
+ TmpNam (path1);
+ TmpNam (path2);
+
+ fp = FileOpen (path1, "w");
+ if (fp != NULL) {
+ SeqEntryToGnbk (sep, NULL, GENBANK_FMT, cfp->ffmode, NORMAL_STYLE, 0, 0, 0, NULL, fp);
+ }
+ FileClose (fp);
+ SeriousSeqEntryCleanupBulk (sep);
+ fp = FileOpen (path2, "w");
+ if (fp != NULL) {
+ SeqEntryToGnbk (sep, NULL, GENBANK_FMT, cfp->ffmode, NORMAL_STYLE, 0, 0, 0, NULL, fp);
+ }
+ FileClose (fp);
+
+ sprintf (cmmd, "%s -o %s -n %s -d %s", cfp->ffdiff, path1, path2, rep);
+ system (cmmd);
+
+ sprintf (cmmd, "rm %s; rm %s", path1, path2);
+ system (cmmd);
+#endif
+}
+
+static void DoModernizeReport (
+ SeqEntryPtr sep,
+ CleanFlagPtr cfp
+)
+
+{
+ CharPtr str1, str2, str3, str4;
+
+ str1 = Se2Str (sep);
+ VisitFeaturesInSep (sep, NULL, ModGenes);
+ str2 = Se2Str (sep);
+ if (StringCmp (str1, str2) != 0) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "GEN %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ VisitFeaturesInSep (sep, NULL, ModRNAs);
+ str3 = Se2Str (sep);
+ if (StringCmp (str2, str3) != 0) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "NCR %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+ VisitBioSourcesInSep (sep, NULL, ModPCRs);
+ str4 = Se2Str (sep);
+ if (StringCmp (str3, str4) != 0) {
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "PCR %s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+ }
+
+ MemFree (str1);
+ MemFree (str2);
+ MemFree (str3);
+ MemFree (str4);
+}
+
static void DoCleanup (
SeqEntryPtr sep,
Uint2 entityID,
@@ -363,14 +898,63 @@ static void DoCleanup (
)
{
+ BioseqPtr bsp;
+ SeqEntryPtr fsep;
+ SeqIdPtr sip, siphead;
+
if (sep == NULL || cfp == NULL) return;
+ StringCpy (cfp->buf, "");
+ fsep = FindNthBioseq (sep, 1);
+ if (fsep != NULL && fsep->choice == 1) {
+ bsp = (BioseqPtr) fsep->data.ptrvalue;
+ if (bsp != NULL) {
+ siphead = SeqIdSetDup (bsp->id);
+ for (sip = siphead; sip != NULL; sip = sip->next) {
+ SeqIdStripLocus (sip);
+ }
+ SeqIdWrite (siphead, cfp->buf, PRINTID_FASTA_LONG, sizeof (cfp->buf));
+ SeqIdSetFree (siphead);
+ }
+ }
+
+ if (StringChr (cfp->report, 'r') != NULL) {
+ DoASNReport (sep, cfp);
+ return;
+ }
+ if (StringChr (cfp->report, 'g') != NULL) {
+ DoGBFFReport (sep, cfp);
+ return;
+ }
+ if (StringChr (cfp->report, 'm') != NULL) {
+ DoModernizeReport (sep, cfp);
+ return;
+ }
+
+ if (cfp->logfp != NULL) {
+ fprintf (cfp->logfp, "%s\n", cfp->buf);
+ fflush (cfp->logfp);
+ }
+
if (StringChr (cfp->clean, 'b') != NULL) {
BasicSeqEntryCleanup (sep);
}
if (StringChr (cfp->clean, 's') != NULL) {
SeriousSeqEntryCleanup (sep, NULL, NULL);
}
+ if (StringChr (cfp->clean, 'n') != NULL) {
+ NormalizeDescriptorOrder (sep);
+ }
+
+ if (StringChr (cfp->modernize, 'g') != NULL) {
+ VisitFeaturesInSep (sep, NULL, ModGenes);
+ }
+ if (StringChr (cfp->modernize, 'r') != NULL) {
+ VisitFeaturesInSep (sep, NULL, ModRNAs);
+ }
+ if (StringChr (cfp->modernize, 'p') != NULL) {
+ VisitBioSourcesInSep (sep, NULL, ModPCRs);
+ }
if (cfp->taxon) {
Taxon3ReplaceOrgInSeqEntry (sep, FALSE);
@@ -407,6 +991,11 @@ static void DoCleanup (
VisitFeaturesInSep (sep, NULL, RemoveUnnecGeneXref);
}
+ if (StringChr (cfp->desc, 't') != NULL) {
+ VisitDescriptorsInSep (sep, NULL, MarkTitles);
+ DeleteMarkedObjects (entityID, 0, NULL);
+ }
+
if (StringChr (cfp->mods, 'd') != NULL) {
SeqMgrIndexFeatures (entityID, 0);
DoAutoDef (sep, entityID);
@@ -420,7 +1009,6 @@ static void CleanupSingleRecord (
{
AsnIoPtr aip, aop;
- AsnTypePtr atp = NULL;
BioseqPtr bsp;
BioseqSetPtr bssp;
Pointer dataptr = NULL;
@@ -566,13 +1154,10 @@ static void CleanupMultipleRecord (
AsnIoPtr aip, aop;
AsnTypePtr atp;
DataVal av;
- BioseqPtr bsp;
- Char buf [41];
Uint2 entityID;
FILE *fp;
- SeqEntryPtr fsep;
size_t len;
- Char longest [41];
+ Char longest [64];
Int4 numrecords;
Char path [PATH_MAX];
CharPtr ptr;
@@ -689,25 +1274,13 @@ static void CleanupMultipleRecord (
entityID = ObjMgrGetEntityIDForChoice (sep);
- fsep = FindNthBioseq (sep, 1);
- if (fsep != NULL && fsep->choice == 1) {
- bsp = (BioseqPtr) fsep->data.ptrvalue;
- if (bsp != NULL) {
- SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
- if (cfp->logfp != NULL) {
- fprintf (cfp->logfp, "%s\n", buf);
- fflush (cfp->logfp);
- }
- }
- }
-
starttime = GetSecs ();
DoCleanup (sep, entityID, cfp);
stoptime = GetSecs ();
if (stoptime - starttime > worsttime) {
worsttime = stoptime - starttime;
- StringCpy (longest, buf);
+ StringCpy (longest, cfp->buf);
}
numrecords++;
@@ -738,10 +1311,16 @@ static void CleanupMultipleRecord (
#else
FileClose (fp);
#endif
- if (cfp->logfp != NULL && (! StringHasNoText (longest))) {
- fprintf (cfp->logfp, "Longest processing time %ld seconds on %s\n",
- (long) worsttime, longest);
+ if (cfp->logfp != NULL) {
fprintf (cfp->logfp, "Total number of records %ld\n", (long) numrecords);
+ if (StringDoesHaveText (longest)) {
+ fprintf (cfp->logfp, "Longest processing time %ld seconds on %s\n",
+ (long) worsttime, longest);
+ }
+ if (cfp->okay > 0 || cfp->norm > 0 || cfp->bsec > 0 || cfp->ssec > 0) {
+ fprintf (cfp->logfp, "%ld OKAY, %ld NORM, %ld BSEC, %ld SSEC\n",
+ (long) cfp->okay, (long) cfp->norm, (long) cfp->bsec, (long) cfp->ssec);
+ }
fflush (cfp->logfp);
}
}
@@ -758,6 +1337,11 @@ static void CleanupOneRecord (
cfp = (CleanFlagPtr) userdata;
if (cfp == NULL) return;
+ cfp->okay = 0;
+ cfp->bsec = 0;
+ cfp->ssec = 0;
+ cfp->norm = 0;
+
if (cfp->batch) {
CleanupMultipleRecord (filename, cfp);
} else {
@@ -778,12 +1362,17 @@ static void CleanupOneRecord (
#define c_argCompressed 8
#define L_argLogFile 9
#define R_argRemote 10
-#define K_argClean 11
-#define N_argLink 12
-#define F_argFeat 13
-#define M_argMods 14
-#define T_argTaxonLookup 15
-#define P_argPubLookup 16
+#define Q_argReport 11
+#define q_argFfDiff 12
+#define m_argFfMode 13
+#define K_argClean 14
+#define U_argModernize 15
+#define N_argLink 16
+#define F_argFeat 17
+#define D_argDesc 18
+#define M_argMods 19
+#define T_argTaxonLookup 20
+#define P_argPubLookup 21
Args myargs [] = {
{"Path to Files", NULL, NULL, NULL,
@@ -814,10 +1403,29 @@ Args myargs [] = {
TRUE, 'L', ARG_FILE_OUT, 0.0, 0, NULL},
{"Remote Fetching from ID", "F", NULL, NULL,
TRUE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Report\n"
+ " r ASN.1 BSEC/SSEC Report\n"
+ " g GenBank SSEC Diff\n"
+ " m Modernize Gene/RNA/PCR", NULL, NULL, NULL,
+ TRUE, 'Q', ARG_STRING, 0.0, 0, NULL},
+ {"Ffdiff Executable", "/netopt/genbank/subtool/bin/ffdiff", NULL, NULL,
+ TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Flatfile Mode\n"
+ " r Release\n"
+ " e Entrez\n"
+ " s Sequin\n"
+ " d Dump\n", NULL, NULL, NULL,
+ TRUE, 'm', ARG_STRING, 0.0, 0, NULL},
{"Cleanup\n"
" b BasicSeqEntryCleanup\n"
- " s SeriousSeqEntryCleanup", NULL, NULL, NULL,
+ " s SeriousSeqEntryCleanup\n"
+ " n Normalize Descriptor Order", NULL, NULL, NULL,
TRUE, 'K', ARG_STRING, 0.0, 0, NULL},
+ {"Modernize\n"
+ " g Gene\n"
+ " r RNA\n"
+ " p PCR Primers", NULL, NULL, NULL,
+ TRUE, 'U', ARG_STRING, 0.0, 0, NULL},
{"Link\n"
" o LinkCDSmRNAbyOverlap\n"
" p LinkCDSmRNAbyProduct\n"
@@ -829,6 +1437,9 @@ Args myargs [] = {
" d Remove db_xref\n"
" r Remove Redundant Gene xref", NULL, NULL, NULL,
TRUE, 'F', ARG_STRING, 0.0, 0, NULL},
+ {"Descriptor\n"
+ " t Remove Title", NULL, NULL, NULL,
+ TRUE, 'D', ARG_STRING, 0.0, 0, NULL},
{"Miscellaneous\n"
" d Automatic Definition Line", NULL, NULL, NULL,
TRUE, 'M', ARG_STRING, 0.0, 0, NULL},
@@ -841,7 +1452,7 @@ Args myargs [] = {
Int2 Main (void)
{
- Char app [64], type;
+ Char app [64], mode, type;
CleanFlagData cfd;
CharPtr directory, filter, infile, logfile, outfile, results, str, suffix;
Boolean remote;
@@ -937,9 +1548,41 @@ Int2 Main (void)
remote = (Boolean) myargs [R_argRemote].intvalue;
+ cfd.report = myargs [Q_argReport].strvalue;
+ cfd.ffdiff = myargs [q_argFfDiff].strvalue;
+
+ str = myargs [m_argFfMode].strvalue;
+ TrimSpacesAroundString (str);
+ if (StringDoesHaveText (str)) {
+ mode = str [0];
+ } else {
+ mode = 'e';
+ }
+
+ mode = TO_LOWER (mode);
+ switch (mode) {
+ case 'r' :
+ cfd.ffmode = RELEASE_MODE;
+ break;
+ case 'e' :
+ cfd.ffmode = ENTREZ_MODE;
+ break;
+ case 's' :
+ cfd.ffmode = SEQUIN_MODE;
+ break;
+ case 'd' :
+ cfd.ffmode = DUMP_MODE;
+ break;
+ default :
+ cfd.ffmode = ENTREZ_MODE;
+ break;
+ }
+
cfd.clean = myargs [K_argClean].strvalue;
+ cfd.modernize = myargs [U_argModernize].strvalue;
cfd.link = myargs [N_argLink].strvalue;
cfd.feat = myargs [F_argFeat].strvalue;
+ cfd.desc = myargs [D_argDesc].strvalue;
cfd.mods = myargs [M_argMods].strvalue;
cfd.taxon = (Boolean) myargs [T_argTaxonLookup].intvalue;
cfd.pub = (Boolean) myargs [P_argPubLookup].intvalue;
@@ -974,10 +1617,17 @@ Int2 Main (void)
starttime = GetSecs ();
if (StringDoesHaveText (directory)) {
+ if (StringCmp (directory, results) == 0) {
+ Message (MSG_POSTERR, "-r results path must be different than -p data path");
+ if (cfd.logfp != NULL) {
+ fprintf (cfd.logfp, "-r results path must be different than -p data path\n");
+ }
+ } else {
- cfd.results = results;
+ cfd.results = results;
- DirExplore (directory, NULL, suffix, FALSE, CleanupOneRecord, (Pointer) &cfd);
+ DirExplore (directory, NULL, suffix, FALSE, CleanupOneRecord, (Pointer) &cfd);
+ }
} else if (StringDoesHaveText (infile) && StringDoesHaveText (outfile)) {
@@ -990,6 +1640,10 @@ Int2 Main (void)
runtime = stoptime - starttime;
if (cfd.logfp != NULL) {
fprintf (cfd.logfp, "Finished in %ld seconds\n", (long) runtime);
+ if (cfd.cumokay > 0 || cfd.cumnorm > 0 || cfd.cumbsec > 0 || cfd.cumssec > 0) {
+ fprintf (cfd.logfp, "Cumulative counts - %ld OKAY, %ld NORM, %ld BSEC, %ld SSEC\n",
+ (long) cfd.cumokay, (long) cfd.cumnorm, (long) cfd.cumbsec, (long) cfd.cumssec);
+ }
FileClose (cfd.logfp);
}
diff --git a/demo/copymat.c b/demo/copymat.c
index cc09a1f7..761503f7 100644
--- a/demo/copymat.c
+++ b/demo/copymat.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: copymat.c,v 6.48 2008/02/01 14:04:25 madden Exp $";
+static char const rcsid[] = "$Id: copymat.c,v 6.49 2008/11/04 16:44:38 maning Exp $";
/*
* ===========================================================================
@@ -36,6 +36,9 @@ Contents: main routines for copymatrices program to convert
score matrices output by makematrices into a single byte-encoded file.
$Log: copymat.c,v $
+Revision 6.49 2008/11/04 16:44:38 maning
+add type cast to fix compilation error
+
Revision 6.48 2008/02/01 14:04:25 madden
LookupTableWrapInit prototype change
@@ -448,19 +451,26 @@ static Boolean RPSUpdateOffsets(BlastAaLookupTable *lookup)
Int4 index;
Int4 num_used;
Int4 offset_diff;
+ AaLookupBackboneCell *bbc;
+ Int4 *ovf;
len = lookup->backbone_size;
offset_diff = lookup->word_length - 1;
+ // database assumes backbone type of lookup table
+ ASSERT(lookup->bone_type == eBackbone);
+ bbc = (AaLookupBackboneCell *)(lookup->thick_backbone);
+ ovf = (Int4 *)(lookup->overflow);
+
/* Walk through table, copying info into mod_lt[] */
for(index = 0; index < len; index++) {
- if((num_used=lookup->thick_backbone[index].num_used) <= 3)
+ if((num_used=bbc[index].num_used) <= 3)
{
while (num_used > 0)
{
num_used--;
- lookup->thick_backbone[index].payload.entries[num_used] += offset_diff;
+ bbc[index].payload.entries[num_used] += offset_diff;
}
}
else
@@ -468,7 +478,7 @@ static Boolean RPSUpdateOffsets(BlastAaLookupTable *lookup)
while (num_used > 0)
{
num_used--;
- lookup->overflow [ lookup->thick_backbone[index].payload.overflow_cursor + num_used] += offset_diff;
+ ovf[ bbc[index].payload.overflow_cursor + num_used] += offset_diff;
}
}
}
@@ -493,6 +503,13 @@ Boolean RPSUpdatePointers(BlastAaLookupTable *lookup, Uint4 *new_overflow, Uint4
Uint4 *new_overflow_cursor;
Int4 *src;
Int4 first_hit;
+ AaLookupBackboneCell *bbc;
+ Int4 *ovf;
+
+ // database assumes backbone type of lookup table
+ ASSERT(lookup->bone_type == eBackbone);
+ bbc = (AaLookupBackboneCell *)(lookup->thick_backbone);
+ ovf = (Int4 *)(lookup->overflow);
len = lookup->backbone_size;
@@ -501,22 +518,22 @@ Boolean RPSUpdatePointers(BlastAaLookupTable *lookup, Uint4 *new_overflow, Uint4
/* Walk through table, copying info into mod_lt[] */
for(index = 0; index < len; index++) {
- if(lookup->thick_backbone[index].num_used <= 3)
+ if(bbc[index].num_used <= 3)
continue;
- src = &(lookup->overflow[lookup->thick_backbone[index].payload.overflow_cursor]);
- MemCpy(new_overflow_cursor, &src[1], sizeof(Uint4)*(lookup->thick_backbone[index].num_used-1));
+ src = &(ovf[bbc[index].payload.overflow_cursor]);
+ MemCpy(new_overflow_cursor, &src[1], sizeof(Uint4)*(bbc[index].num_used-1));
mlpp_address = (long) new_overflow_cursor;
- new_overflow_cursor += lookup->thick_backbone[index].num_used-1;
+ new_overflow_cursor += bbc[index].num_used-1;
first_hit = src[0];
mlpp_address -= (long) start_address;
/* Now this is new relative address - usually small */
- lookup->thick_backbone[index].payload.entries[1] = (Int4) mlpp_address;
- lookup->thick_backbone[index].payload.entries[0] = first_hit;
+ bbc[index].payload.entries[1] = (Int4) mlpp_address;
+ bbc[index].payload.entries[0] = first_hit;
}
diff --git a/demo/cspeedtest.c b/demo/cspeedtest.c
index 6157d0c3..5b682ac0 100644
--- a/demo/cspeedtest.c
+++ b/demo/cspeedtest.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/17/07
*
-* $Revision: 1.10 $
+* $Revision: 1.20 $
*
* File Description:
*
@@ -55,8 +55,16 @@
#include <tofasta.h>
#include <asn2gnbk.h>
#include <valid.h>
+#include <suggslp.h>
-#define CSPEEDTEST_APP_VER "1.3"
+NLM_EXTERN CharPtr NewCreateDefLine (
+ ItemInfoPtr iip,
+ BioseqPtr bsp,
+ Boolean ignoreTitle,
+ Boolean extProtTitle
+);
+
+#define CSPEEDTEST_APP_VER "1.9"
CharPtr CSPEEDTEST_APPLICATION = CSPEEDTEST_APP_VER;
@@ -69,11 +77,14 @@ typedef struct cspeedflags {
Int4 maxcount;
CharPtr io;
CharPtr clean;
+ CharPtr skip;
CharPtr index;
CharPtr seq;
CharPtr feat;
CharPtr desc;
CharPtr verify;
+ BioseqPtr nucbsp;
+ Int2 genCode;
AsnModulePtr amp;
AsnTypePtr atp_bss;
AsnTypePtr atp_bsss;
@@ -94,6 +105,111 @@ static void DoVisitFeaturesTest (
/* empty visit callback */
}
+static void DoVisitCodingRegions (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BioseqPtr bsp;
+ CharPtr caret5, caret3;
+ CSpeedFlagPtr cfp;
+ Char id [64];
+ SeqLocPtr loc, slp;
+ Boolean partial5, partial3;
+ SeqIdPtr sip;
+ Int4 start, stop;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return;
+ cfp = (CSpeedFlagPtr) userdata;
+ if (cfp == NULL || cfp->ofp == NULL) return;
+
+ loc = sfp->location;
+ bsp = BioseqFindFromSeqLoc (loc);
+ if (bsp == NULL) return;
+
+ StringCpy (id, "?");
+ if (sfp->product != NULL) {
+ sip = SeqLocId (sfp->product);
+ if (sip != NULL) {
+ SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);
+ }
+ }
+
+ fprintf (cfp->ofp, "%s\n", id);
+ slp = SeqLocFindNext (loc, NULL);
+ while (slp != NULL) {
+ start = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1;
+ stop = GetOffsetInBioseq (slp, bsp, SEQLOC_STOP) + 1;
+ caret5 = "";
+ caret3 = "";
+ CheckSeqLocForPartial (slp, &partial5, &partial3);
+ if (partial5) {
+ caret5 = "<";
+ }
+ if (partial3) {
+ caret3 = ">";
+ }
+ fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
+ slp = SeqLocFindNext (loc, slp);
+ }
+}
+
+static void DoSuggestIntervals (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ CharPtr caret5, caret3;
+ CSpeedFlagPtr cfp;
+ Char id [64];
+ SeqLocPtr loc, slp;
+ Boolean partial5, partial3;
+ SeqAnnotPtr sap;
+ SeqFeatPtr sfp;
+ SeqIdPtr sip;
+ Int4 start, stop;
+
+ if (bsp == NULL) return;
+ if (! ISA_aa (bsp->mol)) return;
+ cfp = (CSpeedFlagPtr) userdata;
+ if (cfp == NULL || cfp->ofp == NULL || cfp->nucbsp == NULL) return;
+
+ sip = SeqIdFindBest (bsp->id, 0);
+ if (sip == NULL) return;
+ SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id) - 1);
+
+ sap = SuggestCodingRegion (cfp->nucbsp, bsp, cfp->genCode);
+ if (sap == NULL) return;
+ if (sap->type == 1) {
+ sfp = (SeqFeatPtr) sap->data;
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
+ loc = sfp->location;
+ if (loc != NULL) {
+ fprintf (cfp->ofp, "%s\n", id);
+ slp = SeqLocFindNext (loc, NULL);
+ while (slp != NULL) {
+ start = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_START) + 1;
+ stop = GetOffsetInBioseq (slp, cfp->nucbsp, SEQLOC_STOP) + 1;
+ caret5 = "";
+ caret3 = "";
+ CheckSeqLocForPartial (slp, &partial5, &partial3);
+ if (partial5) {
+ caret5 = "<";
+ }
+ if (partial3) {
+ caret3 = ">";
+ }
+ fprintf (cfp->ofp, "%s%ld\t%s%ld\n", caret5, (long) start, caret3, (long) stop);
+ slp = SeqLocFindNext (loc, slp);
+ }
+ }
+ }
+ }
+ SeqAnnotFree (sap);
+}
+
static void DoGeneOverlapPrintTest (
SeqFeatPtr sfp,
Pointer userdata
@@ -215,6 +331,105 @@ static void DoFastaDefline (
}
}
+static void DoNewFastaDefline (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ BioseqSetPtr bssp;
+ CSpeedFlagPtr cfp;
+ Char id [128];
+ CharPtr title;
+
+ if (bsp == NULL) return;
+ cfp = (CSpeedFlagPtr) userdata;
+ if (cfp == NULL) return;
+
+ if (StringChr (cfp->skip, 's') != NULL) {
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ if (bssp != NULL) {
+ if (bssp->_class == BioseqseqSet_class_segset ||
+ bssp->_class == BioseqseqSet_class_parts) return;
+ }
+ }
+ }
+ if (StringChr (cfp->skip, 'v') != NULL) {
+ if (bsp->repr == Seq_repr_virtual) return;
+ }
+
+ id [0] = '\0';
+ SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ title = NewCreateDefLine (NULL, bsp, FALSE, FALSE);
+ if (StringHasNoText (title)) {
+ title = StringSave ("?");
+ }
+
+ if (cfp->ofp != NULL) {
+ fprintf (cfp->ofp, ">%s %s\n", id, title);
+ }
+
+ MemFree (title);
+}
+
+static void DoFastaComp (
+ BioseqPtr bsp,
+ Pointer userdata,
+ Boolean ignoreExisting
+)
+
+{
+ Char buf [4096];
+ CSpeedFlagPtr cfp;
+ Char id [128];
+ CharPtr title;
+
+ if (bsp == NULL) return;
+ cfp = (CSpeedFlagPtr) userdata;
+ if (cfp == NULL) return;
+
+ id [0] = '\0';
+ SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ buf [0] = '\0';
+ CreateDefLineExEx (NULL, bsp, buf, sizeof (buf) - 1, 0,
+ NULL, NULL, ignoreExisting, FALSE);
+ title = NewCreateDefLine (NULL, bsp, ignoreExisting, FALSE);
+ if (StringHasNoText (title)) {
+ title = StringSave ("?");
+ }
+
+ if (StringCmp (buf, title) != 0) {
+ if (cfp->ofp != NULL) {
+ fprintf (cfp->ofp, "< %s %s\n", id, buf);
+ fprintf (cfp->ofp, "> %s %s\n", id, title);
+ }
+ printf ("< %s %s\n", id, buf);
+ printf ("> %s %s\n", id, title);
+ fflush (stdout);
+ }
+
+ MemFree (title);
+}
+
+static void DoFastaExist (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ DoFastaComp (bsp, userdata, FALSE);
+}
+
+static void DoFastaRegen (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ DoFastaComp (bsp, userdata, TRUE);
+}
+
static void DoFastaFeat (
SeqFeatPtr sfp,
Pointer userdata
@@ -344,6 +559,20 @@ static void LIBCALLBACK ValidCallback (
fprintf (fp, "\n");
}
+static void MarkTitles (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ ObjValNodePtr ovn;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_title) return;
+ if (sdp->extended == 0) return;
+ ovn = (ObjValNodePtr) sdp;
+ ovn->idx.deleteme = TRUE;
+}
+
static void DoProcess (
SeqEntryPtr sep,
Uint2 entityID,
@@ -351,11 +580,19 @@ static void DoProcess (
)
{
+ Char id [64];
ErrSev oldErrSev;
ValidStructPtr vsp;
if (sep == NULL || cfp == NULL) return;
+ if (StringChr (cfp->clean, 't') != NULL) {
+ VisitDescriptorsInSep (sep, NULL, MarkTitles);
+ DeleteMarkedObjects (entityID, 0, NULL);
+ }
+ if (StringChr (cfp->clean, 'a') != NULL) {
+ AssignIDsInEntity (entityID, 0, NULL);
+ }
if (StringChr (cfp->clean, 'b') != NULL) {
BasicSeqEntryCleanup (sep);
}
@@ -367,15 +604,49 @@ static void DoProcess (
SeqMgrIndexFeatures (entityID, 0);
}
+ if (StringChr (cfp->seq, 'c') != NULL) {
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaExist);
+ }
+ if (StringChr (cfp->seq, 'C') != NULL) {
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRegen);
+ }
if (StringChr (cfp->seq, 's') != NULL) {
VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
}
+ if (StringChr (cfp->seq, 'S') != NULL) {
+ if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, 0);
+ }
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
+ }
if (StringChr (cfp->seq, 'r') != NULL) {
VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRaw);
}
if (StringChr (cfp->seq, 'd') != NULL) {
VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
}
+ if (StringChr (cfp->seq, 'D') != NULL) {
+ if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, 0);
+ }
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
+ }
+ if (StringChr (cfp->seq, 'T') != NULL) {
+ VisitDescriptorsInSep (sep, NULL, MarkTitles);
+ DeleteMarkedObjects (entityID, 0, NULL);
+ SeqMgrIndexFeatures (entityID, 0);
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
+ }
+ if (StringChr (cfp->seq, 'x') != NULL) {
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
+ }
+ if (StringChr (cfp->seq, 'X') != NULL) {
+ VisitDescriptorsInSep (sep, NULL, MarkTitles);
+ DeleteMarkedObjects (entityID, 0, NULL);
+ SeqMgrIndexFeatures (entityID, 0);
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
+ }
+
if (StringChr (cfp->seq, 'f') != NULL) {
VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaFeat);
}
@@ -408,6 +679,39 @@ static void DoProcess (
SeqEntryToGnbk (sep, NULL, FTABLE_FMT, SEQUIN_MODE, NORMAL_STYLE,
0, 0, SHOW_PROT_FTABLE, NULL, cfp->ofp);
}
+ if (StringChr (cfp->feat, 's') != NULL) {
+ if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, 0);
+ }
+ cfp->nucbsp = FindNucBioseq (sep);
+ if (cfp->nucbsp != NULL) {
+ BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
+ SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ fprintf (cfp->ofp, "%s\n", id);
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
+ cfp->nucbsp = NULL;
+ cfp->genCode = 0;
+ }
+ }
+ if (StringChr (cfp->feat, 'S') != NULL) {
+ if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, 0);
+ }
+ cfp->nucbsp = FindNucBioseq (sep);
+ if (cfp->nucbsp != NULL) {
+ BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
+ SetBatchSuggestNucleotide (cfp->nucbsp, cfp->genCode);
+ SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ fprintf (cfp->ofp, "%s\n", id);
+ VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
+ ClearBatchSuggestNucleotide ();
+ cfp->nucbsp = NULL;
+ cfp->genCode = 0;
+ }
+ }
+ if (StringChr (cfp->feat, 'c') != NULL) {
+ VisitFeaturesInSep (sep, (Pointer) cfp, DoVisitCodingRegions);
+ }
if (StringChr (cfp->desc, 'b') != NULL) {
}
@@ -782,6 +1086,7 @@ static void ProcessMultipleRecord (
}
starttime = GetSecs ();
+
for (x = 0; x < cfp->maxcount; x++) {
DoProcess (sep, entityID, cfp);
}
@@ -856,11 +1161,12 @@ static void ProcessOneRecord (
#define X_argMaxCount 11
#define O_argInOut 12
#define K_argClean 13
-#define I_argIndex 14
-#define S_argSeq 15
-#define F_argFeat 16
-#define D_argDesc 17
-#define V_argVerify 18
+#define P_argSkip 14
+#define I_argIndex 15
+#define S_argSeq 16
+#define F_argFeat 17
+#define D_argDesc 18
+#define V_argVerify 19
Args myargs [] = {
{"Path to Files", NULL, NULL, NULL,
@@ -901,16 +1207,29 @@ Args myargs [] = {
" wb Write Binary ASN.1", NULL, NULL, NULL,
TRUE, 'O', ARG_STRING, 0.0, 0, NULL},
{"Cleanup\n"
+ " t Remove Titles\n"
+ " a AssignIDsInEntity\n"
" b BasicSeqEntryCleanup\n"
" s SeriousSeqEntryCleanup", NULL, NULL, NULL,
TRUE, 'K', ARG_STRING, 0.0, 0, NULL},
+ {"Skip\n"
+ " s Segmented Set Components\n"
+ " v Virtual Bioseqs", NULL, NULL, NULL,
+ TRUE, 'P', ARG_STRING, 0.0, 0, NULL},
{"Index\n"
" f Feature Indexing", NULL, NULL, NULL,
TRUE, 'I', ARG_STRING, 0.0, 0, NULL},
{"Sequence\n"
+ " c Compare FASTA Deflines\n"
+ " C Compare Regenerated FASTA Deflines\n"
" s FASTA of Sequence\n"
+ " S Indexed FASTA\n"
" r Raw FASTA no Defline\n"
" d Just FASTA Defline\n"
+ " D Indexed FASTA Defline\n"
+ " T Regenerate FASTA Titles\n"
+ " x New FASTA Titles\n"
+ " X Regenerate new FASTA Titles\n"
" f FASTA by Feature\n"
" t FASTA of Translation", NULL, NULL, NULL,
TRUE, 'S', ARG_STRING, 0.0, 0, NULL},
@@ -921,7 +1240,10 @@ Args myargs [] = {
" x Gene by Xref\n"
" o Operon by Overlap\n"
" d Feature by ID\n"
- " t Feature Table", NULL, NULL, NULL,
+ " t Feature Table\n"
+ " s Slow Suggest Intervals\n"
+ " S Indexed Suggest Intervals\n"
+ " c Coding Region Intervals", NULL, NULL, NULL,
TRUE, 'F', ARG_STRING, 0.0, 0, NULL},
{"Descriptor\n"
" b BioSource\n"
@@ -1042,6 +1364,7 @@ Int2 Main (void)
cfd.io = myargs [O_argInOut].strvalue;
cfd.clean = myargs [K_argClean].strvalue;
+ cfd.skip = myargs [P_argSkip].strvalue;
cfd.index = myargs [I_argIndex].strvalue;
cfd.seq = myargs [S_argSeq].strvalue;
cfd.feat = myargs [F_argFeat].strvalue;
@@ -1089,6 +1412,7 @@ Int2 Main (void)
fprintf (cfd.logfp, "Finished in %ld seconds\n", (long) runtime);
FileClose (cfd.logfp);
}
+ printf ("Finished in %ld seconds\n", (long) runtime);
if (remote) {
PubSeqFetchDisable ();
diff --git a/demo/entrez2.c b/demo/entrez2.c
index 3977579d..b5e4633e 100644
--- a/demo/entrez2.c
+++ b/demo/entrez2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 06/16/00
*
-* $Revision: 6.28 $
+* $Revision: 6.29 $
*
* File Description:
*
@@ -61,7 +61,7 @@
#include <entrez2.h>
-#define ENTREZ_APP_VERSION "9.5"
+#define ENTREZ_APP_VERSION "9.6"
#define MAX_QUERY_FORMS 256
diff --git a/demo/formatrpsdb.c b/demo/formatrpsdb.c
index f6420407..35276ce0 100644
--- a/demo/formatrpsdb.c
+++ b/demo/formatrpsdb.c
@@ -1,4 +1,4 @@
-static char const rcsid[] = "$Id: formatrpsdb.c,v 1.25 2007/05/07 13:29:11 kans Exp $";
+static char const rcsid[] = "$Id: formatrpsdb.c,v 1.28 2008/11/04 16:26:59 boratyng Exp $";
/*****************************************************************************
@@ -38,6 +38,17 @@ static char const rcsid[] = "$Id: formatrpsdb.c,v 1.25 2007/05/07 13:29:11 kans
***************************************************************************
$Log: formatrpsdb.c,v $
+ Revision 1.28 2008/11/04 16:26:59 boratyng
+ Synchronized with the new BlastAaLookupTable implementation
+
+ Revision 1.27 2008/08/13 13:33:21 ucko
+ Correct previous revision to build even with strict compilers (MSVC, MIPSpro):
+ - In FileWriteInChunks, cast ptr to char* to allow adding to it.
+ - In RPS_DbClose, drop unused mid-block declaration of chunk.
+
+ Revision 1.26 2008/08/12 16:53:12 boratyng
+ Added function that calls FileWrite for chunks of data in order to aviod FileWrite warning: size > SIZE_MAX
+
Revision 1.25 2007/05/07 13:29:11 kans
added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
@@ -93,12 +104,12 @@ static char const rcsid[] = "$Id: formatrpsdb.c,v 1.25 2007/05/07 13:29:11 kans
1. Modify scoremat IO to comply with new scoremat spec
2. Remove check that residue frequencies read from scoremat are <= 1.0
3. Add input argument to specify the underlying score matrix, or to
- use the score matrix specified in the scoremat if present
+ use the score matrix specified in the scoremat if present
Revision 1.8 2004/09/15 18:06:13 papadopo
1. Verify that the scale factor is the same for all input scoremats
2. Do not use the scoremat 'identifier' field to determine the underlying
- score matrix; hardwire to BLOSUM62 temporarily
+ score matrix; hardwire to BLOSUM62 temporarily
3. Use BlastSeqLoc's instead of ListNodes
Revision 1.7 2004/08/25 14:47:50 camacho
@@ -897,6 +908,24 @@ Int2 RPSAddSequence(RPS_DbInfo *info,
return 0;
}
+/* Writes data to file in chunks in order to avoid allocations larger than
+ SIZE_MAX in FileWrite */
+
+size_t FileWriteInChunks(const void* ptr, size_t size, size_t n, FILE* stream)
+{
+ size_t chunk = SIZE_MAX / size;
+ size_t i;
+ size_t count = 0;
+
+ for (i=0;i < n;i+=chunk) {
+ count += FileWrite((char*)ptr + i*size, size,
+ (n-i < chunk ? n-i : chunk), stream);
+ }
+
+ return count;
+}
+
+
/* Once all sequences have been processed, perform
final setup on the BLAST lookup table and finish
up the RPS files */
@@ -913,7 +942,7 @@ void RPS_DbClose(RPS_DbInfo *info)
/* Pack the lookup table into its compressed form */
- if (BlastAaLookupFinalize(info->lookup) != 0) {
+ if (BlastAaLookupFinalize(info->lookup, eBackbone) != 0) {
ErrPostEx(SEV_WARNING, 0, 0, "Failed to compress lookup table");
}
else {
@@ -933,7 +962,8 @@ void RPS_DbClose(RPS_DbInfo *info)
/* for each lookup table cell */
for (index = cursor = 0; index < lut->backbone_size; index++) {
- cell = &lut->thick_backbone[index];
+ cell = (AaLookupBackboneCell*)lut->thick_backbone + index;
+
if (cell->num_used == 0)
continue;
@@ -956,11 +986,12 @@ void RPS_DbClose(RPS_DbInfo *info)
offsets as well */
old_cursor = cell->payload.overflow_cursor;
- cell->payload.entries[0] = lut->overflow[old_cursor] +
+ cell->payload.entries[0] = ((Int4*)lut->overflow)[old_cursor] +
BLAST_WORDSIZE_PROT - 1;
cell->payload.entries[1] = cursor * sizeof(Int4);
for (i = 1; i < cell->num_used; i++, cursor++) {
- lut->overflow[cursor] = lut->overflow[old_cursor + i] +
+ ((Int4*)lut->overflow)[cursor]
+ = ((Int4*)lut->overflow)[old_cursor + i] +
BLAST_WORDSIZE_PROT - 1;
}
}
@@ -976,9 +1007,11 @@ void RPS_DbClose(RPS_DbInfo *info)
FileWrite(&header, sizeof(header), 1, info->lookup_fd);
/* write the thick backbone */
-
- FileWrite(lut->thick_backbone, sizeof(RPSBackboneCell),
+
+
+ FileWriteInChunks(lut->thick_backbone, sizeof(RPSBackboneCell),
lut->backbone_size, info->lookup_fd);
+
/* write extra backbone cells */
@@ -989,7 +1022,7 @@ void RPS_DbClose(RPS_DbInfo *info)
/* write the new overflow array */
- FileWrite(lut->overflow, sizeof(Int4), cursor, info->lookup_fd);
+ FileWriteInChunks(lut->overflow, sizeof(Int4), cursor, info->lookup_fd);
}
/* Free data, close files */
diff --git a/demo/nps2gps.c b/demo/nps2gps.c
index d926a85b..6656fac4 100644
--- a/demo/nps2gps.c
+++ b/demo/nps2gps.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/12/05
*
-* $Revision: 1.13 $
+* $Revision: 1.14 $
*
* File Description:
*
@@ -50,7 +50,7 @@
#include <toasn3.h>
#include <pmfapi.h>
-#define NPS2GPSAPP_VER "2.1"
+#define NPS2GPSAPP_VER "2.2"
CharPtr NPS2GPSAPPLICATION = NPS2GPSAPP_VER;
@@ -61,6 +61,7 @@ typedef struct n2gdata {
Boolean lock;
Boolean byFeatID;
Boolean useProtID;
+ Boolean refSeqTitles;
} N2GData, PNTR N2GPtr;
typedef struct npsseqs {
@@ -279,7 +280,8 @@ static void LclCopyGene (
static void LclAddMrnaTitles (
SeqLocPtr slp,
- Pointer userdata
+ CharPtr organism,
+ Boolean refSeqTitles
)
{
@@ -289,7 +291,6 @@ static void LclAddMrnaTitles (
SeqMgrFeatContext gcontext;
CharPtr genelabel = NULL;
size_t len;
- CharPtr organism;
SeqFeatPtr sfp;
CharPtr str;
@@ -297,7 +298,6 @@ static void LclAddMrnaTitles (
bsp = BioseqFindFromSeqLoc (slp);
if (bsp == NULL) return;
if (! ISA_na (bsp->mol)) return;
- organism = (CharPtr) userdata;
if (BioseqGetTitle (bsp) != NULL) return;
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &gcontext);
if (sfp != NULL) {
@@ -333,9 +333,18 @@ static void LclAddMrnaTitles (
}
if (cdslabel != NULL && genelabel != NULL) {
if (ccontext.partialL || ccontext.partialR) {
- StringCat (str, " mRNA, partial cds.");
+ if (refSeqTitles) {
+ StringCat (str, " partial mRNA.");
+ } else {
+ StringCat (str, " mRNA, partial cds.");
+ }
} else {
- StringCat (str, " mRNA, complete cds.");
+ if (refSeqTitles) {
+ /* requested to make all mRNAs partial in defline */
+ StringCat (str, " partial mRNA.");
+ } else {
+ StringCat (str, " mRNA, complete cds.");
+ }
}
} else if (genelabel != NULL) {
StringCat (str, " mRNA.");
@@ -808,7 +817,7 @@ static void NPStoGPS (
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &mcontext);
while (sfp != NULL) {
- LclAddMrnaTitles (sfp->product, organism);
+ LclAddMrnaTitles (sfp->product, organism, ngp->refSeqTitles);
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_mRNA, &mcontext);
}
@@ -976,16 +985,17 @@ static void ProcessOneRecord (
/* Args structure contains command-line arguments */
-#define p_argInputPath 0
-#define r_argOutputPath 1
-#define i_argInputFile 2
-#define o_argOutputFile 3
-#define f_argFilter 4
-#define x_argSuffix 5
-#define R_argRemote 6
-#define L_argLockFar 7
-#define F_argUseFeatID 8
-#define P_argUseProtID 9
+#define p_argInputPath 0
+#define r_argOutputPath 1
+#define i_argInputFile 2
+#define o_argOutputFile 3
+#define f_argFilter 4
+#define x_argSuffix 5
+#define R_argRemote 6
+#define L_argLockFar 7
+#define F_argUseFeatID 8
+#define P_argUseProtID 9
+#define D_argRefSeqTitles 10
Args myargs [] = {
@@ -1009,6 +1019,8 @@ Args myargs [] = {
TRUE, 'F', ARG_BOOLEAN, 0.0, 0, NULL},
{"mRNA ID from Protein", "F", NULL, NULL,
TRUE, 'P', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"RefSeq mRNA Titles", "F", NULL, NULL,
+ TRUE, 'D', ARG_BOOLEAN, 0.0, 0, NULL},
};
Int2 Main (void)
@@ -1062,6 +1074,7 @@ Int2 Main (void)
ngd.lock = (Boolean) myargs [L_argLockFar].intvalue;
ngd.byFeatID = (Boolean) myargs [F_argUseFeatID].intvalue;
ngd.useProtID = (Boolean) myargs [P_argUseProtID].intvalue;
+ ngd.refSeqTitles = (Boolean) myargs [D_argRefSeqTitles].intvalue;
directory = (CharPtr) myargs [p_argInputPath].strvalue;
results = (CharPtr) myargs [r_argOutputPath].strvalue;
diff --git a/demo/rpsblast.c b/demo/rpsblast.c
index f476d692..fd85ee1a 100644
--- a/demo/rpsblast.c
+++ b/demo/rpsblast.c
@@ -1,6 +1,6 @@
-static char const rcsid[] = "$Id: rpsblast.c,v 6.92 2007/08/21 20:07:01 kans Exp $";
+static char const rcsid[] = "$Id: rpsblast.c,v 6.93 2008/07/23 14:06:57 madden Exp $";
-/* $Id: rpsblast.c,v 6.92 2007/08/21 20:07:01 kans Exp $
+/* $Id: rpsblast.c,v 6.93 2008/07/23 14:06:57 madden Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,12 +31,15 @@ static char const rcsid[] = "$Id: rpsblast.c,v 6.92 2007/08/21 20:07:01 kans Exp
*
* Initial Version Creation Date: 12/14/1999
*
-* $Revision: 6.92 $
+* $Revision: 6.93 $
*
* File Description:
* Main file for RPS BLAST program
*
* $Log: rpsblast.c,v $
+* Revision 6.93 2008/07/23 14:06:57 madden
+* Fix ASN.1 output (JIRA SB-89)
+*
* Revision 6.92 2007/08/21 20:07:01 kans
* include gencode_singleton.h, cast first argument to BlastFormattingInfoNew to fix CodeWarrior complaint
*
@@ -575,6 +578,7 @@ Int2 Main(void)
Blast_SummaryReturn* full_sum_returns = NULL;
Boolean believe_query = (Boolean) myargs[OPT_BELIEVE_QUERY].intvalue;
Char buf[256] = { '\0' };
+ BlastFormattingInfo* asn_format_info = NULL;
GeneticCodeSingletonInit();
StringCpy(buf, "rpsblast ");
@@ -654,6 +658,20 @@ Int2 Main(void)
believe_query);
BLAST_PrintOutputHeader(format_info);
+ if (myargs[OPT_ASNOUT].strvalue) {
+ /* This just prints out the ASN.1 to a secondary file. */
+ BlastFormattingInfoNew(eAlignViewAsnText, options,
+ blast_program, dbname,
+ myargs[OPT_ASNOUT].strvalue, &asn_format_info);
+
+ BlastFormattingInfoSetUpOptions(asn_format_info,
+ myargs[OPT_NUM_DESC].intvalue,
+ myargs[OPT_NUM_DESC].intvalue,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE);
+ }
/* Loop over sets of queries. */
while (1) {
@@ -719,23 +737,9 @@ Int2 Main(void)
/* format results */
if (myargs[OPT_ASNOUT].strvalue) {
- /* This just prints out the ASN.1 to a secondary file. */
- BlastFormattingInfo* asn_format_info = NULL;
- BlastFormattingInfoNew(eAlignViewAsnText, options,
- blast_program, dbname,
- myargs[OPT_ASNOUT].strvalue, &asn_format_info);
-
- BlastFormattingInfoSetUpOptions(asn_format_info,
- myargs[OPT_NUM_DESC].intvalue,
- myargs[OPT_NUM_DESC].intvalue,
- FALSE,
- FALSE,
- FALSE,
- TRUE);
status =
BLAST_FormatResults(seqalign_arr, num_queries, query_slp,
NULL, asn_format_info, sum_returns);
- asn_format_info = BlastFormattingInfoFree(asn_format_info);
}
status =
@@ -756,6 +760,8 @@ Int2 Main(void)
if (infp)
FileClose(infp);
+ if (asn_format_info)
+ asn_format_info = BlastFormattingInfoFree(asn_format_info);
/* Print the footer with summary information. */
Blast_PrintOutputFooter(format_info, full_sum_returns);
diff --git a/demo/scantest.c b/demo/scantest.c
index e3fff721..f7052b48 100644
--- a/demo/scantest.c
+++ b/demo/scantest.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/20/95
*
-* $Revision: 6.4 $
+* $Revision: 6.13 $
*
* File Description:
* template for custom scans of ASN.1 release files
@@ -52,6 +52,7 @@
#include <sequtil.h>
#include <sqnutils.h>
#include <explore.h>
+#include <toasn3.h>
typedef struct appflags {
Boolean binary;
@@ -61,79 +62,765 @@ typedef struct appflags {
Char id [64];
} AppFlagData, PNTR AppFlagPtr;
-static void DoOneUser (UserObjectPtr uop, Pointer userdata)
+static CharPtr Se2Str (
+ SeqEntryPtr sep
+)
{
- AppFlagPtr afp;
- Char buf [128];
- ObjectIdPtr oip;
+ AsnIoBSPtr aibp;
+ ByteStorePtr bs;
+ CharPtr str;
- if (uop == NULL) return;
- afp = (AppFlagPtr) userdata;
- if (afp == NULL) return;
+ if (sep == NULL) return NULL;
+
+ bs = BSNew (1000);
+ if (bs == NULL) return NULL;
+ aibp = AsnIoBSOpen ("w", bs);
+ if (aibp == NULL) return NULL;
+
+ SeqEntryAsnWrite (sep, aibp->aip, NULL);
+
+ AsnIoFlush (aibp->aip);
+ AsnIoBSClose (aibp);
+
+ str = BSMerge (bs, NULL);
+ BSFree (bs);
+
+ return str;
+}
+
+typedef struct chgdata {
+ Boolean rubisco;
+ Boolean rbc;
+ Boolean its;
+ Boolean sgml;
+ Boolean rnaother;
+ Boolean trnanote;
+ Boolean oldbiomol;
+ Boolean badname;
+ Int4 protdesc;
+ Int4 sfpnote;
+ Int4 gbsource;
+ Int4 cdsconf;
+ AppFlagPtr afp;
+} ChangeData, PNTR ChangeDataPtr;
+
+static Boolean IsRubisco (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "rubisco large subunit") == 0 ||
+ StringICmp (name, "rubisco small subunit") == 0);
+}
+
+static Boolean IsRbc (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "RbcL") == 0 ||
+ StringICmp (name, "RbcS") == 0);
+}
- buf [0] = '\0';
- if (StringDoesHaveText (uop->_class)) {
- StringCat (buf, uop->_class);
- }
- StringCat (buf, " ");
- buf [30] = '\0';
- fprintf (afp->fp, "%s", buf);
-
- buf [0] = '\0';
- oip = uop->type;
- if (oip != NULL) {
- if (StringDoesHaveText (oip->str)) {
- StringCat (buf, oip->str);
- } else if (oip->id > 0) {
- sprintf (buf, "%ld", (long) oip->id);
+static Boolean IsITS (
+ CharPtr name
+)
+
+{
+ return (StringICmp (name, "its1") == 0 ||
+ StringICmp (name, "its 1") == 0 ||
+ StringICmp (name, "its2") == 0 ||
+ StringICmp (name, "its 2") == 0 ||
+ StringICmp (name, "its3") == 0 ||
+ StringICmp (name, "its 3") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 1") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 2") == 0 ||
+ StringICmp (name, "Ribosomal DNA internal transcribed spacer 3") == 0 ||
+ StringICmp (name, "internal transcribed spacer 1 (ITS1)") == 0 ||
+ StringICmp (name, "internal transcribed spacer 2 (ITS2)") == 0 ||
+ StringICmp (name, "internal transcribed spacer 3 (ITS3)") == 0);
+}
+
+static Boolean HasSgml (
+ CharPtr str
+)
+
+{
+ Int2 ascii_len;
+ Char buf [1024];
+
+ if (StringHasNoText (str)) return FALSE;
+
+ ascii_len = Sgml2AsciiLen (str);
+ if (ascii_len + 2 > sizeof (buf)) return FALSE;
+
+ Sgml2Ascii (str, buf, ascii_len + 1);
+ if (StringCmp (str, buf) != 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void ScoreFeature (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ ChangeDataPtr cdp;
+ CharPtr comment;
+ CdRegionPtr crp;
+ CharPtr desc;
+ GBQualPtr gbq;
+ GeneRefPtr grp;
+ CharPtr name;
+ ProtRefPtr prp;
+ Uint1 residue;
+ RnaRefPtr rrp;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ if (sfp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+
+ comment = sfp->comment;
+ if (StringDoesHaveText (comment)) {
+ (cdp->sfpnote)++;
+ }
+
+ /* skip feature types that do not use data.value.ptrvalue */
+ switch (sfp->data.choice) {
+ case SEQFEAT_COMMENT:
+ case SEQFEAT_BOND:
+ case SEQFEAT_SITE:
+ case SEQFEAT_PSEC_STR:
+ return;
+ default:
+ break;
+ }
+
+ if (sfp->data.value.ptrvalue == NULL) return;
+
+ switch (sfp->data.choice) {
+ case SEQFEAT_GENE:
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ if (HasSgml (grp->locus)) {
+ cdp->sgml = TRUE;
+ }
+ if (HasSgml (grp->desc)) {
+ cdp->sgml = TRUE;
+ }
+ for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (HasSgml (str)) {
+ cdp->sgml = TRUE;
+ }
+ }
+ break;
+ case SEQFEAT_CDREGION:
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp->conflict) {
+ (cdp->cdsconf)++;
+ }
+ break;
+ case SEQFEAT_PROT:
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ desc = prp->desc;
+ if (StringDoesHaveText (desc)) {
+ (cdp->protdesc)++;
+ }
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (IsRubisco (str)) {
+ cdp->rubisco = TRUE;
+ }
+ if (IsRbc (str)) {
+ cdp->rbc = TRUE;
+ }
+ }
+ break;
+ case SEQFEAT_RNA :
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp->type == 255 && rrp->ext.choice == 1) {
+ name = (CharPtr) rrp->ext.value.ptrvalue;
+ if (StringCmp (name, "misc_RNA") == 0) {
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "product") != 0) continue;
+ name = gbq->val;
+ if (StringHasNoText (name)) continue;
+ if (IsITS (name)) {
+ cdp->its = TRUE;
+ }
+ }
+ } else if (StringCmp (name, "ncRNA") == 0 || StringCmp (name, "tmRNA") == 0) {
+ } else {
+ cdp->rnaother = TRUE;
+ if (IsITS (name)) {
+ cdp->its = TRUE;
+ }
+ }
+ } else if (rrp->type == 3 && rrp->ext.choice == 2) {
+ if (StringDoesHaveText (comment)) {
+ if (StringNCmp (comment, "aa: ", 4) == 0) {
+ comment += 4;
+ }
+ residue = FindTrnaAA3 (comment);
+ if (residue > 0 && residue != 255) {
+ cdp->trnanote = TRUE;
+ }
+ residue = FindTrnaAA (comment);
+ if (residue > 0 && residue != 255) {
+ cdp->trnanote = TRUE;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void ScoreDescriptor (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ ChangeDataPtr cdp;
+ GBBlockPtr gbp;
+ MolInfoPtr mip;
+
+ if (sdp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+
+ switch (sdp->choice) {
+ case Seq_descr_genbank :
+ gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gbp != NULL) {
+ if (StringDoesHaveText (gbp->source)) {
+ (cdp->gbsource)++;
+ }
+ }
+ break;
+ case Seq_descr_molinfo :
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL) {
+ switch (mip->biomol) {
+ case MOLECULE_TYPE_SNRNA:
+ case MOLECULE_TYPE_SCRNA:
+ case MOLECULE_TYPE_SNORNA:
+ cdp->oldbiomol = TRUE;
+ break;
+ default :
+ break;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+static void CheckForChanges (
+ SeqEntryPtr sep,
+ ChangeDataPtr cdp
+)
+
+{
+ if (sep == NULL || cdp == NULL) return;
+
+ VisitFeaturesInSep (sep, (Pointer) cdp, ScoreFeature);
+ VisitDescriptorsInSep (sep, (Pointer) cdp, ScoreDescriptor);
+}
+
+static void ModGenes (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ ModernizeGeneFields (sfp);
+}
+
+static void ModRNAs (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ ModernizeRNAFields (sfp);
+}
+
+static void ModPCRs (
+ BioSourcePtr biop,
+ Pointer userdata
+)
+
+{
+ BoolPtr namP;
+ PCRPrimerPtr ppp;
+ PCRReactionPtr prp;
+
+ if (biop == NULL) return;
+
+ ModernizePCRPrimers (biop);
+
+ namP = (BoolPtr) userdata;
+ if (namP == NULL) return;
+
+ for (prp = biop->pcr_primers; prp != NULL; prp = prp->next) {
+ if (prp->forward == NULL || prp->reverse == NULL) {
+ *namP = TRUE;
+ return;
+ }
+ for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
+ if (StringHasNoText (ppp->seq) && StringDoesHaveText (ppp->name)) {
+ *namP = TRUE;
+ return;
+ }
+ }
+ for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
+ if (StringHasNoText (ppp->seq) && StringDoesHaveText (ppp->name)) {
+ *namP = TRUE;
+ return;
+ }
}
}
- StringCat (buf, " ");
- buf [30] = '\0';
- fprintf (afp->fp, "%s", buf);
+}
- if (afp->verbose) {
- fprintf (afp->fp, " %s", afp->id);
+static void TestForRubisco (
+ CharPtr str,
+ AppFlagPtr afp,
+ CharPtr prefix,
+ CharPtr remainder
+)
+
+{
+ if (StringHasNoText (str)) return;
+ if (afp == NULL || afp->fp == NULL) return;
+
+ if (StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit") == 0) return;
+ if (StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit") == 0) return;
+ if (StringStr (str, "ribulose") == NULL || StringStr (str, "bisphosphate") == NULL) return;
+
+ if (StringHasNoText (prefix)) {
+ prefix = "?";
}
- fprintf (afp->fp, "\n");
+ if (StringStr (str, "methyltransferase") == NULL) {
+ if (StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose 1,5-bisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose bisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose-bisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose-1,5-bisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose-1,5-bisphosphate carboxylase, large subunit") == 0 ||
+ StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
+ StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose bisphosphate carboxylase large chain") == 0 ||
+ StringICmp (str, "ribulose 1,5-bisphosphate carboxylase-oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose bisphosphate carboxylase oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose 1,5 bisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase, large subunit") == 0 ||
+ StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxgenase") == 0 ||
+ StringICmp (str, "ribulose bisphosphate carboxylase/oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase, large subunit") == 0 ||
+ StringICmp (str, "ribulose 5-bisphosphate carboxylase, large subunit") == 0 ||
+ StringICmp (str, "ribulosebisphosphate carboxylase large subunit") == 0 ||
+ StringICmp (str, "ribulose bisphosphate large subunit") == 0 ||
+ StringICmp (str, "ribulose 1,5 bisphosphate carboxylase/oxygenase large subunit") == 0 ||
+ StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large chain") == 0 ||
+ StringICmp (str, "large subunit ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
+ StringICmp (str, "ribulose-bisphosphate carboxylase, large subunit") == 0 ||
+ StringICmp (str, "ribulose-1, 5-bisphosphate carboxylase/oxygenase large-subunit") == 0) {
+ if (afp->verbose) {
+ fprintf (afp->fp, "%s\t%s\t%s\n", prefix, afp->id, str);
+ } else {
+ fprintf (afp->fp, "%s %s\n", prefix, afp->id);
+ }
+ fflush (afp->fp);
+ return;
+ }
+ }
+
+ if (StringHasNoText (remainder)) {
+ remainder = "?";
+ }
+ if (afp->verbose) {
+ fprintf (afp->fp, "%s\t%s\t%s\n", remainder, afp->id, str);
+ } else {
+ fprintf (afp->fp, "%s %s\n", remainder, afp->id);
+ }
fflush (afp->fp);
}
-static void DoOneDescriptor (SeqDescrPtr sdp, Pointer userdata)
+static void TrailingCommaFix (
+ CharPtr str,
+ AppFlagPtr afp,
+ CharPtr prefix
+)
{
- AppFlagPtr afp;
- UserObjectPtr uop;
+ Char ch;
+ size_t len;
+
+ if (StringHasNoText (str)) return;
+ len = StringLen (str);
+ if (len < 1) return;
+ ch = str [len - 1];
+ while (ch == ' ' && len > 2) {
+ len--;
+ ch = str [len - 1];
+ }
+ if (ch == ',') {
+ if (afp != NULL && afp->verbose && afp->fp != NULL) {
+ str [len] = '\0';
+ if (StringHasNoText (prefix)) {
+ prefix = "?";
+ }
+ fprintf (afp->fp, "%s\t%s\t%s\n", prefix, afp->id, str);
+ fflush (afp->fp);
+ }
+ str [len - 1] = '_';
+ str [len] = '\0';
+ }
+}
- if (sdp == NULL || sdp->choice != Seq_descr_user) return;
+static void RnaProtCmntTrailingCommaFix (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ AppFlagPtr afp;
+ ProtRefPtr prp;
+ RnaRefPtr rrp;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ if (sfp == NULL) return;
afp = (AppFlagPtr) userdata;
if (afp == NULL) return;
- uop = (UserObjectPtr) sdp->data.ptrvalue;
- if (uop == NULL) return;
+ str = sfp->comment;
+ if (StringDoesHaveText (str)) {
+ TrailingCommaFix (str, afp, "SFPCOMM");
+ }
- VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
+ if (sfp->data.choice == SEQFEAT_PROT) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ /* turn trailing space into trailing underscore for validator */
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ TrailingCommaFix (str, afp, "PRTCOMM");
+ TestForRubisco (str, afp, "RIBBIS", "RIBREM");
+ }
+ } else if (sfp->data.choice == SEQFEAT_RNA) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ /* turn trailing space into trailing underscore for validator */
+ if (rrp->ext.choice == 1) {
+ str = rrp->ext.value.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ TrailingCommaFix (str, afp, "RNACOMM");
+ }
+ }
+ }
}
-static void DoOneFeature (SeqFeatPtr sfp, Pointer userdata)
+static void LookForBadAuth (
+ NameStdPtr nsp,
+ Pointer userdata
+)
{
AppFlagPtr afp;
- UserObjectPtr uop;
+ ChangeDataPtr cdp;
+ Char ch;
+ Int2 i;
+ Boolean is_bad = FALSE;
+ CharPtr prefix = "\t";
+ CharPtr str;
+
+ if (nsp == NULL) return;
+ cdp = (ChangeDataPtr) userdata;
+ if (cdp == NULL) return;
+ afp = cdp->afp;
+ if (afp == NULL) return;
- if (sfp == NULL) return;
+ for (i = 0; i < 6; i++) {
+ str = nsp->names [i];
+ if (StringHasNoText (str)) continue;
+ ch = *str;
+ while (ch != '\0') {
+ if (IS_DIGIT (ch)) {
+ cdp->badname = TRUE;
+ is_bad = TRUE;
+ }
+ str++;
+ ch = *str;
+ }
+ }
+
+ if (is_bad && afp->fp != NULL && afp->verbose) {
+ fprintf (afp->fp, "%s\t%s", "AUTHOR", afp->id);
+ for (i = 0; i < 6; i++) {
+ str = nsp->names [i];
+ if (StringHasNoText (str)) continue;
+ fprintf (afp->fp, "%s%s", prefix, str);
+ prefix = " | ";
+ }
+ fprintf (afp->fp, "\n");
+ fflush (afp->fp);
+ }
+}
+
+static void LookForBadPub (
+ PubdescPtr pdp,
+ Pointer userdata
+)
+
+{
+ VisitAuthorsInPub (pdp, userdata, LookForBadAuth);
+}
+
+static void CommentDescrTrailingCommaFix (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ AppFlagPtr afp;
+ CharPtr str;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_comment) return;
afp = (AppFlagPtr) userdata;
if (afp == NULL) return;
- uop = sfp->ext;
- if (uop != NULL) {
- VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ TrailingCommaFix (str, afp, "DSCCOMM");
+ }
+}
+
+static void DoReport (
+ SeqEntryPtr sep,
+ AppFlagPtr afp
+)
+
+{
+ Boolean bsec = FALSE, cma = FALSE, norm = FALSE, ssec = FALSE;
+ Boolean gen = FALSE, ncr = FALSE, pcr = FALSE, nam = FALSE;
+ ChangeData cdbefore, cdafter;
+ CharPtr str = NULL, tmp = NULL;
+
+ if (sep == NULL || afp == NULL) return;
+
+ MemSet ((Pointer) &cdbefore, 0, sizeof (ChangeData));
+ MemSet ((Pointer) &cdafter, 0, sizeof (ChangeData));
+
+ cdbefore.afp = afp;
+ cdafter.afp = afp;
+
+ CheckForChanges (sep, &cdbefore);
+
+ str = Se2Str (sep);
+ NormalizeDescriptorOrder (sep);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ norm = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ VisitFeaturesInSep (sep, (Pointer) afp, RnaProtCmntTrailingCommaFix);
+ VisitDescriptorsInSep (sep, (Pointer) afp, CommentDescrTrailingCommaFix);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ cma = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ BasicSeqEntryCleanup (sep);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ bsec = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ VisitPubdescsInSep (sep, (Pointer) &cdbefore, LookForBadPub);
+
+ VisitFeaturesInSep (sep, NULL, ModGenes);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ gen = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ VisitFeaturesInSep (sep, NULL, ModRNAs);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ ncr = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ VisitBioSourcesInSep (sep, (Pointer) &nam, ModPCRs);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ pcr = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ SeriousSeqEntryCleanup (sep, NULL, NULL);
+ tmp = Se2Str (sep);
+ if (StringCmp (str, tmp) != 0) {
+ ssec = TRUE;
+ }
+ MemFree (str);
+ str = tmp;
+
+ CheckForChanges (sep, &cdafter);
+
+ MemFree (str);
+
+ if (ssec) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "SSEC %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ } else if (bsec) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "BSEC %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ } else if (norm) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "NORM %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ } else {
+ /*
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "OKAY %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ */
+ }
+
+ if (cma) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "CMA %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+
+ if (gen) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "GEN %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (ncr) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "NCR %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (pcr) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "PCR %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (nam) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "NAM %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+
+ if (cdbefore.rubisco) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "RUB %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.rbc) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "RBC %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.its) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "ITS %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.sgml) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "SGM %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.rnaother) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "RNA %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.trnanote) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "TRN %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.oldbiomol) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "MOL %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.badname) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "AUT %s\n", afp->id);
+ fflush (afp->fp);
+ }
}
- for (uop = sfp->exts; uop != NULL; uop = uop->next) {
- VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
+ if (cdbefore.protdesc != cdafter.protdesc) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "PRT %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.sfpnote != cdafter.sfpnote) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "COM %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.gbsource != cdafter.gbsource) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "SRC %s\n", afp->id);
+ fflush (afp->fp);
+ }
+ }
+ if (cdbefore.cdsconf != cdafter.cdsconf) {
+ if (afp->fp != NULL) {
+ fprintf (afp->fp, "CNF %s\n", afp->id);
+ fflush (afp->fp);
+ }
}
}
@@ -143,6 +830,7 @@ static void DoRecord (SeqEntryPtr sep, Pointer userdata)
AppFlagPtr afp;
BioseqPtr fbsp;
SeqEntryPtr fsep;
+ SeqIdPtr sip, siphead;
if (sep == NULL) return;
afp = (AppFlagPtr) userdata;
@@ -153,10 +841,14 @@ static void DoRecord (SeqEntryPtr sep, Pointer userdata)
fbsp = (BioseqPtr) fsep->data.ptrvalue;
if (fbsp == NULL) return;
- SeqIdWrite (fbsp->id, afp->id, PRINTID_FASTA_LONG, 64);
+ siphead = SeqIdSetDup (fbsp->id);
+ for (sip = siphead; sip != NULL; sip = sip->next) {
+ SeqIdStripLocus (sip);
+ }
+ SeqIdWrite (siphead, afp->id, PRINTID_FASTA_LONG, sizeof (afp->id));
+ SeqIdSetFree (siphead);
- VisitDescriptorsInSep (sep, (Pointer) afp, DoOneDescriptor);
- VisitFeaturesInSep (sep, (Pointer) afp, DoOneFeature);
+ DoReport (sep, afp);
}
static void ProcessOneRecord (
@@ -171,7 +863,8 @@ static void ProcessOneRecord (
afp = (AppFlagPtr) userdata;
if (afp == NULL) return;
- if (StringStr (filename, "gbest") != NULL ||
+ if (StringStr (filename, "gbcon") != NULL ||
+ StringStr (filename, "gbest") != NULL ||
StringStr (filename, "gbgss") != NULL ||
StringStr (filename, "gbhtg") != NULL ||
StringStr (filename, "gbsts") != NULL) {
@@ -218,7 +911,7 @@ Args myargs [] = {
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
{"Bioseq-set is Compressed", "F", NULL, NULL,
TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Verbose", "F", NULL, NULL,
+ {"Verbose Output", "F", NULL, NULL,
TRUE, 'v', ARG_BOOLEAN, 0.0, 0, NULL},
};
@@ -274,7 +967,7 @@ extern Int2 Main (void)
dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
afd.binary = (Boolean) myargs [b_argBinary].intvalue;
afd.compressed = (Boolean) myargs [c_argCompressed].intvalue;
- afd.verbose = (Boolean) myargs[v_argVerbose].intvalue;
+ afd.verbose = (Boolean) myargs [v_argVerbose].intvalue;
afd.fp = FileOpen (outfile, "w");
if (afd.fp == NULL) {
diff --git a/demo/src_chk.c b/demo/src_chk.c
new file mode 100755
index 00000000..cb5ffe30
--- /dev/null
+++ b/demo/src_chk.c
@@ -0,0 +1,398 @@
+/* src_chk.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: src_chk.c
+*
+* Author: Colleen Bollin
+*
+* Version Creation Date: 4/12/07
+*
+* $Revision: 1.10 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <sequtil.h>
+#include <gather.h>
+#include <sqnutils.h>
+#include <explore.h>
+#include <pmfapi.h>
+#define NLM_GENERATED_CODE_PROTO
+#include <asnmacro.h>
+#include <objmacro.h>
+#include <macroapi.h>
+
+#define SRC_CHK_APP_VER "1.0"
+
+CharPtr SRC_CHK_APPLICATION = SRC_CHK_APP_VER;
+
+
+static ValNodePtr CollectFieldList(BioseqPtr bsp)
+{
+ BioSourcePtr biop;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ ValNodePtr list = NULL, vnp;
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ vnp = GetSourceQualFieldListFromBioSource (biop);
+ ValNodeLink (&list, vnp);
+ }
+ return list;
+}
+
+
+static void PrintHeader (FILE *fp, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || field_list == NULL) {
+ return;
+ }
+ /* first field accession, second field GI, third field tax ID */
+ fprintf (fp, "\t\tTaxID");
+ while (field_list != NULL) {
+ txt = SummarizeFieldType (field_list);
+ fprintf (fp, "\t%s", txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+ fprintf (fp, "\n");
+}
+
+
+static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
+{
+ Int4 tax_id = -1;
+ ValNodePtr vnp;
+ DbtagPtr d;
+
+ if (orp != NULL)
+ {
+ for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
+ {
+ d = (DbtagPtr) vnp->data.ptrvalue;
+ if (StringCmp(d->db, "taxon") == 0)
+ {
+ tax_id = d->tag->id;
+ break;
+ }
+ }
+ }
+ return tax_id;
+}
+
+
+static void PrintBioSourceLine (FILE *fp, BioSourcePtr biop, ValNodePtr field_list)
+{
+ CharPtr txt;
+
+ if (fp == NULL || biop == NULL || field_list == NULL) {
+ return;
+ }
+
+ fprintf (fp, "\t%d", GetTaxIdFromOrgRef(biop->org));
+
+ while (field_list != NULL) {
+ txt = GetSourceQualFromBioSource (biop, field_list->data.ptrvalue, NULL);
+ fprintf (fp, "\t%s", txt == NULL ? "" : txt);
+ txt = MemFree (txt);
+ field_list = field_list->next;
+ }
+}
+
+
+static void PrintBioseqLines (FILE *fp, BioseqPtr bsp, ValNodePtr field_list)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ Char id_txt[255], id_txt2[255];
+ SeqIdPtr sip, sip_gi = NULL, sip_gb = NULL;
+
+ if (fp == NULL || bsp == NULL || field_list == NULL) {
+ return;
+ }
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK
+ || (sip->choice == SEQID_EMBL && sip_gb == NULL)
+ || (sip->choice == SEQID_SWISSPROT && sip_gb == NULL)
+ || (sip->choice == SEQID_DDBJ && sip_gb == NULL)
+ || (sip->choice == SEQID_PIR && sip_gb == NULL)) {
+ sip_gb = sip;
+ } else if (sip->choice == SEQID_GI) {
+ sip_gi = sip;
+ }
+ }
+
+ if (sip_gb == NULL && sip_gi == NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ id_txt2[0] = 0;
+ } else {
+ if (sip_gb == NULL) {
+ id_txt[0] = 0;
+ } else {
+ SeqIdWrite (sip_gb, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+ }
+ if (sip_gi == NULL) {
+ id_txt2[0] = 0;
+ } else {
+ SeqIdWrite (sip_gi, id_txt2, PRINTID_REPORT, sizeof (id_txt2) - 1);
+ }
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
+ fprintf (fp, "%s\t%s", id_txt, id_txt2);
+ PrintBioSourceLine (fp, sdp->data.ptrvalue, field_list);
+ fprintf (fp, "\n");
+ }
+}
+
+
+static void PrintBioseqErrorLine (FILE *fp, SeqIdPtr sip)
+{
+ Char id_txt[255];
+
+ if (fp == NULL || sip == NULL) {
+ return;
+ }
+
+ SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
+
+ if (sip->choice == SEQID_GI) {
+ fprintf (fp, "\t%s\n", id_txt);
+ } else {
+ fprintf (fp, "%s\t\n", id_txt);
+ }
+}
+
+
+static Boolean IsAllDigits (CharPtr str)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ cp = str;
+ while (*cp != 0 && isdigit (*cp)) {
+ cp++;
+ }
+ if (*cp == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static SeqIdPtr SmartGuessMakeId (CharPtr str)
+{
+ CharPtr id_txt;
+ SeqIdPtr sip = NULL;
+
+ if (StringHasNoText (str)) {
+ return NULL;
+ } else if (StringChr (str, '|') != NULL) {
+ sip = MakeSeqID (str);
+ } else if (IsAllDigits (str)) {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gi|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ } else {
+ id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
+ sprintf (id_txt, "gb|%s", str);
+ sip = MakeSeqID (id_txt);
+ id_txt = MemFree (id_txt);
+ }
+ return sip;
+}
+
+
+/* Args structure contains command-line arguments */
+
+#define i_argInputFile 0
+#define o_argOutputFile 1
+
+Args myargs [] = {
+ {"Input File", NULL, NULL, NULL,
+ TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Output File", NULL, NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
+};
+
+
+static void SortFieldListForSrcChk (ValNodePtr PNTR field_list)
+{
+ ValNodePtr vnp, vnp_s, vnp_prev = NULL;
+
+ if (field_list == NULL || *field_list == NULL) return;
+
+ SortUniqueFieldTypeList (field_list);
+
+ /* move taxname to front of list */
+ for (vnp = *field_list; vnp != NULL; vnp_prev = vnp, vnp = vnp->next) {
+ if (vnp->choice == FieldType_source_qual) {
+ vnp_s = vnp->data.ptrvalue;
+ if (vnp_s != NULL
+ && vnp_s->choice == SourceQualChoice_textqual
+ && vnp_s->data.intvalue == Source_qual_taxname) {
+ /* only need to move if not already at front of list */
+ if (vnp_prev != NULL) {
+ vnp_prev->next = vnp->next;
+ vnp->next = *field_list;
+ *field_list = vnp;
+ }
+ break;
+ }
+ }
+ }
+
+
+}
+
+
+Int2 Main(void)
+{
+ Char app [64];
+ Int4 rval = 0;
+ CharPtr id_file, line;
+ ReadBufferData rbd;
+ ValNodePtr field_list = NULL;
+ SeqIdPtr sip;
+ ValNodePtr bsp_list = NULL, vnp;
+ BioseqPtr bsp;
+ FILE *fp;
+
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ /* finish resolving internal connections in ASN.1 parse tables */
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+ PubSeqFetchEnable ();
+
+ /* process command line arguments */
+
+ sprintf (app, "src_chk %s", SRC_CHK_APPLICATION);
+ if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ id_file = (CharPtr) myargs [i_argInputFile].strvalue;
+
+ rbd.fp = FileOpen (id_file, "r");
+ if (rbd.fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [i_argInputFile].strvalue);
+ return 1;
+ }
+ rbd.current_data = NULL;
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL && line[0] != EOF) {
+ if (!StringHasNoText (line)) {
+
+ sip = SmartGuessMakeId (line);
+ bsp = BioseqLockById (sip);
+ if (bsp == NULL) {
+ printf ("Unable to download Bioseq for %s\n", line);
+ } else {
+ ValNodeLink (&field_list, CollectFieldList (bsp));
+ BioseqUnlock (bsp);
+ }
+ ValNodeAddPointer (&bsp_list, 0, sip);
+ }
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+
+ FileClose (rbd.fp);
+
+ SortFieldListForSrcChk (&field_list);
+
+ fp = FileOpen ((CharPtr) myargs [o_argOutputFile].strvalue, "w");
+ if (fp == NULL) {
+ Message (MSG_ERROR, "Unable to open %s", (CharPtr) myargs [o_argOutputFile].strvalue);
+ rval = 1;
+ } else {
+ PrintHeader (fp, field_list);
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ bsp = BioseqLockById (vnp->data.ptrvalue);
+ if (bsp == NULL) {
+ PrintBioseqErrorLine (fp, vnp->data.ptrvalue);
+ } else {
+ PrintBioseqLines (fp, bsp, field_list);
+ }
+ BioseqUnlock (bsp);
+ vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
+ }
+ }
+ FileClose (fp);
+ bsp_list = ValNodeFree (bsp_list);
+ field_list = FieldTypeListFree (field_list);
+ return rval;
+}
diff --git a/demo/subfuse.c b/demo/subfuse.c
new file mode 100644
index 00000000..a8046690
--- /dev/null
+++ b/demo/subfuse.c
@@ -0,0 +1,229 @@
+/* subfuse.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: subfuse.c
+*
+* Author: Jonathan Kans
+*
+* Version Creation Date: 7/30/01
+*
+* $Revision: 1.2 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+
+static SeqSubmitPtr ReadOneSubmission (
+ CharPtr directory,
+ CharPtr base,
+ CharPtr suffix
+)
+
+{
+ AsnIoPtr aip;
+ Char file [FILENAME_MAX], path [PATH_MAX];
+ SeqSubmitPtr ssp;
+
+ if (base == NULL) {
+ base = "";
+ }
+ if (suffix == NULL) {
+ suffix = "";
+ }
+ StringNCpy_0 (path, directory, sizeof (path));
+ sprintf (file, "%s%s", base, suffix);
+ FileBuildPath (path, NULL, file);
+
+ aip = AsnIoOpen (path, "r");
+ if (aip == NULL) return NULL;
+ ssp = SeqSubmitAsnRead (aip, NULL);
+ AsnIoClose (aip);
+
+ return ssp;
+}
+
+static void WriteOneSubmission (
+ CharPtr path,
+ SeqSubmitPtr ssp
+)
+
+{
+ AsnIoPtr aip;
+
+ aip = AsnIoOpen (path, "w");
+ if (aip == NULL) return;
+
+ SeqSubmitAsnWrite (ssp, aip, NULL);
+
+ AsnIoFlush (aip);
+ AsnIoClose (aip);
+}
+
+static void ProcessOneRecord (
+ SeqSubmitPtr master,
+ BioseqSetPtr bssp,
+ CharPtr directory,
+ CharPtr base,
+ CharPtr suffix
+)
+
+{
+ SeqEntryPtr sep;
+ SeqSubmitPtr ssp;
+
+ ssp = ReadOneSubmission (directory, base, suffix);
+ if (ssp == NULL || ssp->datatype != 1) return;
+
+ if (master->sub == NULL) {
+ master->sub = ssp->sub;
+ ssp->sub = NULL;
+ }
+
+ sep = (SeqEntryPtr) ssp->data;
+ ssp->data = NULL;
+
+ ValNodeLink (&(bssp->seq_set), sep);
+}
+
+/* Args structure contains command-line arguments */
+
+#define p_argInputPath 0
+#define o_argOutputFile 1
+#define x_argSuffix 2
+
+Args myargs [] = {
+ {"Path to files", NULL, NULL, NULL,
+ TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
+ {"Output file", "stdout", NULL, NULL,
+ TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"Suffix", ".sqn", NULL, NULL,
+ TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
+};
+
+Int2 Main (void)
+
+{
+ CharPtr base, directory, outfile, suffix, ptr;
+ BioseqSetPtr bssp;
+ ValNodePtr head, vnp;
+ SeqEntryPtr sep;
+ SeqSubmitPtr ssp;
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ /* finish resolving internal connections in ASN.1 parse tables */
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+ /* process command line arguments */
+
+ if (! GetArgs ("subfuse", sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ directory = (CharPtr) myargs [p_argInputPath].strvalue;
+ outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
+ suffix = (CharPtr) myargs [x_argSuffix].strvalue;
+
+ bssp = BioseqSetNew ();
+ if (bssp == NULL) return 0;
+ bssp->_class = BioseqseqSet_class_genbank;
+
+ sep = SeqEntryNew ();
+ if (sep == NULL) return 0;
+ sep->choice = 2;
+ sep->data.ptrvalue = (Pointer) bssp;
+
+ ssp = SeqSubmitNew ();
+ if (ssp == NULL) return 0;
+ ssp->datatype = 1;
+ ssp->data = (Pointer) sep;
+
+ /* get list of all files in source directory */
+
+ head = DirCatalog (directory);
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 0) {
+ base = (CharPtr) vnp->data.ptrvalue;
+ if (! StringHasNoText (base)) {
+ ptr = StringStr (base, suffix);
+ if (ptr != NULL) {
+ *ptr = '\0';
+ Message (MSG_POST, "Processing %s\n", base);
+ ProcessOneRecord (ssp, bssp, directory, base, suffix);
+ }
+ }
+ }
+ }
+
+ /* clean up file list */
+
+ ValNodeFreeData (head);
+
+ /* write output file */
+
+ WriteOneSubmission (outfile, ssp);
+
+ return 0;
+}
+
diff --git a/demo/sugint.c b/demo/sugint.c
new file mode 100644
index 00000000..b93ce545
--- /dev/null
+++ b/demo/sugint.c
@@ -0,0 +1,214 @@
+/* sugint.c
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information (NCBI)
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government do not place any restriction on its use or reproduction.
+* We would, however, appreciate having the NCBI and the author cited in
+* any work or product based on this material
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* ===========================================================================
+*
+* File Name: sugint.c
+*
+* Author: Jonathan Kans
+*
+* Version Creation Date: 10/31/08
+*
+* $Revision: 1.1 $
+*
+* File Description:
+*
+* Modifications:
+* --------------------------------------------------------------------------
+* Date Name Description of modification
+* ------- ---------- -----------------------------------------------------
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <objall.h>
+#include <objsset.h>
+#include <objsub.h>
+#include <objfdef.h>
+#include <seqport.h>
+#include <sequtil.h>
+#include <sqnutils.h>
+#include <subutil.h>
+#include <tofasta.h>
+#include <gather.h>
+#include <explore.h>
+#include <suggslp.h>
+
+static SeqEntryPtr ReadSep (
+ FILE *fp,
+ Boolean forceNuc,
+ Boolean forcePrt
+)
+
+{
+ Pointer dataptr;
+ Uint2 datatype, entityID;
+
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, forceNuc, forcePrt, TRUE, FALSE);
+ if (dataptr == NULL) return NULL;
+ entityID = ObjMgrRegister (datatype, dataptr);
+ return GetTopSeqEntryForEntityID (entityID);
+}
+
+static void ProcessSuggest (
+ FILE *nfp,
+ FILE *pfp,
+ AsnIoPtr ofp,
+ Int2 gencode
+)
+
+{
+ BioseqPtr nbsp = NULL, pbsp = NULL;
+ SeqEntryPtr nsep, psep, sep;
+ SeqAnnotPtr sap;
+ SeqFeatPtr sfp;
+ SeqLocPtr slp;
+
+ nsep = ReadSep (nfp, TRUE, FALSE);
+ psep = ReadSep (pfp, FALSE, TRUE);
+
+ if (nsep != NULL && psep != NULL) {
+ sep = FindNthBioseq (nsep, 1);
+ if (sep != NULL && IS_Bioseq (sep)) {
+ nbsp = (BioseqPtr) sep->data.ptrvalue;
+ }
+ sep = FindNthBioseq (psep, 1);
+ if (sep != NULL && IS_Bioseq (sep)) {
+ pbsp = (BioseqPtr) sep->data.ptrvalue;
+ }
+ if (nbsp != NULL && pbsp != NULL) {
+ if (ISA_na (nbsp->mol) && ISA_aa (pbsp->mol)) {
+ sap = SuggestCodingRegion (nbsp, pbsp, gencode);
+
+ if (sap != NULL && sap->type == 1) {
+ sfp = (SeqFeatPtr) sap->data;
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
+ slp = sfp->location;
+ if (slp != NULL) {
+ SeqLocAsnWrite (slp, ofp, NULL);
+ }
+ }
+ }
+
+ SeqAnnotFree (sap);
+ }
+ }
+ }
+
+ SeqEntryFree (nsep);
+ SeqEntryFree (psep);
+}
+
+#define n_argNucInputFile 0
+#define p_argPrtInputFile 1
+#define o_argOutputFile 2
+#define g_argGeneticCode 3
+
+Args myargs [] = {
+ {"Nucleotide Input File", NULL, NULL, NULL,
+ FALSE, 'n', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Protein Input File", NULL, NULL, NULL,
+ FALSE, 'p', ARG_FILE_IN, 0.0, 0, NULL},
+ {"Output File", NULL, NULL, NULL,
+ FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"Genetic Code", "1", "0", "20",
+ TRUE, 'g', ARG_INT, 0.0, 0, NULL},
+};
+
+Int2 Main (void)
+
+{
+ Int2 gencode;
+ FILE *nfp, *pfp;
+ AsnIoPtr ofp;
+ CharPtr nucfile, prtfile, outfile;
+
+ /* standard setup */
+
+ ErrSetFatalLevel (SEV_MAX);
+ ErrClearOptFlags (EO_SHOW_USERSTR);
+ UseLocalAsnloadDataAndErrMsg ();
+ ErrPathReset ();
+
+ /* finish resolving internal connections in ASN.1 parse tables */
+
+ if (! AllObjLoad ()) {
+ Message (MSG_FATAL, "AllObjLoad failed");
+ return 1;
+ }
+ if (! SubmitAsnLoad ()) {
+ Message (MSG_FATAL, "SubmitAsnLoad failed");
+ return 1;
+ }
+ if (! FeatDefSetLoad ()) {
+ Message (MSG_FATAL, "FeatDefSetLoad failed");
+ return 1;
+ }
+ if (! SeqCodeSetLoad ()) {
+ Message (MSG_FATAL, "SeqCodeSetLoad failed");
+ return 1;
+ }
+ if (! GeneticCodeTableLoad ()) {
+ Message (MSG_FATAL, "GeneticCodeTableLoad failed");
+ return 1;
+ }
+
+ /* process command line arguments */
+
+ if (! GetArgs ("sugint", sizeof (myargs) / sizeof (Args), myargs)) {
+ return 0;
+ }
+
+ nucfile = (CharPtr) myargs [n_argNucInputFile].strvalue;
+ prtfile = (CharPtr) myargs [p_argPrtInputFile].strvalue;
+ outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
+ gencode = (Int2) myargs [g_argGeneticCode].intvalue;
+
+ nfp = FileOpen (nucfile, "r");
+ if (nfp == NULL) {
+ Message (MSG_FATAL, "Unable to open nucleotide input file");
+ return 1;
+ }
+
+ pfp = FileOpen (prtfile, "r");
+ if (pfp == NULL) {
+ Message (MSG_FATAL, "Unable to open protein input file");
+ return 1;
+ }
+
+ ofp = AsnIoOpen (outfile, "w");
+ if (ofp == NULL) {
+ Message (MSG_FATAL, "Unable to open output file");
+ return 1;
+ }
+
+ ProcessSuggest (nfp, pfp, ofp, gencode);
+
+ AsnIoClose (ofp);
+ FileClose (pfp);
+ FileClose (nfp);
+
+ return 0;
+}
+
diff --git a/demo/taxblast_main.c b/demo/taxblast_main.c
new file mode 100644
index 00000000..b4aa65c7
--- /dev/null
+++ b/demo/taxblast_main.c
@@ -0,0 +1,99 @@
+static char const rcsid[] = "$Id: taxblast_main.c,v";
+
+/* $Id: taxblast_main.c,v
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* File Name: $RCSfile: taxblast_main.c,v $
+*
+* Authors: Tom Madden
+*
+* ==========================================================================
+*/
+
+#include <ncbi.h>
+#include <sequtil.h>
+#include <treemgr.h>
+#include <taxext.h>
+#include <txclient.h>
+#include <objseq.h>
+#include <objgen.h>
+#include <taxblast.h>
+
+
+#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
+
+static Args myargs [] = {
+ { "Input ASN.1 File (SeqAnnot)", /* 0 */
+ NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL },
+ { "Sequence is DNA", /* 1 */
+ "F", NULL, NULL, TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL },
+ { "Database used to get SeqAnnot ASN.1", /* 2 */
+ "nr", NULL, NULL, TRUE, 'd', ARG_STRING, 0.0, 0, NULL },
+ { "Output file name", /* 3 */
+ "stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL }
+};
+
+Int2 Main (void)
+{
+ AsnIoPtr aip;
+ SeqAnnotPtr sap;
+ Boolean is_na = FALSE;
+ FILE *outfile;
+ Char ofile[128];
+
+ if (!GetArgs("txblast", NUMARG, myargs)) {
+ return 1;
+ }
+
+ if (myargs[1].intvalue)
+ is_na = TRUE;
+
+ if((aip = AsnIoOpen(myargs[0].strvalue, "r")) == NULL) {
+ ErrPostEx(SEV_FATAL, 1,0, "AsnIoOpen failure\n");
+ return 1;
+ }
+
+ if((sap = SeqAnnotAsnRead (aip, NULL)) == NULL) {
+ ErrPostEx(SEV_FATAL, 1,0,"SeqAlignAsnRead failure\n");
+ return 1;
+ }
+
+ if(StringCmp(myargs[3].strvalue, "stdout")) {
+ sprintf (ofile, "%s.html", myargs[0].strvalue);
+ outfile = FileOpen(ofile, "w");
+ } else {
+ outfile = FileOpen(myargs[3].strvalue, "w");
+ }
+
+ TXBHtmlReport((SeqAlignPtr)sap->data, outfile, is_na, is_na,
+ myargs[2].strvalue, NULL, NULL, FALSE);
+
+ FileClose(outfile);
+
+ AsnIoClose(aip);
+ SeqAnnotFree(sap);
+
+ return (0);
+}
diff --git a/demo/tbl2asn.c b/demo/tbl2asn.c
index 2aae8664..2d999195 100644
--- a/demo/tbl2asn.c
+++ b/demo/tbl2asn.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/5/00
*
-* $Revision: 6.223 $
+* $Revision: 6.271 $
*
* File Description:
*
@@ -67,11 +67,21 @@
#ifdef INTERNAL_NCBI_TBL2ASN
#include <accpubseq.h>
#endif
+#define NLM_GENERATED_CODE_PROTO
+#include <asnmacro.h>
+#include <objmacro.h>
+#include <macroapi.h>
-#define TBL2ASN_APP_VER "10.3"
+#define TBL2ASN_APP_VER "12.7"
CharPtr TBL2ASN_APPLICATION = TBL2ASN_APP_VER;
+typedef struct cleanupargs {
+ Boolean collection_dates;
+ Boolean collection_dates_month_first;
+ Boolean add_notes_to_overlapping_cds_without_abc;
+} CleanupArgsData, PNTR CleanupArgsPtr;
+
typedef struct tblargs {
Boolean raw2delt;
Int2 r2dmin;
@@ -105,8 +115,9 @@ typedef struct tblargs {
Boolean conflict;
Boolean validate;
Boolean relaxed;
- Boolean discrepancy;
+ Boolean validate_barcode;
Boolean flatfile;
+ Boolean genereport;
Boolean seqidfromfile;
Boolean smartfeats;
Boolean smarttitle;
@@ -118,17 +129,10 @@ typedef struct tblargs {
CharPtr aln_match;
Boolean aln_is_protein;
Boolean save_bioseq_set;
- ValNodePtr locus_tag_list;
- ValNodePtr missing_locus_tag;
- ValNodePtr cds_product_list;
- ValNodePtr missing_cds_product;
- ValNodePtr mrna_product_list;
- ValNodePtr missing_mrna_product;
- ValNodePtr adjacent_locus_tag_disc_list;
- ValNodePtr missing_gnl_list;
- ValNodePtr gnl_list;
- ValNodePtr discrepancy_list;
- DiscReportOutputConfigData disc_rep_config;
+
+ GlobalDiscrepReportPtr global_report;
+
+ CleanupArgsData cleanup_args;
} TblArgs, PNTR TblArgsPtr;
static FILE* OpenOneFile (
@@ -271,12 +275,15 @@ static void LIBCALLBACK ValidCallback (
fprintf (fp, "\n");
}
+
static void ValidateOneFile (
CharPtr results,
CharPtr base,
CharPtr suffix,
SeqEntryPtr sep,
- Boolean relaxed
+ Boolean standard,
+ Boolean relaxed,
+ Boolean barcode
)
{
@@ -289,24 +296,32 @@ static void ValidateOneFile (
sprintf (file, "%s%s", base, suffix);
FileBuildPath (path, NULL, file);
- vsp = ValidStructNew ();
- if (vsp != NULL) {
- vsp->useSeqMgrIndexes = TRUE;
- vsp->suppressContext = TRUE;
- vsp->seqSubmitParent = TRUE;
- if (! relaxed) {
- vsp->testLatLonSubregion = TRUE;
- }
- oldErrSev = ErrSetMessageLevel (SEV_NONE);
- ofp = FileOpen (path, "w");
- vsp->errfunc = ValidCallback;
- vsp->userdata = (Pointer) ofp;
- /* vsp->convertGiToAccn = FALSE; */
- ValidateSeqEntry (sep, vsp);
- ValidStructFree (vsp);
- FileClose (ofp);
- ErrSetMessageLevel (oldErrSev);
+ ofp = FileOpen (path, "w");
+
+ if (standard) {
+ vsp = ValidStructNew ();
+ if (vsp != NULL) {
+ vsp->useSeqMgrIndexes = TRUE;
+ vsp->suppressContext = TRUE;
+ vsp->seqSubmitParent = TRUE;
+ if (! relaxed) {
+ vsp->testLatLonSubregion = TRUE;
+ }
+ oldErrSev = ErrSetMessageLevel (SEV_NONE);
+ vsp->errfunc = ValidCallback;
+ vsp->userdata = (Pointer) ofp;
+ /* vsp->convertGiToAccn = FALSE; */
+ ValidateSeqEntry (sep, vsp);
+ ValidStructFree (vsp);
+ ErrSetMessageLevel (oldErrSev);
+ }
}
+ /* Barcode results if requested */
+ if (barcode) {
+ BarcodeValidateOneSeqEntry (ofp, sep, TRUE, FALSE, TRUE, NULL);
+ }
+
+ FileClose (ofp);
}
static void FlatfileOneFile (
@@ -989,6 +1004,450 @@ static int LIBCALLBACK SortByGenePtr (
return 0;
}
+static void PrintOneGeneLine (
+ SeqFeatPtr gene,
+ SeqFeatPtr cds,
+ SeqFeatPtr rna,
+ CharPtr cdslabel,
+ CharPtr rnalabel,
+ FILE *fp
+)
+
+{
+ BioseqPtr bsp;
+ ValNodePtr db, old_locus_tag, vnp;
+ DbtagPtr dbt;
+ CharPtr desc, locus, locus_tag, cdslcl, cdsaccn, cdsgnl,
+ rnaaccn, rnagnl, fbgn, gene_type, rna_type, prefix;
+ GBQualPtr gbq;
+ GeneRefPtr grp;
+ ObjectIdPtr oip;
+ SeqIdPtr sip;
+ CharPtr str;
+ TextSeqIdPtr tsip;
+
+ if (fp == NULL) return;
+
+ locus = NULL;
+ desc = NULL;
+ locus_tag = NULL;
+ old_locus_tag = NULL;
+
+ cdslcl = NULL;
+ cdsaccn = NULL;
+ cdsgnl = NULL;
+ rnaaccn = NULL;
+ rnagnl = NULL;
+
+ db = NULL;
+ fbgn = NULL;
+
+ gene_type = NULL;
+ rna_type = NULL;
+
+ if (gene != NULL) {
+ gene_type = "gene";
+ if (gene->pseudo) {
+ gene_type = "pseudogene";
+ }
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (grp != NULL) {
+ if (grp->pseudo) {
+ gene_type = "pseudogene";
+ }
+ locus = grp->locus;
+ desc = grp->desc;
+ locus_tag = grp->locus_tag;
+ db = grp->db;
+ }
+ if (db == NULL) {
+ db = gene->dbxref;
+ }
+ for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "old_locus_tag") != 0) continue;
+ if (StringHasNoText (gbq->val)) continue;
+ ValNodeCopyStr(&old_locus_tag, 0, gbq->val);
+ }
+ for (vnp = db; vnp != NULL; vnp = vnp->next) {
+ dbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (StringICmp (dbt->db, "FLYBASE") != 0) continue;
+ oip = dbt->tag;
+ if (oip == NULL) continue;
+ fbgn = oip->str;
+ }
+ }
+
+ if (cds != NULL) {
+ if (cds->product != NULL) {
+ bsp = BioseqFindFromSeqLoc (cds->product);
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_LOCAL :
+ oip = (ObjectIdPtr) sip->data.ptrvalue;
+ if (oip == NULL) continue;
+ cdslcl = oip->str;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_TPG :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ cdsaccn = tsip->accession;
+ break;
+ case SEQID_GENERAL :
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (IsSkippableDbtag (dbt)) continue;
+ oip = dbt->tag;
+ if (oip == NULL) continue;
+ cdsgnl = oip->str;
+ break;
+ default :
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (rna != NULL) {
+ switch (rna->idx.subtype) {
+ case FEATDEF_preRNA :
+ rna_type = "precursor RNA";
+ break;
+ case FEATDEF_mRNA :
+ rna_type = "mRNA";
+ break;
+ case FEATDEF_tRNA :
+ rna_type = "tRNA";
+ break;
+ case FEATDEF_rRNA :
+ rna_type = "rRNA";
+ break;
+ case FEATDEF_otherRNA :
+ rna_type = "misc RNA";
+ break;
+ case FEATDEF_ncRNA :
+ rna_type = "ncRNA";
+ for (gbq = rna->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "ncRNA_class") != 0) continue;
+ if (StringDoesHaveText (gbq->val)) {
+ rna_type = gbq->val;
+ }
+ }
+ break;
+ case FEATDEF_tmRNA :
+ rna_type = "tmRNA";
+ break;
+ default :
+ break;
+ }
+ if (rna->pseudo) {
+ rna_type = "pseudo RNA";
+ }
+ if (rna->product != NULL) {
+ bsp = BioseqFindFromSeqLoc (rna->product);
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GENBANK :
+ case SEQID_TPG :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ rnaaccn = tsip->accession;
+ break;
+ case SEQID_GENERAL :
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (IsSkippableDbtag (dbt)) continue;
+ oip = dbt->tag;
+ if (oip == NULL) continue;
+ rnagnl = oip->str;
+ break;
+ default :
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (StringDoesHaveText (locus_tag)) {
+ fprintf (fp, "%s", locus_tag);
+ } else {
+ fprintf (fp, "null_gene_ltag");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (locus)) {
+ fprintf (fp, "%s", locus);
+ } else {
+ fprintf (fp, "null_gene_locus");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (desc)) {
+ fprintf (fp, "%s", desc);
+ } else {
+ fprintf (fp, "null_gene_desc");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (fbgn)) {
+ fprintf (fp, "%s", fbgn);
+ } else {
+ fprintf (fp, "null_fbgn");
+ }
+
+ fprintf (fp, "\t");
+ if (old_locus_tag != NULL) {
+ prefix = "";
+ for (vnp = old_locus_tag; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ fprintf (fp, "%s%s", prefix, str);
+ prefix = ",";
+ }
+ } else {
+ fprintf (fp, "null_old_ltag");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (cdslcl)) {
+ fprintf (fp, "%s", cdslcl);
+ } else {
+ fprintf (fp, "null_cds_lcl");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (cdsaccn)) {
+ fprintf (fp, "%s", cdsaccn);
+ } else {
+ fprintf (fp, "null_cds_accn");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (cdsgnl)) {
+ fprintf (fp, "%s", cdsgnl);
+ } else {
+ fprintf (fp, "null_cds_gnl");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (rnaaccn)) {
+ fprintf (fp, "%s", rnaaccn);
+ } else {
+ fprintf (fp, "null_rna_accn");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (rnagnl)) {
+ fprintf (fp, "%s", rnagnl);
+ } else {
+ fprintf (fp, "null_rna_gnl");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (cdslabel)) {
+ fprintf (fp, "%s", cdslabel);
+ } else {
+ fprintf (fp, "null_cds_product");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (rnalabel)) {
+ fprintf (fp, "%s", rnalabel);
+ } else {
+ fprintf (fp, "null_rna_product");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (gene_type)) {
+ fprintf (fp, "%s", gene_type);
+ } else {
+ fprintf (fp, "null_gene_type");
+ }
+
+ fprintf (fp, "\t");
+ if (StringDoesHaveText (rna_type)) {
+ fprintf (fp, "%s", rna_type);
+ } else {
+ fprintf (fp, "null_rna_type");
+ }
+
+ fprintf (fp, "\n");
+}
+
+static void GeneReportOneBsp (
+ BioseqPtr bsp,
+ FILE *fp
+)
+
+{
+ CharPtr cdslabel, rnalabel;
+ SeqMgrFeatContext fcontext;
+ GmcDataPtr gdp, head;
+ GeneRefPtr grp;
+ Int2 i, j, k, numgene, numcds, numrna, total;
+ SeqFeatPtr matchsfp, sfp, tmp;
+ SeqFeatXrefPtr xref;
+
+ if (bsp == NULL || fp == NULL) return;
+
+ numgene = 0;
+ numcds = 0;
+ numrna = 0;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ while (sfp != NULL) {
+ switch (sfp->data.choice) {
+ case SEQFEAT_GENE :
+ numgene++;
+ break;
+ case SEQFEAT_CDREGION :
+ numcds++;
+ break;
+ case SEQFEAT_RNA :
+ numrna++;
+ break;
+ default :
+ break;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
+ }
+
+ if (numgene == 0) return;
+ total = numgene + numcds + numrna;
+ if (total == 0) return;
+
+ head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (total + 1));
+ if (head == NULL) return;
+
+ gdp = head;
+ total = 0;
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ while (sfp != NULL) {
+ if (sfp->data.choice == SEQFEAT_CDREGION || sfp->data.choice == SEQFEAT_RNA) {
+ gdp->feat = sfp;
+ gdp->label = fcontext.label;
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ gdp->gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ } else if (! SeqMgrGeneIsSuppressed (grp)) {
+ if (StringDoesHaveText (grp->locus_tag)) {
+ gdp->gene = SeqMgrGetGeneByLocusTag (bsp, grp->locus_tag, NULL);
+ } else if (StringDoesHaveText (grp->locus)) {
+ gdp->gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, NULL);
+ }
+ }
+ gdp++;
+ total++;
+ } else if (sfp->data.choice == SEQFEAT_GENE) {
+ gdp->gene = sfp;
+ gdp++;
+ total++;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
+ }
+
+ HeapSort (head, (size_t) total, sizeof (GmcData), SortByGenePtr);
+
+ for (i = 0; i < total; i += j) {
+ sfp = head [i].gene;
+ if (sfp == NULL) continue;
+ numcds = 0;
+ numrna = 0;
+ for (j = 0; i + j < total && sfp == head [i + j].gene; j++) {
+ tmp = head [i + j].feat;
+ if (tmp == NULL) continue;
+ if (tmp->data.choice == SEQFEAT_CDREGION) {
+ numcds++;
+ } else if (tmp->data.choice == SEQFEAT_RNA) {
+ numrna++;
+ }
+ }
+ cdslabel = NULL;
+ rnalabel = NULL;
+ if (numcds > 0) {
+ for (k = 0; k < j; k++) {
+ tmp = head [i + k].feat;
+ if (tmp == NULL) continue;
+ if (tmp->data.choice != SEQFEAT_CDREGION) continue;
+ cdslabel = head [i + k].label;
+ matchsfp = NULL;
+ for (xref = tmp->xref; xref != NULL && matchsfp == NULL; xref = xref->next) {
+ if (xref->id.choice != 0) {
+ matchsfp = SeqMgrGetFeatureByFeatID (tmp->idx.entityID, NULL, NULL, xref, &fcontext);
+ rnalabel = fcontext.label;
+ }
+ }
+ PrintOneGeneLine (sfp, tmp, matchsfp, cdslabel, rnalabel, fp);
+ }
+ } else if (numrna > 0) {
+ for (k = 0; k < j; k++) {
+ tmp = head [i + k].feat;
+ if (tmp == NULL) continue;
+ if (tmp->data.choice != SEQFEAT_RNA) continue;
+ rnalabel = head [i + k].label;
+ PrintOneGeneLine (sfp, NULL, tmp, NULL, rnalabel, fp);
+ }
+ } else {
+ PrintOneGeneLine (sfp, NULL, NULL, NULL, NULL, fp);
+ }
+ }
+
+ MemFree (head);
+}
+
+static void GeneReportGenomicBsp (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ SeqMgrDescContext dcontext;
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+
+ if (bsp == NULL) return;
+
+ if (ISA_aa (bsp->mol)) return;
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
+ if (sdp == NULL) return;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) return;
+ if (mip->biomol != MOLECULE_TYPE_GENOMIC) return;
+
+ GeneReportOneBsp (bsp, (FILE *) userdata);
+}
+
+static void GeneReportOneFile (
+ CharPtr results,
+ CharPtr base,
+ CharPtr suffix,
+ SeqEntryPtr sep
+)
+
+{
+ Char file [FILENAME_MAX], path [PATH_MAX];
+ FILE *fp;
+ ErrSev oldErrSev;
+
+ StringNCpy_0 (path, results, sizeof (path));
+ sprintf (file, "%s%s", base, suffix);
+ FileBuildPath (path, NULL, file);
+
+ fp = FileOpen (path, "w");
+ if (fp == NULL) return;
+
+ oldErrSev = ErrSetMessageLevel (SEV_MAX);
+ VisitBioseqsInSep (sep, (Pointer) fp, GeneReportGenomicBsp);
+ ErrSetMessageLevel (oldErrSev);
+
+ FileClose (fp);
+}
+
static void EnhanceOneCDS (
SeqFeatPtr sfp,
Boolean alt_splice
@@ -1032,7 +1491,7 @@ static void EnhanceOneCDS (
if (sip->choice != SEQID_GENERAL) continue;
dbt = (DbtagPtr) sip->data.ptrvalue;
if (dbt == NULL) continue;
- if (IsSkippableDbtag (dbt)) continue;
+ if (IsSkippableDbtag (dbt)) continue;
oip = dbt->tag;
if (oip == NULL) continue;
str = oip->str;
@@ -1100,14 +1559,14 @@ static void EnhanceOneCDS (
}
}
-static void EnhanceOneMrna (
+static void EnhanceOneRna (
SeqFeatPtr sfp,
Boolean alt_splice
)
{
DbtagPtr dbt;
- GBQualPtr gbq;
+ GBQualPtr gbq, nm_gbq;
Char id [64];
SeqIdPtr ids, sip;
size_t len;
@@ -1119,10 +1578,29 @@ static void EnhanceOneMrna (
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
name = NULL;
+ nm_gbq = NULL;
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
- if (rrp != NULL && rrp->type == 2 && rrp->ext.choice == 1) {
- name = rrp->ext.value.ptrvalue;
+ if (rrp != NULL && rrp->ext.choice == 1) {
+ switch (rrp->type) {
+ case 1 : /* precurrsor_RNA */
+ case 2 : /* mRNA */
+ case 4 : /* rRNA */
+ name = rrp->ext.value.ptrvalue;
+ break;
+ case 255 : /* misc_RNA, ncRNA, tmRNA */
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "product") == 0) {
+ nm_gbq = gbq;
+ name = gbq->val;
+ }
+ }
+ break;
+ case 3: /* tRNA */
+ return;
+ default :
+ break;
+ }
}
id [0] = '\0';
@@ -1161,7 +1639,11 @@ static void EnhanceOneMrna (
*/
StringCat (nwstr, " ");
StringCat (nwstr, tmp);
- rrp->ext.value.ptrvalue = (Pointer) nwstr;
+ if (nm_gbq != NULL) {
+ nm_gbq->val = (Pointer) nwstr;
+ } else {
+ rrp->ext.value.ptrvalue = (Pointer) nwstr;
+ }
MemFree (name);
}
} else {
@@ -1177,7 +1659,11 @@ static void EnhanceOneMrna (
*/
StringCat (nwstr, " ");
StringCat (nwstr, str);
- rrp->ext.value.ptrvalue = (Pointer) nwstr;
+ if (nm_gbq != NULL) {
+ nm_gbq->val = (Pointer) nwstr;
+ } else {
+ rrp->ext.value.ptrvalue = (Pointer) nwstr;
+ }
MemFree (name);
}
}
@@ -1210,25 +1696,25 @@ static void EnhanceFeatureAnnotation (
{
GmcDataPtr gdp, head;
GeneRefPtr grp;
- Int2 i, j, k, numgene, numcds, nummrna;
+ Int2 i, j, k, numgene, numcds, numrna;
SeqFeatPtr sfp;
if (features == NULL || bsp == NULL) return;
numgene = 0;
numcds = 0;
- nummrna = 0;
+ numrna = 0;
for (sfp = features; sfp != NULL; sfp = sfp->next) {
- switch (sfp->idx.subtype) {
- case FEATDEF_GENE :
+ switch (sfp->data.choice) {
+ case SEQFEAT_GENE :
numgene++;
break;
- case FEATDEF_CDS :
+ case SEQFEAT_CDREGION :
numcds++;
break;
- case FEATDEF_mRNA :
- nummrna++;
+ case SEQFEAT_RNA :
+ numrna++;
break;
default :
break;
@@ -1269,12 +1755,12 @@ static void EnhanceFeatureAnnotation (
MemFree (head);
}
- if (nummrna > 0) {
- head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (nummrna + 1));
+ if (numrna > 0) {
+ head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (numrna + 1));
if (head != NULL) {
gdp = head;
for (sfp = features; sfp != NULL; sfp = sfp->next) {
- if (sfp->idx.subtype == FEATDEF_mRNA) {
+ if (sfp->data.choice == SEQFEAT_RNA) {
gdp->feat = sfp;
grp = SeqMgrGetGeneXref (sfp);
if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) {
@@ -1283,17 +1769,17 @@ static void EnhanceFeatureAnnotation (
gdp++;
}
}
- HeapSort (head, (size_t) nummrna, sizeof (GmcData), SortByGenePtr);
- for (i = 0; i < nummrna; i += j) {
+ HeapSort (head, (size_t) numrna, sizeof (GmcData), SortByGenePtr);
+ for (i = 0; i < numrna; i += j) {
sfp = head [i].gene;
- for (j = 1; i + j < nummrna && sfp == head [i + j].gene; j++) continue;
+ for (j = 1; i + j < numrna && sfp == head [i + j].gene; j++) continue;
if (j == 1) {
/* no alt splicing */
- EnhanceOneMrna (head [i].feat, FALSE);
+ EnhanceOneRna (head [i].feat, FALSE);
} else {
/* is alt splicing */
for (k = 0; k < j; k++) {
- EnhanceOneMrna (head [i + k].feat, TRUE);
+ EnhanceOneRna (head [i + k].feat, TRUE);
}
}
}
@@ -1564,10 +2050,11 @@ static void ProcessOneNuc (
SeqHistPtr shp;
SqnTagPtr stp = NULL;
CharPtr str;
- SeqEntryPtr top;
+ CharPtr tmp;
CharPtr ttl = NULL;
UserObjectPtr uop;
ValNodePtr vnp;
+ SeqMgrDescContext dcontext;
if (bsp == NULL) return;
@@ -1582,9 +2069,9 @@ static void ProcessOneNuc (
(AsnReadFunc) BioSourceAsnRead,
(AsnWriteFunc) BioSourceAsnWrite);
} else {
- top = GetTopSeqEntryForEntityID (entityID);
- if (top != NULL) {
- VisitBioSourcesInSep (top, (Pointer) &src, GetFirstBiop);
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL) {
+ src = sdp->data.ptrvalue;
if (src != NULL) {
addNewBiop = FALSE;
}
@@ -1613,6 +2100,11 @@ static void ProcessOneNuc (
if (stp != NULL) {
biop = ParseTitleIntoBioSource (stp, tbl->organism, src);
ParseTitleIntoBioseq (stp, bsp);
+ str = SqnTagFind (stp, "comment");
+ if (str != NULL) {
+ tmp = StringSave (str);
+ SeqDescrAddPointer (&(bsp->descr), Seq_descr_comment, (Pointer) tmp);
+ }
}
if (biop == NULL) {
biop = ParseTitleIntoBioSource (NULL, tbl->organism, src);
@@ -1757,6 +2249,27 @@ static void ProcessOneNuc (
ValNodeFreeData (vnp);
}
+static void ProcessNucBioseqs (SeqEntryPtr top_sep, Uint2 entityID, BioSourcePtr src, TblArgsPtr tbl, MolInfoPtr template_molinfo)
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ SeqEntryPtr sep;
+
+ if (top_sep == NULL || top_sep->data.ptrvalue == NULL) return;
+ if (IS_Bioseq (top_sep)) {
+ bsp = (BioseqPtr) top_sep->data.ptrvalue;
+ if (!ISA_aa (bsp->mol)) {
+ ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
+ }
+ } else if (IS_Bioseq_set (top_sep)) {
+ bssp = (BioseqSetPtr) top_sep->data.ptrvalue;
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ ProcessNucBioseqs (sep, entityID, src, tbl, template_molinfo);
+ }
+ }
+}
+
+
static void ProcessOneAnnot (
SeqAnnotPtr sap,
Uint2 entityID,
@@ -1952,11 +2465,14 @@ static void ReplaceOneRNA (
)
{
- ByteStorePtr bs;
- BioseqPtr bsp;
- SeqFeatPtr mrna;
- SeqIdPtr sip;
- CharPtr str, str1, str2;
+ ByteStorePtr bs;
+ BioseqPtr bsp;
+ SeqMgrFeatContext ccontext;
+ SeqFeatPtr cds, mrna;
+ SeqIntPtr sintp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ CharPtr str, str1, str2;
if (ssp == NULL || ssp->numid < 1) return;
@@ -2008,6 +2524,21 @@ static void ReplaceOneRNA (
}
*/
}
+
+ /* make sure CDS in nuc-prot set is not longer than just-replaced RNA */
+
+ cds = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &ccontext);
+ if (cds != NULL) {
+ slp = cds->location;
+ if (slp != NULL && slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL) {
+ if (sintp->from == 0 && sintp->to > bsp->length - 1) {
+ sintp->to = bsp->length - 1;
+ }
+ }
+ }
+ }
}
MemFree (str1);
@@ -2134,6 +2665,59 @@ static void SuggestOnePeptide (
SeqLocFree (slp);
}
+static void RnaProtTrailingCommaFix (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ Char ch;
+ size_t len;
+ ProtRefPtr prp;
+ RnaRefPtr rrp;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ if (sfp == NULL) return;
+
+ if (sfp->data.choice == SEQFEAT_PROT) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ /* turn trailing space into trailing underscore for validator */
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ len = StringLen (str);
+ if (len < 1) continue;
+ ch = str [len - 1];
+ while (ch == ' ' && len > 2) {
+ len--;
+ ch = str [len - 1];
+ }
+ if (ch == ',') {
+ str [len - 1] = '_';
+ str [len] = '\0';
+ }
+ }
+ } else if (sfp->data.choice == SEQFEAT_RNA) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ /* turn trailing space into trailing underscore for validator */
+ if (rrp->ext.choice == 1) {
+ str = rrp->ext.value.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ len = StringLen (str);
+ if (len > 0) {
+ ch = str [len - 1];
+ while (ch == ' ' && len > 2) {
+ len--;
+ ch = str [len - 1];
+ }
+ if (ch == ',') {
+ str [len - 1] = '_';
+ str [len] = '\0';
+ }
+ }
+ }
+ }
+ }
+}
+
static Uint2 ProcessOneAsn (
FILE* fp,
BioSourcePtr src,
@@ -2197,6 +2781,8 @@ static Uint2 ProcessOneAsn (
return 0;
}
+ VisitFeaturesInSep (sep, NULL, RnaProtTrailingCommaFix);
+
if (StringDoesHaveText (localname)) {
sip = MakeSeqID (localname);
if (sip != NULL) {
@@ -2207,7 +2793,7 @@ static Uint2 ProcessOneAsn (
}
}
- ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
+ ProcessNucBioseqs (sep, entityID, src, tbl, template_molinfo);
return entityID;
}
@@ -2276,6 +2862,8 @@ static Uint2 ProcessRaw2Delt (
return 0;
}
+ VisitFeaturesInSep (sep, NULL, RnaProtTrailingCommaFix);
+
if (StringDoesHaveText (localname)) {
sip = MakeSeqID (localname);
if (sip != NULL) {
@@ -2355,6 +2943,8 @@ static Uint2 ProcessGappedSet (
return 0;
}
+ VisitFeaturesInSep (sep, NULL, RnaProtTrailingCommaFix);
+
ProcessOneNuc (entityID, bsp, src, tbl, template_molinfo);
return entityID;
@@ -2642,38 +3232,38 @@ static CharPtr ReadContigFile (
rescuedcontigs = ValNodeFreeData (rescuedcontigs);
if (sp6_clonep != NULL && *sp6_clonep != NULL) {
- sp6_end = StringChr (*sp6_clonep, ',');
- if (sp6_end != NULL) {
- *sp6_end = '\0';
- sp6_end++;
- if (StringICmp (sp6_end, "left") == 0) {
- sp6_end = "left";
- } else if (StringICmp (sp6_end, "right") == 0) {
- sp6_end = "right";
- } else {
- sp6_end = NULL;
- }
- }
- if (sp6_endp != NULL) {
- *sp6_endp = sp6_end;
- }
+ sp6_end = StringChr (*sp6_clonep, ',');
+ if (sp6_end != NULL) {
+ *sp6_end = '\0';
+ sp6_end++;
+ if (StringICmp (sp6_end, "left") == 0) {
+ sp6_end = "left";
+ } else if (StringICmp (sp6_end, "right") == 0) {
+ sp6_end = "right";
+ } else {
+ sp6_end = NULL;
+ }
+ }
+ if (sp6_endp != NULL) {
+ *sp6_endp = sp6_end;
+ }
}
if (t7_clonep != NULL && *t7_clonep != NULL) {
- t7_end = StringChr (*t7_clonep, ',');
- if (t7_end != NULL) {
- *t7_end = '\0';
- t7_end++;
- if (StringICmp (t7_end, "left") == 0) {
- t7_end = "left";
- } else if (StringICmp (t7_end, "right") == 0) {
- t7_end = "right";
- } else {
- t7_end = NULL;
- }
- }
- if (t7_endp != NULL) {
- *t7_endp = t7_end;
- }
+ t7_end = StringChr (*t7_clonep, ',');
+ if (t7_end != NULL) {
+ *t7_end = '\0';
+ t7_end++;
+ if (StringICmp (t7_end, "left") == 0) {
+ t7_end = "left";
+ } else if (StringICmp (t7_end, "right") == 0) {
+ t7_end = "right";
+ } else {
+ t7_end = NULL;
+ }
+ }
+ if (t7_endp != NULL) {
+ *t7_endp = t7_end;
+ }
}
return pstring;
@@ -3792,6 +4382,64 @@ static void CopyGene (
MemFree (temp); /* do not SeqFeatFree */
}
+static void CopyNcRna (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BioseqPtr bsp;
+ SeqFeatPtr copy, temp;
+ Boolean partial5, partial3;
+
+ if (sfp->data.choice != SEQFEAT_RNA) return;
+ if (sfp->idx.subtype != FEATDEF_ncRNA) return;
+
+ /* find instantiated product of ncRNA */
+
+ bsp = BioseqFindFromSeqLoc (sfp->product);
+ if (bsp == NULL) return;
+
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+
+ /* copy ncRNA feature fields to paste into new ncRNA feature */
+
+ temp = AsnIoMemCopy (sfp,
+ (AsnReadFunc) SeqFeatAsnRead,
+ (AsnWriteFunc) SeqFeatAsnWrite);
+ if (temp == NULL) return;
+
+ /* make new ncRNA feature on full-length of transcript */
+
+ copy = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
+ if (copy == NULL) {
+ SeqFeatFree (temp);
+ return;
+ }
+
+ /* paste fields from temp copy of original ncRNA */
+
+ copy->data.value.ptrvalue = temp->data.value.ptrvalue;
+ copy->partial = temp->partial;
+ copy->excpt = temp->excpt;
+ copy->comment = temp->comment;
+ copy->qual = temp->qual;
+ copy->title = temp->title;
+ copy->ext = temp->ext;
+ copy->cit = temp->cit;
+ copy->exp_ev = temp->exp_ev;
+ copy->xref = temp->xref;
+ copy->dbxref = temp->dbxref;
+ copy->pseudo = temp->pseudo;
+ copy->except_text = temp->except_text;
+
+ SetSeqLocPartial (copy->location, partial5, partial3);
+
+ SeqLocFree (temp->location);
+ SeqLocFree (temp->product);
+ MemFree (temp); /* do not SeqFeatFree */
+}
+
static void ClearRnaProducts (
SeqFeatPtr sfp,
Pointer userdata
@@ -3858,6 +4506,35 @@ static void FindDupProtFeats (
}
}
+static void ClearProtFeatStrand (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ SeqIntPtr sintp;
+ SeqLocPtr slp;
+
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_REGION &&
+ sfp->data.choice != SEQFEAT_SITE &&
+ sfp->data.choice != SEQFEAT_BOND &&
+ sfp->data.choice != SEQFEAT_PROT) return;
+
+ slp = SeqLocFindNext (sfp->location, NULL);
+ while (slp != NULL) {
+ if (slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL) {
+ if (sintp->strand != Seq_strand_unknown) {
+ sintp->strand = Seq_strand_unknown;
+ }
+ }
+ }
+ slp = SeqLocFindNext (sfp->location, slp);
+ }
+}
+
static void RemoveDupProtFeats (
BioseqPtr bsp,
Pointer userdata
@@ -3868,10 +4545,11 @@ static void RemoveDupProtFeats (
if (bsp == NULL) return;
if (! ISA_aa (bsp->mol)) return;
+ VisitFeaturesOnBsp (bsp, NULL, ClearProtFeatStrand);
dp.firstprot = NULL;
dp.secondprot = NULL;
VisitFeaturesOnBsp (bsp, (Pointer) &dp, FindDupProtFeats);
- if (dp.firstprot == NULL && dp.secondprot == NULL) return;
+ if (dp.firstprot == NULL || dp.secondprot == NULL) return;
if (AsnIoMemComp ((Pointer) dp.firstprot, (Pointer) dp.secondprot, (AsnWriteFunc) SeqFeatAsnWrite)) {
dp.firstprot->idx.deleteme = TRUE;
}
@@ -4085,7 +4763,7 @@ static CharPtr RnaTypeLabel (
return "RNA";
}
-static void AddMrnaTitles (
+static void AddRnaTitles (
SeqFeatPtr rna,
CharPtr organism
)
@@ -4155,7 +4833,7 @@ static void AddMrnaTitles (
SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) str);
}
-static void MakeOneMrnaTitle (
+static void MakeOneRnaTitle (
SeqFeatPtr rna,
SeqFeatPtr gene,
CharPtr label,
@@ -4215,7 +4893,7 @@ static void MakeOneMrnaTitle (
StringCat (str, grp->locus);
StringCat (str, ")");
}
- StringCat (str, " ");
+ StringCat (str, ", ");
StringCat (str, ptr);
} else {
StringCat (str, lbl);
@@ -4226,24 +4904,16 @@ static void MakeOneMrnaTitle (
}
}
}
- if (cds != NULL) {
- StringCat (str, " ");
- StringCat (str, typ);
- if (ccontext.partialL || ccontext.partialR) {
- StringCat (str, ", partial cds.");
- } else {
- StringCat (str, ", complete cds.");
- }
- } else {
- StringCat (str, " ");
- StringCat (str, typ);
- StringCat (str, ".");
- }
+
+ StringCat (str, ", ");
+ StringCat (str, typ);
+ StringCat (str, ".");
+
SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) str);
MemFree (lbl);
}
-static void MakeSmartMrnaTitles (
+static void MakeSmartRnaTitles (
BioseqPtr bsp,
CharPtr organism
)
@@ -4252,13 +4922,13 @@ static void MakeSmartMrnaTitles (
SeqMgrFeatContext context;
GmcDataPtr gdp, head;
GeneRefPtr grp;
- Int2 i, j, k, numgene, nummrna;
+ Int2 i, j, k, numgene, numrna;
SeqFeatPtr sfp;
if (bsp == NULL) return;
numgene = 0;
- nummrna = 0;
+ numrna = 0;
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
while (sfp != NULL) {
@@ -4267,7 +4937,7 @@ static void MakeSmartMrnaTitles (
numgene++;
break;
case SEQFEAT_RNA :
- nummrna++;
+ numrna++;
break;
default :
break;
@@ -4277,8 +4947,8 @@ static void MakeSmartMrnaTitles (
/* if (numgene == 0) return; */
- if (nummrna > 0) {
- head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (nummrna + 1));
+ if (numrna > 0) {
+ head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (numrna + 1));
if (head != NULL) {
gdp = head;
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &context);
@@ -4294,17 +4964,17 @@ static void MakeSmartMrnaTitles (
}
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &context);
}
- HeapSort (head, (size_t) nummrna, sizeof (GmcData), SortByGenePtr);
- for (i = 0; i < nummrna; i += j) {
+ HeapSort (head, (size_t) numrna, sizeof (GmcData), SortByGenePtr);
+ for (i = 0; i < numrna; i += j) {
sfp = head [i].gene;
- for (j = 1; i + j < nummrna && sfp == head [i + j].gene; j++) continue;
+ for (j = 1; i + j < numrna && sfp == head [i + j].gene; j++) continue;
if (j == 1) {
/* no alt splicing */
- MakeOneMrnaTitle (head [i].feat, head [i].gene, head [i].label, organism, FALSE);
+ MakeOneRnaTitle (head [i].feat, head [i].gene, head [i].label, organism, FALSE);
} else {
/* is alt splicing */
for (k = 0; k < j; k++) {
- MakeOneMrnaTitle (head [i + k].feat, head [i + k].gene, head [i + k].label, organism, TRUE);
+ MakeOneRnaTitle (head [i + k].feat, head [i + k].gene, head [i + k].label, organism, TRUE);
}
}
}
@@ -4327,7 +4997,7 @@ static void LookForGo (
Char ch;
GoSearchPtr gsp;
CharPtr ptr;
- Int2 state;
+ Int4 state;
ValNodePtr matches;
if (sfp == NULL || StringHasNoText (sfp->comment)) return;
@@ -4528,6 +5198,7 @@ static SeqEntryPtr PropagateDescsFromGenBankSet (
}
bssp = (BioseqSetPtr) sep->data.ptrvalue;
bssp->descr = SeqDescrFree (bssp->descr);
+ NormalizeDescriptorOrder (sep);
return firstsep;
}
@@ -4752,6 +5423,7 @@ static void AddTemplateDescriptors (
dsc = sdp;
}
ValNodeLink (current_list, (Pointer) dsc);
+ sdp->next = sdp_next;
}
}
@@ -5004,159 +5676,138 @@ static void LookupPubdesc (
}
-typedef struct globaldiscrepancylists {
- ValNodePtr locus_tag_list;
- ValNodePtr missing_locus_tag;
-} GlobalDiscrepancyListsData, PNTR GlobalDiscrepancyListPtr;
-static void CollectGlobalDiscrepancyData (
- SeqFeatPtr sfp,
- Pointer userdata
-)
+#ifdef INTERNAL_NCBI_ASNDISC
+const PerformDiscrepancyTest taxlookup = CheckTaxNamesAgainstTaxDatabase;
+#else
+const PerformDiscrepancyTest taxlookup = NULL;
+#endif
+
+static void CleanupCollectionDatesMonthFirst (BioSourcePtr biop, Pointer data)
{
- GeneRefPtr grp;
- GlobalDiscrepancyListPtr tbl;
+ SubSourcePtr ssp;
+ CharPtr reformatted_date = NULL;
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_GENE) return;
- tbl = (GlobalDiscrepancyListPtr) userdata;
- if (tbl == NULL) return;
+ if (biop == NULL) return;
- grp = (GeneRefPtr) sfp->data.value.ptrvalue;
- if (grp != NULL) {
- if (grp->pseudo) return;
- if (StringDoesHaveText (grp->locus_tag)) {
- ValNodeAddPointer (&(tbl->locus_tag_list), 0,
- GlobalDiscrepancyNew (grp->locus_tag, OBJ_SEQFEAT, sfp));
- } else {
- ValNodeAddPointer (&(tbl->missing_locus_tag), 0,
- GlobalDiscrepancyNew (NULL, OBJ_SEQFEAT, sfp));
+ ssp = biop->subtype;
+ while (ssp != NULL)
+ {
+ if (ssp->subtype == SUBSRC_collection_date)
+ {
+ reformatted_date = ReformatDateStringEx (ssp->name, TRUE, NULL);
+ if (reformatted_date != NULL)
+ {
+ ssp->name = MemFree (ssp->name);
+ ssp->name = reformatted_date;
+ }
}
+ ssp = ssp->next;
}
}
-static void SaveStringsForDiscrepancyItemList (ValNodePtr list, Boolean use_feature_fmt);
-
-static void SaveStringsForDiscrepancyItems (ClickableItemPtr cip, Boolean use_feature_fmt)
+static void CleanupCollectionDatesDayFirst (BioSourcePtr biop, Pointer data)
{
- ValNodePtr vnp, list_copy;
- CharPtr str;
+ SubSourcePtr ssp;
+ CharPtr reformatted_date = NULL;
- if (cip == NULL) return;
- if (use_feature_fmt) {
- list_copy = ReplaceDiscrepancyItemWithFeatureTableStrings (cip->item_list);
- cip->item_list = ValNodeFree (cip->item_list);
- cip->item_list = list_copy;
- } else {
- for (vnp = cip->item_list; vnp != NULL; vnp = vnp->next) {
- str = GetDiscrepancyItemText (vnp);
- vnp->choice = 0;
- vnp->data.ptrvalue = str;
+ if (biop == NULL) return;
+
+ ssp = biop->subtype;
+ while (ssp != NULL)
+ {
+ if (ssp->subtype == SUBSRC_collection_date)
+ {
+ reformatted_date = ReformatDateStringEx (ssp->name, FALSE, NULL);
+ if (reformatted_date != NULL)
+ {
+ ssp->name = MemFree (ssp->name);
+ ssp->name = reformatted_date;
+ }
}
+ ssp = ssp->next;
}
- SaveStringsForDiscrepancyItemList (cip->subcategories, use_feature_fmt);
}
-static void SaveStringsForDiscrepancyItemList (ValNodePtr list, Boolean use_feature_fmt)
+static void ValNodeLinkCopy (ValNodePtr PNTR list1, ValNodePtr list2)
{
- while (list != NULL) {
- SaveStringsForDiscrepancyItems (list->data.ptrvalue, use_feature_fmt);
- list = list->next;
+ if (list1 == NULL) return;
+ while (list2 != NULL)
+ {
+ ValNodeAddPointer (list1, list2->choice, list2->data.ptrvalue);
+ list2 = list2->next;
}
}
+static ValNodePtr FindItemListForClickableItemCategory (ValNodePtr list, CharPtr category_fmt)
+{
+ ClickableItemPtr cip;
+ ValNodePtr vnp;
+ ValNodePtr item_list = NULL;
+ CharPtr cp;
-#ifdef INTERNAL_NCBI_ASNDISC
-const PerformDiscrepancyTest taxlookup = CheckTaxNamesAgainstTaxDatabase;
-#else
-const PerformDiscrepancyTest taxlookup = NULL;
-#endif
+ if (StringLen (category_fmt) < 2) {
+ return NULL;
+ }
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ cip = (ClickableItemPtr) vnp->data.ptrvalue;
+ if (cip != NULL) {
+ if (cip->description != NULL) {
+ /* skip number at beginning of category title */
+ cp = cip->description;
+ while (isdigit (*cp)) {
+ cp++;
+ }
+ if (StringCmp (cp, category_fmt + 2) == 0) {
+ ValNodeLinkCopy (&item_list, cip->item_list);
+ }
+ }
+ ValNodeLink (&item_list, FindItemListForClickableItemCategory (cip->subcategories, category_fmt));
+ }
+ }
+ return item_list;
+}
-static void DiscrepancyReportOneRecord (TblArgsPtr tbl, SeqEntryPtr sep)
+static void DoTbl2AsnCleanup (SeqEntryPtr sep, CleanupArgsPtr c)
{
- ClickableItemPtr adjacent_cip = NULL;
- ValNode sep_list;
- ValNodePtr local_discrepancy_list = NULL;
- Uint2 entityID;
- DiscrepancyConfigData dcd;
- GlobalDiscrepancyListsData lists;
- GenProdSetDiscrepancyListsData gps_lists;
- ProtIdListsData prot_lists;
- Int4 k;
-
- if (tbl == NULL || sep == NULL) return;
-
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
- if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
- SeqMgrIndexFeatures (entityID, NULL);
- }
+ ValNodePtr sep_list = NULL;
+ ValNodePtr discrepancy_list = NULL, item_list = NULL, vnp;
+ SeqFeatPtr sfp;
- MemSet (&lists, 0, sizeof (GlobalDiscrepancyListsData));
- VisitGenProdSetFeatures (sep, &lists, CollectGlobalDiscrepancyData);
- MemSet (&gps_lists, 0, sizeof (GenProdSetDiscrepancyListsData));
- CheckGenProdSetsInSeqEntry (sep, &gps_lists);
- MemSet (&prot_lists, 0, sizeof (ProtIdListsData));
- VisitBioseqsInSep (sep, &prot_lists, FindProteinIDCallback);
-
- if (lists.locus_tag_list != NULL) {
- /* collect adjacent genes */
- lists.locus_tag_list = ValNodeSort (lists.locus_tag_list, SortVnpByGlobalDiscrepancyString);
- adjacent_cip = FindAdjacentDuplicateLocusTagGenes (lists.locus_tag_list);
- if (adjacent_cip != NULL) {
- SaveStringsForDiscrepancyItems (adjacent_cip, tbl->disc_rep_config.use_feature_table_format);
- ValNodeAddPointer (&(tbl->adjacent_locus_tag_disc_list), 0, adjacent_cip);
- }
- }
-
- /* convert lists to strings and add to global lists */
- ConvertGlobalDiscrepancyListToText (lists.locus_tag_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->locus_tag_list), lists.locus_tag_list);
- ConvertGlobalDiscrepancyListToText (lists.missing_locus_tag, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->missing_locus_tag), lists.missing_locus_tag);
- ConvertGlobalDiscrepancyListToText (gps_lists.cds_product_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->cds_product_list), gps_lists.cds_product_list);
- ConvertGlobalDiscrepancyListToText (gps_lists.missing_protein_id, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->missing_cds_product), gps_lists.missing_protein_id);
- ConvertGlobalDiscrepancyListToText (gps_lists.mrna_product_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->mrna_product_list), gps_lists.mrna_product_list);
- ConvertGlobalDiscrepancyListToText (gps_lists.missing_mrna_product, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->missing_mrna_product), gps_lists.missing_mrna_product);
- ConvertGlobalDiscrepancyListToText (prot_lists.gnl_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&tbl->gnl_list, prot_lists.gnl_list);
- ConvertGlobalDiscrepancyListToText (prot_lists.missing_gnl_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&tbl->missing_gnl_list, prot_lists.missing_gnl_list);
-
- /* setup discrepancy report config */
- MemSet (&dcd, 0, sizeof (DiscrepancyConfigData));
- /* enable all tests except tRNA */
- for (k = 0; k < MAX_DISC_TYPE; k++) {
- dcd.conf_list[k] = TRUE;
- }
- DisableTRNATests (&(dcd));
- /* disable tests that are global */
- dcd.conf_list[DISC_GENE_MISSING_LOCUS_TAG] = FALSE;
- dcd.conf_list[DISC_GENE_DUPLICATE_LOCUS_TAG] = FALSE;
- dcd.conf_list[DISC_GENE_LOCUS_TAG_BAD_FORMAT] = FALSE;
- dcd.conf_list[DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX] = FALSE;
- dcd.conf_list[DISC_MISSING_GENPRODSET_PROTEIN] = FALSE;
- dcd.conf_list[DISC_DUP_GENPRODSET_PROTEIN] = FALSE;
- dcd.conf_list[DISC_MISSING_GENPRODSET_TRANSCRIPT_ID] = FALSE;
- dcd.conf_list[DISC_DUP_GENPRODSET_TRANSCRIPT_ID] = FALSE;
- dcd.conf_list[DISC_MISSING_PROTEIN_ID] = FALSE;
- dcd.conf_list[DISC_INCONSISTENT_PROTEIN_ID_PREFIX] = FALSE;
-
- sep_list.data.ptrvalue = sep;
- sep_list.next = NULL;
- local_discrepancy_list = CollectDiscrepancies (&dcd, &sep_list, taxlookup);
-
- SaveStringsForDiscrepancyItemList (local_discrepancy_list, tbl->disc_rep_config.use_feature_table_format);
- ValNodeLink (&(tbl->discrepancy_list), local_discrepancy_list);
+ if (sep == NULL || c == NULL) {
+ return;
+ }
+ if (c->collection_dates) {
+ if (c->collection_dates_month_first) {
+ VisitBioSourcesInSep (sep, NULL, CleanupCollectionDatesMonthFirst);
+ } else {
+ VisitBioSourcesInSep (sep, NULL, CleanupCollectionDatesDayFirst);
+ }
+ }
+ if (c->add_notes_to_overlapping_cds_without_abc) {
+ ValNodeAddPointer (&sep_list, 0, sep);
+ SeqMgrIndexFeatures (ObjMgrGetEntityIDForChoice (sep), NULL);
+ AddOverlappingCodingRegionDiscrepancies (&discrepancy_list, sep_list);
+ sep_list = ValNodeFree (sep_list);
+ item_list = FindItemListForClickableItemCategory (discrepancy_list, kOverlappingCDSNeedsNoteFmt);
+ discrepancy_list = FreeClickableList (discrepancy_list);
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp != NULL) {
+ SetStringValue (&(sfp->comment), kOverlappingCDSNoteText, ExistingTextOption_append_semi);
+ }
+ }
+ }
+ item_list = ValNodeFree (item_list);
+ }
}
-
+
static void ProcessOneRecord (
SubmitBlockPtr sbp,
@@ -5219,6 +5870,7 @@ static void ProcessOneRecord (
CharPtr tblfile = NULL;
SeqEntryPtr tmp;
MolInfoPtr template_molinfo = NULL;
+ ValNodePtr cmt_errors, vnp;
fp = OpenOneFile (directory, base, suffix);
if (fp == NULL) return;
@@ -5374,6 +6026,20 @@ static void ProcessOneRecord (
FileClose (fp);
}
+ /* read structured comments from .cmt file */
+ fp = OpenOneFile (directory, base, ".cmt");
+ if (fp != NULL) {
+ sep = GetTopSeqEntryForEntityID (entityID);
+ cmt_errors = CreateStructuredCommentsFromFile (fp, sep);
+ FileClose (fp);
+ if (cmt_errors != NULL) {
+ for (vnp = cmt_errors; vnp != NULL; vnp = vnp->next) {
+ Message (MSG_POSTERR, "Error processing structured comment (.cmt) file: %s", vnp->data.ptrvalue);
+ }
+ cmt_errors = ValNodeFreeData (cmt_errors);
+ }
+ }
+
/* read one or more protein sequences from .pep file */
fp = OpenOneFile (directory, base, ".pep");
@@ -5622,8 +6288,12 @@ static void ProcessOneRecord (
if (tbl->genprodset) {
VisitFeaturesInSep (sep, NULL, CopyGene);
}
+ if (tbl->genprodset) {
+ /* currently copying ncRNA feature onto product */
+ VisitFeaturesInSep (sep, NULL, CopyNcRna);
+ }
if (! tbl->genprodset) {
- VisitFeaturesInSep (sep, NULL, ClearRnaProducts);
+ VisitFeaturesInSep (sep, NULL, ClearRnaProducts);
}
if (tbl->removeunnecxref) {
@@ -5648,11 +6318,11 @@ static void ProcessOneRecord (
bsp = FindNucBioseq (sep);
if (tbl->smarttitle) {
- MakeSmartMrnaTitles (bsp, organism);
+ MakeSmartRnaTitles (bsp, organism);
} else {
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &context);
while (sfp != NULL) {
- AddMrnaTitles (sfp, organism);
+ AddRnaTitles (sfp, organism);
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &context);
}
}
@@ -5675,6 +6345,8 @@ static void ProcessOneRecord (
/*
SeriousSeqEntryCleanup (sep, NULL, NULL);
*/
+ ConvertFullLenSourceFeatToDesc (sep);
+ ConvertFullLenPubFeatToDesc (sep);
if (tbl->linkbyoverlap) {
SeqMgrIndexFeatures (entityID, NULL);
LinkCDSmRNAbyOverlap (sep);
@@ -5683,6 +6355,9 @@ static void ProcessOneRecord (
LinkCDSmRNAbyProduct (sep);
}
+ DoTbl2AsnCleanup (sep, &(tbl->cleanup_args));
+ NormalizeDescriptorOrder (sep);
+
if (StringHasNoText (results)) {
results = directory;
}
@@ -5691,9 +6366,10 @@ static void ProcessOneRecord (
atp_bssse = AsnFind ("Bioseq-set.seq-set.E");
if (atp_bssse == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
- } else if (tbl->fastaset) {
+ } else if (tbl->fastaset && tbl->whichclass == 0) {
/* already has genbank wrapper, write individual components */
tmp = PropagateDescsFromGenBankSet (sep);
+ SeqMgrClearFeatureIndexes (entityID, NULL);
while (tmp != NULL) {
SeqEntryAsnWrite (tmp, aip, atp_bssse);
tmp = tmp->next;
@@ -5702,8 +6378,9 @@ static void ProcessOneRecord (
SeqEntryAsnWrite (sep, aip, atp_bssse);
}
} else {
- if (tbl->fastaset) {
+ if (tbl->fastaset && tbl->whichclass == 0) {
PropagateDescsFromGenBankSet (sep);
+ SeqMgrClearFeatureIndexes (entityID, NULL);
}
WriteOneFile (results, base, ".sqn", outfile, sep, sbp, tbl->save_bioseq_set);
}
@@ -5712,11 +6389,11 @@ static void ProcessOneRecord (
Message (MSG_OK, "Illegal GO term format detected in note - contact database for instructions");
}
- if (tbl->discrepancy) {
- DiscrepancyReportOneRecord (tbl, sep);
+ if (tbl->global_report != NULL) {
+ AddSeqEntryToGlobalDiscrepReport (sep, tbl->global_report, base);
}
- if (tbl->validate || tbl->flatfile) {
+ if (tbl->validate || tbl->flatfile || tbl->genereport || tbl->validate_barcode) {
if (pdp != NULL) {
/* copy in citsub as publication for validator and flatfile */
@@ -5733,9 +6410,12 @@ static void ProcessOneRecord (
Message (MSG_POST, "Flatfile %s\n", base);
FlatfileOneFile (results, base, ".gbf", sep);
}
- if (tbl->validate) {
+ if (tbl->validate || tbl->validate_barcode) {
Message (MSG_POST, "Validating %s\n", base);
- ValidateOneFile (results, base, ".val", sep, tbl->relaxed);
+ ValidateOneFile (results, base, ".val", sep, tbl->validate, tbl->relaxed, tbl->validate_barcode);
+ }
+ if (tbl->genereport) {
+ GeneReportOneFile (results, base, ".t2g", sep);
}
}
}
@@ -5744,126 +6424,6 @@ static void ProcessOneRecord (
}
-static void DoDiscrepancySummary (
- TblArgsPtr tbl,
- FILE *fp
-)
-
-{
- ValNodePtr local_list = NULL;
- ClickableItemPtr cip;
-
- if (tbl == NULL) return;
-
- tbl->locus_tag_list = ValNodeSort (tbl->locus_tag_list, SortVnpByGlobalDiscrepancyString);
- tbl->missing_locus_tag = ValNodeSort (tbl->missing_locus_tag, SortVnpByGlobalDiscrepancyString);
- tbl->cds_product_list = ValNodeSort (tbl->cds_product_list, SortVnpByGlobalDiscrepancyString);
- tbl->missing_cds_product = ValNodeSort (tbl->missing_cds_product, SortVnpByGlobalDiscrepancyString);
- tbl->mrna_product_list = ValNodeSort (tbl->mrna_product_list, SortVnpByGlobalDiscrepancyString);
- tbl->missing_mrna_product = ValNodeSort (tbl->missing_mrna_product, SortVnpByGlobalDiscrepancyString);
-
- if (tbl->locus_tag_list != NULL) {
- if (tbl->missing_locus_tag != NULL) {
- cip = ReportMissingFields (tbl->missing_locus_tag, discReportMissingLocusTags, DISC_GENE_MISSING_LOCUS_TAG);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- }
- CollateDiscrepancyReports (&(tbl->adjacent_locus_tag_disc_list));
- cip = ReportNonUniqueGlobalDiscrepancy (tbl->locus_tag_list,
- discReportDuplicateLocusTagFmt,
- discReportOneDuplicateLocusTagFmt,
- DISC_GENE_DUPLICATE_LOCUS_TAG,
- TRUE);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- if (tbl->adjacent_locus_tag_disc_list != NULL) {
- ValNodeLink (&(cip->subcategories), tbl->adjacent_locus_tag_disc_list);
- }
- } else if (tbl->adjacent_locus_tag_disc_list != NULL) {
- ValNodeLink (&local_list, tbl->adjacent_locus_tag_disc_list);
- }
- tbl->adjacent_locus_tag_disc_list = NULL;
-
- /* inconsistent locus tags */
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyPrefixes (tbl->locus_tag_list,
- discReportInconsistentLocusTagPrefixFmt,
- DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX));
- /* bad formats */
- cip = ReportBadLocusTagFormat (tbl->locus_tag_list);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- }
-
- if (tbl->cds_product_list != NULL) {
- /* report duplicates */
- cip = ReportNonUniqueGlobalDiscrepancy (tbl->cds_product_list,
- discReportDuplicateProteinIDFmt,
- discReportOneDuplicateProteinIDFmt,
- DISC_DUP_GENPRODSET_PROTEIN,
- TRUE);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
-
- /* report inconsistent IDs */
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyPrefixes (tbl->cds_product_list,
- discReportInconsistentProteinIDPrefixFmt,
- DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
- }
-
- if (tbl->mrna_product_list != NULL) {
- if (tbl->missing_locus_tag != NULL) {
- cip = ReportMissingFields (tbl->mrna_product_list, discReportMissingTranscriptIDFmt, DISC_MISSING_GENPRODSET_TRANSCRIPT_ID);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- }
-
- cip = ReportNonUniqueGlobalDiscrepancy (tbl->mrna_product_list,
- discReportDuplicateTranscriptIdFmt,
- discReportOneDuplicateTranscriptIdFmt,
- DISC_DUP_GENPRODSET_TRANSCRIPT_ID,
- TRUE);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- }
-
- /* missing gnl protein IDs */
- cip = ReportMissingFields (tbl->missing_gnl_list, discReportBadProteinIdFmt, DISC_MISSING_PROTEIN_ID);
- if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- tbl->gnl_list = ValNodeSort (tbl->gnl_list, SortVnpByGlobalDiscrepancyString);
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyStrings (tbl->gnl_list,
- discReportInconsistentProteinIDPrefixFmt,
- DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
-
-
- tbl->locus_tag_list = FreeGlobalDiscrepancyList (tbl->locus_tag_list);
- tbl->missing_locus_tag = FreeGlobalDiscrepancyList (tbl->missing_locus_tag);
- tbl->cds_product_list = FreeGlobalDiscrepancyList (tbl->cds_product_list);
- tbl->missing_cds_product = FreeGlobalDiscrepancyList (tbl->missing_cds_product);
- tbl->mrna_product_list = FreeGlobalDiscrepancyList (tbl->mrna_product_list);
- tbl->missing_mrna_product = FreeGlobalDiscrepancyList (tbl->missing_mrna_product);
- tbl->missing_gnl_list = FreeGlobalDiscrepancyList (tbl->missing_gnl_list);
- tbl->gnl_list = FreeGlobalDiscrepancyList (tbl->gnl_list);
-
-
- /* group discrepany reports from separate files */
- CollateDiscrepancyReports (&(tbl->discrepancy_list));
-
-
- WriteAsnDiscReport (local_list, fp, &(tbl->disc_rep_config), TRUE);
- local_list = FreeClickableList (local_list);
-
- WriteAsnDiscReport (tbl->discrepancy_list, fp, &(tbl->disc_rep_config), TRUE);
-}
static CharPtr overwriteMsg = "Your template with a .sqn suffix will be overwritten. Do you wish to continue?";
@@ -5912,6 +6472,7 @@ static void FileRecurse (
CharPtr directory,
CharPtr results,
CharPtr suffix,
+ Boolean recurse,
SeqDescrPtr sdphead,
TblArgsPtr tbl,
TextFsaPtr gotags,
@@ -5951,14 +6512,14 @@ static void FileRecurse (
}
}
}
- } else if (vnp->choice == 1) {
+ } else if (vnp->choice == 1 && recurse) {
/* recurse into subdirectory */
StringNCpy_0 (path, directory, sizeof (path));
str = (CharPtr) vnp->data.ptrvalue;
FileBuildPath (path, str, NULL);
- FileRecurse (sbp, pdp, src, path, results, suffix, sdphead, tbl, gotags, aip, outfile);
+ FileRecurse (sbp, pdp, src, path, results, suffix, recurse, sdphead, tbl, gotags, aip, outfile);
}
}
@@ -6050,7 +6611,8 @@ static AsnTypePtr DoFirstPrefix (
}
static AsnTypePtr DoSecondPrefix (
- AsnIoPtr aip
+ AsnIoPtr aip,
+ TblArgsPtr tbl
)
{
@@ -6090,7 +6652,23 @@ static AsnTypePtr DoSecondPrefix (
if (! AsnOpenStruct (aip, bssp_atp, (Pointer) &bs)) return NULL;
- av.intvalue = BioseqseqSet_class_genbank;
+ switch (tbl->whichclass) {
+ case 1 :
+ av.intvalue = BioseqseqSet_class_pop_set;
+ break;
+ case 2 :
+ av.intvalue = BioseqseqSet_class_phy_set;
+ break;
+ case 3 :
+ av.intvalue = BioseqseqSet_class_mut_set;
+ break;
+ case 4 :
+ av.intvalue = BioseqseqSet_class_eco_set;
+ break;
+ default :
+ av.intvalue = BioseqseqSet_class_genbank;
+ break;
+ }
if (! AsnWrite (aip, atp_bsc, &av)) return NULL;
if (! AsnOpenStruct (aip, atp_bsss, (Pointer) &bs.seq_set)) return NULL;
@@ -6275,6 +6853,60 @@ static CharPtr ParseCommaField (
return str;
}
+static DatePtr DateParse (
+ CharPtr str
+)
+
+{
+ Int4 day = -1, month = -1, year = -1;
+ DatePtr dp;
+ CharPtr ptr;
+ Char tmp [64];
+ long int val;
+
+ if (StringHasNoText (str)) return NULL;
+
+ StringNCpy_0 (tmp, str, sizeof (tmp));
+ ptr = StringChr (tmp, '/');
+ if (ptr == NULL) {
+ ptr = StringChr (tmp, '-');
+ }
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ if (sscanf (tmp, "%ld", &val) == 1) {
+ month = (Int4) val;
+ }
+ str = StringChr (ptr, '/');
+ if (str == NULL) {
+ str = StringChr (ptr, '-');
+ }
+ if (str != NULL) {
+ *str = '\0';
+ str++;
+ if (sscanf (ptr, "%ld", &val) == 1) {
+ day = (Int4) val;
+ }
+ if (sscanf (str, "%ld", &val) == 1) {
+ year = (Int4) val;
+ }
+ }
+ }
+
+ if (month < 0 || day < 0 || year < 2000) return NULL;
+ if (month > 12 || day > 31 || year > 2099) return NULL;
+
+ dp = DateNew ();
+ if (dp == NULL) return NULL;
+
+ dp->data [0] = 1;
+ dp->data [1] = (Uint1) (year - 1900);
+ dp->data [2] = (Uint1) month;
+ dp->data [3] = (Uint1) day;
+
+ return dp;
+}
+
/* Args structure contains command-line arguments */
#define p_argInputPath 0
@@ -6282,44 +6914,41 @@ static CharPtr ParseCommaField (
#define i_argInputFile 2
#define o_argOutputFile 3
#define x_argSuffix 4
-#define t_argTemplate 5
-#define a_argType 6
-#define s_argFastaSet 7
-#define w_argWhichClass 8
-#define d_argDeltaSet 9
-#define l_argAlignment 10
-#define z_argGapped 11
-#define e_argPhrapAce 12
-#define g_argGenProdSet 13
-#define F_argFeatIdLinks 14
-#define H_argImplicitGaps 15
-#define A_argAccession 16
-#define C_argCenter 17
-#define n_argOrgName 18
-#define j_argSrcQuals 19
-#define y_argComment 20
-#define Y_argCommentFile 21
-#define D_argDescrsFile 22
-#define f_argTableFile 23
-#define k_argCdsFlags 24
-#define c_argFindOrf 25
-#define V_argVerify 26
-#define v_argValidate 27
-#define b_argGenBank 28
-#define q_argFileID 29
-#define u_argUndoGPS 30
-#define h_argGnlToNote 31
-#define G_argGapFields 32
-#define R_argRemote 33
-#define S_argSmartFeats 34
-#define Q_argSmartTitle 35
-#define U_argUnnecXref 36
-#define L_argLocalID 37
-#define T_argTaxLookup 38
-#define P_argPubLookup 39
-#define W_argLogProgress 40
-#define K_argBioseqSet 41
-#define Z_argDiscRepFile 42
+#define E_argRecurse 5
+#define t_argTemplate 6
+#define a_argType 7
+#define s_argFastaSet 8
+#define g_argGenProdSet 9
+#define F_argFeatIdLinks 10
+#define A_argAccession 11
+#define C_argCenter 12
+#define n_argOrgName 13
+#define j_argSrcQuals 14
+#define y_argComment 15
+#define Y_argCommentFile 16
+#define D_argDescrsFile 17
+#define f_argTableFile 18
+#define k_argCdsFlags 19
+#define V_argVerify 20
+#define v_argValidate 21
+#define b_argGenBank 22
+#define q_argFileID 23
+#define u_argUndoGPS 24
+#define h_argGnlToNote 25
+#define G_argGapFields 26
+#define R_argRemote 27
+#define S_argSmartFeats 28
+#define Q_argSmartTitle 29
+#define U_argUnnecXref 30
+#define L_argLocalID 31
+#define T_argTaxLookup 32
+#define P_argPubLookup 33
+#define W_argLogProgress 34
+#define K_argBioseqSet 35
+#define H_argHoldUntilPub 36
+#define Z_argDiscRepFile 37
+#define c_argCleanupOptions 38
+
Args myargs [] = {
{"Path to Files", NULL, NULL, NULL,
@@ -6332,6 +6961,8 @@ Args myargs [] = {
TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
{"Suffix", ".fsa", NULL, NULL,
TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
+ {"Recurse", "F", NULL, NULL,
+ TRUE, 'E', ARG_BOOLEAN, 0.0, 0, NULL},
{"Template File", NULL, NULL, NULL,
TRUE, 't', ARG_FILE_IN, 0.0, 0, NULL},
{"File Type\n"
@@ -6346,22 +6977,10 @@ Args myargs [] = {
TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
{"Read FASTAs as Set", "F", NULL, NULL,
TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Fasta Set Class (1 Pop, 2 Phy, 3 Mut, 4 Eco) (obsolete: use -a s1-4)", "0", "0", "4",
- FALSE, 'w', ARG_INT, 0.0, 0, NULL},
- {"Read FASTAs as Delta (obsolete: use -a d)", "F", NULL, NULL,
- TRUE, 'd', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Read FASTA+Gap Alignment (obsolete: use -a l)", "F", NULL, NULL,
- TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Read FASTAs with Gap Lines (obsolete: use -a z)", "F", NULL, NULL,
- TRUE, 'z', ARG_BOOLEAN, 0.0, 0, NULL},
- {"Read PHRAP/ACE Format (obsolete: use -a e)", "F", NULL, NULL,
- TRUE, 'e', ARG_BOOLEAN, 0.0, 0, NULL},
{"Genomic Product Set", "F", NULL, NULL,
TRUE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
{"Feature ID Links (o by Overlap, p by Product)", NULL, NULL, NULL,
TRUE, 'F', ARG_STRING, 0.0, 0, NULL},
- {"Implicit Gaps (obsolete: use -a di)", "F", NULL, NULL,
- TRUE, 'H', ARG_BOOLEAN, 0.0, 0, NULL},
{"Accession", NULL, NULL, NULL,
TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
{"Genome Center Tag", NULL, NULL, NULL,
@@ -6384,12 +7003,11 @@ Args myargs [] = {
" m Allow Alternative Starts\n"
" k Set Conflict on Mismatch\n", NULL, NULL, NULL,
TRUE, 'k', ARG_STRING, 0.0, 0, NULL},
- {"Annotate Longest ORF (obsolete: use -k c)", "F", NULL, NULL,
- TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
{"Verification (combine any of the following letters)\n"
" v Validate with Normal Stringency\n"
" r Validate without Country Check\n"
- " b Generate GenBank Flatfile\n", NULL, NULL, NULL,
+ " b Generate GenBank Flatfile\n"
+ " g Generate Gene Report\n", NULL, NULL, NULL,
TRUE, 'V', ARG_STRING, 0.0, 0, NULL},
{"Validate (obsolete: use -V v)", "F", NULL, NULL,
TRUE, 'v', ARG_BOOLEAN, 0.0, 0, NULL},
@@ -6424,8 +7042,18 @@ Args myargs [] = {
TRUE, 'W', ARG_BOOLEAN, 0.0, 0, NULL},
{"Save Bioseq-set", "F", NULL, NULL,
TRUE, 'K', ARG_BOOLEAN, 0.0, 0, NULL},
+ {"Hold Until Publish\n"
+ " y Hold for One Year\n"
+ " mm/dd/yyyy\n", NULL, NULL, NULL,
+ TRUE, 'H', ARG_STRING, 0.0, 0, NULL},
{"Discrepancy Report Output File", NULL, NULL, NULL,
TRUE, 'Z', ARG_FILE_OUT, 0.0, 0, NULL},
+ {"Cleanup (combine any of the following letters)\n"
+ " d Correct Collection Dates (assume month first)\n"
+ " D Correct Collection Dates (assume day first)\n"
+ " b Append note to coding regions that overlap other coding regions with similar product names and do not contain 'ABC'",
+ NULL, NULL, NULL,
+ TRUE, 'c', ARG_STRING, 0.0, 0, NULL},
};
Int2 Main (void)
@@ -6440,15 +7068,18 @@ Int2 Main (void)
Uint2 datatype;
CharPtr descrs;
CharPtr directory;
+ DatePtr dp;
FILE *fp;
Char gapstring [128];
TextFsaPtr gotags;
+ CharPtr hold;
CharPtr os;
CharPtr outfile;
Pubdesc pd;
PubdescPtr pdp = NULL;
ValNode pb;
CharPtr ptr;
+ Boolean recurse;
Boolean remote;
CharPtr results;
SubmitBlockPtr sbp = NULL;
@@ -6509,6 +7140,7 @@ Int2 Main (void)
results = NULL;
}
suffix = (CharPtr) myargs [x_argSuffix].strvalue;
+ recurse = (Boolean) myargs [E_argRecurse].intvalue;
base = (CharPtr) myargs [i_argInputFile].strvalue;
outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
if (StringHasNoText (outfile)) {
@@ -6517,6 +7149,8 @@ Int2 Main (void)
tmplate = (CharPtr) myargs [t_argTemplate].strvalue;
descrs = (CharPtr) myargs [D_argDescrsFile].strvalue;
+ hold = (CharPtr) myargs [H_argHoldUntilPub].strvalue;
+
if (StringHasNoText(directory) && StringHasNoText(base)) {
Message (MSG_FATAL, "You must supply either an input file (-i) or an input directory (-p).\nUse -p . to specify the current directory.\n\n");
return 1;
@@ -6525,33 +7159,6 @@ Int2 Main (void)
MemSet ((Pointer) &tbl, 0, sizeof (TblArgs));
- /* process obsolete format arguments first, warn if used */
-
- tbl.whichclass = (Boolean) myargs [w_argWhichClass].intvalue;
- if (tbl.whichclass) {
- Message (MSG_POST, "-w is obsolete, use -a s1-4 instead");
- }
- tbl.deltaset = (Boolean) myargs [d_argDeltaSet].intvalue;
- if (tbl.deltaset) {
- Message (MSG_POST, "-d is obsolete, use -a d instead");
- }
- tbl.alignset = (Boolean) myargs [l_argAlignment].intvalue;
- if (tbl.alignset) {
- Message (MSG_POST, "-l is obsolete, use -a l instead");
- }
- tbl.gapped = (Boolean) myargs [z_argGapped].intvalue;
- if (tbl.gapped) {
- Message (MSG_POST, "-z is obsolete, use -a z instead");
- }
- tbl.phrapace = (Boolean) myargs [e_argPhrapAce].intvalue;
- if (tbl.phrapace) {
- Message (MSG_POST, "-e is obsolete, use -a e instead");
- }
- tbl.implicitgaps = (Boolean) myargs [H_argImplicitGaps].intvalue;
- if (tbl.implicitgaps) {
- Message (MSG_POST, "-H is obsolete, use -a di instead");
- }
-
/* -s is heavily used and will remain as an alternative to -a s */
tbl.fastaset = (Boolean) myargs [s_argFastaSet].intvalue;
@@ -6611,13 +7218,6 @@ Int2 Main (void)
tbl.comment = (CharPtr) myargs [y_argComment].strvalue;
tbl.commentFile = ReadCommentFile ((CharPtr) myargs [Y_argCommentFile].strvalue);
- /* process obsolete findorf argument first, warn if used */
-
- tbl.findorf = (Boolean) myargs [c_argFindOrf].intvalue;
- if (tbl.findorf) {
- Message (MSG_POST, "-c is obsolete, use -k c instead");
- }
-
ptr = myargs [k_argCdsFlags].strvalue;
if (StringChr (ptr, 'c') != NULL) {
tbl.findorf = TRUE;
@@ -6661,7 +7261,12 @@ Int2 Main (void)
if (StringChr (ptr, 'b') != NULL) {
tbl.flatfile = TRUE;
}
-
+ if (StringChr (ptr, 'g') != NULL) {
+ tbl.genereport = TRUE;
+ }
+ if (StringChr (ptr, 'c') != NULL) {
+ tbl.validate_barcode = TRUE;
+ }
tbl.seqidfromfile = (Boolean) myargs [q_argFileID].intvalue;
@@ -6675,8 +7280,18 @@ Int2 Main (void)
tbl.save_bioseq_set = (Boolean) myargs [K_argBioseqSet].intvalue;
disc_rep_file = (CharPtr) myargs [Z_argDiscRepFile].strvalue;
- if (!StringHasNoText (disc_rep_file)) {
- tbl.discrepancy = TRUE;
+ if (StringHasNoText (disc_rep_file)) {
+ tbl.global_report = NULL;
+ } else {
+ tbl.global_report = GlobalDiscrepReportNew();
+ tbl.global_report->test_config = DiscrepancyConfigNew ();
+ ConfigureForGenomes (tbl.global_report->test_config);
+ tbl.global_report->taxlookup = taxlookup;
+ tbl.global_report->output_config->summary_report = FALSE;
+ tbl.global_report->output_config->expand_report_categories[DISC_SUPERFLUOUS_GENE] = TRUE;
+ tbl.global_report->output_config->expand_report_categories[DISC_RNA_CDS_OVERLAP] = TRUE;
+ tbl.global_report->output_config->expand_report_categories[DISC_SUSPECT_PRODUCT_NAME] = TRUE;
+ tbl.global_report->output_config->expand_report_categories[DISC_OVERLAPPING_CDS] = TRUE;
}
@@ -6735,6 +7350,25 @@ Int2 Main (void)
return 1;
}
+ /* arguments for cleanup */
+ MemSet (&(tbl.cleanup_args), 0, sizeof (CleanupArgsData));
+ ptr = (CharPtr) myargs [c_argCleanupOptions].strvalue;
+ if (StringChr (ptr, 'd') != NULL) {
+ if (StringChr (ptr, 'D') != NULL) {
+ Message (MSG_FATAL, "Cannot use both d and D options for cleanup. Choose one.");
+ return 1;
+ }
+ tbl.cleanup_args.collection_dates = TRUE;
+ tbl.cleanup_args.collection_dates_month_first = TRUE;
+ } else if (StringChr (ptr, 'D') != NULL) {
+ tbl.cleanup_args.collection_dates = TRUE;
+ tbl.cleanup_args.collection_dates_month_first = FALSE;
+ }
+
+ if (StringChr (ptr, 'b') != NULL) {
+ tbl.cleanup_args.add_notes_to_overlapping_cds_without_abc = TRUE;
+ }
+
if (StringHasNoText (base) && (StringDoesHaveText (tbl.accn))) {
Message (MSG_FATAL, "Accession can be entered only for a single record");
return 1;
@@ -6790,6 +7424,24 @@ Int2 Main (void)
MemFree (os);
sbp->hup = FALSE;
sbp->reldate = DateFree (sbp->reldate);
+ if (StringDoesHaveText (hold)) {
+ if (StringICmp (hold, "y") == 0) {
+ sbp->hup = TRUE;
+ dp = DateCurr ();
+ sbp->reldate = dp;
+ if (dp != NULL) {
+ if (dp->data [0] == 1) {
+ (dp->data [1])++;
+ }
+ }
+ } else {
+ dp = DateParse (hold);
+ if (dp != NULL) {
+ sbp->hup = TRUE;
+ sbp->reldate = dp;
+ }
+ }
+ }
csp = sbp->cit;
if (csp != NULL) {
csp->date = DateFree (csp->date);
@@ -6870,7 +7522,7 @@ Int2 Main (void)
return 1;
}
ssp_atp = DoFirstPrefix (aip, sbp);
- bssp_atp = DoSecondPrefix (aip);
+ bssp_atp = DoSecondPrefix (aip, &tbl);
}
if (StringDoesHaveText (base)) {
@@ -6885,7 +7537,7 @@ Int2 Main (void)
} else {
- FileRecurse (sbp, pdp, src, directory, results, suffix, sdphead, &tbl, gotags, aip, NULL);
+ FileRecurse (sbp, pdp, src, directory, results, suffix, recurse, sdphead, &tbl, gotags, aip, NULL);
}
if (aip != NULL) {
@@ -6894,14 +7546,11 @@ Int2 Main (void)
AsnIoClose (aip);
}
- if (tbl.discrepancy) {
- tbl.disc_rep_config.summary_report = FALSE;
- tbl.disc_rep_config.expand_report_categories[DISC_SUPERFLUOUS_GENE] = TRUE;
- tbl.disc_rep_config.expand_report_categories[DISC_RNA_CDS_OVERLAP] = TRUE;
- tbl.disc_rep_config.expand_report_categories[DISC_SUSPECT_PRODUCT_NAME] = TRUE;
+ if (tbl.global_report != NULL) {
fp = FileOpen (disc_rep_file, "w");
- DoDiscrepancySummary (&tbl, fp);
+ WriteGlobalDiscrepancyReport (tbl.global_report, fp);
FileClose (fp);
+ tbl.global_report = GlobalDiscrepReportFree (tbl.global_report);
}
if (sbp != NULL) {