diff options
author | Aaron M. Ucko <ucko@debian.org> | 2006-05-26 18:34:14 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2006-05-26 18:34:14 +0000 |
commit | de1d4a4e7eb2f2e1a20a6c3c90f4128f6f344e80 (patch) | |
tree | b98cfcc27d8a53160d6d0d4171f537e667bd9c68 /api | |
parent | 047f9550aeffa40eb05ad53427718889f660e0f4 (diff) |
Load /tmp/.../ncbi-tools6-6.1.20060507 into
branches/upstream/current.
Diffstat (limited to 'api')
-rw-r--r-- | api/asn2gnb1.c | 24 | ||||
-rw-r--r-- | api/asn2gnb2.c | 79 | ||||
-rw-r--r-- | api/asn2gnb4.c | 140 | ||||
-rw-r--r-- | api/asn2gnb5.c | 260 | ||||
-rw-r--r-- | api/asn2gnb6.c | 12 | ||||
-rw-r--r-- | api/asn2gnbi.h | 6 | ||||
-rw-r--r-- | api/asn2gnbk.h | 5 | ||||
-rw-r--r-- | api/edutil.c | 56 | ||||
-rw-r--r-- | api/seqmgr.c | 81 | ||||
-rw-r--r-- | api/seqmgr.h | 8 | ||||
-rw-r--r-- | api/seqport.c | 26 | ||||
-rw-r--r-- | api/sequtil.c | 102 | ||||
-rw-r--r-- | api/sqnutil1.c | 70 | ||||
-rw-r--r-- | api/sqnutil2.c | 196 | ||||
-rw-r--r-- | api/sqnutil3.c | 444 | ||||
-rw-r--r-- | api/sqnutils.h | 27 | ||||
-rw-r--r-- | api/subutil.c | 110 | ||||
-rw-r--r-- | api/subutil.h | 15 | ||||
-rw-r--r-- | api/tofasta.c | 75 | ||||
-rw-r--r-- | api/valid.c | 469 | ||||
-rw-r--r-- | api/valid.h | 6 | ||||
-rw-r--r-- | api/valid.msg | 20 | ||||
-rw-r--r-- | api/validerr.h | 6 |
23 files changed, 1908 insertions, 329 deletions
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c index 9e55f9b1..8c2cb9a0 100644 --- a/api/asn2gnb1.c +++ b/api/asn2gnb1.c @@ -28,11 +28,11 @@ * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans, * Mati Shomrat * -* $Id: asn2gnb1.c,v 1.97 2006/02/23 16:38:54 kans Exp $ +* $Id: asn2gnb1.c,v 1.101 2006/05/03 18:05:39 kans Exp $ * * Version Creation Date: 10/21/98 * -* $Revision: 1.97 $ +* $Revision: 1.101 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -3320,8 +3320,8 @@ static void MakeGapFeats ( gapvnp = (ValNodePtr PNTR) userdata; sip = SeqIdFindBest (bsp->id, 0); if (sip == NULL) return; - /* suppress on far delta contigs for now */ - if (! DeltaLitOnly (bsp)) return; + /* no longer suppress on far delta contigs */ + /* if (! DeltaLitOnly (bsp)) return; */ for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) { if (vnp->choice == 1) { @@ -3477,6 +3477,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( Boolean lockFarProd; Boolean lookupFarComp; Boolean lookupFarHist; + Boolean lookupFarInf; Boolean lookupFarLocs; Boolean lookupFarOthers; Boolean lookupFarProd; @@ -3625,7 +3626,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( gapvnp = NULL; if (format != FTABLE_FMT) { - if (isG || isTPG || isOnlyLocal || isRefSeq || (isGeneral && (! isGED))) { + if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || (isGeneral && (! isGED))) { if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) { VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats); } @@ -3717,13 +3718,14 @@ static Asn2gbJobPtr asn2gnbk_setup_ex ( lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0); lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0); lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0); + lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0); lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0); - if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarOthers) { + if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) { /* lookukp all far SeqIDs in advance */ - LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarOthers); + LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers); } ajp->showFarTransl = (Boolean) ((flags & FAR_TRANS_MASK) == SHOW_FAR_TRANSLATION); @@ -5694,6 +5696,7 @@ NLM_EXTERN Boolean SeqEntryToGnbk ( Boolean lockFarProd; Boolean lookupFarComp; Boolean lookupFarHist; + Boolean lookupFarInf; Boolean lookupFarLocs; Boolean lookupFarOthers; Boolean lookupFarProd; @@ -5749,11 +5752,12 @@ NLM_EXTERN Boolean SeqEntryToGnbk ( lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0); lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0); lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0); + lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0); lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0); - if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarOthers) { - locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_OTHERS); - LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarOthers); + if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) { + locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_INFERENCE | LOOKUP_FAR_OTHERS); + LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers); } ProfilerSetStatus (TRUE); diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c index c8353c89..9e940a24 100644 --- a/api/asn2gnb2.c +++ b/api/asn2gnb2.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.69 $ +* $Revision: 1.74 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -455,6 +455,7 @@ NLM_EXTERN void AddLocusBlock ( Char mol [30]; Int4 nextGi; BioseqPtr nm = NULL; + BioseqPtr nuc; ObjectIdPtr oip; OrgNamePtr onp; Uint1 origin; @@ -847,6 +848,22 @@ NLM_EXTERN void AddLocusBlock ( StringCpy (div, "PAT"); } + /* if protein is encoded by a patent nucleotide, use PAT division */ + + if (ISA_aa (bsp->mol)) { + cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext); + if (cds != NULL) { + nuc = BioseqFindFromSeqLoc (cds->location); + if (nuc != NULL) { + for (sip = nuc->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_PATENT) { + StringCpy (div, "PAT"); + } + } + } + } + } + /* more complicated code for division, if necessary, goes here */ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext); @@ -1067,6 +1084,9 @@ NLM_EXTERN void AddLocusBlock ( if (topol < 0 || topol > 2) { topol = 0; } + if (topol == 0) { + topol = 1; /* default to displaying linear if not set */ + } gbseq->topology = StringSave (gbseq_top [topol]); for (sip = bsp->id; sip != NULL; sip = sip->next) { @@ -1082,11 +1102,16 @@ NLM_EXTERN void AddLocusBlock ( } if (dp != NULL) { DateToFF (date, dp, FALSE); + if (StringDoesHaveText (date)) { + gbseq->create_date = StringSave (date); + } } + /* if (StringHasNoText (date)) { StringCpy (date, "01-JAN-1900"); } gbseq->create_date = StringSave (date); + */ date [0] = '\0'; dp = NULL; @@ -2016,13 +2041,16 @@ NLM_EXTERN void AddProjectBlock ( { IntAsn2gbJobPtr ajp; + Asn2gbSectPtr asp; BaseBlockPtr bbp; BioseqPtr bsp; Char buf [32]; UserFieldPtr curr; SeqMgrDescContext dcontext; StringItemPtr ffstring; + GBSeqPtr gbseq; UserObjectPtr gpuop = NULL; + ValNodePtr head = NULL; Uint4 itemID; ObjectIdPtr oip; Int4 parentID; @@ -2037,10 +2065,18 @@ NLM_EXTERN void AddProjectBlock ( if (ajp == NULL) return; bsp = awp->bsp; if (bsp == NULL) return; + asp = awp->asp; + if (asp == NULL) return; if (! ISA_na (bsp->mol)) return; if (awp->format != GENBANK_FMT) return; + if (ajp->gbseq) { + gbseq = &asp->gbseq; + } else { + gbseq = NULL; + } + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); while (sdp != NULL) { uop = (UserObjectPtr) sdp->data.ptrvalue; @@ -2087,6 +2123,14 @@ NLM_EXTERN void AddProjectBlock ( /* FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE); */ + if (gbseq != NULL) { + if (head == NULL) { + sprintf (buf, "%ld", (long) projectID); + } else { + sprintf (buf, ", %ld", (long) projectID); + } + ValNodeCopyStr (&head, 0, buf); + } prefix = ","; parentID = 0; } @@ -2110,11 +2154,26 @@ NLM_EXTERN void AddProjectBlock ( /* FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE); */ + if (gbseq != NULL) { + if (head == NULL) { + sprintf (buf, "%ld", (long) projectID); + } else { + sprintf (buf, ", %ld", (long) projectID); + } + ValNodeCopyStr (&head, 0, buf); + } } bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX"); FFRecycleString (ajp, ffstring); + if (gbseq != NULL) { + if (head != NULL) { + gbseq->project = MergeFFValNodeStrs (head); + ValNodeFreeData (head); + } + } + if (awp->afp != NULL) { DoImmediateFormat (awp->afp, bbp); } @@ -2986,6 +3045,7 @@ NLM_EXTERN void AddDbsourceBlock ( SeqIdPtr id; ValNodePtr list = NULL; BioseqPtr nuc; + SeqEntryPtr sep; SeqIdPtr sip; SeqLocPtr slp; CharPtr str; @@ -3084,6 +3144,23 @@ NLM_EXTERN void AddDbsourceBlock ( } } ValNodeFree (list); + } else { + sep = GetTopSeqEntryForEntityID (awp->entityID); + if (sep != NULL && IS_Bioseq (sep)) { + /* special case for coded_by CDS packed on retcode 1 protein */ + id = SeqLocId (cds->location); + if (id != NULL && id->choice == SEQID_GI) { + sip = GetSeqIdForGI (id->data.intvalue); + if (sip == NULL) { + sip = id; + } + } + if (WriteDbsourceID (sip, buf)) { + FF_www_dbsource (ajp, ffstring, buf, TRUE, sip->choice); + FFAddNewLine(ffstring); + unknown = FALSE; + } + } } } else { if (WriteDbsourceID (sip, buf)) { diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c index 2aebe7ad..45f44fd9 100644 --- a/api/asn2gnb4.c +++ b/api/asn2gnb4.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.98 $ +* $Revision: 1.106 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -1495,6 +1495,7 @@ static Int2 ValidateAccnInternal ( if (numAlpha == 2 && numDigits == 6) return 0; if (numAlpha == 3 && numDigits == 5) return 0; if (numAlpha == 4 && numDigits == 8) return 0; + if (numAlpha == 5 && numDigits == 7) return 0; } else if (numUndersc == 1) { if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return -2; if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') { @@ -3313,8 +3314,21 @@ static void FormatFeatureBlockQuals ( pmid = (Int4) vnp->data.intvalue; if (pmid > 0) { sprintf (numbuf, "%ld", (long) pmid); + FFAddOneString(ffstring, "/citation=[PUBMED ", FALSE, TRUE, TILDE_TO_SPACES); + if (GetWWW (ajp)) { + + FFAddTextToString(ffstring, "<a href=", link_muid, NULL, FALSE, FALSE, TILDE_IGNORE); + FFAddTextToString(ffstring, NULL, numbuf, ">", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } else { + FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString(ffstring, "]", FALSE, FALSE, TILDE_IGNORE); + /* FFAddTextToString(ffstring, "/citation=[PUBMED ", numbuf, "]", FALSE, TRUE, TILDE_TO_SPACES); + */ FFAddOneChar(ffstring, '\n', FALSE); } } @@ -3380,7 +3394,7 @@ static void FormatFeatureBlockQuals ( } if (okay) { FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); - FF_www_db_xref(ajp, ffstring, dbt->db, buf); + FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp); FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); } } @@ -3423,7 +3437,7 @@ static void FormatFeatureBlockQuals ( } sprintf (seqid, "%ld", (long) sip->data.intvalue); FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); - FF_www_db_xref(ajp, ffstring, "GI", seqid); + FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp); FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); } else if (sip->choice == SEQID_GENERAL) { dbt = (DbtagPtr) sip->data.ptrvalue; @@ -3455,6 +3469,11 @@ static void FormatFeatureBlockQuals ( if (sip->choice == SEQID_GI) { gi = sip->data.intvalue; if (GetAccnVerFromServer (gi, seqid)) { +#ifdef OS_UNIX + if (getenv ("ASN2GB_PSF_DEBUG") != NULL) { + printf ("GetAccnVerFromServer returned %s\n", seqid); + } +#endif if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"", FALSE, FALSE, TILDE_IGNORE); @@ -3466,6 +3485,11 @@ static void FormatFeatureBlockQuals ( } else { sip = GetSeqIdForGI (gi); if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) { +#ifdef OS_UNIX + if (getenv ("ASN2GB_PSF_DEBUG") != NULL) { + printf ("GetSeqIdForGI returned %s\n", seqid); + } +#endif if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"", FALSE, FALSE, TILDE_IGNORE); @@ -3487,7 +3511,7 @@ static void FormatFeatureBlockQuals ( sprintf (seqid, "%ld", (long) gi); FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); - FF_www_db_xref(ajp, ffstring, "GI", seqid); + FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp); FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); } else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) { if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { @@ -3503,7 +3527,7 @@ static void FormatFeatureBlockQuals ( if (gi > 0) { sprintf (seqid, "%ld", (long) gi); FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); - FF_www_db_xref(ajp, ffstring, "GI", seqid); + FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp); FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); } } @@ -4201,11 +4225,10 @@ static void FormatFeatureBlockQuals ( } - static void FF_asn2gb_www_featkey ( StringItemPtr ffstring, CharPtr key, - SeqLocPtr slp, + SeqFeatPtr sfp, Int4 from, Int4 to, Uint1 strand, @@ -4213,13 +4236,21 @@ static void FF_asn2gb_www_featkey ( ) { - BioseqPtr bsp; - Int4 gi = 0; - SeqIdPtr sip; - Boolean is_aa = FALSE; - Char gi_buf[16]; - Char itemID_buf[16]; + BioseqPtr bsp; + Char buf [16]; + Int4 featID = 0; + Int4 ffrom = 0; + Int4 fto = 0; + Int4 gi = 0; + Char gi_buf[16]; + Boolean is_aa = FALSE; + ObjectIdPtr oip; + SeqIntPtr sintp; + SeqIdPtr sip; + SeqLocPtr slp; + if (sfp == NULL) return; + slp = sfp->location; bsp = BioseqFindFromSeqLoc (slp); if (bsp != NULL) { is_aa = ISA_aa (bsp->mol); @@ -4228,19 +4259,47 @@ static void FF_asn2gb_www_featkey ( gi = (Int4) sip->data.intvalue; } } + } else { + if (sfp->id.choice == 3) { + oip = (ObjectIdPtr) sfp->id.value.ptrvalue; + if (oip != NULL && oip->str == NULL) { + featID = oip->id; + } + } + if (slp->choice == SEQLOC_INT) { + sintp = (SeqIntPtr) slp->data.ptrvalue; + if (sintp != NULL) { + ffrom = sintp->from + 1; + fto = sintp->to + 1; + sip = sintp->id; + if (sip->choice == SEQID_GI) { + gi = (Int4) sip->data.intvalue; + } + } + } } - sprintf(gi_buf, "%ld", (long)gi); - sprintf(itemID_buf, "%ld", (long)itemID); - + sprintf (gi_buf, "%ld", (long)gi); FFAddOneString(ffstring, "<a href=", FALSE, FALSE, TILDE_IGNORE); FFAddOneString(ffstring, link_feat, FALSE, FALSE, TILDE_IGNORE); FFAddOneString(ffstring, "val=", FALSE, FALSE, TILDE_IGNORE); FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE); - if (itemID > 0) { + if (featID > 0) { + sprintf (buf, "%ld", (long) featID); + FFAddOneString(ffstring, "&featID=", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE); + } else if (ffrom > 0 && fto > 0) { + sprintf (buf, "%ld", (long) ffrom); + FFAddOneString(ffstring, "&from=", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE); + sprintf (buf, "%ld", (long) fto); + FFAddOneString(ffstring, "&to=", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE); + } else if (itemID > 0) { + sprintf (buf, "%ld", (long) itemID); FFAddOneString(ffstring, "&itemID=", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString(ffstring, itemID_buf, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE); } @@ -4353,8 +4412,11 @@ static void AddIntervalsToGbfeat ( Char accn [41]; SeqLocPtr copy = NULL; Int4 from; + IntFuzzPtr fuzz; GBIntervalPtr gbint; Int4 gi; + Boolean interbp; + Boolean iscomp; GBIntervalPtr last = NULL; Int4 point; SeqIntPtr sint; @@ -4375,6 +4437,8 @@ static void AddIntervalsToGbfeat ( from = 0; to = 0; point = 0; + iscomp = FALSE; + interbp = FALSE; sip = NULL; switch (slp->choice) { case SEQLOC_WHOLE : @@ -4398,6 +4462,9 @@ static void AddIntervalsToGbfeat ( from = to; to = swap; } + if (sint->strand == Seq_strand_minus) { + iscomp = TRUE; + } } break; case SEQLOC_PNT : @@ -4405,6 +4472,25 @@ static void AddIntervalsToGbfeat ( if (spp != NULL) { point = spp->point + 1; sip = spp->id; + if (spp->strand == Seq_strand_minus) { + iscomp = TRUE; + } + fuzz = spp->fuzz; + if (fuzz != NULL) { + if (fuzz->choice == 4) { + if (fuzz->a == 3) { /* space to right */ + from = point; + to = point + 1; + point = 0; + interbp = TRUE; + } else if (fuzz->a == 4 && point > 1) { /* space to left */ + from = point - 1; + to = point; + point = 0; + interbp = TRUE; + } + } + } } break; default : @@ -4431,6 +4517,8 @@ static void AddIntervalsToGbfeat ( gbint->from = from; gbint->to = to; gbint->point = point; + gbint->iscomp = iscomp; + gbint->interbp = interbp; gbint->accession = StringSave (accn); if (gbfeat->intervals == NULL) { gbfeat->intervals = gbint; @@ -4800,7 +4888,7 @@ static CharPtr FormatFeatureBlockEx ( Choice cbaa; CodeBreakPtr cbp; BioseqPtr cdna; - SeqFeatPtr cds; + SeqFeatPtr cds = NULL; Char ch; Uint1 code = Seq_code_ncbieaa; CdRegionPtr crp; @@ -5065,7 +5153,7 @@ static CharPtr FormatFeatureBlockEx ( if (ajp->ajp.slp != NULL) { FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE); } else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) { - FF_asn2gb_www_featkey (ffstring, key, sfp->location, fcontext->left + 1, fcontext->right + 1, fcontext->strand, itemID); + FF_asn2gb_www_featkey (ffstring, key, sfp, fcontext->left + 1, fcontext->right + 1, fcontext->strand, itemID); } else { FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE); } @@ -5111,6 +5199,15 @@ static CharPtr FormatFeatureBlockEx ( if (gbseq != NULL) { if (gbfeat != NULL) { gbfeat->location = StringSave (str); + if (StringDoesHaveText (str)) { + if (StringStr (str, "join") != NULL) { + gbfeat->operator__ = StringSave ("join"); + } else if (StringStr (str, "order") != NULL) { + gbfeat->operator__ = StringSave ("order"); + } + } + gbfeat->partial5 = fcontext->partialL; + gbfeat->partial3 = fcontext->partialR; if (ajp->masterStyle) { AddIntervalsToGbfeat (gbfeat, location, target); } else { @@ -5229,6 +5326,9 @@ static CharPtr FormatFeatureBlockEx ( gene_for_old_locus_tag = SeqMgrGetFeatureByLabel (bsp_for_old_locus_tag, grp->locus_tag, SEQFEAT_GENE, 0, &gcontext); } } + if (grp == NULL && ifp->mapToNuc && cds != NULL) { + grp = SeqMgrGetGeneXref (cds); + } if (grp == NULL && featdeftype != FEATDEF_primer_bind) { gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene); if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) { diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c index 7abce569..34462b1a 100644 --- a/api/asn2gnb5.c +++ b/api/asn2gnb5.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.54 $ +* $Revision: 1.64 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -281,6 +281,12 @@ static Char link_hprd [MAX_WWWBUF]; static Char link_uspto [MAX_WWWBUF]; #define DEF_LINK_USPTO "http://patft.uspto.gov/netacgi/nph-Parser?patentnumber=" +static Char link_vector [MAX_WWWBUF]; +#define DEF_LINK_VECTOR "http://www.vectorbase.org/Genome/BRCGene/?" + +static Char link_mirbase [MAX_WWWBUF]; +#define DEF_LINK_MIRBASE "http://microrna.sanger.ac.uk/cgi-bin/sequences/mirna_entry.pl?acc=" + /* www utility functions */ @@ -310,8 +316,6 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp) GetAppParam ("NCBI", "WWWENTREZ", "LINK_ECAMBIG", DEF_LINK_ECAMBIG, ec_ambig, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_FF", DEF_LINK_FF, link_ff, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_MUID", DEF_LINK_MUID, link_muid, MAX_WWWBUF); - GetAppParam ("NCBI", "WWWENTREZ", "LINK_FF", DEF_LINK_FF, link_ff, MAX_WWWBUF); - GetAppParam ("NCBI", "WWWENTREZ", "LINK_MUID", DEF_LINK_MUID, link_muid, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_ACE", DEF_LINK_ACE, link_ace, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_TAX", DEF_LINK_TAX, link_tax, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_CODE", DEF_LINK_CODE, link_code, MAX_WWWBUF); @@ -371,6 +375,8 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp) GetAppParam ("NCBI", "WWWENTREZ", "LINK_BOLD", DEF_LINK_BOLD, link_bold, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_HPRD", DEF_LINK_HPRD, link_hprd, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_USPTO", DEF_LINK_USPTO, link_uspto, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_VECTOR", DEF_LINK_VECTOR, link_vector, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_MIRBASE", DEF_LINK_MIRBASE, link_mirbase, MAX_WWWBUF); } @@ -670,6 +676,39 @@ static void FF_www_db_xref_hprd ( FF_www_db_xref_std (ffstring, db, identifier, link); } +static void FF_www_db_xref_vector ( + StringItemPtr ffstring, + CharPtr db, + CharPtr identifier, + BioseqPtr bsp, + CharPtr link +) +{ + Char ch; + Char buf [512], tax [256]; + CharPtr ptr; + + StringCpy (buf, link); + if (bsp != NULL) { + if (BioseqToGeneticCode (bsp, NULL, NULL, NULL, tax, sizeof (tax), NULL)) { + ptr = tax; + ch = *ptr; + while (ch != '\0') { + if (IS_WHITESP (ch)) { + *ptr = '_'; + } + ptr++; + ch = *ptr; + } + StringCat (buf, "org="); + StringCat (buf, tax); + StringCat (buf, "&"); + } + } + StringCat (buf, "gene="); + FF_www_db_xref_std (ffstring, db, identifier, buf); +} + static void FF_www_db_xref_null ( StringItemPtr ffstring, CharPtr db, @@ -688,7 +727,9 @@ static void FF_www_db_xref_null ( static void Do_www_db_xref( IntAsn2gbJobPtr ajp, StringItemPtr ffstring, - CharPtr db, CharPtr identifier + CharPtr db, + CharPtr identifier, + BioseqPtr bsp ) { if ( ffstring == NULL || db == NULL || identifier == NULL ) return; @@ -813,6 +854,10 @@ static void Do_www_db_xref( FF_www_db_xref_null(ffstring, db, identifier, link_bold); } else if ( StringCmp(db , "HPRD") == 0) { FF_www_db_xref_hprd(ffstring, db, identifier, link_hprd); + } else if ( StringCmp(db , "VectorBase") == 0) { + FF_www_db_xref_vector(ffstring, db, identifier, bsp, link_vector); + } else if ( StringCmp(db , "miRBase") == 0) { + FF_www_db_xref_std(ffstring, db, identifier, link_mirbase); } else { /* default: no link just the text */ @@ -823,13 +868,15 @@ static void Do_www_db_xref( NLM_EXTERN void FF_www_db_xref( IntAsn2gbJobPtr ajp, StringItemPtr ffstring, - CharPtr db, CharPtr identifier + CharPtr db, + CharPtr identifier, + BioseqPtr bsp ) { if ( ffstring == NULL || db == NULL || identifier == NULL ) return; if ( GetWWW(ajp) ) { - Do_www_db_xref (ajp, ffstring, db, identifier); + Do_www_db_xref (ajp, ffstring, db, identifier, bsp); } else { /* not in www mode */ if (StringCmp(db , "MGD") == 0 || StringCmp(db , "MGI") == 0) { if (StringNICmp (identifier, "MGI:", 4) == 0) { @@ -885,7 +932,7 @@ NLM_EXTERN CharPtr asn2gnbk_dbxref ( } ajp->www = TRUE; - Do_www_db_xref (ajp, ffstring, dbt->db, buf); + Do_www_db_xref (ajp, ffstring, dbt->db, buf, NULL); ajp->www = FALSE; @@ -1082,6 +1129,7 @@ NLM_EXTERN CharPtr GetAuthorsString ( { AuthorPtr ap; + ValNodePtr clist; ValNodePtr conslist; Int2 count; ValNodePtr head = NULL; @@ -1151,23 +1199,33 @@ NLM_EXTERN CharPtr GetAuthorsString ( prefix = ", "; } + prefix = NULL; + clist = NULL; for (vnp = conslist; vnp != NULL; vnp = vnp->next) { str = NULL; pid = (PersonIdPtr) vnp->data.ptrvalue; if (pid->choice == 5) { - str = MakeSingleAuthorString (format, NULL, (CharPtr) pid->data, NULL, NULL, index, NULL); - if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) { - *consortP = StringSave (str); + str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, NULL); + if (str != NULL) { + ValNodeAddStr (&clist, 0, str); } + prefix = "; "; + } + } + if (clist != NULL) { + str = MergeFFValNodeStrs (clist); + if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) { + *consortP = StringSave (str); + } - /* optionally populate gbseq for XML-ized GenBank format */ - - if (gbref != NULL) { - gbref->consortium = StringSave (str); - } + /* optionally populate gbseq for XML-ized GenBank format */ - str = MemFree (str); + if (gbref != NULL) { + gbref->consortium = StringSave (str); } + + str = MemFree (str); + ValNodeFreeData (clist); } ValNodeFree (pidlist); @@ -3254,15 +3312,25 @@ static CharPtr remarksText [] = { static void AddReferenceToGbseq ( GBSeqPtr gbseq, GBReferencePtr gbref, - CharPtr str + CharPtr str, + RefBlockPtr rbp, + BioseqPtr bsp ) { - CharPtr copy; - CharPtr ptr; - CharPtr ref; - - if (gbseq == NULL || gbref == NULL || StringHasNoText (str)) return; + Char buf [32]; + CharPtr copy; + ValNodePtr head = NULL; + IntRefBlockPtr irp; + SeqLocPtr loc; + CharPtr ptr; + CharPtr ref; + SeqLocPtr slp; + Int4 start; + Int4 stop; + CharPtr tmp; + + if (gbseq == NULL || gbref == NULL || StringHasNoText (str) || rbp == NULL || bsp == NULL) return; copy = StringSave (str); @@ -3277,7 +3345,13 @@ static void AddReferenceToGbseq ( ref = copy + 12; ptr = StringStr (ref, "\n AUTHORS"); if (ptr == NULL) { + ptr = StringStr (ref, "\n CONSRTM"); + } + if (ptr == NULL) { ptr = StringStr (ref, ")\n"); + if (ptr != NULL) { + ptr++; + } } if (ptr != NULL) { *ptr = '\0'; @@ -3300,6 +3374,36 @@ static void AddReferenceToGbseq ( Asn2gnbkCompressSpaces (gbref->journal); MemFree (copy); + + if (rbp->sites == 1 || rbp->sites == 2) { + gbref->position = StringSave ("sites"); + } else if (rbp->sites == 3) { + } else { + irp = (IntRefBlockPtr) rbp; + loc = irp->loc; + if (loc != NULL) { + slp = SeqLocFindNext (loc, NULL); + while (slp != NULL) { + start = SeqLocStart (slp) + 1; + stop = SeqLocStop (slp) + 1; + if (head == NULL) { + sprintf (buf, "%ld..%ld", (long) start, (long) stop); + } else { + sprintf (buf, "; %ld..%ld", (long) start, (long) stop); + } + ValNodeCopyStr (&head, 0, buf); + slp = SeqLocFindNext (loc, slp); + } + tmp = MergeFFValNodeStrs (head); + ValNodeFreeData (head); + gbref->position = tmp; + } else { + start = 1; + stop = bsp->length; + sprintf (buf, "%ld..%ld", (long) start, (long) stop); + gbref->position = StringSave (buf); + } + } } static Boolean IsCitSub ( @@ -3381,6 +3485,8 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( Int4 pmid = 0; CharPtr prefix = NULL; RefBlockPtr rbp; + ValNodePtr remarks = NULL; + CharPtr remprefix = NULL; SubmitBlockPtr sbp; SeqDescrPtr sdp; SeqFeatPtr sfp = NULL; @@ -3631,10 +3737,6 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( /* print author list */ - FFRecycleString(ajp, temp); - temp = FFGetString(ajp); - FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE); - str = NULL; consortium = NULL; @@ -3644,36 +3746,42 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( TrimSpacesAroundString (str); } - if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - suffix = NULL; - trailingPeriod = TRUE; - } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { - trailingPeriod = FALSE; - len = StringLen (str); - if (len > 0 && str [len - 1] != '.') { - suffix = ".;"; - } else { - suffix = ";"; - } - } + if (str != NULL || StringHasNoText (consortium)) { + FFRecycleString(ajp, temp); + temp = FFGetString(ajp); + FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE); - /* if no authors were found, period will still be added by this call */ - if (str != NULL) { - FFAddTextToString(temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES); - } else { if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { - FFAddOneChar(temp, '.', FALSE); + suffix = NULL; + trailingPeriod = TRUE; } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { - FFAddOneChar(temp, ';', FALSE); - } - } + trailingPeriod = FALSE; + len = StringLen (str); + if (len > 0 && str [len - 1] != '.') { + suffix = ".;"; + } else { + suffix = ";"; + } + } - MemFree (str); - if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { + /* if no authors were found, period will still be added by this call */ + if (str != NULL) { + FFAddTextToString(temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES); + } else if (StringHasNoText (consortium)) { + if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { + FFAddOneChar(temp, '.', FALSE); + } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { + FFAddOneChar(temp, ';', FALSE); + } + } + + if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); - } else { + } else { FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA"); + } } + MemFree (str); /* print consortium */ @@ -3857,7 +3965,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (gbseq != NULL) { if (gbref != NULL) { - AddReferenceToGbseq (gbseq, gbref, str); + AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp); } } @@ -3890,6 +3998,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFRecycleString(ajp, temp); temp = FFGetString(ajp); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, pdp->comment); + remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND); /* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */ @@ -3898,7 +4011,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (gbseq != NULL) { if (gbref != NULL) { + /* gbref->remark = StringSave (pdp->comment); + */ } } @@ -3922,6 +4037,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( temp = FFGetString(ajp); sprintf (buf, "GenBank staff at the National Library of Medicine created this entry [NCBI gibbsq %ld] from the original journal article.", (long) gibbsq); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, buf); + remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND); FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); @@ -3938,6 +4058,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( temp = FFGetString(ajp); sprintf (buf, "This sequence comes from %s", str); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, buf); + remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND); FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); @@ -3948,7 +4073,12 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFRecycleString(ajp, temp); temp = FFGetString(ajp); - FFStartPrint (temp ,afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, "Polyadenylate residues occurring in the figure were omitted from the sequence."); + remprefix = "; "; + FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND); FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); prefix = NULL; @@ -3963,6 +4093,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( temp = FFGetString(ajp); sprintf (buf, "Map location: %s", str); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, buf); + remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND); FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL); @@ -3984,6 +4119,17 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFRecycleString(ajp, temp); temp = FFGetString(ajp); + len = StringLen (crp->exp) + 20; + str = MemNew (sizeof (Char) * len); + if (str != NULL) { + sprintf (str, "Erratum:[%s]", crp->exp); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, str); + remprefix = "; "; + str = MemFree (str); + } FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); FFAddOneString (temp, "Erratum:", FALSE, FALSE, TILDE_TO_SPACES); FFAddTextToString (temp, "[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND); @@ -4000,6 +4146,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( FFRecycleString(ajp, temp); temp = FFGetString(ajp); + if (remprefix != NULL) { + ValNodeCopyStr (&remarks, 0, remprefix); + } + ValNodeCopyStr (&remarks, 0, csp->descr); + remprefix = "; "; FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE); /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */ AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL); @@ -4016,9 +4167,14 @@ NLM_EXTERN CharPtr FormatReferenceBlock ( if (gbseq != NULL) { if (gbref != NULL) { - AddReferenceToGbseq (gbseq, gbref, str); + if (remarks != NULL) { + gbref->remark = MergeFFValNodeStrs (remarks); + } + + AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp); } } + ValNodeFreeData (remarks); FFRecycleString(ajp, ffstring); FFRecycleString(ajp, temp); diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c index 698d7587..a9d156fd 100644 --- a/api/asn2gnb6.c +++ b/api/asn2gnb6.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 1.69 $ +* $Revision: 1.73 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -446,6 +446,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = { "UniProtKB/TrEMBL", "UniSTS", "VBASE2", + "VectorBase", "WorfDB", "WormBase", "ZFIN", @@ -458,6 +459,7 @@ NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = { "ECOCYC", "HPRD", "REBASE", + "miRBase", NULL }; @@ -3195,7 +3197,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock ( } if (! StringHasNoText (buf)) { FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE); - FF_www_db_xref(ajp, ffstring, dbt->db, buf); + FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp); FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); } } @@ -4087,7 +4089,7 @@ static Int2 ProcessGapSpecialFormat ( ) { - Char fmt_buf [32]; + Char fmt_buf [64]; Char gapbuf [80]; Int4 gi; Char gi_buf [16]; @@ -4129,6 +4131,9 @@ static Int2 ProcessGapSpecialFormat ( if (gi > 0) { sprintf(gi_buf, "%ld", (long) gi); sprintf(fmt_buf, "&fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY); + if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) { + StringCat (fmt_buf, "&view=gbwithparts"); + } FFAddOneString (ffstring, " <a href=", FALSE, FALSE, TILDE_IGNORE); FFAddOneString (ffstring, link_featc, FALSE, FALSE, TILDE_IGNORE); FFAddOneString (ffstring, "val=", FALSE, FALSE, TILDE_IGNORE); @@ -4511,6 +4516,7 @@ NLM_EXTERN CharPtr FormatSlashBlock ( is.accession_version = gbseq->accession_version; is.other_seqids = gbseq->other_seqids; is.secondary_accessions = gbseq->secondary_accessions; + is.project = gbseq->project; is.keywords = gbseq->keywords; is.segment = gbseq->segment; is.source = gbseq->source; diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h index 1c4e4ebd..2c966b2f 100644 --- a/api/asn2gnbi.h +++ b/api/asn2gnbi.h @@ -29,7 +29,7 @@ * * Version Creation Date: 12/30/03 * -* $Revision: 1.61 $ +* $Revision: 1.62 $ * * File Description: New GenBank flatfile generator, internal header * @@ -705,7 +705,9 @@ NLM_EXTERN Char link_sp [MAX_WWWBUF]; NLM_EXTERN void FF_www_db_xref( IntAsn2gbJobPtr ajp, StringItemPtr ffstring, - CharPtr db, CharPtr identifier + CharPtr db, + CharPtr identifier, + BioseqPtr bsp ); NLM_EXTERN Boolean StringIsJustQuotes ( diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h index eb181c3c..7db3d786 100644 --- a/api/asn2gnbk.h +++ b/api/asn2gnbk.h @@ -29,7 +29,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 6.69 $ +* $Revision: 6.70 $ * * File Description: New GenBank flatfile generator * @@ -131,7 +131,8 @@ typedef unsigned long LckType; #define LOOKUP_FAR_LOCATIONS 32 #define LOOKUP_FAR_PRODUCTS 64 #define LOOKUP_FAR_HISTORY 128 -#define LOOKUP_FAR_OTHERS 256 +#define LOOKUP_FAR_INFERENCE 256 +#define LOOKUP_FAR_OTHERS 512 /* bit flags for unusual customized reports */ diff --git a/api/edutil.c b/api/edutil.c index 4a288a79..5e08e2b9 100644 --- a/api/edutil.c +++ b/api/edutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/4/94 * -* $Revision: 6.54 $ +* $Revision: 6.56 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,12 @@ * ------- ---------- ----------------------------------------------------- * * $Log: edutil.c,v $ +* Revision 6.56 2006/04/04 18:00:47 kans +* SeqLocAddEx properly returns value to &last argument, makes SeqLocMix from DeltaSeqsToSeqLocs +* +* Revision 6.55 2006/03/30 19:50:15 kans +* DeltaSeqsToSeqLocs calls SeqLocAddEx for efficient list usage +* * Revision 6.54 2006/02/07 13:41:29 bollin * added function AdjustFeatureForGapChange, which changes a feature to accommodate * a change in the length of a gap @@ -370,15 +376,16 @@ NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head) * if incoming is merged, deletes the incoming SeqLoc * *****************************************************************************/ -NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy) +static SeqLocPtr LIBCALL SeqLocAddEx (SeqLocPtr PNTR head, SeqLocPtr PNTR lastp, SeqLocPtr slp, Boolean merge, Boolean do_copy) { - SeqLocPtr tmp, last, retval = NULL; + SeqLocPtr tmp, last = NULL, retval = NULL; Boolean merged = FALSE; /* intervals were merged */ if (slp == NULL) return NULL; - last = NULL; - if (* head != NULL) + if (lastp != NULL) { + last = *lastp; + } else if (head != NULL && *head != NULL) { for (tmp = *head; tmp != NULL; tmp = tmp->next) { @@ -501,12 +508,16 @@ ret: else tmp = slp; - tmp->next = NULL; + if (tmp != NULL) { + tmp->next = NULL; + } - if (last != NULL) + if (last != NULL) { last->next = tmp; - else + } else if (head != NULL) { *head = tmp; + } + last = tmp; retval = tmp; } else @@ -515,10 +526,30 @@ ret: if (! do_copy) /* got to free it here */ SeqLocFree(slp); } + if (lastp != NULL) { + *lastp = last; + } return retval; } +NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy) +{ + SeqLocPtr tmp, last; + + if (slp == NULL) return NULL; + + last = NULL; + if (* head != NULL) + { + for (tmp = *head; tmp != NULL; tmp = tmp->next) + { + last = tmp; + } + } + return SeqLocAddEx (head, &last, slp, merge, do_copy); +} + /***************************************************************************** * * SegLocToParts(BioseqPtr seg, SeqLocPtr slp) @@ -652,7 +683,7 @@ NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp) *****************************************************************************/ NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp) { - SeqLocPtr head = NULL, thead=NULL; + SeqLocPtr head = NULL, thead = NULL, last = NULL; DeltaSeqPtr curr; SeqInt si; Dbtag db; @@ -674,14 +705,15 @@ NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp) oi.id = 1; + for (curr = dsp; curr != NULL; curr = curr->next) { if (curr->choice == 1) /* a SeqLoc */ - SeqLocAdd(&thead, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE); + SeqLocAddEx (&thead, &last, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE); else { - si.to = ((SeqLitPtr)(curr->data.ptrvalue))->length - 1; - SeqLocAdd(&thead, &vn, TRUE, TRUE); + si.to = ((SeqLitPtr) (curr->data.ptrvalue))->length - 1; + SeqLocAddEx (&thead, &last, &vn, TRUE, TRUE); } oi.id++; } diff --git a/api/seqmgr.c b/api/seqmgr.c index ecefefa8..d3e0e41e 100644 --- a/api/seqmgr.c +++ b/api/seqmgr.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.259 $ +* $Revision: 6.263 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -39,6 +39,18 @@ * ------- ---------- ----------------------------------------------------- * * $Log: seqmgr.c,v $ +* Revision 6.263 2006/04/13 20:02:15 kans +* LookupFarSeqIDs takes inference parameter +* +* Revision 6.262 2006/04/05 17:18:23 kans +* IndexSegmentedParts uses Int4 for numsegs to avoid overflow to negative number, failure to MemNew +* +* Revision 6.261 2006/03/21 15:32:13 kans +* set ignore flag on generated gaps in IndexRecordedFeatures, not as side effect of sorting callback +* +* Revision 6.260 2006/03/20 22:53:44 kans +* sort flatfile-generated gap feature last, set ignore flag +* * Revision 6.259 2006/02/17 19:05:05 kans * special case coded_by only for CDS feature on isolated protein bioseq * @@ -7123,6 +7135,16 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2) return 1; } + /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */ + + if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) { + if (sp1->itemID > sp2->itemID) { + return 1; + } else if (sp1->itemID < sp2->itemID) { + return -1; + } + } + /* if identical cds ranges, compare codon_start */ if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) { @@ -7295,6 +7317,16 @@ static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2) return 1; } + /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */ + + if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) { + if (sp1->itemID > sp2->itemID) { + return 1; + } else if (sp1->itemID < sp2->itemID) { + return -1; + } + } + /* if identical cds ranges, compare codon_start */ if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) { @@ -7431,8 +7463,8 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp) BioseqPtr bsp; BioseqExtraPtr bspextra; BioseqSetPtr bssp; - Int2 i; - Int2 numsegs = 0; + Int4 i; + Int4 numsegs = 0; ObjMgrDataPtr omdp; SMSeqIdxPtr PNTR partsByLoc; SMSeqIdxPtr PNTR partsBySeqId; @@ -7534,7 +7566,7 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp) * *****************************************************************************/ -static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats) +static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats, Uint4 baseItemID) { BioseqPtr bsp; @@ -7553,6 +7585,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats) Int4 i; Int4 j; SMFeatItemPtr item; + SMFeatItemPtr last; BioseqPtr nuc; Int4 numfeats; Int4 numgenes; @@ -7568,7 +7601,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats) bssp = (BioseqSetPtr) sep->data.ptrvalue; if (bssp == NULL) return; for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { - IndexRecordedFeatures (sep, dorevfeats); + IndexRecordedFeatures (sep, dorevfeats, baseItemID); } return; } @@ -7643,6 +7676,25 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats) } } + /* gap feature in record overrides flatfile-generated feature */ + + if (baseItemID > 0) { + last = featsByPos [0]; + for (i = 1; i < numfeats; i++) { + item = featsByPos [i]; + if (item != NULL && last != NULL) { + if (last->subtype == FEATDEF_gap && item->subtype == FEATDEF_gap) { + if (last->left == item->left && last->right == item->right) { + if (item->itemID >= baseItemID) { + item->ignore = TRUE; + } + } + } + } + last = item; + } + } + /* build arrays of sorted gene, mRNA, CDS, publication, and biosource features for lookup by overlap */ bspextra->genesByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numgenes), 0, FEATDEF_GENE); @@ -8338,6 +8390,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx ( { AdpBspPtr abp; AnnotDescPtr PNTR annotDescByID; + Uint4 baseItemID = 0; BioseqPtr bsp; BioseqExtraPtr bspextra; Int4 count; @@ -8414,6 +8467,19 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx ( AssignIDsInEntityEx (entityID, 0, NULL, extra); + /* get first feature itemID in remote feature tables (including generated gaps) */ + + for (vnp = extra; vnp != NULL && baseItemID == 0; vnp = vnp->next) { + bsp = (BioseqPtr) vnp->data.ptrvalue; + if (bsp == NULL) continue; + for (sap = bsp->annot; sap != NULL && baseItemID == 0; sap = sap->next) { + if (sap->type != 1) continue; + for (sfp = (SeqFeatPtr) sap->data; sfp != NULL && baseItemID == 0; sfp = sfp->next) { + baseItemID = sfp->idx.itemID; + } + } + } + /* set scope for FindAppropriateBioseq, FindFirstLocalBioseq */ oldscope = SeqEntrySetScope (sep); @@ -8481,7 +8547,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx ( /* finish building array of sorted features on each indexed bioseq */ - IndexRecordedFeatures (sep, dorevfeats); + IndexRecordedFeatures (sep, dorevfeats, baseItemID); /* set best protein feature for segmented protein bioseqs and their parts */ @@ -11163,6 +11229,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs ( Boolean products, Boolean alignments, Boolean history, + Boolean inference, Boolean others ) @@ -11175,7 +11242,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs ( func = smp->seq_id_precache_func; SeqMgrUnlock (); if (func == NULL) return 0; - return (*func) (sep, components, locations, products, alignments, history, others); + return (*func) (sep, components, locations, products, alignments, history, inference, others); } /***************************************************************************** diff --git a/api/seqmgr.h b/api/seqmgr.h index c80ab55e..89ff1bbc 100644 --- a/api/seqmgr.h +++ b/api/seqmgr.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.59 $ +* $Revision: 6.60 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -40,6 +40,9 @@ * * * $Log: seqmgr.h,v $ +* Revision 6.60 2006/04/13 20:02:15 kans +* LookupFarSeqIDs takes inference parameter +* * Revision 6.59 2006/02/16 20:24:32 kans * added bad_order and mixed_strand fields to feature index - to be used for get best gene overlap function in cases of trans-splicing * @@ -316,7 +319,7 @@ typedef BioseqPtr (LIBCALLBACK * BSFetchTop) typedef BioseqPtr (LIBCALLBACK * BSFetch) PROTO((SeqIdPtr sip, Pointer data)); -typedef Int4 (LIBCALLBACK * SIDPreCacheFunc) (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, Boolean alignments, Boolean history, Boolean others); +typedef Int4 (LIBCALLBACK * SIDPreCacheFunc) (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, Boolean alignments, Boolean history, Boolean inference, Boolean others); typedef Int4 (LIBCALLBACK * SeqLenLookupFunc) (Int4 gi); typedef CharPtr (LIBCALLBACK * AccnVerLookupFunc) (Int4 gi); typedef SeqIdPtr (LIBCALLBACK * SeqIdSetLookupFunc) (Int4 gi); @@ -1118,6 +1121,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs ( Boolean products, Boolean alignments, Boolean history, + Boolean inference, Boolean others ); diff --git a/api/seqport.c b/api/seqport.c index 89550a8a..b0e77145 100644 --- a/api/seqport.c +++ b/api/seqport.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.147 $ +* $Revision: 6.150 $ * * File Description: Ports onto Bioseqs * @@ -39,6 +39,15 @@ * ------- ---------- ----------------------------------------------------- * * $Log: seqport.c,v $ +* Revision 6.150 2006/03/22 15:31:32 kans +* SeqPortStreamSeqLoc gives unique message when bailing on gi 0 as opposed to failure after trying to load +* +* Revision 6.149 2006/03/07 21:34:28 kans +* checks for gi 0 now also check for negative value +* +* Revision 6.148 2006/03/07 20:02:01 kans +* SeqPortStreamSeqLoc immediately treats gi 0 as an error +* * Revision 6.147 2006/01/23 13:01:41 bollin * when converting sequences from raw to delta, adjust any alignments that the * sequence may be part of. @@ -2978,6 +2987,21 @@ static Int4 SeqPortStreamSeqLoc ( sip = SeqLocId (slp); if (sip == NULL) return 0; + if (sip->choice == SEQID_GI && sip->data.intvalue <= 0) { + + /* gi 0 or negative is always a data error, just report and bail */ + + SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1); + if (parentID != NULL) { + SeqIdWrite (parentID, pid, PRINTID_FASTA_LONG, sizeof (pid) - 1); + ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream ignoring Bioseq %s component of %s", buf, pid); + } else { + ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream ignoring Bioseq %s", buf); + } + sdp->failed = TRUE; + return 0; + } + bsp = BioseqLockById (sip); #ifdef OS_UNIX diff --git a/api/sequtil.c b/api/sequtil.c index c7d8d53e..07cb6305 100644 --- a/api/sequtil.c +++ b/api/sequtil.c @@ -29,13 +29,32 @@ * * Version Creation Date: 4/1/91 * -* $Revision: 6.190 $ +* $Revision: 6.196 $ * * File Description: Sequence Utilities for objseq and objsset * * Modifications: * -------------------------------------------------------------------------- * $Log: sequtil.c,v $ +* Revision 6.196 2006/04/06 15:41:19 kans +* added DG to WHICH_db_accession +* +* Revision 6.195 2006/04/05 16:45:01 bollin +* special left-right end handling for circular topology in GetThePointForOffset +* +* Revision 6.194 2006/03/30 17:04:53 kans +* DF is DDBJ CON accession prefix +* +* Revision 6.193 2006/03/23 18:31:32 kans +* added EB as NCBI EST +* +* Revision 6.192 2006/03/10 17:27:14 bollin +* make sure parentptr is BioseqSet in GetEarlierSeqIdPtr +* +* Revision 6.191 2006/03/10 17:13:45 bollin +* changes to GetEarlierSeqIdPtr to handle the situation where one of the Bioseqs +* has not been indexed. Fixes bug reported by Serge Bazhin +* * Revision 6.190 2006/02/16 17:19:14 kans * better handling of trans splicing in GetThePointForOffset, SeqLocStart (CB) * @@ -847,7 +866,7 @@ static char *this_file = __FILE__; #include <seqport.h> #include <sqnutils.h> /* prototype for SeqIdFindWorst */ #include <edutil.h> - +#include <subutil.h> /**** Static variables used for randomized sequence conversions ****/ @@ -6625,7 +6644,7 @@ NLM_EXTERN Int4 CheckPointInBioseq (SeqPntPtr sp, BioseqPtr in) static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2) { BioseqPtr bsp1, bsp2; - BioseqSetPtr bssp; + BioseqSetPtr bssp = NULL; SeqEntryPtr sep; if (sip1 == NULL && sip2 != NULL) @@ -6655,25 +6674,31 @@ static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2) { return sip1; } + + if (bsp1->idx.parentptr != NULL && bsp2->idx.parentptr != 0 && bsp1->idx.parentptr != bsp2->idx.parentptr) + { + return NULL; + } + if (bsp1->idx.parentptr != NULL && bsp1->idx.parenttype == OBJ_BIOSEQSET) { + bssp = bsp1->idx.parentptr; + } else if (bsp2->idx.parentptr != NULL && bsp2->idx.parenttype == OBJ_BIOSEQSET) { + bssp = bsp2->idx.parentptr; + } + + if (bssp == NULL) return NULL; - if (bsp1->idx.parenttype == OBJ_BIOSEQSET - && bsp2->idx.parenttype == OBJ_BIOSEQSET - && bsp1->idx.parentptr == bsp2->idx.parentptr) + for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { - bssp = (BioseqSetPtr) bsp1->idx.parentptr; - for (sep = bssp->seq_set; sep != NULL; sep = sep->next) + if (sep->data.ptrvalue == bsp1) { - if (sep->data.ptrvalue == bsp1) - { - return sip1; - } - else if (sep->data.ptrvalue == bsp2) - { - return sip2; - } + return sip1; + } + else if (sep->data.ptrvalue == bsp2) + { + return sip2; } } - return sip1; + return NULL; } /***************************************************************************** @@ -6689,8 +6714,16 @@ Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end) Int4 lowest = -1, highest = 0, tmp; SeqIdPtr low_sip = NULL, high_sip = NULL, first_sip = NULL, last_sip = NULL; Boolean id_same; + BioseqPtr bsp; + Boolean is_circular = FALSE; pnt = NULL; /* get first or last single span type in "of"*/ + + bsp = BioseqFind (SeqLocId(of)); + if (bsp != NULL && bsp->topology == TOPOLOGY_CIRCULAR) { + is_circular = TRUE; + } + while ((pnt = SeqLocFindNext(of, pnt)) != NULL) { last_strand = SeqLocStrand (pnt); @@ -6751,12 +6784,32 @@ Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end) switch (which_end) { case SEQLOC_LEFT_END: - target->point = lowest; - target->id = low_sip; + if (is_circular) { + if (all_minus) { + target->point = SeqLocStart (last); + target->id = last_sip; + } else { + target->point = SeqLocStart (first); + target->id = first_sip; + } + } else { + target->point = lowest; + target->id = low_sip; + } break; case SEQLOC_RIGHT_END: - target->point = highest; - target->id = high_sip; + if (is_circular) { + if (all_minus) { + target->point = SeqLocStop (first); + target->id = first_sip; + } else { + target->point = SeqLocStop (last); + target->id = last_sip; + } + } else { + target->point = highest; + target->id = high_sip; + } break; case SEQLOC_START: if (all_minus) @@ -9325,7 +9378,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"DT") == 0) || (StringICmp(temp,"DV") == 0) || (StringICmp(temp,"DW") == 0) || - (StringICmp(temp,"DY") == 0) ) { /* NCBI EST */ + (StringICmp(temp,"DY") == 0) || + (StringICmp(temp,"EB") == 0) ) { /* NCBI EST */ retcode = ACCN_NCBI_EST; } else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */ retcode = ACCN_NCBI_STS; @@ -9414,7 +9468,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) retcode = ACCN_DDBJ_GENOME; } else if ((StringICmp(temp,"AK") == 0)) { /* DDBJ HTGS */ retcode = ACCN_DDBJ_HTGS; - } else if ((StringICmp(temp,"BA") == 0)) { /* DDBJ CON division */ + } else if ((StringICmp(temp,"BA") == 0) || + (StringICmp(temp,"DF") == 0) || + (StringICmp(temp,"DG") == 0)) { /* DDBJ CON division */ retcode = ACCN_DDBJ_CON; } else if ((StringICmp(temp,"BD") == 0) || (StringICmp(temp,"DD") == 0)) { /* DDBJ patent division */ diff --git a/api/sqnutil1.c b/api/sqnutil1.c index e44797e5..432b130c 100644 --- a/api/sqnutil1.c +++ b/api/sqnutil1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.369 $ +* $Revision: 6.376 $ * * File Description: * @@ -2685,9 +2685,7 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq) Int2 choice = 0; GeneRefPtr grp; - if (StringICmp (gbq->qual, "pseudo") == 0) { - choice = 1; - } else if (StringICmp (gbq->qual, "map") == 0) { + if (StringICmp (gbq->qual, "map") == 0) { choice = 2; } else if (StringICmp (gbq->qual, "allele") == 0) { choice = 3; @@ -2698,9 +2696,6 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq) grp = (GeneRefPtr) sfp->data.value.ptrvalue; if (grp == NULL) return FALSE; switch (choice) { - case 1 : - grp->pseudo = TRUE; - break; case 2 : if (grp->maploc != NULL) return FALSE; if (StringHasNoText (gbq->val)) return FALSE; @@ -2785,6 +2780,7 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset) { Int4 diff; + Int2 j; Boolean locmap; int num_errs; CharPtr pos; @@ -2809,6 +2805,12 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset) rrp->ext.choice = 2; trp = (tRNAPtr) MemNew (sizeof (tRNA)); rrp->ext.value.ptrvalue = (Pointer) trp; + if (trp != NULL) { + trp->aatype = 2; + for (j = 0; j < 6; j++) { + trp->codon [j] = 255; + } + } } if (rrp->ext.choice != 2) return FALSE; @@ -5201,7 +5203,8 @@ static void FixOldDbxrefs (ValNodePtr vnp) dbt->db = MemFree (dbt->db); dbt->db = StringSave ("SubtiList"); } - if (StringICmp (dbt->db, "Swiss-Prot") == 0) { + if (StringICmp (dbt->db, "Swiss-Prot") == 0 || + StringICmp (dbt->db, "SWISSPROT") == 0) { dbt->db = MemFree (dbt->db); dbt->db = StringSave ("UniProt/Swiss-Prot"); } else if (StringICmp (dbt->db, "TrEMBL") == 0) { @@ -7658,6 +7661,7 @@ NLM_EXTERN void CleanUpSeqFeat ( BioseqPtr bsp; CodeBreakPtr cbp; CdRegionPtr crp; + GeneRefPtr grp; Boolean hasNulls; SeqIdPtr id; ImpFeatPtr ifp; @@ -7774,7 +7778,15 @@ NLM_EXTERN void CleanUpSeqFeat ( CleanupSeqLoc (sfp->location); strand = SeqLocStrand (sfp->location); id = SeqLocId (sfp->location); - if (sfp->data.choice == SEQFEAT_CDREGION) { + if (sfp->data.choice == SEQFEAT_GENE) { + grp = (GeneRefPtr) sfp->data.value.ptrvalue; + if (grp != NULL) { + if (grp->pseudo) { + sfp->pseudo = TRUE; + grp->pseudo = FALSE; + } + } + } else if (sfp->data.choice == SEQFEAT_CDREGION) { crp = (CdRegionPtr) sfp->data.value.ptrvalue; if (crp != NULL) { crp->code_break = SortCodeBreaks (sfp, crp->code_break); @@ -7797,6 +7809,12 @@ NLM_EXTERN void CleanUpSeqFeat ( } } else if (sfp->data.choice == SEQFEAT_RNA) { rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (rrp != NULL) { + if (rrp->pseudo) { + sfp->pseudo = TRUE; + rrp->pseudo = FALSE; + } + } if (rrp != NULL && rrp->ext.choice == 2) { trp = (tRNAPtr) rrp->ext.value.ptrvalue; if (trp != NULL && trp->anticodon != NULL) { @@ -9827,6 +9845,40 @@ NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, Visi return index; } +typedef struct uopdata { + UserObjectPtr rsult; + CharPtr tag; +} UopData, PNTR UopDataPtr; + +static void FindUopProc ( + UserObjectPtr uop, + Pointer userdata +) + +{ + ObjectIdPtr oip; + UopDataPtr udp; + + if (uop == NULL || userdata == NULL) return; + oip = uop->type; + if (oip == NULL) return; + udp = (UopDataPtr) userdata; + if (StringICmp (oip->str, udp->tag) != 0) return; + udp->rsult = uop; +} + +NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag) + +{ + UopData ud; + + if (top == NULL || StringHasNoText (tag)) return NULL; + ud.rsult = NULL; + ud.tag = tag; + VisitUserObjectsInUop (top, (Pointer) &ud, FindUopProc); + return ud.rsult; +} + NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop) { diff --git a/api/sqnutil2.c b/api/sqnutil2.c index f5d24d2b..7ef549ec 100644 --- a/api/sqnutil2.c +++ b/api/sqnutil2.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.244 $ +* $Revision: 6.251 $ * * File Description: * @@ -1562,18 +1562,19 @@ NLM_EXTERN SqnTagPtr SqnTagFree (SqnTagPtr stp) return MemFree (stp); } -static Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2) +extern Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2) { Char ch1, ch2; + if (StringHasNoText (str1) && StringHasNoText (str2)) return TRUE; if (StringHasNoText (str1) || StringHasNoText (str2)) return FALSE; ch1 = *str1; ch2 = *str2; while (ch1 != '\0' && ch2 != '\0') { if (TO_LOWER (ch1) != TO_LOWER (ch2)) { - if ((ch1 != '-' && ch1 != '_') || (ch2 != '_' && ch2 != '-')) return FALSE; + if ((ch1 != '-' && ch1 != '_' && ch1 != ' ') || (ch2 != '_' && ch2 != '-' && ch2 != ' ')) return FALSE; } str1++; str2++; @@ -1582,7 +1583,7 @@ static Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2) } if (TO_LOWER (ch1) != TO_LOWER (ch2)) { - if ((ch1 != '-' && ch1 != '_') || (ch2 != '_' && ch2 != '-')) return FALSE; + if ((ch1 != '-' && ch1 != '_' && ch1 != ' ') || (ch2 != '_' && ch2 != '-' && ch2 != ' ')) return FALSE; } return TRUE; @@ -2436,6 +2437,74 @@ NLM_EXTERN UserObjectPtr ParseTitleIntoTpaAssembly ( return uop; } +NLM_EXTERN UserObjectPtr ParseStringIntoStructuredComment ( + UserObjectPtr uop, + CharPtr str, + CharPtr prefix, + CharPtr suffix +) + +{ + Char ch; + CharPtr field; + CharPtr item; + CharPtr last; + CharPtr ptr; + CharPtr tmp; + + if (uop == NULL) { + uop = CreateStructuredCommentUserObject (); + if (uop == NULL) return uop; + } + if (str == NULL) return uop; + + tmp = StringSave (str); + if (tmp == NULL) return uop; + + last = tmp; + if (StringDoesHaveText (prefix)) { + ptr = StringStr (last, prefix); + if (ptr != NULL) { + last = ptr + StringLen (prefix); + } + } + if (StringDoesHaveText (suffix)) { + ptr = StringStr (last, suffix); + if (ptr != NULL) { + *ptr = '\0'; + } + } + + ptr = last; + ch = *ptr; + while (ch != '\0') { + field = last; + ptr = StringChr (last, '='); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + item = ptr; + last = StringChr (ptr, ';'); + if (last != NULL) { + *last = '\0'; + last++; + ch = *last; + } else { + ch = '\0'; + } + TrimSpacesAroundString (field); + TrimSpacesAroundString (item); + AddItemStructuredCommentUserObject (uop, field, item); + } else { + ch = '\0'; + } + } + + MemFree (tmp); + + return uop; +} + /* PHRAP file reading functions */ static Boolean HasNoText (CharPtr str) @@ -4934,6 +5003,63 @@ static Boolean InvalidInference (CharPtr str) return TRUE; } +static void ParseCodonRecognized (CharPtr val, tRNAPtr trp) + +{ + Char buf [256]; + Char codon [16]; + ValNodePtr head = NULL; + Int2 i; + Int2 j; + CharPtr ptr; + CharPtr str; + tRNA tr; + ValNodePtr vnp; + + if (trp == NULL) return; + for (j = 0; j < 6; j++) { + trp->codon [j] = 255; + } + if (StringHasNoText (val)) return; + + MemSet ((Pointer) &tr, 0, sizeof (tRNA)); + + StringNCpy_0 (buf, val, sizeof (buf)); + str = buf; + while (StringDoesHaveText (str)) { + ptr = StringChr (str, ','); + if (ptr != NULL) { + *ptr = '\0'; + ptr++; + } + TrimSpacesAroundString (str); + if (StringDoesHaveText (str)) { + for (j = 0; j < 6; j++) { + tr.codon [j] = 255; + } + StringCpy (codon, str); + for (i = 0; i < 3; i++) { + if (codon [i] == 'U') { + codon [i] = 'T'; + } + } + ParseDegenerateCodon (&tr, (Uint1Ptr) codon); + for (i = 0; i < 6; i++) { + if (tr.codon [i] == 255) continue; + ValNodeAddInt (&head, 0, (long) tr.codon [i]); + } + } + str = ptr; + } + if (head == NULL) return; + + head = ValNodeSort (head, SortByIntvalue); + head = UniqueIntValNode (head); + for (vnp = head, j = 0; vnp != NULL && j < 6; vnp = vnp->next, j++) { + trp->codon [j] = (Uint1) vnp->data.intvalue; + } +} + static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, Int4 offset) { @@ -5061,6 +5187,20 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, (StringCmp (qual, "codon_recognized") == 0 || StringCmp (qual, "codons_recognized") == 0)) { rrp = (RnaRefPtr) sfp->data.value.ptrvalue; if (rrp != NULL && rrp->type == 3) { + if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) { + rrp->ext.choice = 2; + trna = (tRNAPtr) MemNew (sizeof (tRNA)); + rrp->ext.value.ptrvalue = (Pointer) trna; + if (trna != NULL) { + trna->aatype = 2; + for (j = 0; j < 6; j++) { + trna->codon [j] = 255; + } + } + } + trna = (tRNAPtr) rrp->ext.value.ptrvalue; + ParseCodonRecognized (val, trna); + /* StringNCpy_0 ((CharPtr) codon, val, sizeof (codon)); if (StringLen ((CharPtr) codon) == 3) { for (j = 0; j < 3; j++) { @@ -5068,11 +5208,11 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, codon [j] = 'T'; } } - trna = (tRNAPtr) rrp->ext.value.ptrvalue; if (trna != NULL) { ParseDegenerateCodon (trna, (Uint1Ptr) codon); } } + */ } } else if (ifp != NULL && StringICmp (ifp->key, "variation") == 0 && ParseQualIntoSnpUserObject (sfp, qual, val)) { } else if (ifp != NULL && StringICmp (ifp->key, "STS") == 0 && ParseQualIntoStsUserObject (sfp, qual, val)) { @@ -5107,6 +5247,9 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, sfp->comment = str; } return; + } else if (qnum == GBQUAL_pseudo) { + sfp->pseudo = TRUE; + return; } else if ((qnum == GBQUAL_gene || qnum == GBQUAL_locus_tag) && sfp->data.choice != SEQFEAT_GENE) { if (StringCmp (val, "-") == 0) { val = NULL; @@ -5203,7 +5346,7 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, } } else if (sfp->data.choice == SEQFEAT_GENE) { - if (qnum == GBQUAL_gene || qnum == GBQUAL_pseudo || qnum == GBQUAL_allele || qnum == GBQUAL_map || qnum == GBQUAL_locus_tag) { + if (qnum == GBQUAL_gene || qnum == GBQUAL_allele || qnum == GBQUAL_map || qnum == GBQUAL_locus_tag) { if (qnum == GBQUAL_gene) { grp = (GeneRefPtr) sfp->data.value.ptrvalue; if (grp != NULL) { @@ -5219,11 +5362,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, ValNodeCopyStr (&(grp->syn), 0, val); } } - } else if (qnum == GBQUAL_pseudo) { - grp = (GeneRefPtr) sfp->data.value.ptrvalue; - if (grp != NULL) { - grp->pseudo = TRUE; - } } else if (qnum == GBQUAL_allele) { grp = (GeneRefPtr) sfp->data.value.ptrvalue; if (grp != NULL) { @@ -5280,8 +5418,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, } } return; - } else if (qnum == GBQUAL_pseudo) { - sfp->pseudo = TRUE; } } else if (sfp->data.choice == SEQFEAT_PROT) { if (qnum == GBQUAL_function || qnum == GBQUAL_EC_number || qnum == GBQUAL_product) { @@ -5304,14 +5440,19 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, if (rrp->type == 3) { aa = ParseTRnaString (val, &justTrnaText, codon, FALSE); if (aa != 0) { - rrp->ext.choice = 2; - trna = (tRNAPtr) MemNew (sizeof (tRNA)); - rrp->ext.value.ptrvalue = (Pointer) trna; - if (trna != NULL) { - trna->aatype = 2; - for (j = 0; j < 6; j++) { - trna->codon [j] = 255; + if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) { + rrp->ext.choice = 2; + trna = (tRNAPtr) MemNew (sizeof (tRNA)); + rrp->ext.value.ptrvalue = (Pointer) trna; + if (trna != NULL) { + trna->aatype = 2; + for (j = 0; j < 6; j++) { + trna->codon [j] = 255; + } } + } + trna = (tRNAPtr) rrp->ext.value.ptrvalue; + if (trna != NULL) { if (justTrnaText) { for (j = 0; j < 6; j++) { trna->codon [j] = codon [j]; @@ -5357,9 +5498,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, return; } else if (qnum == GBQUAL_anticodon) { if (ParseAnticodon (sfp, val, offset)) return; - } else if (qnum == GBQUAL_pseudo) { - sfp->pseudo = TRUE; - return; } } else if (sfp->data.choice == SEQFEAT_BIOSRC) { if (ParseQualIntoBioSource (sfp, qual, val)) return; @@ -8555,6 +8693,8 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le Int2 bs_max = 0, bs_min = 0; Int4 new_pos, old_pos; Int2 val; + Int4 loc_stop; + Boolean changed = FALSE; if (sgp == NULL || num_to_trim < 1) { @@ -8605,6 +8745,7 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le sgp->numval = new_len; sgp->max.realvalue = fhmax; sgp->min.realvalue = fhmin; + changed = TRUE; } else if (sgp->flags[2] == 2) { @@ -8640,6 +8781,7 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le sgp->numval = new_len; sgp->max.intvalue = intmax; sgp->min.intvalue = intmin; + changed = TRUE; } else if (sgp->flags[2] == 3) { @@ -8680,6 +8822,14 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le sgp->numval = new_len; sgp->max.intvalue = bs_max; sgp->min.intvalue = bs_min; + changed = TRUE; + } + if (changed) + { + loc_stop = SeqLocStop (sgp->loc); + sgp->loc = SeqLocDelete (sgp->loc, SeqLocId (sgp->loc), + loc_stop - num_to_trim + 1, + loc_stop, FALSE, &changed); } } diff --git a/api/sqnutil3.c b/api/sqnutil3.c index ef2a21bf..007ce12f 100644 --- a/api/sqnutil3.c +++ b/api/sqnutil3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/7/00 * -* $Revision: 6.69 $ +* $Revision: 6.74 $ * * File Description: * @@ -115,7 +115,7 @@ static void SfpClearFeatIDs ( ClearFeatIDXrefs (sfp); } -NLM_EXTERN void ClearCDSmRNAfeatureIDs ( +NLM_EXTERN void ClearFeatureIDs ( SeqEntryPtr sep ) @@ -123,11 +123,20 @@ NLM_EXTERN void ClearCDSmRNAfeatureIDs ( VisitFeaturesInSep (sep, NULL, SfpClearFeatIDs); } +typedef struct idpair { + Int4 before; + Int4 after; +} IdPairData, PNTR IdPairPtr; + typedef struct fiddata { - Int4 highestID; + Int4 highestID; + Int4 highestRef; + Int4 offset; + Int4 count; + IdPairPtr pairs; } FidData, PNTR FidDataPtr; -static void FindHighestFeatureID ( +static void FindHighestFeatID ( SeqFeatPtr sfp, Pointer userdata ) @@ -138,7 +147,6 @@ static void FindHighestFeatureID ( SeqFeatXrefPtr xref; if (sfp == NULL) return; - if (sfp->idx.subtype != FEATDEF_CDS && sfp->idx.subtype != FEATDEF_mRNA) return; fip = (FidDataPtr) userdata; if (fip == NULL) return; @@ -147,7 +155,7 @@ static void FindHighestFeatureID ( if (oip != NULL) { if (oip->str == NULL) { if (oip->id >= fip->highestID) { - fip->highestID = oip->id + 1; + fip->highestID = oip->id; } } } @@ -158,15 +166,29 @@ static void FindHighestFeatureID ( oip = (ObjectIdPtr) xref->id.value.ptrvalue; if (oip != NULL) { if (oip->str == NULL) { - if (oip->id >= fip->highestID) { - fip->highestID = oip->id + 1; + if (oip->id >= fip->highestRef) { + fip->highestRef = oip->id; } } } } } -static void SfpAssignCDSmRNAfeatureIDs ( +NLM_EXTERN Int4 FindHighestFeatureID ( + SeqEntryPtr sep +) + +{ + FidData fd; + + MemSet ((Pointer) &fd, 0, sizeof (FidData)); + fd.highestID = 0; + fd.highestRef = 0; + VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatID); + return fd.highestID; +} + +static void SfpAssignFeatIDs ( SeqFeatPtr sfp, Pointer userdata ) @@ -176,32 +198,241 @@ static void SfpAssignCDSmRNAfeatureIDs ( ObjectIdPtr oip; if (sfp == NULL) return; - if (sfp->idx.subtype != FEATDEF_CDS && sfp->idx.subtype != FEATDEF_mRNA) return; fip = (FidDataPtr) userdata; if (fip == NULL) return; if (sfp->id.choice == 3) return; oip = ObjectIdNew (); if (oip == NULL) return; + + (fip->highestID)++; oip->id = fip->highestID; sfp->id.value.ptrvalue = (Pointer) oip; sfp->id.choice = 3; +} + +NLM_EXTERN void AssignFeatureIDs ( + SeqEntryPtr sep +) + +{ + FidData fd; + + MemSet ((Pointer) &fd, 0, sizeof (FidData)); + fd.highestID = 0; + fd.highestRef = 0; + VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatID); + VisitFeaturesInSep (sep, (Pointer) &fd, SfpAssignFeatIDs); +} + +static void SfpOffsetFeatIDs ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + FidDataPtr fip; + ObjectIdPtr oip; + + if (sfp == NULL) return; + fip = (FidDataPtr) userdata; + if (fip == NULL) return; + + if (sfp->id.choice == 3) { + oip = (ObjectIdPtr) sfp->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str == NULL) { + oip->id += fip->offset; + } + } + } +} + +NLM_EXTERN void OffsetFeatureIDs ( + SeqEntryPtr sep, + Int4 offset +) + +{ + FidData fd; + + MemSet ((Pointer) &fd, 0, sizeof (FidData)); + fd.offset = offset; + VisitFeaturesInSep (sep, (Pointer) &fd, SfpOffsetFeatIDs); +} + +static void SfpOffsetFeatIDXrefs ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + FidDataPtr fip; + ObjectIdPtr oip; + SeqFeatXrefPtr xref; + + if (sfp == NULL) return; + fip = (FidDataPtr) userdata; + if (fip == NULL) return; + + for (xref = sfp->xref; xref != NULL; xref = xref->next) { + if (xref->id.choice != 3) continue; + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str == NULL) { + oip->id += fip->offset; + } + } + } +} + +NLM_EXTERN void OffsetFeatureIDXrefs ( + SeqEntryPtr sep, + Int4 offset +) + +{ + FidData fd; + + MemSet ((Pointer) &fd, 0, sizeof (FidData)); + fd.offset = offset; + VisitFeaturesInSep (sep, (Pointer) &fd, SfpOffsetFeatIDXrefs); +} + +static void SfpMakePairList ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + FidDataPtr fip; + Int4 idx; + IdPairPtr ipp; + ObjectIdPtr oip; + + if (sfp == NULL) return; + fip = (FidDataPtr) userdata; + if (fip == NULL) return; + if (fip->pairs == NULL) return; + + if (sfp->id.choice != 3) return; + oip = (ObjectIdPtr) sfp->id.value.ptrvalue; + if (oip == NULL) return; + + idx = fip->highestID; + ipp = &(fip->pairs [idx]); (fip->highestID)++; + ipp->before = oip->id; + ipp->after = fip->highestID; +} + +static int LIBCALLBACK SortPairList (VoidPtr ptr1, VoidPtr ptr2) + +{ + IdPairPtr ipp1 = (IdPairPtr) ptr1; + IdPairPtr ipp2 = (IdPairPtr) ptr2; + + if (ipp1 == NULL || ipp2 == NULL) return 0; + if (ipp1->before > ipp2->before) return 1; + if (ipp1->before < ipp2->before) return -1; + return 0; +} + +static Int4 LookupNewFeatID ( + FidDataPtr fip, + Int4 before +) + +{ + IdPairPtr ipp; + Int4 L; + Int4 mid; + Int4 R; + + if (fip == NULL || fip->pairs == NULL || fip->count < 1) return 0; + + L = 0; + R = fip->count - 1; + while (L < R) { + mid = (L + R) / 2; + ipp = &(fip->pairs [mid]); + if (ipp->before < before) { + L = mid + 1; + } else { + R = mid; + } + } + + if (R < fip->count) { + ipp = &(fip->pairs [R]); + if (ipp->before == before) return ipp->after; + } + + return 0; +} + +static void SfpReassignPairList ( + SeqFeatPtr sfp, + Pointer userdata +) + +{ + FidDataPtr fip; + ObjectIdPtr oip; + SeqFeatXrefPtr xref; + + if (sfp == NULL) return; + fip = (FidDataPtr) userdata; + if (fip == NULL) return; + if (fip->pairs == NULL) return; + + if (sfp->id.choice == 3) { + oip = (ObjectIdPtr) sfp->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str == NULL) { + oip->id = LookupNewFeatID (fip, oip->id); + } + } + } + + for (xref = sfp->xref; xref != NULL; xref = xref->next) { + if (xref->id.choice != 3) continue; + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str == NULL) { + oip->id = LookupNewFeatID (fip, oip->id); + } + } + } } -NLM_EXTERN void AssignCDSmRNAfeatureIDs ( +NLM_EXTERN void ReassignFeatureIDs ( SeqEntryPtr sep ) { + Int4 count; FidData fd; + count = VisitFeaturesInSep (sep, NULL, NULL); + if (count < 1) return; + MemSet ((Pointer) &fd, 0, sizeof (FidData)); - fd.highestID = 1; - VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatureID); - VisitFeaturesInSep (sep, (Pointer) &fd, SfpAssignCDSmRNAfeatureIDs); + fd.highestID = 0; + fd.highestRef = 0; + fd.count = count; + fd.pairs = (IdPairPtr) MemNew (sizeof (IdPairData) * (count + 1)); + if (fd.pairs == NULL) return; + + VisitFeaturesInSep (sep, (Pointer) &fd, SfpMakePairList); + + HeapSort (fd.pairs, (size_t) count, sizeof (IdPairData), SortPairList); + + VisitFeaturesInSep (sep, (Pointer) &fd, SfpReassignPairList); + + MemFree (fd.pairs); } typedef struct vcmdata { @@ -368,7 +599,7 @@ NLM_EXTERN void LinkCDSmRNAbyOverlap ( ) { - AssignCDSmRNAfeatureIDs (sep); + AssignFeatureIDs (sep); VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyOverlap); } @@ -515,97 +746,100 @@ static void BspLinkCDSmRNAbyProduct ( if (cdna->idx.parenttype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) cdna->idx.parentptr; if (bssp == NULL) continue; - if (bssp->_class != BioseqseqSet_class_nuc_prot) continue; - prot = NULL; - if (VisitBioseqsInSet (bssp, (Pointer) &prot, FindProtBsp) != 2) continue; - for (sip = prot->id; sip != NULL; sip = sip->next) { - MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1); - - /* binary search */ - - L = 0; - R = numcds - 1; - while (L < R) { - mid = (L + R) / 2; - odp = cdsarray [mid]; - compare = StringCmp (odp->revstr, buf); - if (compare < 0) { - L = mid + 1; - } else { - R = mid; - } - } - odp = cdsarray [R]; - if (odp != NULL && StringCmp (odp->revstr, buf) == 0) { - cds = odp->sfp; - if (cds == NULL) continue; - - /* make reciprocal feature ID xrefs */ - - if (cds->id.choice == 3) { - oip = (ObjectIdPtr) cds->id.value.ptrvalue; - if (oip != NULL && oip->str == NULL) { - id = oip->id; - if (id > 0) { - for (xref = mrna->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue; - if (xref != NULL) { - oip = (ObjectIdPtr) xref->id.value.ptrvalue; - if (oip != NULL) { - if (oip->str != NULL) { - oip->str = MemFree (oip->str); - } - oip->id = id; - } - } else { - xref = SeqFeatXrefNew (); - if (xref != NULL) { - oip = ObjectIdNew (); - if (oip != NULL) { - oip->id = id; - xref->id.choice = 3; - xref->id.value.ptrvalue = (Pointer) oip; - xref->next = mrna->xref; - mrna->xref = xref; - } - } - } - } - } - } - - if (mrna->id.choice == 3) { - oip = (ObjectIdPtr) mrna->id.value.ptrvalue; - if (oip != NULL && oip->str == NULL) { - id = oip->id; - if (id > 0) { - for (xref = cds->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue; - if (xref != NULL) { - oip = (ObjectIdPtr) xref->id.value.ptrvalue; - if (oip != NULL) { - if (oip->str != NULL) { - oip->str = MemFree (oip->str); - } - oip->id = id; - } - } else { - xref = SeqFeatXrefNew (); - if (xref != NULL) { - oip = ObjectIdNew (); - if (oip != NULL) { - oip->id = id; - xref->id.choice = 3; - xref->id.value.ptrvalue = (Pointer) oip; - xref->next = cds->xref; - cds->xref = xref; - } - } - } - } - } - } - } + if (bssp->_class == BioseqseqSet_class_nuc_prot) { + prot = NULL; + if (VisitBioseqsInSet (bssp, (Pointer) &prot, FindProtBsp) == 2) { + for (sip = prot->id; sip != NULL; sip = sip->next) { + MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1); + + /* binary search */ + + L = 0; + R = numcds - 1; + while (L < R) { + mid = (L + R) / 2; + odp = cdsarray [mid]; + compare = StringCmp (odp->revstr, buf); + if (compare < 0) { + L = mid + 1; + } else { + R = mid; + } + } + odp = cdsarray [R]; + if (odp != NULL && StringCmp (odp->revstr, buf) == 0) { + cds = odp->sfp; + if (cds == NULL) continue; + + /* make reciprocal feature ID xrefs */ + + if (cds->id.choice == 3) { + oip = (ObjectIdPtr) cds->id.value.ptrvalue; + if (oip != NULL && oip->str == NULL) { + id = oip->id; + if (id > 0) { + for (xref = mrna->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue; + if (xref != NULL) { + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str != NULL) { + oip->str = MemFree (oip->str); + } + oip->id = id; + } + } else { + xref = SeqFeatXrefNew (); + if (xref != NULL) { + oip = ObjectIdNew (); + if (oip != NULL) { + oip->id = id; + xref->id.choice = 3; + xref->id.value.ptrvalue = (Pointer) oip; + xref->next = mrna->xref; + mrna->xref = xref; + } + } + } + } + } + } + + if (mrna->id.choice == 3) { + oip = (ObjectIdPtr) mrna->id.value.ptrvalue; + if (oip != NULL && oip->str == NULL) { + id = oip->id; + if (id > 0) { + for (xref = cds->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue; + if (xref != NULL) { + oip = (ObjectIdPtr) xref->id.value.ptrvalue; + if (oip != NULL) { + if (oip->str != NULL) { + oip->str = MemFree (oip->str); + } + oip->id = id; + } + } else { + xref = SeqFeatXrefNew (); + if (xref != NULL) { + oip = ObjectIdNew (); + if (oip != NULL) { + oip->id = id; + xref->id.choice = 3; + xref->id.value.ptrvalue = (Pointer) oip; + xref->next = cds->xref; + cds->xref = xref; + } + } + } + } + } + } + } + } + } } } + BioseqUnlock (cdna); } } @@ -626,7 +860,7 @@ NLM_EXTERN void LinkCDSmRNAbyProduct ( ) { - AssignCDSmRNAfeatureIDs (sep); + AssignFeatureIDs (sep); VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyProduct); } diff --git a/api/sqnutils.h b/api/sqnutils.h index 279fa9c1..472da29a 100644 --- a/api/sqnutils.h +++ b/api/sqnutils.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.134 $ +* $Revision: 6.140 $ * * File Description: * @@ -233,9 +233,15 @@ NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata); NLM_EXTERN void ClearFeatIDs (SeqFeatPtr sfp); NLM_EXTERN void ClearFeatIDXrefs (SeqFeatPtr sfp); -NLM_EXTERN void ClearCDSmRNAfeatureIDs (SeqEntryPtr sep); +NLM_EXTERN void ClearFeatureIDs (SeqEntryPtr sep); +NLM_EXTERN Int4 FindHighestFeatureID (SeqEntryPtr sep); -NLM_EXTERN void AssignCDSmRNAfeatureIDs (SeqEntryPtr sep); +NLM_EXTERN void AssignFeatureIDs (SeqEntryPtr sep); + +NLM_EXTERN void OffsetFeatureIDs (SeqEntryPtr sep, Int4 offset); +NLM_EXTERN void OffsetFeatureIDXrefs (SeqEntryPtr sep, Int4 offset); + +NLM_EXTERN void ReassignFeatureIDs (SeqEntryPtr sep); NLM_EXTERN void LinkCDSmRNAbyOverlap (SeqEntryPtr sep); @@ -264,6 +270,8 @@ NLM_EXTERN CharPtr SqnTagFind (SqnTagPtr stp, CharPtr tag); NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip); NLM_EXTERN void ReadCompletenessFromString (CharPtr str, MolInfoPtr mip); +extern Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2); + /* functions to extract BioSource, MolInfo, and Bioseq information from parsed titles */ NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource ( @@ -312,6 +320,15 @@ NLM_EXTERN UserObjectPtr ParseTitleIntoTpaAssembly ( UserObjectPtr uop ); +/* structured comment user object for flatfile presentation */ + +NLM_EXTERN UserObjectPtr ParseStringIntoStructuredComment ( + UserObjectPtr uop, + CharPtr str, + CharPtr prefix, + CharPtr suffix +); + /* UseLocalAsnloadDataAndErrMsg transiently sets paths to asnload, data, and errmsg if they are packaged in the same directory as the executing program. */ @@ -624,6 +641,10 @@ NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, Visit typedef void (*VisitUserObjectFunc) (UserObjectPtr uop, Pointer userdata); NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback); +/* explores sub UserObjects including "CombinedFeatureUserObjects" and finds by label */ + +NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag); + /* creates "CombinedFeatureUserObjects" sfp->ext to combine two user objects */ NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop); diff --git a/api/subutil.c b/api/subutil.c index 07588e4d..890f700f 100644 --- a/api/subutil.c +++ b/api/subutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.62 $ +* $Revision: 6.64 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -40,6 +40,12 @@ * * * $Log: subutil.c,v $ +* Revision 6.64 2006/05/05 19:49:40 kans +* added StructuredComment user object creation functions +* +* Revision 6.63 2006/03/23 19:35:55 kans +* expand keywords with semicolons in AddGenBankBlockToEntry, not BSEC - already done in ParseTitleIntoGenBank +* * Revision 6.62 2006/02/06 19:00:15 kans * added CreateFeatureFetchPolicyUserObject * @@ -2543,6 +2549,51 @@ NLM_EXTERN Boolean SetGeneticCodeForEntry ( return TRUE; } +static void SubExpandSemicolonedKeyword (ValNodePtr vnp) + +{ + Char ch; + ValNodePtr lastvnp; + ValNodePtr newvnp; + ValNodePtr nextvnp; + CharPtr ptr; + CharPtr str; + CharPtr tmp; + + if (vnp == NULL) return; + str = (CharPtr) vnp->data.ptrvalue; + if (StringHasNoText (str)) return; + if (StringChr (str, ';') == NULL && StringChr (str, ',') == NULL) return; + + lastvnp = vnp; + nextvnp = vnp->next; + + tmp = StringSave (str); + str = tmp; + vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue); + while (StringDoesHaveText (str)) { + ptr = str; + ch = *ptr; + while (ch != '\0' && ch != ',' && ch != ';') { + ptr++; + ch = *ptr; + } + if (ptr != NULL && *ptr != '\0') { + *ptr = '\0'; + ptr++; + } + TrimSpacesAroundString (str); + newvnp = ValNodeCopyStr (NULL, 0, str); + if (newvnp != NULL) { + newvnp->next = nextvnp; + lastvnp->next = newvnp; + lastvnp = newvnp; + } + str = ptr; + } + MemFree (tmp); +} + NLM_EXTERN Boolean AddGenBankBlockToEntry ( NCBISubPtr submission, SeqEntryPtr entry , @@ -2552,7 +2603,7 @@ NLM_EXTERN Boolean AddGenBankBlockToEntry ( CharPtr keyword2 , CharPtr keyword3 ) { - ValNodePtr vnp; + ValNodePtr vnp, tmp; GBBlockPtr gbp; if ((submission == NULL) || (entry == NULL)) @@ -2570,6 +2621,10 @@ NLM_EXTERN Boolean AddGenBankBlockToEntry ( ValNodeCopyStr(&gbp->keywords, 0, keyword2); ValNodeCopyStr(&gbp->keywords, 0, keyword3); + for (tmp = gbp->keywords; tmp != NULL; tmp = tmp->next) { + SubExpandSemicolonedKeyword (tmp); + } + return TRUE; } @@ -5399,3 +5454,54 @@ NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject ( return uop; } +/* structured comment user object for flatfile presentation */ + +NLM_EXTERN UserObjectPtr CreateStructuredCommentUserObject (void) + +{ + ObjectIdPtr oip; + UserObjectPtr uop; + + uop = UserObjectNew (); + oip = ObjectIdNew (); + oip->str = StringSave ("StructuredComment"); + uop->type = oip; + + return uop; +} + +NLM_EXTERN void AddItemStructuredCommentUserObject ( + UserObjectPtr uop, + CharPtr field, + CharPtr str +) + +{ + UserFieldPtr curr; + ObjectIdPtr oip; + UserFieldPtr prev = NULL; + + if (uop == NULL || StringHasNoText (field) || StringHasNoText (str)) return; + oip = uop->type; + if (oip == NULL || StringICmp (oip->str, "StructuredComment") != 0) return; + + for (curr = uop->data; curr != NULL; curr = curr->next) { + prev = curr; + } + + curr = UserFieldNew (); + oip = ObjectIdNew (); + oip->str = StringSave (field); + curr->label = oip; + curr->choice = 1; /* visible string */ + curr->data.ptrvalue = (Pointer) StringSave (str); + + /* link curator at end of list */ + + if (prev != NULL) { + prev->next = curr; + } else { + uop->data = curr; + } +} + diff --git a/api/subutil.h b/api/subutil.h index eaba6f79..b4f1ba9d 100644 --- a/api/subutil.h +++ b/api/subutil.h @@ -31,7 +31,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.53 $ +* $Revision: 6.54 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -42,6 +42,9 @@ * * * $Log: subutil.h,v $ +* Revision 6.54 2006/05/05 19:49:40 kans +* added StructuredComment user object creation functions +* * Revision 6.53 2006/02/06 19:00:15 kans * added CreateFeatureFetchPolicyUserObject * @@ -1708,6 +1711,16 @@ NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject ( CharPtr policy ); +/* structured comment user object for flatfile presentation */ + +NLM_EXTERN UserObjectPtr CreateStructuredCommentUserObject (void); + +NLM_EXTERN void AddItemStructuredCommentUserObject ( + UserObjectPtr uop, + CharPtr field, + CharPtr str +); + #ifdef __cplusplus } diff --git a/api/tofasta.c b/api/tofasta.c index dcbbb7d3..19dd1daf 100644 --- a/api/tofasta.c +++ b/api/tofasta.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/12/91 * -* $Revision: 6.150 $ +* $Revision: 6.152 $ * * File Description: various sequence objects to fasta output * @@ -39,6 +39,12 @@ * ------- ---------- ----------------------------------------------------- * * $Log: tofasta.c,v $ +* Revision 6.152 2006/03/29 16:04:47 kans +* in AddNcTitles, do not clear mip->completeness - cannot determine why this was done in the past +* +* Revision 6.151 2006/03/08 21:29:47 kans +* FindNMDefLine calls ReplaceFlyDashPwithDashR if Drosophila melanogaster curated RefSeq +* * Revision 6.150 2006/01/10 22:19:29 kans * CreateDefLine calls DoTpaPrefix to handle TPA_exp and TPA_inf * @@ -3336,6 +3342,65 @@ static void FindNMFeats (SeqFeatPtr sfp, Pointer userdata) } } +static Boolean IsFlyCG (CharPtr str) + +{ + Char ch; + + if (StringHasNoText (str)) return FALSE; + + ch = *str; + if (ch != 'C') return FALSE; + str++; + ch = *str; + if (ch != 'G') return FALSE; + str++; + ch = *str; + while (IS_DIGIT (ch)) { + str++; + ch = *str; + } + if (ch != '-') return FALSE; + str++; + ch = *str; + if (ch != 'P') return FALSE; + str++; + ch = *str; + if (IS_ALPHA (ch)) { + str++; + ch = *str; + if (ch == '\0' || ch == ' ' || ch == ',' || ch == ';') return TRUE; + } + + return FALSE; +} + +static void ReplaceFlyDashPwithDashR (CharPtr str) + +{ + Char ch; + CharPtr ptr; + + while (StringDoesHaveText (str)) { + ch = *str; + while (IS_WHITESP (ch)) { + str++; + ch = *str; + } + if (IsFlyCG (str)) { + ptr = StringStr (str, "-P"); + if (ptr != NULL) { + ptr [1] = 'R'; + return; + } + } + while (ch != '\0' && (! IS_WHITESP (ch))) { + str++; + ch = *str; + } + } +} + static CharPtr FindNMDefLine (BioseqPtr bsp) { @@ -3377,6 +3442,10 @@ static CharPtr FindNMDefLine (BioseqPtr bsp) } } if (is_refseq) { + /* special case Drosophila RefSeq NM titles */ + if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) { + ReplaceFlyDashPwithDashR (buf); + } ptr = StringStr (buf, "isoform "); if (ptr != NULL) { *ptr = '\0'; @@ -4928,8 +4997,10 @@ static Boolean AddNcTitles (GatherObjectPtr gop) BioseqPtr bsp; Char buf [512]; Boolean is_nc; + /* MolInfoPtr mip; SeqDescrPtr sdp; + */ SeqIdPtr sip; CharPtr str; TextSeqIdPtr tsip; @@ -4962,6 +5033,7 @@ static Boolean AddNcTitles (GatherObjectPtr gop) } } + /* for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) { if (sdp->choice == Seq_descr_molinfo) { mip = (MolInfoPtr) sdp->data.ptrvalue; @@ -4972,6 +5044,7 @@ static Boolean AddNcTitles (GatherObjectPtr gop) } } } + */ return TRUE; } diff --git a/api/valid.c b/api/valid.c index 0a8ecae1..66191353 100644 --- a/api/valid.c +++ b/api/valid.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.726 $ +* $Revision: 6.755 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,93 @@ * ------- ---------- ----------------------------------------------------- * * $Log: valid.c,v $ +* Revision 6.755 2006/05/04 20:31:46 kans +* ERR_SEQ_FEAT_ErroneousException test for unclassified transcription discrepancy that should be mismatches in transcription +* +* Revision 6.754 2006/05/04 20:13:15 kans +* added ERR_SEQ_FEAT_ErroneousException - used for unclassified translation discrepancy that should be mismatches in translation +* +* Revision 6.753 2006/04/24 16:28:10 kans +* InvalidInferenceValue dropped to warning +* +* Revision 6.752 2006/04/21 17:59:18 kans +* added ignoreExceptions flag to vsp - for MrnaTransCheck and CdTransCheck +* +* Revision 6.751 2006/04/17 18:05:09 kans +* added ERR_SEQ_DESCR_BadPunctuation +* +* Revision 6.750 2006/04/14 20:35:40 kans +* ERR_SEQ_FEAT_UnindexedFeature also reports bioseq identifiers +* +* Revision 6.749 2006/04/14 20:18:23 kans +* CDSmRNAmismatch, CDSwithMultipleMRNAs and CDSwithNoMRNAOverlap tests in ValidateCDSmRNAmatch suppressed if genbank +* +* Revision 6.748 2006/04/14 17:06:00 kans +* ERR_SEQ_FEAT_FeatureLocationIsGi0 prints IDs of Bioseq +* +* Revision 6.747 2006/04/14 16:25:35 kans +* added ERR_SEQ_FEAT_PseudoCdsHasProtXref +* +* Revision 6.746 2006/04/12 14:24:02 kans +* CDSmRNArange and mRNAgeneRange reduced to WARNING level +* +* Revision 6.745 2006/04/11 18:04:08 kans +* IdXrefsNotReciprocal also tests MrnaProteinLink user object against cds->product +* +* Revision 6.744 2006/04/11 16:16:57 kans +* raised obsolete experimental evidence qualifier set message to error +* +* Revision 6.743 2006/04/10 15:23:24 kans +* CDSsLinkedToDifferentMRNAs used to suppress duplicate feature message on otherwise identical CDSs +* +* Revision 6.742 2006/04/10 14:56:57 kans +* report obsolete sfp->exp_ev only if no gi present +* +* Revision 6.741 2006/04/07 19:49:06 kans +* allow 5 letter + 7 digit accessions +* +* Revision 6.740 2006/04/06 18:43:50 kans +* report ERR_SEQ_FEAT_InvalidInferenceValue for new records with only sfp->exp_ev set +* +* Revision 6.739 2006/04/05 15:04:11 kans +* ERR_SEQ_FEAT_BadTrnaAA not reported for pseudo tRNA +* +* Revision 6.738 2006/03/30 19:24:56 kans +* made CheckDeltaForReuse more efficient +* +* Revision 6.737 2006/03/27 18:27:01 kans +* biop->origin == ORG_MUT no longer sets bvsp->is_artificial +* +* Revision 6.736 2006/03/27 14:52:34 kans +* ribosomal rna abutting test now handles same rRNA subtype split across segmented parts +* +* Revision 6.735 2006/03/24 18:57:26 kans +* if estimated_length unknown do not complain about different dash count +* +* Revision 6.734 2006/03/24 18:48:05 kans +* added ERR_SEQ_FEAT_GapFeatureProblem +* +* Revision 6.733 2006/03/17 16:50:41 kans +* added INTERNAL_SPACER_X to ITS-rRNA adjacency tests +* +* Revision 6.732 2006/03/10 13:38:48 kans +* raised DeltaComponentIsGi0 and FeatureLocationIsGi0 from ERROR to REJECT +* +* Revision 6.731 2006/03/09 13:55:35 kans +* replace qualifier can have * for termination in protein sequence +* +* Revision 6.730 2006/03/08 19:42:27 kans +* do not call BioseqLockById on 0 or negative gi number +* +* Revision 6.729 2006/03/07 21:34:28 kans +* checks for gi 0 now also check for negative value +* +* Revision 6.728 2006/03/07 21:15:59 kans +* added ERR_SEQ_INST_DeltaComponentIsGi0 and ERR_SEQ_FEAT_FeatureLocationIsGi0 +* +* Revision 6.727 2006/03/07 17:00:36 kans +* ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct and ERR_SEQ_FEAT_PseudoCdsHasProduct are now ERROR +* * Revision 6.726 2006/02/27 17:49:34 kans * added adjusted for low-quality genome exception for RefSeq models * @@ -2464,6 +2551,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) Boolean validateIDSet; Boolean seqSubmitParent; Boolean justShowAccession; + Boolean ignoreExceptions; Int2 validationLimit; ValidErrorFunc errfunc; Pointer userdata; @@ -2498,6 +2586,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) validateIDSet = vsp->validateIDSet; seqSubmitParent = vsp->seqSubmitParent; justShowAccession = vsp->justShowAccession; + ignoreExceptions = vsp->ignoreExceptions; validationLimit = vsp->validationLimit; errfunc = vsp->errfunc; userdata = vsp->userdata; @@ -2529,6 +2618,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp) vsp->validateIDSet = validateIDSet; vsp->seqSubmitParent = seqSubmitParent; vsp->justShowAccession = justShowAccession; + vsp->ignoreExceptions = ignoreExceptions; vsp->validationLimit = validationLimit; vsp->errfunc = errfunc; vsp->userdata = userdata; @@ -2810,7 +2900,8 @@ static CharPtr err1Label [] = { "LeadingX", "InternalNsInSeqRaw", "InternalNsAdjacentToGap", - "CaseDifferenceInSeqID" + "CaseDifferenceInSeqID", + "DeltaComponentIsGi0" }; static CharPtr err2Label [] = { @@ -2850,7 +2941,8 @@ static CharPtr err2Label [] = { "FastaBracketTitle", "MissingText", "BadCollectionDate", - "BadPCRPrimerSequence" + "BadPCRPrimerSequence", + "BadPunctuation" }; static CharPtr err3Label [] = { @@ -3005,7 +3097,11 @@ static CharPtr err5Label [] = { "FeatureRefersToAccession", "SelfReferentialProduct", "ITSdoesNotAbutRRNA", - "FeatureSeqIDCaseDifference" + "FeatureSeqIDCaseDifference", + "FeatureLocationIsGi0", + "GapFeatureProblem", + "PseudoCdsHasProtXref", + "ErroneousException" }; static CharPtr err6Label [] = { @@ -3670,7 +3766,12 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp) } if (vsp->useSeqMgrIndexes) { if (SeqMgrGetDesiredFeature (gcp->entityID, NULL, 0, 0, sfp, &context) == NULL) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_UnindexedFeature, "Feature is not indexed"); + StringCpy (buf, "?"); + bsp = vsp->bsp; + if (bsp != NULL) { + SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1); + } + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_UnindexedFeature, "Feature is not indexed on Bioseq %s", buf); } else { bsp = BioseqFindFromSeqLoc (sfp->location); if (bsp != NULL) { @@ -6058,6 +6159,7 @@ static void CheckDeltaForReuse (ValidStructPtr vsp, GatherContextPtr gcp, Bioseq { Char buf [80]; ValNodePtr head = NULL; + ValNodePtr last = NULL; ReuseDataPtr lastrdp = NULL; ReuseDataPtr rdp; SeqIntPtr sintp; @@ -6082,7 +6184,11 @@ static void CheckDeltaForReuse (ValidStructPtr vsp, GatherContextPtr gcp, Bioseq rdp->seqidstr = StringSave (buf); rdp->from = sintp->from; rdp->to = sintp->to; - ValNodeAddPointer (&head, 0, (Pointer) rdp); + vnp = ValNodeAddPointer (&last, 0, (Pointer) rdp); + if (head == NULL) { + head = vnp; + } + last = vnp; } if (head == NULL) return; @@ -6307,6 +6413,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp) (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) { } else if (numletters == 4 && numdigits == 9 && ISA_na (bsp->mol) && (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) { + } else if (numletters == 5 && numdigits == 7 && ISA_na (bsp->mol) && + (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) { } else { ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession); } @@ -7047,6 +7155,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp) switch (vnp->choice) { case 1: /* SeqLocPtr */ slp = (SeqLocPtr) (vnp->data.ptrvalue); + sip3 = SeqLocId (slp); + if (sip3 != NULL && sip3->choice == SEQID_GI && sip3->data.intvalue <= 0) { + ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_DeltaComponentIsGi0, "Delta component is gi|0"); + } len2 = SeqLocLen (slp); if (len2 < 0) ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqDataLenWrong, "-1 length on seq-loc of delta seq_ext"); @@ -9384,7 +9496,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V ValNodePtr vnp, vnp2; OrgRefPtr this_org = NULL, that_org = NULL; int tmpval; - Char buf1[20], buf2[20]; + Char buf1[20], buf2[20], ch; EMBLBlockPtr ebp; GBBlockPtr gbp; ValNodePtr keywords = NULL; @@ -9400,6 +9512,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V Boolean tpa_inf; BioseqPtr bsp; DatePtr dp; + size_t len; SeqMgrFeatContext fcontext; static char *badmod = "Inconsistent GIBB-mod [%d] and [%d]"; @@ -9660,6 +9773,21 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MultipleTitles, "Undesired multiple title descriptors"); } } + len = StringLen (str); + if (len > 4) { + ch = str [len - 1]; + while (ch == ' ' && len > 4) { + len--; + ch = str [len - 1]; + } + if (ch == '.' && len > 4) { + len--; + ch = str [len - 1]; + } + if (ch == '.' || ch == ',' || ch == ';' || ch == ':') { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPunctuation, "Title descriptor ends in bad punctuation"); + } + } break; case Seq_descr_name: str = (CharPtr) vnp->data.ptrvalue; @@ -10413,33 +10541,68 @@ static Boolean IdXrefsAreReciprocal ( return FALSE; } -static Boolean IdXrefsNotReciprocal ( +static Int2 IdXrefsNotReciprocal ( SeqFeatPtr cds, SeqFeatPtr mrna ) { - SeqFeatXrefPtr xref; + Int4 giu = 0, gip = 0; SeqFeatPtr matchsfp; + ObjectIdPtr oip; + SeqIdPtr sip; + CharPtr tmp; + UserFieldPtr ufp; + UserObjectPtr uop; + SeqFeatXrefPtr xref; - if (cds == NULL || mrna == NULL) return FALSE; - if (cds->id.choice != 3 || mrna->id.choice != 3) return FALSE; + if (cds == NULL || mrna == NULL) return 0; + if (cds->id.choice != 3 || mrna->id.choice != 3) return 0; for (xref = cds->xref; xref != NULL; xref = xref->next) { if (xref->id.choice != 0) { matchsfp = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL); - if (matchsfp != mrna) return TRUE; + if (matchsfp != mrna) return 1; } } for (xref = mrna->xref; xref != NULL; xref = xref->next) { if (xref->id.choice != 0) { matchsfp = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL); - if (matchsfp != cds) return TRUE; + if (matchsfp != cds) return 1; } } - return FALSE; + if (cds->product == NULL) return 0; + if (mrna->ext == NULL) return 0; + uop = FindUopByTag (mrna->ext, "MrnaProteinLink"); + if (uop == NULL) return 0; + sip = SeqLocId (cds->product); + if (sip == NULL) return 0; + if (sip->choice == SEQID_GI) { + gip = (Int4) sip->data.intvalue; + } else { + gip = GetGIForSeqId (sip); + } + if (gip == 0) return 0; + ufp = uop->data; + if (ufp == NULL || ufp->choice != 1) return 0; + oip = ufp->label; + if (oip == NULL || StringICmp (oip->str, "protein seqID") != 0) return 0; + tmp = (CharPtr) ufp->data.ptrvalue; + if (StringHasNoText (tmp)) return 0; + sip = MakeSeqID (tmp); + if (sip == NULL) return 0; + if (sip->choice == SEQID_GI) { + giu = (Int4) sip->data.intvalue; + } else { + giu = GetGIForSeqId (sip); + } + SeqIdFree (sip); + if (giu == 0) return 0; + if (gip != giu) return 2; + + return 0; } static Boolean LIBCALLBACK FindSingleMrnaProc ( @@ -10538,10 +10701,12 @@ static void ValidateCDSmRNAmatch ( Boolean goOn, pseudo; GeneRefPtr grp; Int2 i, j, k, numfeats, tmpnumcds, tmpnummrna, count; + Boolean is_genbank = FALSE; LpData ld; Int4 num_repeat_regions; Uint2 olditemtype = 0; Uint2 olditemid = 0; + Int2 recip; VoidPtr repeat_region_array; SeqFeatPtr rpt_region; ErrSev sev = /* SEV_INFO */ SEV_WARNING; @@ -10564,14 +10729,16 @@ static void ValidateCDSmRNAmatch ( } */ - repeat_region_array = SeqMgrBuildFeatureIndex (bsp, &num_repeat_regions, 0, FEATDEF_repeat_region); - for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_OTHER) { sev = SEV_WARNING; + } else if (sip->choice == SEQID_GENBANK) { + is_genbank = TRUE; } } + repeat_region_array = SeqMgrBuildFeatureIndex (bsp, &num_repeat_regions, 0, FEATDEF_repeat_region); + if (numgene > 0 && numcds > 0 && nummrna > 0) { numfeats = numcds + nummrna; head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (numfeats + 1)); @@ -10613,7 +10780,7 @@ static void ValidateCDSmRNAmatch ( tmpnummrna++; } } - if (tmpnumcds > 0 && tmpnummrna > 1 && tmpnumcds != tmpnummrna) { + if (tmpnumcds > 0 && tmpnummrna > 1 && tmpnumcds != tmpnummrna && (! is_genbank)) { if (gcp != NULL) { gcp->itemID = gene->idx.itemID; @@ -10677,7 +10844,8 @@ static void ValidateCDSmRNAmatch ( if (vdp != NULL) { vdp->accounted_for = TRUE; goOn = TRUE; - if (IdXrefsNotReciprocal (sfp, ld.mrna)) { + recip = IdXrefsNotReciprocal (sfp, ld.mrna); + if (recip == 1) { if (gcp != NULL) { gcp->itemID = sfp->idx.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -10685,6 +10853,14 @@ static void ValidateCDSmRNAmatch ( vsp->descr = NULL; vsp->sfp = sfp; ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "CDS/mRNA unambiguous pair have erroneous cross-references"); + } else if (recip == 2) { + if (gcp != NULL) { + gcp->itemID = ld.mrna->idx.itemID; + gcp->thistype = OBJ_SEQFEAT; + } + vsp->descr = NULL; + vsp->sfp = ld.mrna; + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "MrnaProteinLink inconsistent with feature ID cross-references"); } } } else { @@ -10698,7 +10874,7 @@ static void ValidateCDSmRNAmatch ( } sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext); - while (sfp != NULL) { + while (sfp != NULL && (! is_genbank)) { vdp = (VvmDataPtr) sfp->idx.scratch; if (vdp != NULL) { count = vdp->num_mrnas; @@ -10857,6 +11033,38 @@ static Int2 WhichRNA (SeqFeatPtr sfp) return 0; } +static Boolean CDSsLinkedToDifferentMRNAs (SeqFeatPtr sfp, SeqFeatPtr last) + +{ + SeqFeatPtr mrna1 = NULL, mrna2 = NULL; + SeqFeatXrefPtr xref; + + if (sfp == NULL || last == NULL) return FALSE; + if (sfp->idx.subtype != FEATDEF_CDS || last->idx.subtype != FEATDEF_CDS) return FALSE; + + for (xref = sfp->xref; xref != NULL && mrna1 == NULL; xref = xref->next) { + if (xref->id.choice != 0) { + mrna1 = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL); + if (mrna1 != NULL && mrna1->idx.subtype != FEATDEF_mRNA) { + mrna1 = NULL; + } + } + } + + for (xref = last->xref; xref != NULL && mrna2 == NULL; xref = xref->next) { + if (xref->id.choice != 0) { + mrna2 = SeqMgrGetFeatureByFeatID (last->idx.entityID, NULL, NULL, xref, NULL); + if (mrna2 != NULL && mrna2->idx.subtype != FEATDEF_mRNA) { + mrna2 = NULL; + } + } + } + + if (mrna1 != NULL && mrna2 != NULL && mrna1 != mrna2) return TRUE; + + return FALSE; +} + static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp) { @@ -10924,6 +11132,15 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv CharPtr except_text = NULL; ValNodePtr vnp, cds_prod_head = NULL, mrna_prod_head = NULL, lastcdsprod = NULL, lastmrnaprod = NULL; + StreamCache sc; + Int2 res; + Int4 dashes; + Int4 Ns; + Int4 realBases; + Int4 estimated_length; + Int4 loclen; + GBQualPtr gbq; + long int val; gcp = bvsp->gcp; vsp = bvsp->vsp; @@ -11222,6 +11439,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv /* do not report if both have dbxrefs and they are different */ } else if (featdeftype == FEATDEF_variation && ReplaceQualsDiffer (sfp->qual, last->qual)) { /* do not report if both have replace quals and they are different */ + } else if (CDSsLinkedToDifferentMRNAs (sfp, last)) { + /* do not report if CDSs are linked to two different mRNAs */ } else if (fcontext.sap == sap) { if (samelabel) { ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature"); @@ -11507,6 +11726,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv left = fcontext.left; right = fcontext.right; strand = fcontext.strand; + partialL = fcontext.partialL; + partialR = fcontext.partialR; sfp = SeqMgrGetNextFeature (bsp, last, SEQFEAT_RNA, 0, &fcontext); while (sfp != NULL) { thisrnatype = WhichRNA (sfp); @@ -11518,10 +11739,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || (lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { /* okay in mitochondria */ - } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) || + } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_2 || thisrnatype == INTERNAL_SPACER_X)) || (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) || - (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11534,10 +11756,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { /* okay in mitochondria */ - } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) || + } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_1 || thisrnatype == INTERNAL_SPACER_X)) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) || (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) || - (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11559,10 +11782,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv vsp->descr = NULL; vsp->sfp = sfp; ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "tRNA overlaps adjacent rRNA component"); - } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) || + } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_2 || thisrnatype == INTERNAL_SPACER_X)) || (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) || - (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11575,10 +11799,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { /* okay in mitochondria */ - } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) || + } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_1 || thisrnatype == INTERNAL_SPACER_X)) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) || (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) || - (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11591,13 +11816,16 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv } else { /* abuts */ if (strand == Seq_strand_minus) { - if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || + if (lastrnatype == thisrnatype && partialL && fcontext.partialR && bsp->repr == Seq_repr_seg) { + /* okay in segmented set */ + } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || (lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) { /* okay in mitochondria */ - } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) || + } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype != INTERNAL_SPACER_2 && thisrnatype != INTERNAL_SPACER_X)) || (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) || - (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11607,13 +11835,16 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "Problem with order of abutting rRNA components"); } } else { - if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || + if (lastrnatype == thisrnatype && partialR && fcontext.partialL && bsp->repr == Seq_repr_seg) { + /* okay in segmented set */ + } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) || (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) { /* okay in mitochondria */ - } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) || + } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype != INTERNAL_SPACER_1 && thisrnatype != INTERNAL_SPACER_X)) || (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) || (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) || - (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT)) { + (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT) || + (lastrnatype == INTERNAL_SPACER_X && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT)) { if (gcp != NULL) { gcp->itemID = fcontext.itemID; gcp->thistype = OBJ_SEQFEAT; @@ -11630,6 +11861,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv left = fcontext.left; right = fcontext.right; strand = fcontext.strand; + partialL = fcontext.partialL; + partialR = fcontext.partialR; lastrnatype = thisrnatype; sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &fcontext); } @@ -11677,6 +11910,65 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv } } + if (ISA_na (bsp->mol)) { + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_gap, &fcontext); + while (sfp != NULL) { + estimated_length = 0; + for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) { + if (StringICmp (gbq->qual, "estimated_length") != 0) continue; + if (StringHasNoText (gbq->val)) continue; + if (StringICmp (gbq->val, "unknown") == 0) continue; + if (sscanf (gbq->val, "%ld", &val) == 1) { + estimated_length = val; + } + } + if (StreamCacheSetup (NULL, sfp->location, EXPAND_GAPS_TO_DASHES, &sc)) { + dashes = 0; + Ns = 0; + realBases = 0; + while ((res = StreamCacheGetResidue (&sc)) != '\0') { + if (IS_LOWER (res)) { + res = TO_UPPER (res); + } + if (res == '-') { + dashes++; + } else if (res == 'N') { + Ns++; + } else { + realBases++; + } + } + if (gcp != NULL) { + gcp->itemID = fcontext.itemID; + gcp->thistype = OBJ_SEQFEAT; + } + vsp->descr = NULL; + vsp->sfp = sfp; + loclen = SeqLocLen (sfp->location); + if (estimated_length > 0 && estimated_length != loclen) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature estimated_length %ld does not match %ld feature length", + (long) estimated_length, (long) loclen); + } else if (realBases > 0 && Ns > 0) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld real bases and %ld Ns", (long) realBases, (long) Ns); + } else if (realBases > 0) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld real bases", (long) realBases); + } else if (Ns > 0) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld Ns", (long) Ns); + } else if (estimated_length > 0 && dashes != estimated_length) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature estimated_length %ld does not match %ld gap characters", + (long) estimated_length, (long) dashes); + } + } + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_gap, &fcontext); + } + } + if (gcp != NULL) { + gcp->itemID = olditemid; + gcp->thistype = olditemtype; + } + vsp->descr = NULL; + vsp->sfp = NULL; + lastbiop = NULL; lastsfp = NULL; numBadFullSource = 0; @@ -11972,7 +12264,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp) bvs.is_syn_constr = TRUE; } } - if (biop->origin == ORG_ARTIFICIAL || biop->origin == ORG_MUT || biop->origin == ORG_SYNTHETIC) { + if (biop->origin == ORG_ARTIFICIAL || biop->origin == ORG_SYNTHETIC) { bvs.is_artificial = TRUE; } for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) { @@ -12538,13 +12830,13 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt } else if (ISA_aa (bsp->mol)) { just_prt_letters = TRUE; for (ptr = gbqual->val, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) { - if (StringChr ("acdefghiklmnpqrstuvwy", ch) == NULL) { + if (StringChr ("acdefghiklmnpqrstuvwy*", ch) == NULL) { just_prt_letters = FALSE; } } if (!just_prt_letters) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, - "%s is not a legal value for qualifier %s - should only be composed of acdefghiklmnpqrstuvwy amino acids", + "%s is not a legal value for qualifier %s - should only be composed of acdefghiklmnpqrstuvwy* amino acids", gbqual->val, gbqual->qual); } } @@ -12929,7 +13221,10 @@ static Boolean PartialAtSpliceSiteOrGap (SeqLocPtr head, Uint2 slpTag, BoolPtr i return FALSE; acceptor = SeqLocStart (slp); donor = SeqLocStop (slp); - bsp = BioseqLockById (sip); + bsp = NULL; + if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) { + bsp = BioseqLockById (sip); + } if (bsp == NULL) return FALSE; len = bsp->length; @@ -13112,6 +13407,8 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt } } + if (sfp->pseudo) return; + if (aa > 0 && aa != 255) { /* - no gaps now that O and J are added if (aa <= 74) { @@ -13133,7 +13430,7 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid"); } } else { - ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid"); + ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Missing tRNA amino acid"); } } @@ -13439,7 +13736,7 @@ static void CheckForBadGeneOverlap (ValidStructPtr vsp, SeqFeatPtr sfp) SeqMgrFeatContext fcontext; SeqFeatPtr gene, operon; GeneRefPtr grp; - ErrSev sev = SEV_ERROR; + ErrSev sev = /* SEV_ERROR */ SEV_WARNING; if (sfp == NULL) return; @@ -13469,7 +13766,7 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp) { SeqMgrFeatContext fcontext; SeqFeatPtr mrna; - ErrSev sev = SEV_ERROR; + ErrSev sev = /* SEV_ERROR */ SEV_WARNING; if (sfp == NULL) return; @@ -14630,6 +14927,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) Boolean bypassGeneTest; Boolean dicistronic = FALSE; Int2 inferenceCode; + Boolean hasInference = FALSE; + Boolean hasExperiment = FALSE; Boolean accn_seqid; @@ -14950,11 +15249,14 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } if (pseudo && sfp->product != NULL) { if (ovgenepseudo) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct, "A coding region overlapped by a pseudogene should not have a product"); + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct, "A coding region overlapped by a pseudogene should not have a product"); } else { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PseudoCdsHasProduct, "A pseudo coding region should not have a product"); + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsHasProtXref, "A pseudo coding region should not have a product"); } } + if (pseudo && SeqMgrGetProtXref (sfp) != NULL) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsHasProduct, "A pseudo coding region should not have a protein xref"); + } if (codonqual) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CodonQualifierUsed, "Use the proper genetic code, if available, or set transl_excepts on specific codons"); } @@ -15344,6 +15646,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } } if (StringICmp (gbq->qual, "inference") == 0) { + hasInference = TRUE; inferenceCode = ValidateInferenceQualifier (gbq->val, TRUE); if (inferenceCode != VALID_INFERENCE) { if (inferenceCode < VALID_INFERENCE || inferenceCode > ACC_VERSION_NOT_PUBLIC) { @@ -15351,8 +15654,14 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue, "Inference qualifier problem - %s", infMessage [(int) inferenceCode]); } + } else if (StringICmp (gbq->val, "experiment") == 0) { + hasExperiment = TRUE; } } + if (sfp->exp_ev > 0 && (! hasInference) && (! hasExperiment) && (! vsp->feat_loc_has_gi)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue, + "Inference or experiment qualifier missing but obsolete experimental evidence qualifier set"); + } if (sfp->product != NULL) { sip = SeqLocId (sfp->product); @@ -15622,7 +15931,8 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) CharPtr farstr = ""; ErrSev fetchsev; GatherContextPtr gcp; - Boolean has_errors = FALSE, unclassified_except = FALSE, mismatch_except = FALSE; + Boolean has_errors = FALSE, unclassified_except = FALSE, + mismatch_except = FALSE, other_than_mismatch = FALSE; Int2 i; Boolean is_refseq = FALSE; Int4 mismatch, total; @@ -15643,7 +15953,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if (sfp->product == NULL) return; - if (sfp->excpt && (! StringHasNoText (sfp->except_text))) { + if (sfp->excpt && (! vsp->ignoreExceptions) && (! StringHasNoText (sfp->except_text))) { for (i = 0; bypass_mrna_trans_check [i] != NULL; i++) { if (StringISearch (sfp->except_text, bypass_mrna_trans_check [i]) != NULL) { report_errors = FALSE; /* biological exception */ @@ -15690,7 +16000,9 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } } if (bsp == NULL && vsp->farFetchMRNAproducts) { - bsp = BioseqLockById (sip); + if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) { + bsp = BioseqLockById (sip); + } if (bsp != NULL) { unlockProd = TRUE; farstr = "(far) "; @@ -15720,6 +16032,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) pdseq = GetSequenceByFeature (&sf); if (pdseq == NULL) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors || unclassified_except) { fetchsev = SEV_ERROR; if (sip->choice != SEQID_GI) { @@ -15748,18 +16061,21 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } if (counta < 19 * countnona) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than %sproduct length [%ld], and tail < 95%s polyA", (long) mlen, farstr, (long) plen, "%"); } plen = mlen; /* even if it fails polyA test, allow base-by-base comparison on common length */ } else if (counta > 0 && countnona == 0) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%"); } plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */ } else { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%"); } @@ -15767,6 +16083,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } } else { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] greater than %sproduct length [%ld]", (long) mlen, farstr, (long) plen); } @@ -15807,6 +16124,8 @@ erret: if (! report_errors) { if (! has_errors) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "mRNA has exception but passes transcription test"); + } else if (unclassified_except && (! other_than_mismatch)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ErroneousException, "mRNA has unclassified exception but only difference is mismatches"); } } } @@ -15952,7 +16271,8 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) Boolean transl_except = FALSE, prot_ok = TRUE, is_nc = FALSE, has_errors = FALSE, report_errors = TRUE, unclassified_except = FALSE, mismatch_except = FALSE, - frameshift_except = FALSE, rearrange_except = FALSE; + frameshift_except = FALSE, rearrange_except = FALSE, + other_than_mismatch = FALSE; CharPtr nuclocstr, farstr = ""; CodeBreakPtr cbp; Int4 pos1, pos2, pos; @@ -15973,7 +16293,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if (sfp == NULL) return; - if (sfp->excpt && (! StringHasNoText (sfp->except_text))) { + if (sfp->excpt && (! vsp->ignoreExceptions) && (! StringHasNoText (sfp->except_text))) { for (i = 0; bypass_cds_trans_check [i] != NULL; i++) { if (StringISearch (sfp->except_text, bypass_cds_trans_check [i]) != NULL) { report_errors = FALSE; /* biological exception */ @@ -16037,6 +16357,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) newprot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start); /* include stop codons, do not remove trailing X/B/Z */ if (newprot == NULL) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors || unclassified_except) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CdTransFail, "Unable to translate"); } @@ -16058,6 +16379,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } if (sev > SEV_NONE) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_AltStartCodon, "Alternative start codon used"); } @@ -16127,6 +16449,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) pos = 0; if ((pos1 % 3) != pos) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExceptPhase, "transl_except qual out of frame."); } @@ -16144,6 +16467,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_ERROR; } has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, "Suspicious CDS location - frame > 1 but not 5' partial"); } @@ -16153,6 +16477,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) sev = SEV_ERROR; } has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, "Suspicious CDS location - frame > 1 and not at consensus splice site"); } @@ -16199,6 +16524,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if (stop_count > 0) { if (got_dash) { has_errors = TRUE; + other_than_mismatch = TRUE; sev = SEV_ERROR; if (unclassified_except) { sev = SEV_WARNING; @@ -16209,6 +16535,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } } else { has_errors = TRUE; + other_than_mismatch = TRUE; sev = SEV_ERROR; if (unclassified_except) { sev = SEV_WARNING; @@ -16248,6 +16575,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) goto erret; } else if (got_dash) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } @@ -16259,7 +16587,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if (protid != NULL) { prot1seq = BioseqFind (protid); if (prot1seq == NULL && vsp->farFetchCDSproducts) { - prot1seq = BioseqLockById (protid); + if (protid != NULL && (protid->choice != SEQID_GI || protid->data.intvalue > 0)) { + prot1seq = BioseqLockById (protid); + } if (prot1seq != NULL) { unlockProd = TRUE; farstr = "(Far) "; @@ -16292,6 +16622,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } if (sev != SEV_NONE) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, sev, ERR_SEQ_FEAT_NoProtein, "No protein Bioseq given"); } @@ -16377,11 +16708,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) } else if (i == 0) { if ((sfp->partial) && (!no_beg) && (!no_end)) { /* ok, it's partial */ has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "Start of location should probably be partial"); } } else if (residue1 == '-') { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode); } @@ -16427,6 +16760,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) */ } else { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TransLen, "Given protein length [%ld] does not match %stranslation length [%ld]", prot1len, farstr, len); } @@ -16436,11 +16770,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if ((!no_beg) && (!no_end)) { /* just didn't label */ if (!got_stop) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial"); } } else { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial"); } @@ -16458,16 +16794,19 @@ erret: if (show_stop) { if ((!got_stop) && (!no_end)) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_NoStop, "Missing stop codon"); } } else if ((got_stop) && (no_end)) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "Got stop codon, but 3'end is labeled partial"); } } else if ((got_stop) && (!no_end) && (ragged)) { has_errors = TRUE; + other_than_mismatch = TRUE; sev = SEV_ERROR; if (unclassified_except) { sev = SEV_WARNING; @@ -16481,6 +16820,7 @@ erret: if (!prot_ok) { if (transl_except) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual. Skipped"); } @@ -16488,6 +16828,7 @@ erret: } else { if (transl_except) { has_errors = TRUE; + other_than_mismatch = TRUE; if (report_errors) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual (but protein is okay). Skipped"); } @@ -16508,6 +16849,8 @@ erret: if ((! frameshift_except) && (! rearrange_except)) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "CDS has exception but passes translation test"); } + } else if (unclassified_except && (! other_than_mismatch)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ErroneousException, "CDS has unclassified exception but only difference is mismatches"); } } } @@ -16653,7 +16996,10 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll) if ((ctr == 1) || (!SeqIdMatch (sip, last_sip))) { /* spp = SeqPortFree (spp); */ - bsp = BioseqLockById (sip); + bsp = NULL; + if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) { + bsp = BioseqLockById (sip); + } if (bsp == NULL) break; len = bsp->length; @@ -16931,12 +17277,37 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi SeqIdPtr id1 = NULL, id2; BioseqPtr bsp; SeqFeatPtr sfp = NULL; + Int2 zeroGi = 0; + Char buf [32]; + SeqIdPtr sip; if (slp == NULL) return; sfp = vsp->sfp; + tmp = NULL; + while ((tmp = SeqLocFindNext (slp, tmp)) != NULL) { + sip = SeqLocId (tmp); + if (sip != NULL && sip->choice == SEQID_GI && sip->data.intvalue <= 0) { + zeroGi++; + } + } + if (zeroGi > 0) { + StringCpy (buf, "?"); + bsp = vsp->bsp; + if (bsp != NULL) { + SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1); + } + if (zeroGi > 1) { + ValidErr (vsp, SEV_REJECT, ERR_SEQ_FEAT_FeatureLocationIsGi0, "Feature has %d gi|0 locations on Bioseq %s", + (int) zeroGi, buf); + } else if (zeroGi > 0) { + ValidErr (vsp, SEV_REJECT, ERR_SEQ_FEAT_FeatureLocationIsGi0, "Feature has %d gi|0 location on Bioseq %s", + (int) zeroGi, buf); + } + } + bsp = BioseqFindFromSeqLoc (slp); if (bsp != NULL && bsp->topology == 2) { circular = TRUE; diff --git a/api/valid.h b/api/valid.h index 33af92a7..aa0eaa87 100644 --- a/api/valid.h +++ b/api/valid.h @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.22 $ +* $Revision: 6.23 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,9 @@ * ------- ---------- ----------------------------------------------------- * * $Log: valid.h,v $ +* Revision 6.23 2006/04/21 17:59:18 kans +* added ignoreExceptions flag to vsp - for MrnaTransCheck and CdTransCheck +* * Revision 6.22 2006/02/16 19:34:47 kans * use vsp->is_smupd_in_sep to suppress ERR_SEQ_FEAT_FeatureRefersToAccession * @@ -233,6 +236,7 @@ typedef struct validstruct { Boolean validateIDSet; /* look for gain or loss of general IDs on sequence update */ Boolean seqSubmitParent; /* flag from tbl2asn to suppress no pub message */ Boolean justShowAccession; /* extremely terse output with accession and error type */ + Boolean ignoreExceptions; /* report translation and transcription problems even if exception set */ Int2 validationLimit; /* limit validation to major classes in Valid1GatherProc */ /* this section used for finer error reporting callback */ ValidErrorFunc errfunc; diff --git a/api/valid.msg b/api/valid.msg index 97bc4de9..0616b3e3 100644 --- a/api/valid.msg +++ b/api/valid.msg @@ -204,6 +204,9 @@ $^ CaseDifferenceInSeqID, 57 Multiple Bioseqs have the same Seq-id except for capitalization. Sequence identifiers must be unique in a case-insensitive manner within a record. +$^ DeltaComponentIsGi0, 58 +Delta component refers to gi 0. This indicates an error in database processing of this record. + $$ SEQ_DESCR, 2 $^ BioSourceMissing, 1 @@ -347,6 +350,9 @@ The collection date is not in the required format. $^ BadPCRPrimerSequence, 36 The PCR primer sequence has illegal characters or non-IUPAC nucleotides. +$^ BadPunctuation, 37 +The title ends with incorrect punctuation marks. + $$ GENERIC, 3 $^ NonAsciiAsn, 1 @@ -911,6 +917,20 @@ $^ FeatureSeqIDCaseDifference, 117 Feature location and referenced Bioseq have the same Seq-id except for capitalization. Sequence identifiers must be unique in a case-insensitive manner within a record. +$^ FeatureLocationIsGi0, 118 +Feature location refers to gi 0. This indicates an error in database processing of this record. + +$^ GapFeatureProblem, 119 +Gap features must only cover gaps in the sequence, not actual bases. + +$^ PseudoCdsHasProtXref, 120 +A coding region flagged as pseudo has a protein cross reference. There should be no +protein product bioseq or protein cross reference on a pseudo CDS. + +$^ ErroneousException, 121 +The feature is marked with a specific exception qualifier, but validation indicates +that a different exception should be used. + $$ SEQ_ALIGN, 6 $^ SeqIdProblem, 1 diff --git a/api/validerr.h b/api/validerr.h index cfad0e33..5e2ee13d 100644 --- a/api/validerr.h +++ b/api/validerr.h @@ -59,6 +59,7 @@ #define ERR_SEQ_INST_InternalNsInSeqRaw 1,55 #define ERR_SEQ_INST_InternalNsAdjacentToGap 1,56 #define ERR_SEQ_INST_CaseDifferenceInSeqID 1,57 +#define ERR_SEQ_INST_DeltaComponentIsGi0 1,58 #define ERR_SEQ_DESCR 2,0 #define ERR_SEQ_DESCR_BioSourceMissing 2,1 #define ERR_SEQ_DESCR_InvalidForType 2,2 @@ -96,6 +97,7 @@ #define ERR_SEQ_DESCR_MissingText 2,34 #define ERR_SEQ_DESCR_BadCollectionDate 2,35 #define ERR_SEQ_DESCR_BadPCRPrimerSequence 2,36 +#define ERR_SEQ_DESCR_BadPunctuation 2,37 #define ERR_GENERIC 3,0 #define ERR_GENERIC_NonAsciiAsn 3,1 #define ERR_GENERIC_Spell 3,2 @@ -242,6 +244,10 @@ #define ERR_SEQ_FEAT_SelfReferentialProduct 5,115 #define ERR_SEQ_FEAT_ITSdoesNotAbutRRNA 5,116 #define ERR_SEQ_FEAT_FeatureSeqIDCaseDifference 5,117 +#define ERR_SEQ_FEAT_FeatureLocationIsGi0 5,118 +#define ERR_SEQ_FEAT_GapFeatureProblem 5,119 +#define ERR_SEQ_FEAT_PseudoCdsHasProtXref 5,120 +#define ERR_SEQ_FEAT_ErroneousException 5,121 #define ERR_SEQ_ALIGN 6,0 #define ERR_SEQ_ALIGN_SeqIdProblem 6,1 #define ERR_SEQ_ALIGN_StrandRev 6,2 |