summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2006-05-26 18:34:14 +0000
committerAaron M. Ucko <ucko@debian.org>2006-05-26 18:34:14 +0000
commitde1d4a4e7eb2f2e1a20a6c3c90f4128f6f344e80 (patch)
treeb98cfcc27d8a53160d6d0d4171f537e667bd9c68 /api
parent047f9550aeffa40eb05ad53427718889f660e0f4 (diff)
Load /tmp/.../ncbi-tools6-6.1.20060507 into
branches/upstream/current.
Diffstat (limited to 'api')
-rw-r--r--api/asn2gnb1.c24
-rw-r--r--api/asn2gnb2.c79
-rw-r--r--api/asn2gnb4.c140
-rw-r--r--api/asn2gnb5.c260
-rw-r--r--api/asn2gnb6.c12
-rw-r--r--api/asn2gnbi.h6
-rw-r--r--api/asn2gnbk.h5
-rw-r--r--api/edutil.c56
-rw-r--r--api/seqmgr.c81
-rw-r--r--api/seqmgr.h8
-rw-r--r--api/seqport.c26
-rw-r--r--api/sequtil.c102
-rw-r--r--api/sqnutil1.c70
-rw-r--r--api/sqnutil2.c196
-rw-r--r--api/sqnutil3.c444
-rw-r--r--api/sqnutils.h27
-rw-r--r--api/subutil.c110
-rw-r--r--api/subutil.h15
-rw-r--r--api/tofasta.c75
-rw-r--r--api/valid.c469
-rw-r--r--api/valid.h6
-rw-r--r--api/valid.msg20
-rw-r--r--api/validerr.h6
23 files changed, 1908 insertions, 329 deletions
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c
index 9e55f9b1..8c2cb9a0 100644
--- a/api/asn2gnb1.c
+++ b/api/asn2gnb1.c
@@ -28,11 +28,11 @@
* Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
* Mati Shomrat
*
-* $Id: asn2gnb1.c,v 1.97 2006/02/23 16:38:54 kans Exp $
+* $Id: asn2gnb1.c,v 1.101 2006/05/03 18:05:39 kans Exp $
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.97 $
+* $Revision: 1.101 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -3320,8 +3320,8 @@ static void MakeGapFeats (
gapvnp = (ValNodePtr PNTR) userdata;
sip = SeqIdFindBest (bsp->id, 0);
if (sip == NULL) return;
- /* suppress on far delta contigs for now */
- if (! DeltaLitOnly (bsp)) return;
+ /* no longer suppress on far delta contigs */
+ /* if (! DeltaLitOnly (bsp)) return; */
for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
if (vnp->choice == 1) {
@@ -3477,6 +3477,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
Boolean lockFarProd;
Boolean lookupFarComp;
Boolean lookupFarHist;
+ Boolean lookupFarInf;
Boolean lookupFarLocs;
Boolean lookupFarOthers;
Boolean lookupFarProd;
@@ -3625,7 +3626,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
gapvnp = NULL;
if (format != FTABLE_FMT) {
- if (isG || isTPG || isOnlyLocal || isRefSeq || (isGeneral && (! isGED))) {
+ if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || (isGeneral && (! isGED))) {
if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats);
}
@@ -3717,13 +3718,14 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
+ lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
- if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarOthers) {
+ if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
/* lookukp all far SeqIDs in advance */
- LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarOthers);
+ LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
}
ajp->showFarTransl = (Boolean) ((flags & FAR_TRANS_MASK) == SHOW_FAR_TRANSLATION);
@@ -5694,6 +5696,7 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
Boolean lockFarProd;
Boolean lookupFarComp;
Boolean lookupFarHist;
+ Boolean lookupFarInf;
Boolean lookupFarLocs;
Boolean lookupFarOthers;
Boolean lookupFarProd;
@@ -5749,11 +5752,12 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
+ lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
- if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarOthers) {
- locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_OTHERS);
- LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarOthers);
+ if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
+ locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_INFERENCE | LOOKUP_FAR_OTHERS);
+ LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
}
ProfilerSetStatus (TRUE);
diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c
index c8353c89..9e940a24 100644
--- a/api/asn2gnb2.c
+++ b/api/asn2gnb2.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.69 $
+* $Revision: 1.74 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -455,6 +455,7 @@ NLM_EXTERN void AddLocusBlock (
Char mol [30];
Int4 nextGi;
BioseqPtr nm = NULL;
+ BioseqPtr nuc;
ObjectIdPtr oip;
OrgNamePtr onp;
Uint1 origin;
@@ -847,6 +848,22 @@ NLM_EXTERN void AddLocusBlock (
StringCpy (div, "PAT");
}
+ /* if protein is encoded by a patent nucleotide, use PAT division */
+
+ if (ISA_aa (bsp->mol)) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
+ if (cds != NULL) {
+ nuc = BioseqFindFromSeqLoc (cds->location);
+ if (nuc != NULL) {
+ for (sip = nuc->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_PATENT) {
+ StringCpy (div, "PAT");
+ }
+ }
+ }
+ }
+ }
+
/* more complicated code for division, if necessary, goes here */
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
@@ -1067,6 +1084,9 @@ NLM_EXTERN void AddLocusBlock (
if (topol < 0 || topol > 2) {
topol = 0;
}
+ if (topol == 0) {
+ topol = 1; /* default to displaying linear if not set */
+ }
gbseq->topology = StringSave (gbseq_top [topol]);
for (sip = bsp->id; sip != NULL; sip = sip->next) {
@@ -1082,11 +1102,16 @@ NLM_EXTERN void AddLocusBlock (
}
if (dp != NULL) {
DateToFF (date, dp, FALSE);
+ if (StringDoesHaveText (date)) {
+ gbseq->create_date = StringSave (date);
+ }
}
+ /*
if (StringHasNoText (date)) {
StringCpy (date, "01-JAN-1900");
}
gbseq->create_date = StringSave (date);
+ */
date [0] = '\0';
dp = NULL;
@@ -2016,13 +2041,16 @@ NLM_EXTERN void AddProjectBlock (
{
IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
BaseBlockPtr bbp;
BioseqPtr bsp;
Char buf [32];
UserFieldPtr curr;
SeqMgrDescContext dcontext;
StringItemPtr ffstring;
+ GBSeqPtr gbseq;
UserObjectPtr gpuop = NULL;
+ ValNodePtr head = NULL;
Uint4 itemID;
ObjectIdPtr oip;
Int4 parentID;
@@ -2037,10 +2065,18 @@ NLM_EXTERN void AddProjectBlock (
if (ajp == NULL) return;
bsp = awp->bsp;
if (bsp == NULL) return;
+ asp = awp->asp;
+ if (asp == NULL) return;
if (! ISA_na (bsp->mol)) return;
if (awp->format != GENBANK_FMT) return;
+ if (ajp->gbseq) {
+ gbseq = &asp->gbseq;
+ } else {
+ gbseq = NULL;
+ }
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
while (sdp != NULL) {
uop = (UserObjectPtr) sdp->data.ptrvalue;
@@ -2087,6 +2123,14 @@ NLM_EXTERN void AddProjectBlock (
/*
FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
*/
+ if (gbseq != NULL) {
+ if (head == NULL) {
+ sprintf (buf, "%ld", (long) projectID);
+ } else {
+ sprintf (buf, ", %ld", (long) projectID);
+ }
+ ValNodeCopyStr (&head, 0, buf);
+ }
prefix = ",";
parentID = 0;
}
@@ -2110,11 +2154,26 @@ NLM_EXTERN void AddProjectBlock (
/*
FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
*/
+ if (gbseq != NULL) {
+ if (head == NULL) {
+ sprintf (buf, "%ld", (long) projectID);
+ } else {
+ sprintf (buf, ", %ld", (long) projectID);
+ }
+ ValNodeCopyStr (&head, 0, buf);
+ }
}
bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
FFRecycleString (ajp, ffstring);
+ if (gbseq != NULL) {
+ if (head != NULL) {
+ gbseq->project = MergeFFValNodeStrs (head);
+ ValNodeFreeData (head);
+ }
+ }
+
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, bbp);
}
@@ -2986,6 +3045,7 @@ NLM_EXTERN void AddDbsourceBlock (
SeqIdPtr id;
ValNodePtr list = NULL;
BioseqPtr nuc;
+ SeqEntryPtr sep;
SeqIdPtr sip;
SeqLocPtr slp;
CharPtr str;
@@ -3084,6 +3144,23 @@ NLM_EXTERN void AddDbsourceBlock (
}
}
ValNodeFree (list);
+ } else {
+ sep = GetTopSeqEntryForEntityID (awp->entityID);
+ if (sep != NULL && IS_Bioseq (sep)) {
+ /* special case for coded_by CDS packed on retcode 1 protein */
+ id = SeqLocId (cds->location);
+ if (id != NULL && id->choice == SEQID_GI) {
+ sip = GetSeqIdForGI (id->data.intvalue);
+ if (sip == NULL) {
+ sip = id;
+ }
+ }
+ if (WriteDbsourceID (sip, buf)) {
+ FF_www_dbsource (ajp, ffstring, buf, TRUE, sip->choice);
+ FFAddNewLine(ffstring);
+ unknown = FALSE;
+ }
+ }
}
} else {
if (WriteDbsourceID (sip, buf)) {
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index 2aebe7ad..45f44fd9 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.98 $
+* $Revision: 1.106 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -1495,6 +1495,7 @@ static Int2 ValidateAccnInternal (
if (numAlpha == 2 && numDigits == 6) return 0;
if (numAlpha == 3 && numDigits == 5) return 0;
if (numAlpha == 4 && numDigits == 8) return 0;
+ if (numAlpha == 5 && numDigits == 7) return 0;
} else if (numUndersc == 1) {
if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return -2;
if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
@@ -3313,8 +3314,21 @@ static void FormatFeatureBlockQuals (
pmid = (Int4) vnp->data.intvalue;
if (pmid > 0) {
sprintf (numbuf, "%ld", (long) pmid);
+ FFAddOneString(ffstring, "/citation=[PUBMED ", FALSE, TRUE, TILDE_TO_SPACES);
+ if (GetWWW (ajp)) {
+
+ FFAddTextToString(ffstring, "<a href=", link_muid, NULL, FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString(ffstring, NULL, numbuf, ">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString(ffstring, "]", FALSE, FALSE, TILDE_IGNORE);
+ /*
FFAddTextToString(ffstring, "/citation=[PUBMED ", numbuf, "]",
FALSE, TRUE, TILDE_TO_SPACES);
+ */
FFAddOneChar(ffstring, '\n', FALSE);
}
}
@@ -3380,7 +3394,7 @@ static void FormatFeatureBlockQuals (
}
if (okay) {
FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, dbt->db, buf);
+ FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
}
}
@@ -3423,7 +3437,7 @@ static void FormatFeatureBlockQuals (
}
sprintf (seqid, "%ld", (long) sip->data.intvalue);
FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
} else if (sip->choice == SEQID_GENERAL) {
dbt = (DbtagPtr) sip->data.ptrvalue;
@@ -3455,6 +3469,11 @@ static void FormatFeatureBlockQuals (
if (sip->choice == SEQID_GI) {
gi = sip->data.intvalue;
if (GetAccnVerFromServer (gi, seqid)) {
+#ifdef OS_UNIX
+ if (getenv ("ASN2GB_PSF_DEBUG") != NULL) {
+ printf ("GetAccnVerFromServer returned %s\n", seqid);
+ }
+#endif
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
FALSE, FALSE, TILDE_IGNORE);
@@ -3466,6 +3485,11 @@ static void FormatFeatureBlockQuals (
} else {
sip = GetSeqIdForGI (gi);
if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
+#ifdef OS_UNIX
+ if (getenv ("ASN2GB_PSF_DEBUG") != NULL) {
+ printf ("GetSeqIdForGI returned %s\n", seqid);
+ }
+#endif
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
FALSE, FALSE, TILDE_IGNORE);
@@ -3487,7 +3511,7 @@ static void FormatFeatureBlockQuals (
sprintf (seqid, "%ld", (long) gi);
FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
} else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
@@ -3503,7 +3527,7 @@ static void FormatFeatureBlockQuals (
if (gi > 0) {
sprintf (seqid, "%ld", (long) gi);
FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
}
}
@@ -4201,11 +4225,10 @@ static void FormatFeatureBlockQuals (
}
-
static void FF_asn2gb_www_featkey (
StringItemPtr ffstring,
CharPtr key,
- SeqLocPtr slp,
+ SeqFeatPtr sfp,
Int4 from,
Int4 to,
Uint1 strand,
@@ -4213,13 +4236,21 @@ static void FF_asn2gb_www_featkey (
)
{
- BioseqPtr bsp;
- Int4 gi = 0;
- SeqIdPtr sip;
- Boolean is_aa = FALSE;
- Char gi_buf[16];
- Char itemID_buf[16];
+ BioseqPtr bsp;
+ Char buf [16];
+ Int4 featID = 0;
+ Int4 ffrom = 0;
+ Int4 fto = 0;
+ Int4 gi = 0;
+ Char gi_buf[16];
+ Boolean is_aa = FALSE;
+ ObjectIdPtr oip;
+ SeqIntPtr sintp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ if (sfp == NULL) return;
+ slp = sfp->location;
bsp = BioseqFindFromSeqLoc (slp);
if (bsp != NULL) {
is_aa = ISA_aa (bsp->mol);
@@ -4228,19 +4259,47 @@ static void FF_asn2gb_www_featkey (
gi = (Int4) sip->data.intvalue;
}
}
+ } else {
+ if (sfp->id.choice == 3) {
+ oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
+ if (oip != NULL && oip->str == NULL) {
+ featID = oip->id;
+ }
+ }
+ if (slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL) {
+ ffrom = sintp->from + 1;
+ fto = sintp->to + 1;
+ sip = sintp->id;
+ if (sip->choice == SEQID_GI) {
+ gi = (Int4) sip->data.intvalue;
+ }
+ }
+ }
}
- sprintf(gi_buf, "%ld", (long)gi);
- sprintf(itemID_buf, "%ld", (long)itemID);
-
+ sprintf (gi_buf, "%ld", (long)gi);
FFAddOneString(ffstring, "<a href=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, link_feat, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, "val=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
- if (itemID > 0) {
+ if (featID > 0) {
+ sprintf (buf, "%ld", (long) featID);
+ FFAddOneString(ffstring, "&featID=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ } else if (ffrom > 0 && fto > 0) {
+ sprintf (buf, "%ld", (long) ffrom);
+ FFAddOneString(ffstring, "&from=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) fto);
+ FFAddOneString(ffstring, "&to=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ } else if (itemID > 0) {
+ sprintf (buf, "%ld", (long) itemID);
FFAddOneString(ffstring, "&itemID=", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString(ffstring, itemID_buf, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
}
@@ -4353,8 +4412,11 @@ static void AddIntervalsToGbfeat (
Char accn [41];
SeqLocPtr copy = NULL;
Int4 from;
+ IntFuzzPtr fuzz;
GBIntervalPtr gbint;
Int4 gi;
+ Boolean interbp;
+ Boolean iscomp;
GBIntervalPtr last = NULL;
Int4 point;
SeqIntPtr sint;
@@ -4375,6 +4437,8 @@ static void AddIntervalsToGbfeat (
from = 0;
to = 0;
point = 0;
+ iscomp = FALSE;
+ interbp = FALSE;
sip = NULL;
switch (slp->choice) {
case SEQLOC_WHOLE :
@@ -4398,6 +4462,9 @@ static void AddIntervalsToGbfeat (
from = to;
to = swap;
}
+ if (sint->strand == Seq_strand_minus) {
+ iscomp = TRUE;
+ }
}
break;
case SEQLOC_PNT :
@@ -4405,6 +4472,25 @@ static void AddIntervalsToGbfeat (
if (spp != NULL) {
point = spp->point + 1;
sip = spp->id;
+ if (spp->strand == Seq_strand_minus) {
+ iscomp = TRUE;
+ }
+ fuzz = spp->fuzz;
+ if (fuzz != NULL) {
+ if (fuzz->choice == 4) {
+ if (fuzz->a == 3) { /* space to right */
+ from = point;
+ to = point + 1;
+ point = 0;
+ interbp = TRUE;
+ } else if (fuzz->a == 4 && point > 1) { /* space to left */
+ from = point - 1;
+ to = point;
+ point = 0;
+ interbp = TRUE;
+ }
+ }
+ }
}
break;
default :
@@ -4431,6 +4517,8 @@ static void AddIntervalsToGbfeat (
gbint->from = from;
gbint->to = to;
gbint->point = point;
+ gbint->iscomp = iscomp;
+ gbint->interbp = interbp;
gbint->accession = StringSave (accn);
if (gbfeat->intervals == NULL) {
gbfeat->intervals = gbint;
@@ -4800,7 +4888,7 @@ static CharPtr FormatFeatureBlockEx (
Choice cbaa;
CodeBreakPtr cbp;
BioseqPtr cdna;
- SeqFeatPtr cds;
+ SeqFeatPtr cds = NULL;
Char ch;
Uint1 code = Seq_code_ncbieaa;
CdRegionPtr crp;
@@ -5065,7 +5153,7 @@ static CharPtr FormatFeatureBlockEx (
if (ajp->ajp.slp != NULL) {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
} else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
- FF_asn2gb_www_featkey (ffstring, key, sfp->location, fcontext->left + 1, fcontext->right + 1, fcontext->strand, itemID);
+ FF_asn2gb_www_featkey (ffstring, key, sfp, fcontext->left + 1, fcontext->right + 1, fcontext->strand, itemID);
} else {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
}
@@ -5111,6 +5199,15 @@ static CharPtr FormatFeatureBlockEx (
if (gbseq != NULL) {
if (gbfeat != NULL) {
gbfeat->location = StringSave (str);
+ if (StringDoesHaveText (str)) {
+ if (StringStr (str, "join") != NULL) {
+ gbfeat->operator__ = StringSave ("join");
+ } else if (StringStr (str, "order") != NULL) {
+ gbfeat->operator__ = StringSave ("order");
+ }
+ }
+ gbfeat->partial5 = fcontext->partialL;
+ gbfeat->partial3 = fcontext->partialR;
if (ajp->masterStyle) {
AddIntervalsToGbfeat (gbfeat, location, target);
} else {
@@ -5229,6 +5326,9 @@ static CharPtr FormatFeatureBlockEx (
gene_for_old_locus_tag = SeqMgrGetFeatureByLabel (bsp_for_old_locus_tag, grp->locus_tag, SEQFEAT_GENE, 0, &gcontext);
}
}
+ if (grp == NULL && ifp->mapToNuc && cds != NULL) {
+ grp = SeqMgrGetGeneXref (cds);
+ }
if (grp == NULL && featdeftype != FEATDEF_primer_bind) {
gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene);
if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index 7abce569..34462b1a 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.54 $
+* $Revision: 1.64 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -281,6 +281,12 @@ static Char link_hprd [MAX_WWWBUF];
static Char link_uspto [MAX_WWWBUF];
#define DEF_LINK_USPTO "http://patft.uspto.gov/netacgi/nph-Parser?patentnumber="
+static Char link_vector [MAX_WWWBUF];
+#define DEF_LINK_VECTOR "http://www.vectorbase.org/Genome/BRCGene/?"
+
+static Char link_mirbase [MAX_WWWBUF];
+#define DEF_LINK_MIRBASE "http://microrna.sanger.ac.uk/cgi-bin/sequences/mirna_entry.pl?acc="
+
/* www utility functions */
@@ -310,8 +316,6 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
GetAppParam ("NCBI", "WWWENTREZ", "LINK_ECAMBIG", DEF_LINK_ECAMBIG, ec_ambig, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_FF", DEF_LINK_FF, link_ff, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_MUID", DEF_LINK_MUID, link_muid, MAX_WWWBUF);
- GetAppParam ("NCBI", "WWWENTREZ", "LINK_FF", DEF_LINK_FF, link_ff, MAX_WWWBUF);
- GetAppParam ("NCBI", "WWWENTREZ", "LINK_MUID", DEF_LINK_MUID, link_muid, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_ACE", DEF_LINK_ACE, link_ace, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_TAX", DEF_LINK_TAX, link_tax, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_CODE", DEF_LINK_CODE, link_code, MAX_WWWBUF);
@@ -371,6 +375,8 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
GetAppParam ("NCBI", "WWWENTREZ", "LINK_BOLD", DEF_LINK_BOLD, link_bold, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_HPRD", DEF_LINK_HPRD, link_hprd, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_USPTO", DEF_LINK_USPTO, link_uspto, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_VECTOR", DEF_LINK_VECTOR, link_vector, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_MIRBASE", DEF_LINK_MIRBASE, link_mirbase, MAX_WWWBUF);
}
@@ -670,6 +676,39 @@ static void FF_www_db_xref_hprd (
FF_www_db_xref_std (ffstring, db, identifier, link);
}
+static void FF_www_db_xref_vector (
+ StringItemPtr ffstring,
+ CharPtr db,
+ CharPtr identifier,
+ BioseqPtr bsp,
+ CharPtr link
+)
+{
+ Char ch;
+ Char buf [512], tax [256];
+ CharPtr ptr;
+
+ StringCpy (buf, link);
+ if (bsp != NULL) {
+ if (BioseqToGeneticCode (bsp, NULL, NULL, NULL, tax, sizeof (tax), NULL)) {
+ ptr = tax;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_WHITESP (ch)) {
+ *ptr = '_';
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ StringCat (buf, "org=");
+ StringCat (buf, tax);
+ StringCat (buf, "&");
+ }
+ }
+ StringCat (buf, "gene=");
+ FF_www_db_xref_std (ffstring, db, identifier, buf);
+}
+
static void FF_www_db_xref_null (
StringItemPtr ffstring,
CharPtr db,
@@ -688,7 +727,9 @@ static void FF_www_db_xref_null (
static void Do_www_db_xref(
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
- CharPtr db, CharPtr identifier
+ CharPtr db,
+ CharPtr identifier,
+ BioseqPtr bsp
)
{
if ( ffstring == NULL || db == NULL || identifier == NULL ) return;
@@ -813,6 +854,10 @@ static void Do_www_db_xref(
FF_www_db_xref_null(ffstring, db, identifier, link_bold);
} else if ( StringCmp(db , "HPRD") == 0) {
FF_www_db_xref_hprd(ffstring, db, identifier, link_hprd);
+ } else if ( StringCmp(db , "VectorBase") == 0) {
+ FF_www_db_xref_vector(ffstring, db, identifier, bsp, link_vector);
+ } else if ( StringCmp(db , "miRBase") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_mirbase);
} else {
/* default: no link just the text */
@@ -823,13 +868,15 @@ static void Do_www_db_xref(
NLM_EXTERN void FF_www_db_xref(
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
- CharPtr db, CharPtr identifier
+ CharPtr db,
+ CharPtr identifier,
+ BioseqPtr bsp
)
{
if ( ffstring == NULL || db == NULL || identifier == NULL ) return;
if ( GetWWW(ajp) ) {
- Do_www_db_xref (ajp, ffstring, db, identifier);
+ Do_www_db_xref (ajp, ffstring, db, identifier, bsp);
} else { /* not in www mode */
if (StringCmp(db , "MGD") == 0 || StringCmp(db , "MGI") == 0) {
if (StringNICmp (identifier, "MGI:", 4) == 0) {
@@ -885,7 +932,7 @@ NLM_EXTERN CharPtr asn2gnbk_dbxref (
}
ajp->www = TRUE;
- Do_www_db_xref (ajp, ffstring, dbt->db, buf);
+ Do_www_db_xref (ajp, ffstring, dbt->db, buf, NULL);
ajp->www = FALSE;
@@ -1082,6 +1129,7 @@ NLM_EXTERN CharPtr GetAuthorsString (
{
AuthorPtr ap;
+ ValNodePtr clist;
ValNodePtr conslist;
Int2 count;
ValNodePtr head = NULL;
@@ -1151,23 +1199,33 @@ NLM_EXTERN CharPtr GetAuthorsString (
prefix = ", ";
}
+ prefix = NULL;
+ clist = NULL;
for (vnp = conslist; vnp != NULL; vnp = vnp->next) {
str = NULL;
pid = (PersonIdPtr) vnp->data.ptrvalue;
if (pid->choice == 5) {
- str = MakeSingleAuthorString (format, NULL, (CharPtr) pid->data, NULL, NULL, index, NULL);
- if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) {
- *consortP = StringSave (str);
+ str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, NULL);
+ if (str != NULL) {
+ ValNodeAddStr (&clist, 0, str);
}
+ prefix = "; ";
+ }
+ }
+ if (clist != NULL) {
+ str = MergeFFValNodeStrs (clist);
+ if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) {
+ *consortP = StringSave (str);
+ }
- /* optionally populate gbseq for XML-ized GenBank format */
-
- if (gbref != NULL) {
- gbref->consortium = StringSave (str);
- }
+ /* optionally populate gbseq for XML-ized GenBank format */
- str = MemFree (str);
+ if (gbref != NULL) {
+ gbref->consortium = StringSave (str);
}
+
+ str = MemFree (str);
+ ValNodeFreeData (clist);
}
ValNodeFree (pidlist);
@@ -3254,15 +3312,25 @@ static CharPtr remarksText [] = {
static void AddReferenceToGbseq (
GBSeqPtr gbseq,
GBReferencePtr gbref,
- CharPtr str
+ CharPtr str,
+ RefBlockPtr rbp,
+ BioseqPtr bsp
)
{
- CharPtr copy;
- CharPtr ptr;
- CharPtr ref;
-
- if (gbseq == NULL || gbref == NULL || StringHasNoText (str)) return;
+ Char buf [32];
+ CharPtr copy;
+ ValNodePtr head = NULL;
+ IntRefBlockPtr irp;
+ SeqLocPtr loc;
+ CharPtr ptr;
+ CharPtr ref;
+ SeqLocPtr slp;
+ Int4 start;
+ Int4 stop;
+ CharPtr tmp;
+
+ if (gbseq == NULL || gbref == NULL || StringHasNoText (str) || rbp == NULL || bsp == NULL) return;
copy = StringSave (str);
@@ -3277,7 +3345,13 @@ static void AddReferenceToGbseq (
ref = copy + 12;
ptr = StringStr (ref, "\n AUTHORS");
if (ptr == NULL) {
+ ptr = StringStr (ref, "\n CONSRTM");
+ }
+ if (ptr == NULL) {
ptr = StringStr (ref, ")\n");
+ if (ptr != NULL) {
+ ptr++;
+ }
}
if (ptr != NULL) {
*ptr = '\0';
@@ -3300,6 +3374,36 @@ static void AddReferenceToGbseq (
Asn2gnbkCompressSpaces (gbref->journal);
MemFree (copy);
+
+ if (rbp->sites == 1 || rbp->sites == 2) {
+ gbref->position = StringSave ("sites");
+ } else if (rbp->sites == 3) {
+ } else {
+ irp = (IntRefBlockPtr) rbp;
+ loc = irp->loc;
+ if (loc != NULL) {
+ slp = SeqLocFindNext (loc, NULL);
+ while (slp != NULL) {
+ start = SeqLocStart (slp) + 1;
+ stop = SeqLocStop (slp) + 1;
+ if (head == NULL) {
+ sprintf (buf, "%ld..%ld", (long) start, (long) stop);
+ } else {
+ sprintf (buf, "; %ld..%ld", (long) start, (long) stop);
+ }
+ ValNodeCopyStr (&head, 0, buf);
+ slp = SeqLocFindNext (loc, slp);
+ }
+ tmp = MergeFFValNodeStrs (head);
+ ValNodeFreeData (head);
+ gbref->position = tmp;
+ } else {
+ start = 1;
+ stop = bsp->length;
+ sprintf (buf, "%ld..%ld", (long) start, (long) stop);
+ gbref->position = StringSave (buf);
+ }
+ }
}
static Boolean IsCitSub (
@@ -3381,6 +3485,8 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
Int4 pmid = 0;
CharPtr prefix = NULL;
RefBlockPtr rbp;
+ ValNodePtr remarks = NULL;
+ CharPtr remprefix = NULL;
SubmitBlockPtr sbp;
SeqDescrPtr sdp;
SeqFeatPtr sfp = NULL;
@@ -3631,10 +3737,6 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
/* print author list */
- FFRecycleString(ajp, temp);
- temp = FFGetString(ajp);
- FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE);
-
str = NULL;
consortium = NULL;
@@ -3644,36 +3746,42 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
TrimSpacesAroundString (str);
}
- if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- suffix = NULL;
- trailingPeriod = TRUE;
- } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
- trailingPeriod = FALSE;
- len = StringLen (str);
- if (len > 0 && str [len - 1] != '.') {
- suffix = ".;";
- } else {
- suffix = ";";
- }
- }
+ if (str != NULL || StringHasNoText (consortium)) {
+ FFRecycleString(ajp, temp);
+ temp = FFGetString(ajp);
+ FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE);
- /* if no authors were found, period will still be added by this call */
- if (str != NULL) {
- FFAddTextToString(temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES);
- } else {
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFAddOneChar(temp, '.', FALSE);
+ suffix = NULL;
+ trailingPeriod = TRUE;
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
- FFAddOneChar(temp, ';', FALSE);
- }
- }
+ trailingPeriod = FALSE;
+ len = StringLen (str);
+ if (len > 0 && str [len - 1] != '.') {
+ suffix = ".;";
+ } else {
+ suffix = ";";
+ }
+ }
- MemFree (str);
- if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
+ /* if no authors were found, period will still be added by this call */
+ if (str != NULL) {
+ FFAddTextToString(temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES);
+ } else if (StringHasNoText (consortium)) {
+ if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
+ FFAddOneChar(temp, '.', FALSE);
+ } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
+ FFAddOneChar(temp, ';', FALSE);
+ }
+ }
+
+ if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
- } else {
+ } else {
FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA");
+ }
}
+ MemFree (str);
/* print consortium */
@@ -3857,7 +3965,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (gbseq != NULL) {
if (gbref != NULL) {
- AddReferenceToGbseq (gbseq, gbref, str);
+ AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
}
}
@@ -3890,6 +3998,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, pdp->comment);
+ remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND);
/* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */
@@ -3898,7 +4011,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (gbseq != NULL) {
if (gbref != NULL) {
+ /*
gbref->remark = StringSave (pdp->comment);
+ */
}
}
@@ -3922,6 +4037,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
temp = FFGetString(ajp);
sprintf (buf, "GenBank staff at the National Library of Medicine created this entry [NCBI gibbsq %ld] from the original journal article.", (long) gibbsq);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, buf);
+ remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
@@ -3938,6 +4058,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
temp = FFGetString(ajp);
sprintf (buf, "This sequence comes from %s", str);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, buf);
+ remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
@@ -3948,7 +4073,12 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
- FFStartPrint (temp ,afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, "Polyadenylate residues occurring in the figure were omitted from the sequence.");
+ remprefix = "; ";
+ FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND);
FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
@@ -3963,6 +4093,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
temp = FFGetString(ajp);
sprintf (buf, "Map location: %s", str);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, buf);
+ remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
@@ -3984,6 +4119,17 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
+ len = StringLen (crp->exp) + 20;
+ str = MemNew (sizeof (Char) * len);
+ if (str != NULL) {
+ sprintf (str, "Erratum:[%s]", crp->exp);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, str);
+ remprefix = "; ";
+ str = MemFree (str);
+ }
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, "Erratum:", FALSE, FALSE, TILDE_TO_SPACES);
FFAddTextToString (temp, "[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
@@ -4000,6 +4146,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
+ if (remprefix != NULL) {
+ ValNodeCopyStr (&remarks, 0, remprefix);
+ }
+ ValNodeCopyStr (&remarks, 0, csp->descr);
+ remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
/* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
@@ -4016,9 +4167,14 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (gbseq != NULL) {
if (gbref != NULL) {
- AddReferenceToGbseq (gbseq, gbref, str);
+ if (remarks != NULL) {
+ gbref->remark = MergeFFValNodeStrs (remarks);
+ }
+
+ AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
}
}
+ ValNodeFreeData (remarks);
FFRecycleString(ajp, ffstring);
FFRecycleString(ajp, temp);
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index 698d7587..a9d156fd 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.69 $
+* $Revision: 1.73 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -446,6 +446,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"UniProtKB/TrEMBL",
"UniSTS",
"VBASE2",
+ "VectorBase",
"WorfDB",
"WormBase",
"ZFIN",
@@ -458,6 +459,7 @@ NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
"ECOCYC",
"HPRD",
"REBASE",
+ "miRBase",
NULL
};
@@ -3195,7 +3197,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
}
if (! StringHasNoText (buf)) {
FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, dbt->db, buf);
+ FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
}
}
@@ -4087,7 +4089,7 @@ static Int2 ProcessGapSpecialFormat (
)
{
- Char fmt_buf [32];
+ Char fmt_buf [64];
Char gapbuf [80];
Int4 gi;
Char gi_buf [16];
@@ -4129,6 +4131,9 @@ static Int2 ProcessGapSpecialFormat (
if (gi > 0) {
sprintf(gi_buf, "%ld", (long) gi);
sprintf(fmt_buf, "&fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
+ if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
+ StringCat (fmt_buf, "&view=gbwithparts");
+ }
FFAddOneString (ffstring, " <a href=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, link_featc, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, "val=", FALSE, FALSE, TILDE_IGNORE);
@@ -4511,6 +4516,7 @@ NLM_EXTERN CharPtr FormatSlashBlock (
is.accession_version = gbseq->accession_version;
is.other_seqids = gbseq->other_seqids;
is.secondary_accessions = gbseq->secondary_accessions;
+ is.project = gbseq->project;
is.keywords = gbseq->keywords;
is.segment = gbseq->segment;
is.source = gbseq->source;
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index 1c4e4ebd..2c966b2f 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.61 $
+* $Revision: 1.62 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -705,7 +705,9 @@ NLM_EXTERN Char link_sp [MAX_WWWBUF];
NLM_EXTERN void FF_www_db_xref(
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
- CharPtr db, CharPtr identifier
+ CharPtr db,
+ CharPtr identifier,
+ BioseqPtr bsp
);
NLM_EXTERN Boolean StringIsJustQuotes (
diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h
index eb181c3c..7db3d786 100644
--- a/api/asn2gnbk.h
+++ b/api/asn2gnbk.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.69 $
+* $Revision: 6.70 $
*
* File Description: New GenBank flatfile generator
*
@@ -131,7 +131,8 @@ typedef unsigned long LckType;
#define LOOKUP_FAR_LOCATIONS 32
#define LOOKUP_FAR_PRODUCTS 64
#define LOOKUP_FAR_HISTORY 128
-#define LOOKUP_FAR_OTHERS 256
+#define LOOKUP_FAR_INFERENCE 256
+#define LOOKUP_FAR_OTHERS 512
/* bit flags for unusual customized reports */
diff --git a/api/edutil.c b/api/edutil.c
index 4a288a79..5e08e2b9 100644
--- a/api/edutil.c
+++ b/api/edutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/4/94
*
-* $Revision: 6.54 $
+* $Revision: 6.56 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,12 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: edutil.c,v $
+* Revision 6.56 2006/04/04 18:00:47 kans
+* SeqLocAddEx properly returns value to &last argument, makes SeqLocMix from DeltaSeqsToSeqLocs
+*
+* Revision 6.55 2006/03/30 19:50:15 kans
+* DeltaSeqsToSeqLocs calls SeqLocAddEx for efficient list usage
+*
* Revision 6.54 2006/02/07 13:41:29 bollin
* added function AdjustFeatureForGapChange, which changes a feature to accommodate
* a change in the length of a gap
@@ -370,15 +376,16 @@ NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head)
* if incoming is merged, deletes the incoming SeqLoc
*
*****************************************************************************/
-NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy)
+static SeqLocPtr LIBCALL SeqLocAddEx (SeqLocPtr PNTR head, SeqLocPtr PNTR lastp, SeqLocPtr slp, Boolean merge, Boolean do_copy)
{
- SeqLocPtr tmp, last, retval = NULL;
+ SeqLocPtr tmp, last = NULL, retval = NULL;
Boolean merged = FALSE; /* intervals were merged */
if (slp == NULL) return NULL;
- last = NULL;
- if (* head != NULL)
+ if (lastp != NULL) {
+ last = *lastp;
+ } else if (head != NULL && *head != NULL)
{
for (tmp = *head; tmp != NULL; tmp = tmp->next)
{
@@ -501,12 +508,16 @@ ret:
else
tmp = slp;
- tmp->next = NULL;
+ if (tmp != NULL) {
+ tmp->next = NULL;
+ }
- if (last != NULL)
+ if (last != NULL) {
last->next = tmp;
- else
+ } else if (head != NULL) {
*head = tmp;
+ }
+ last = tmp;
retval = tmp;
}
else
@@ -515,10 +526,30 @@ ret:
if (! do_copy) /* got to free it here */
SeqLocFree(slp);
}
+ if (lastp != NULL) {
+ *lastp = last;
+ }
return retval;
}
+NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy)
+{
+ SeqLocPtr tmp, last;
+
+ if (slp == NULL) return NULL;
+
+ last = NULL;
+ if (* head != NULL)
+ {
+ for (tmp = *head; tmp != NULL; tmp = tmp->next)
+ {
+ last = tmp;
+ }
+ }
+ return SeqLocAddEx (head, &last, slp, merge, do_copy);
+}
+
/*****************************************************************************
*
* SegLocToParts(BioseqPtr seg, SeqLocPtr slp)
@@ -652,7 +683,7 @@ NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp)
*****************************************************************************/
NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp)
{
- SeqLocPtr head = NULL, thead=NULL;
+ SeqLocPtr head = NULL, thead = NULL, last = NULL;
DeltaSeqPtr curr;
SeqInt si;
Dbtag db;
@@ -674,14 +705,15 @@ NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp)
oi.id = 1;
+
for (curr = dsp; curr != NULL; curr = curr->next)
{
if (curr->choice == 1) /* a SeqLoc */
- SeqLocAdd(&thead, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE);
+ SeqLocAddEx (&thead, &last, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE);
else
{
- si.to = ((SeqLitPtr)(curr->data.ptrvalue))->length - 1;
- SeqLocAdd(&thead, &vn, TRUE, TRUE);
+ si.to = ((SeqLitPtr) (curr->data.ptrvalue))->length - 1;
+ SeqLocAddEx (&thead, &last, &vn, TRUE, TRUE);
}
oi.id++;
}
diff --git a/api/seqmgr.c b/api/seqmgr.c
index ecefefa8..d3e0e41e 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.259 $
+* $Revision: 6.263 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -39,6 +39,18 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqmgr.c,v $
+* Revision 6.263 2006/04/13 20:02:15 kans
+* LookupFarSeqIDs takes inference parameter
+*
+* Revision 6.262 2006/04/05 17:18:23 kans
+* IndexSegmentedParts uses Int4 for numsegs to avoid overflow to negative number, failure to MemNew
+*
+* Revision 6.261 2006/03/21 15:32:13 kans
+* set ignore flag on generated gaps in IndexRecordedFeatures, not as side effect of sorting callback
+*
+* Revision 6.260 2006/03/20 22:53:44 kans
+* sort flatfile-generated gap feature last, set ignore flag
+*
* Revision 6.259 2006/02/17 19:05:05 kans
* special case coded_by only for CDS feature on isolated protein bioseq
*
@@ -7123,6 +7135,16 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
return 1;
}
+ /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
+
+ if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
+ if (sp1->itemID > sp2->itemID) {
+ return 1;
+ } else if (sp1->itemID < sp2->itemID) {
+ return -1;
+ }
+ }
+
/* if identical cds ranges, compare codon_start */
if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
@@ -7295,6 +7317,16 @@ static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2)
return 1;
}
+ /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
+
+ if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
+ if (sp1->itemID > sp2->itemID) {
+ return 1;
+ } else if (sp1->itemID < sp2->itemID) {
+ return -1;
+ }
+ }
+
/* if identical cds ranges, compare codon_start */
if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
@@ -7431,8 +7463,8 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
BioseqPtr bsp;
BioseqExtraPtr bspextra;
BioseqSetPtr bssp;
- Int2 i;
- Int2 numsegs = 0;
+ Int4 i;
+ Int4 numsegs = 0;
ObjMgrDataPtr omdp;
SMSeqIdxPtr PNTR partsByLoc;
SMSeqIdxPtr PNTR partsBySeqId;
@@ -7534,7 +7566,7 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
*
*****************************************************************************/
-static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
+static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats, Uint4 baseItemID)
{
BioseqPtr bsp;
@@ -7553,6 +7585,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
Int4 i;
Int4 j;
SMFeatItemPtr item;
+ SMFeatItemPtr last;
BioseqPtr nuc;
Int4 numfeats;
Int4 numgenes;
@@ -7568,7 +7601,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
bssp = (BioseqSetPtr) sep->data.ptrvalue;
if (bssp == NULL) return;
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
- IndexRecordedFeatures (sep, dorevfeats);
+ IndexRecordedFeatures (sep, dorevfeats, baseItemID);
}
return;
}
@@ -7643,6 +7676,25 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
}
}
+ /* gap feature in record overrides flatfile-generated feature */
+
+ if (baseItemID > 0) {
+ last = featsByPos [0];
+ for (i = 1; i < numfeats; i++) {
+ item = featsByPos [i];
+ if (item != NULL && last != NULL) {
+ if (last->subtype == FEATDEF_gap && item->subtype == FEATDEF_gap) {
+ if (last->left == item->left && last->right == item->right) {
+ if (item->itemID >= baseItemID) {
+ item->ignore = TRUE;
+ }
+ }
+ }
+ }
+ last = item;
+ }
+ }
+
/* build arrays of sorted gene, mRNA, CDS, publication, and biosource features for lookup by overlap */
bspextra->genesByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numgenes), 0, FEATDEF_GENE);
@@ -8338,6 +8390,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
{
AdpBspPtr abp;
AnnotDescPtr PNTR annotDescByID;
+ Uint4 baseItemID = 0;
BioseqPtr bsp;
BioseqExtraPtr bspextra;
Int4 count;
@@ -8414,6 +8467,19 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
AssignIDsInEntityEx (entityID, 0, NULL, extra);
+ /* get first feature itemID in remote feature tables (including generated gaps) */
+
+ for (vnp = extra; vnp != NULL && baseItemID == 0; vnp = vnp->next) {
+ bsp = (BioseqPtr) vnp->data.ptrvalue;
+ if (bsp == NULL) continue;
+ for (sap = bsp->annot; sap != NULL && baseItemID == 0; sap = sap->next) {
+ if (sap->type != 1) continue;
+ for (sfp = (SeqFeatPtr) sap->data; sfp != NULL && baseItemID == 0; sfp = sfp->next) {
+ baseItemID = sfp->idx.itemID;
+ }
+ }
+ }
+
/* set scope for FindAppropriateBioseq, FindFirstLocalBioseq */
oldscope = SeqEntrySetScope (sep);
@@ -8481,7 +8547,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
/* finish building array of sorted features on each indexed bioseq */
- IndexRecordedFeatures (sep, dorevfeats);
+ IndexRecordedFeatures (sep, dorevfeats, baseItemID);
/* set best protein feature for segmented protein bioseqs and their parts */
@@ -11163,6 +11229,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs (
Boolean products,
Boolean alignments,
Boolean history,
+ Boolean inference,
Boolean others
)
@@ -11175,7 +11242,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs (
func = smp->seq_id_precache_func;
SeqMgrUnlock ();
if (func == NULL) return 0;
- return (*func) (sep, components, locations, products, alignments, history, others);
+ return (*func) (sep, components, locations, products, alignments, history, inference, others);
}
/*****************************************************************************
diff --git a/api/seqmgr.h b/api/seqmgr.h
index c80ab55e..89ff1bbc 100644
--- a/api/seqmgr.h
+++ b/api/seqmgr.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.59 $
+* $Revision: 6.60 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -40,6 +40,9 @@
*
*
* $Log: seqmgr.h,v $
+* Revision 6.60 2006/04/13 20:02:15 kans
+* LookupFarSeqIDs takes inference parameter
+*
* Revision 6.59 2006/02/16 20:24:32 kans
* added bad_order and mixed_strand fields to feature index - to be used for get best gene overlap function in cases of trans-splicing
*
@@ -316,7 +319,7 @@ typedef BioseqPtr (LIBCALLBACK * BSFetchTop)
typedef BioseqPtr (LIBCALLBACK * BSFetch) PROTO((SeqIdPtr sip, Pointer data));
-typedef Int4 (LIBCALLBACK * SIDPreCacheFunc) (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, Boolean alignments, Boolean history, Boolean others);
+typedef Int4 (LIBCALLBACK * SIDPreCacheFunc) (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, Boolean alignments, Boolean history, Boolean inference, Boolean others);
typedef Int4 (LIBCALLBACK * SeqLenLookupFunc) (Int4 gi);
typedef CharPtr (LIBCALLBACK * AccnVerLookupFunc) (Int4 gi);
typedef SeqIdPtr (LIBCALLBACK * SeqIdSetLookupFunc) (Int4 gi);
@@ -1118,6 +1121,7 @@ NLM_EXTERN Int4 LookupFarSeqIDs (
Boolean products,
Boolean alignments,
Boolean history,
+ Boolean inference,
Boolean others
);
diff --git a/api/seqport.c b/api/seqport.c
index 89550a8a..b0e77145 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.147 $
+* $Revision: 6.150 $
*
* File Description: Ports onto Bioseqs
*
@@ -39,6 +39,15 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqport.c,v $
+* Revision 6.150 2006/03/22 15:31:32 kans
+* SeqPortStreamSeqLoc gives unique message when bailing on gi 0 as opposed to failure after trying to load
+*
+* Revision 6.149 2006/03/07 21:34:28 kans
+* checks for gi 0 now also check for negative value
+*
+* Revision 6.148 2006/03/07 20:02:01 kans
+* SeqPortStreamSeqLoc immediately treats gi 0 as an error
+*
* Revision 6.147 2006/01/23 13:01:41 bollin
* when converting sequences from raw to delta, adjust any alignments that the
* sequence may be part of.
@@ -2978,6 +2987,21 @@ static Int4 SeqPortStreamSeqLoc (
sip = SeqLocId (slp);
if (sip == NULL) return 0;
+ if (sip->choice == SEQID_GI && sip->data.intvalue <= 0) {
+
+ /* gi 0 or negative is always a data error, just report and bail */
+
+ SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
+ if (parentID != NULL) {
+ SeqIdWrite (parentID, pid, PRINTID_FASTA_LONG, sizeof (pid) - 1);
+ ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream ignoring Bioseq %s component of %s", buf, pid);
+ } else {
+ ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream ignoring Bioseq %s", buf);
+ }
+ sdp->failed = TRUE;
+ return 0;
+ }
+
bsp = BioseqLockById (sip);
#ifdef OS_UNIX
diff --git a/api/sequtil.c b/api/sequtil.c
index c7d8d53e..07cb6305 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,13 +29,32 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.190 $
+* $Revision: 6.196 $
*
* File Description: Sequence Utilities for objseq and objsset
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: sequtil.c,v $
+* Revision 6.196 2006/04/06 15:41:19 kans
+* added DG to WHICH_db_accession
+*
+* Revision 6.195 2006/04/05 16:45:01 bollin
+* special left-right end handling for circular topology in GetThePointForOffset
+*
+* Revision 6.194 2006/03/30 17:04:53 kans
+* DF is DDBJ CON accession prefix
+*
+* Revision 6.193 2006/03/23 18:31:32 kans
+* added EB as NCBI EST
+*
+* Revision 6.192 2006/03/10 17:27:14 bollin
+* make sure parentptr is BioseqSet in GetEarlierSeqIdPtr
+*
+* Revision 6.191 2006/03/10 17:13:45 bollin
+* changes to GetEarlierSeqIdPtr to handle the situation where one of the Bioseqs
+* has not been indexed. Fixes bug reported by Serge Bazhin
+*
* Revision 6.190 2006/02/16 17:19:14 kans
* better handling of trans splicing in GetThePointForOffset, SeqLocStart (CB)
*
@@ -847,7 +866,7 @@ static char *this_file = __FILE__;
#include <seqport.h>
#include <sqnutils.h> /* prototype for SeqIdFindWorst */
#include <edutil.h>
-
+#include <subutil.h>
/**** Static variables used for randomized sequence conversions ****/
@@ -6625,7 +6644,7 @@ NLM_EXTERN Int4 CheckPointInBioseq (SeqPntPtr sp, BioseqPtr in)
static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2)
{
BioseqPtr bsp1, bsp2;
- BioseqSetPtr bssp;
+ BioseqSetPtr bssp = NULL;
SeqEntryPtr sep;
if (sip1 == NULL && sip2 != NULL)
@@ -6655,25 +6674,31 @@ static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2)
{
return sip1;
}
+
+ if (bsp1->idx.parentptr != NULL && bsp2->idx.parentptr != 0 && bsp1->idx.parentptr != bsp2->idx.parentptr)
+ {
+ return NULL;
+ }
+ if (bsp1->idx.parentptr != NULL && bsp1->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = bsp1->idx.parentptr;
+ } else if (bsp2->idx.parentptr != NULL && bsp2->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = bsp2->idx.parentptr;
+ }
+
+ if (bssp == NULL) return NULL;
- if (bsp1->idx.parenttype == OBJ_BIOSEQSET
- && bsp2->idx.parenttype == OBJ_BIOSEQSET
- && bsp1->idx.parentptr == bsp2->idx.parentptr)
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
{
- bssp = (BioseqSetPtr) bsp1->idx.parentptr;
- for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
+ if (sep->data.ptrvalue == bsp1)
{
- if (sep->data.ptrvalue == bsp1)
- {
- return sip1;
- }
- else if (sep->data.ptrvalue == bsp2)
- {
- return sip2;
- }
+ return sip1;
+ }
+ else if (sep->data.ptrvalue == bsp2)
+ {
+ return sip2;
}
}
- return sip1;
+ return NULL;
}
/*****************************************************************************
@@ -6689,8 +6714,16 @@ Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
Int4 lowest = -1, highest = 0, tmp;
SeqIdPtr low_sip = NULL, high_sip = NULL, first_sip = NULL, last_sip = NULL;
Boolean id_same;
+ BioseqPtr bsp;
+ Boolean is_circular = FALSE;
pnt = NULL; /* get first or last single span type in "of"*/
+
+ bsp = BioseqFind (SeqLocId(of));
+ if (bsp != NULL && bsp->topology == TOPOLOGY_CIRCULAR) {
+ is_circular = TRUE;
+ }
+
while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
{
last_strand = SeqLocStrand (pnt);
@@ -6751,12 +6784,32 @@ Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
switch (which_end)
{
case SEQLOC_LEFT_END:
- target->point = lowest;
- target->id = low_sip;
+ if (is_circular) {
+ if (all_minus) {
+ target->point = SeqLocStart (last);
+ target->id = last_sip;
+ } else {
+ target->point = SeqLocStart (first);
+ target->id = first_sip;
+ }
+ } else {
+ target->point = lowest;
+ target->id = low_sip;
+ }
break;
case SEQLOC_RIGHT_END:
- target->point = highest;
- target->id = high_sip;
+ if (is_circular) {
+ if (all_minus) {
+ target->point = SeqLocStop (first);
+ target->id = first_sip;
+ } else {
+ target->point = SeqLocStop (last);
+ target->id = last_sip;
+ }
+ } else {
+ target->point = highest;
+ target->id = high_sip;
+ }
break;
case SEQLOC_START:
if (all_minus)
@@ -9325,7 +9378,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"DT") == 0) ||
(StringICmp(temp,"DV") == 0) ||
(StringICmp(temp,"DW") == 0) ||
- (StringICmp(temp,"DY") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"DY") == 0) ||
+ (StringICmp(temp,"EB") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */
retcode = ACCN_NCBI_STS;
@@ -9414,7 +9468,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_DDBJ_GENOME;
} else if ((StringICmp(temp,"AK") == 0)) { /* DDBJ HTGS */
retcode = ACCN_DDBJ_HTGS;
- } else if ((StringICmp(temp,"BA") == 0)) { /* DDBJ CON division */
+ } else if ((StringICmp(temp,"BA") == 0) ||
+ (StringICmp(temp,"DF") == 0) ||
+ (StringICmp(temp,"DG") == 0)) { /* DDBJ CON division */
retcode = ACCN_DDBJ_CON;
} else if ((StringICmp(temp,"BD") == 0) ||
(StringICmp(temp,"DD") == 0)) { /* DDBJ patent division */
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index e44797e5..432b130c 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.369 $
+* $Revision: 6.376 $
*
* File Description:
*
@@ -2685,9 +2685,7 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
Int2 choice = 0;
GeneRefPtr grp;
- if (StringICmp (gbq->qual, "pseudo") == 0) {
- choice = 1;
- } else if (StringICmp (gbq->qual, "map") == 0) {
+ if (StringICmp (gbq->qual, "map") == 0) {
choice = 2;
} else if (StringICmp (gbq->qual, "allele") == 0) {
choice = 3;
@@ -2698,9 +2696,6 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp == NULL) return FALSE;
switch (choice) {
- case 1 :
- grp->pseudo = TRUE;
- break;
case 2 :
if (grp->maploc != NULL) return FALSE;
if (StringHasNoText (gbq->val)) return FALSE;
@@ -2785,6 +2780,7 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
{
Int4 diff;
+ Int2 j;
Boolean locmap;
int num_errs;
CharPtr pos;
@@ -2809,6 +2805,12 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
rrp->ext.choice = 2;
trp = (tRNAPtr) MemNew (sizeof (tRNA));
rrp->ext.value.ptrvalue = (Pointer) trp;
+ if (trp != NULL) {
+ trp->aatype = 2;
+ for (j = 0; j < 6; j++) {
+ trp->codon [j] = 255;
+ }
+ }
}
if (rrp->ext.choice != 2) return FALSE;
@@ -5201,7 +5203,8 @@ static void FixOldDbxrefs (ValNodePtr vnp)
dbt->db = MemFree (dbt->db);
dbt->db = StringSave ("SubtiList");
}
- if (StringICmp (dbt->db, "Swiss-Prot") == 0) {
+ if (StringICmp (dbt->db, "Swiss-Prot") == 0 ||
+ StringICmp (dbt->db, "SWISSPROT") == 0) {
dbt->db = MemFree (dbt->db);
dbt->db = StringSave ("UniProt/Swiss-Prot");
} else if (StringICmp (dbt->db, "TrEMBL") == 0) {
@@ -7658,6 +7661,7 @@ NLM_EXTERN void CleanUpSeqFeat (
BioseqPtr bsp;
CodeBreakPtr cbp;
CdRegionPtr crp;
+ GeneRefPtr grp;
Boolean hasNulls;
SeqIdPtr id;
ImpFeatPtr ifp;
@@ -7774,7 +7778,15 @@ NLM_EXTERN void CleanUpSeqFeat (
CleanupSeqLoc (sfp->location);
strand = SeqLocStrand (sfp->location);
id = SeqLocId (sfp->location);
- if (sfp->data.choice == SEQFEAT_CDREGION) {
+ if (sfp->data.choice == SEQFEAT_GENE) {
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ if (grp != NULL) {
+ if (grp->pseudo) {
+ sfp->pseudo = TRUE;
+ grp->pseudo = FALSE;
+ }
+ }
+ } else if (sfp->data.choice == SEQFEAT_CDREGION) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
if (crp != NULL) {
crp->code_break = SortCodeBreaks (sfp, crp->code_break);
@@ -7797,6 +7809,12 @@ NLM_EXTERN void CleanUpSeqFeat (
}
} else if (sfp->data.choice == SEQFEAT_RNA) {
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp != NULL) {
+ if (rrp->pseudo) {
+ sfp->pseudo = TRUE;
+ rrp->pseudo = FALSE;
+ }
+ }
if (rrp != NULL && rrp->ext.choice == 2) {
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
if (trp != NULL && trp->anticodon != NULL) {
@@ -9827,6 +9845,40 @@ NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, Visi
return index;
}
+typedef struct uopdata {
+ UserObjectPtr rsult;
+ CharPtr tag;
+} UopData, PNTR UopDataPtr;
+
+static void FindUopProc (
+ UserObjectPtr uop,
+ Pointer userdata
+)
+
+{
+ ObjectIdPtr oip;
+ UopDataPtr udp;
+
+ if (uop == NULL || userdata == NULL) return;
+ oip = uop->type;
+ if (oip == NULL) return;
+ udp = (UopDataPtr) userdata;
+ if (StringICmp (oip->str, udp->tag) != 0) return;
+ udp->rsult = uop;
+}
+
+NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag)
+
+{
+ UopData ud;
+
+ if (top == NULL || StringHasNoText (tag)) return NULL;
+ ud.rsult = NULL;
+ ud.tag = tag;
+ VisitUserObjectsInUop (top, (Pointer) &ud, FindUopProc);
+ return ud.rsult;
+}
+
NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop)
{
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index f5d24d2b..7ef549ec 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.244 $
+* $Revision: 6.251 $
*
* File Description:
*
@@ -1562,18 +1562,19 @@ NLM_EXTERN SqnTagPtr SqnTagFree (SqnTagPtr stp)
return MemFree (stp);
}
-static Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2)
+extern Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2)
{
Char ch1, ch2;
+ if (StringHasNoText (str1) && StringHasNoText (str2)) return TRUE;
if (StringHasNoText (str1) || StringHasNoText (str2)) return FALSE;
ch1 = *str1;
ch2 = *str2;
while (ch1 != '\0' && ch2 != '\0') {
if (TO_LOWER (ch1) != TO_LOWER (ch2)) {
- if ((ch1 != '-' && ch1 != '_') || (ch2 != '_' && ch2 != '-')) return FALSE;
+ if ((ch1 != '-' && ch1 != '_' && ch1 != ' ') || (ch2 != '_' && ch2 != '-' && ch2 != ' ')) return FALSE;
}
str1++;
str2++;
@@ -1582,7 +1583,7 @@ static Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2)
}
if (TO_LOWER (ch1) != TO_LOWER (ch2)) {
- if ((ch1 != '-' && ch1 != '_') || (ch2 != '_' && ch2 != '-')) return FALSE;
+ if ((ch1 != '-' && ch1 != '_' && ch1 != ' ') || (ch2 != '_' && ch2 != '-' && ch2 != ' ')) return FALSE;
}
return TRUE;
@@ -2436,6 +2437,74 @@ NLM_EXTERN UserObjectPtr ParseTitleIntoTpaAssembly (
return uop;
}
+NLM_EXTERN UserObjectPtr ParseStringIntoStructuredComment (
+ UserObjectPtr uop,
+ CharPtr str,
+ CharPtr prefix,
+ CharPtr suffix
+)
+
+{
+ Char ch;
+ CharPtr field;
+ CharPtr item;
+ CharPtr last;
+ CharPtr ptr;
+ CharPtr tmp;
+
+ if (uop == NULL) {
+ uop = CreateStructuredCommentUserObject ();
+ if (uop == NULL) return uop;
+ }
+ if (str == NULL) return uop;
+
+ tmp = StringSave (str);
+ if (tmp == NULL) return uop;
+
+ last = tmp;
+ if (StringDoesHaveText (prefix)) {
+ ptr = StringStr (last, prefix);
+ if (ptr != NULL) {
+ last = ptr + StringLen (prefix);
+ }
+ }
+ if (StringDoesHaveText (suffix)) {
+ ptr = StringStr (last, suffix);
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ }
+
+ ptr = last;
+ ch = *ptr;
+ while (ch != '\0') {
+ field = last;
+ ptr = StringChr (last, '=');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ item = ptr;
+ last = StringChr (ptr, ';');
+ if (last != NULL) {
+ *last = '\0';
+ last++;
+ ch = *last;
+ } else {
+ ch = '\0';
+ }
+ TrimSpacesAroundString (field);
+ TrimSpacesAroundString (item);
+ AddItemStructuredCommentUserObject (uop, field, item);
+ } else {
+ ch = '\0';
+ }
+ }
+
+ MemFree (tmp);
+
+ return uop;
+}
+
/* PHRAP file reading functions */
static Boolean HasNoText (CharPtr str)
@@ -4934,6 +5003,63 @@ static Boolean InvalidInference (CharPtr str)
return TRUE;
}
+static void ParseCodonRecognized (CharPtr val, tRNAPtr trp)
+
+{
+ Char buf [256];
+ Char codon [16];
+ ValNodePtr head = NULL;
+ Int2 i;
+ Int2 j;
+ CharPtr ptr;
+ CharPtr str;
+ tRNA tr;
+ ValNodePtr vnp;
+
+ if (trp == NULL) return;
+ for (j = 0; j < 6; j++) {
+ trp->codon [j] = 255;
+ }
+ if (StringHasNoText (val)) return;
+
+ MemSet ((Pointer) &tr, 0, sizeof (tRNA));
+
+ StringNCpy_0 (buf, val, sizeof (buf));
+ str = buf;
+ while (StringDoesHaveText (str)) {
+ ptr = StringChr (str, ',');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ TrimSpacesAroundString (str);
+ if (StringDoesHaveText (str)) {
+ for (j = 0; j < 6; j++) {
+ tr.codon [j] = 255;
+ }
+ StringCpy (codon, str);
+ for (i = 0; i < 3; i++) {
+ if (codon [i] == 'U') {
+ codon [i] = 'T';
+ }
+ }
+ ParseDegenerateCodon (&tr, (Uint1Ptr) codon);
+ for (i = 0; i < 6; i++) {
+ if (tr.codon [i] == 255) continue;
+ ValNodeAddInt (&head, 0, (long) tr.codon [i]);
+ }
+ }
+ str = ptr;
+ }
+ if (head == NULL) return;
+
+ head = ValNodeSort (head, SortByIntvalue);
+ head = UniqueIntValNode (head);
+ for (vnp = head, j = 0; vnp != NULL && j < 6; vnp = vnp->next, j++) {
+ trp->codon [j] = (Uint1) vnp->data.intvalue;
+ }
+}
+
static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val, Int4 offset)
{
@@ -5061,6 +5187,20 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
(StringCmp (qual, "codon_recognized") == 0 || StringCmp (qual, "codons_recognized") == 0)) {
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp != NULL && rrp->type == 3) {
+ if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) {
+ rrp->ext.choice = 2;
+ trna = (tRNAPtr) MemNew (sizeof (tRNA));
+ rrp->ext.value.ptrvalue = (Pointer) trna;
+ if (trna != NULL) {
+ trna->aatype = 2;
+ for (j = 0; j < 6; j++) {
+ trna->codon [j] = 255;
+ }
+ }
+ }
+ trna = (tRNAPtr) rrp->ext.value.ptrvalue;
+ ParseCodonRecognized (val, trna);
+ /*
StringNCpy_0 ((CharPtr) codon, val, sizeof (codon));
if (StringLen ((CharPtr) codon) == 3) {
for (j = 0; j < 3; j++) {
@@ -5068,11 +5208,11 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
codon [j] = 'T';
}
}
- trna = (tRNAPtr) rrp->ext.value.ptrvalue;
if (trna != NULL) {
ParseDegenerateCodon (trna, (Uint1Ptr) codon);
}
}
+ */
}
} else if (ifp != NULL && StringICmp (ifp->key, "variation") == 0 && ParseQualIntoSnpUserObject (sfp, qual, val)) {
} else if (ifp != NULL && StringICmp (ifp->key, "STS") == 0 && ParseQualIntoStsUserObject (sfp, qual, val)) {
@@ -5107,6 +5247,9 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
sfp->comment = str;
}
return;
+ } else if (qnum == GBQUAL_pseudo) {
+ sfp->pseudo = TRUE;
+ return;
} else if ((qnum == GBQUAL_gene || qnum == GBQUAL_locus_tag) && sfp->data.choice != SEQFEAT_GENE) {
if (StringCmp (val, "-") == 0) {
val = NULL;
@@ -5203,7 +5346,7 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
}
} else if (sfp->data.choice == SEQFEAT_GENE) {
- if (qnum == GBQUAL_gene || qnum == GBQUAL_pseudo || qnum == GBQUAL_allele || qnum == GBQUAL_map || qnum == GBQUAL_locus_tag) {
+ if (qnum == GBQUAL_gene || qnum == GBQUAL_allele || qnum == GBQUAL_map || qnum == GBQUAL_locus_tag) {
if (qnum == GBQUAL_gene) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp != NULL) {
@@ -5219,11 +5362,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
ValNodeCopyStr (&(grp->syn), 0, val);
}
}
- } else if (qnum == GBQUAL_pseudo) {
- grp = (GeneRefPtr) sfp->data.value.ptrvalue;
- if (grp != NULL) {
- grp->pseudo = TRUE;
- }
} else if (qnum == GBQUAL_allele) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp != NULL) {
@@ -5280,8 +5418,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
}
}
return;
- } else if (qnum == GBQUAL_pseudo) {
- sfp->pseudo = TRUE;
}
} else if (sfp->data.choice == SEQFEAT_PROT) {
if (qnum == GBQUAL_function || qnum == GBQUAL_EC_number || qnum == GBQUAL_product) {
@@ -5304,14 +5440,19 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
if (rrp->type == 3) {
aa = ParseTRnaString (val, &justTrnaText, codon, FALSE);
if (aa != 0) {
- rrp->ext.choice = 2;
- trna = (tRNAPtr) MemNew (sizeof (tRNA));
- rrp->ext.value.ptrvalue = (Pointer) trna;
- if (trna != NULL) {
- trna->aatype = 2;
- for (j = 0; j < 6; j++) {
- trna->codon [j] = 255;
+ if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) {
+ rrp->ext.choice = 2;
+ trna = (tRNAPtr) MemNew (sizeof (tRNA));
+ rrp->ext.value.ptrvalue = (Pointer) trna;
+ if (trna != NULL) {
+ trna->aatype = 2;
+ for (j = 0; j < 6; j++) {
+ trna->codon [j] = 255;
+ }
}
+ }
+ trna = (tRNAPtr) rrp->ext.value.ptrvalue;
+ if (trna != NULL) {
if (justTrnaText) {
for (j = 0; j < 6; j++) {
trna->codon [j] = codon [j];
@@ -5357,9 +5498,6 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
return;
} else if (qnum == GBQUAL_anticodon) {
if (ParseAnticodon (sfp, val, offset)) return;
- } else if (qnum == GBQUAL_pseudo) {
- sfp->pseudo = TRUE;
- return;
}
} else if (sfp->data.choice == SEQFEAT_BIOSRC) {
if (ParseQualIntoBioSource (sfp, qual, val)) return;
@@ -8555,6 +8693,8 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le
Int2 bs_max = 0, bs_min = 0;
Int4 new_pos, old_pos;
Int2 val;
+ Int4 loc_stop;
+ Boolean changed = FALSE;
if (sgp == NULL || num_to_trim < 1)
{
@@ -8605,6 +8745,7 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le
sgp->numval = new_len;
sgp->max.realvalue = fhmax;
sgp->min.realvalue = fhmin;
+ changed = TRUE;
}
else if (sgp->flags[2] == 2)
{
@@ -8640,6 +8781,7 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le
sgp->numval = new_len;
sgp->max.intvalue = intmax;
sgp->min.intvalue = intmin;
+ changed = TRUE;
}
else if (sgp->flags[2] == 3)
{
@@ -8680,6 +8822,14 @@ NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_le
sgp->numval = new_len;
sgp->max.intvalue = bs_max;
sgp->min.intvalue = bs_min;
+ changed = TRUE;
+ }
+ if (changed)
+ {
+ loc_stop = SeqLocStop (sgp->loc);
+ sgp->loc = SeqLocDelete (sgp->loc, SeqLocId (sgp->loc),
+ loc_stop - num_to_trim + 1,
+ loc_stop, FALSE, &changed);
}
}
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index ef2a21bf..007ce12f 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.69 $
+* $Revision: 6.74 $
*
* File Description:
*
@@ -115,7 +115,7 @@ static void SfpClearFeatIDs (
ClearFeatIDXrefs (sfp);
}
-NLM_EXTERN void ClearCDSmRNAfeatureIDs (
+NLM_EXTERN void ClearFeatureIDs (
SeqEntryPtr sep
)
@@ -123,11 +123,20 @@ NLM_EXTERN void ClearCDSmRNAfeatureIDs (
VisitFeaturesInSep (sep, NULL, SfpClearFeatIDs);
}
+typedef struct idpair {
+ Int4 before;
+ Int4 after;
+} IdPairData, PNTR IdPairPtr;
+
typedef struct fiddata {
- Int4 highestID;
+ Int4 highestID;
+ Int4 highestRef;
+ Int4 offset;
+ Int4 count;
+ IdPairPtr pairs;
} FidData, PNTR FidDataPtr;
-static void FindHighestFeatureID (
+static void FindHighestFeatID (
SeqFeatPtr sfp,
Pointer userdata
)
@@ -138,7 +147,6 @@ static void FindHighestFeatureID (
SeqFeatXrefPtr xref;
if (sfp == NULL) return;
- if (sfp->idx.subtype != FEATDEF_CDS && sfp->idx.subtype != FEATDEF_mRNA) return;
fip = (FidDataPtr) userdata;
if (fip == NULL) return;
@@ -147,7 +155,7 @@ static void FindHighestFeatureID (
if (oip != NULL) {
if (oip->str == NULL) {
if (oip->id >= fip->highestID) {
- fip->highestID = oip->id + 1;
+ fip->highestID = oip->id;
}
}
}
@@ -158,15 +166,29 @@ static void FindHighestFeatureID (
oip = (ObjectIdPtr) xref->id.value.ptrvalue;
if (oip != NULL) {
if (oip->str == NULL) {
- if (oip->id >= fip->highestID) {
- fip->highestID = oip->id + 1;
+ if (oip->id >= fip->highestRef) {
+ fip->highestRef = oip->id;
}
}
}
}
}
-static void SfpAssignCDSmRNAfeatureIDs (
+NLM_EXTERN Int4 FindHighestFeatureID (
+ SeqEntryPtr sep
+)
+
+{
+ FidData fd;
+
+ MemSet ((Pointer) &fd, 0, sizeof (FidData));
+ fd.highestID = 0;
+ fd.highestRef = 0;
+ VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatID);
+ return fd.highestID;
+}
+
+static void SfpAssignFeatIDs (
SeqFeatPtr sfp,
Pointer userdata
)
@@ -176,32 +198,241 @@ static void SfpAssignCDSmRNAfeatureIDs (
ObjectIdPtr oip;
if (sfp == NULL) return;
- if (sfp->idx.subtype != FEATDEF_CDS && sfp->idx.subtype != FEATDEF_mRNA) return;
fip = (FidDataPtr) userdata;
if (fip == NULL) return;
if (sfp->id.choice == 3) return;
oip = ObjectIdNew ();
if (oip == NULL) return;
+
+ (fip->highestID)++;
oip->id = fip->highestID;
sfp->id.value.ptrvalue = (Pointer) oip;
sfp->id.choice = 3;
+}
+
+NLM_EXTERN void AssignFeatureIDs (
+ SeqEntryPtr sep
+)
+
+{
+ FidData fd;
+
+ MemSet ((Pointer) &fd, 0, sizeof (FidData));
+ fd.highestID = 0;
+ fd.highestRef = 0;
+ VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatID);
+ VisitFeaturesInSep (sep, (Pointer) &fd, SfpAssignFeatIDs);
+}
+
+static void SfpOffsetFeatIDs (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ FidDataPtr fip;
+ ObjectIdPtr oip;
+
+ if (sfp == NULL) return;
+ fip = (FidDataPtr) userdata;
+ if (fip == NULL) return;
+
+ if (sfp->id.choice == 3) {
+ oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str == NULL) {
+ oip->id += fip->offset;
+ }
+ }
+ }
+}
+
+NLM_EXTERN void OffsetFeatureIDs (
+ SeqEntryPtr sep,
+ Int4 offset
+)
+
+{
+ FidData fd;
+
+ MemSet ((Pointer) &fd, 0, sizeof (FidData));
+ fd.offset = offset;
+ VisitFeaturesInSep (sep, (Pointer) &fd, SfpOffsetFeatIDs);
+}
+
+static void SfpOffsetFeatIDXrefs (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ FidDataPtr fip;
+ ObjectIdPtr oip;
+ SeqFeatXrefPtr xref;
+
+ if (sfp == NULL) return;
+ fip = (FidDataPtr) userdata;
+ if (fip == NULL) return;
+
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->id.choice != 3) continue;
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str == NULL) {
+ oip->id += fip->offset;
+ }
+ }
+ }
+}
+
+NLM_EXTERN void OffsetFeatureIDXrefs (
+ SeqEntryPtr sep,
+ Int4 offset
+)
+
+{
+ FidData fd;
+
+ MemSet ((Pointer) &fd, 0, sizeof (FidData));
+ fd.offset = offset;
+ VisitFeaturesInSep (sep, (Pointer) &fd, SfpOffsetFeatIDXrefs);
+}
+
+static void SfpMakePairList (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ FidDataPtr fip;
+ Int4 idx;
+ IdPairPtr ipp;
+ ObjectIdPtr oip;
+
+ if (sfp == NULL) return;
+ fip = (FidDataPtr) userdata;
+ if (fip == NULL) return;
+ if (fip->pairs == NULL) return;
+
+ if (sfp->id.choice != 3) return;
+ oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
+ if (oip == NULL) return;
+
+ idx = fip->highestID;
+ ipp = &(fip->pairs [idx]);
(fip->highestID)++;
+ ipp->before = oip->id;
+ ipp->after = fip->highestID;
+}
+
+static int LIBCALLBACK SortPairList (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ IdPairPtr ipp1 = (IdPairPtr) ptr1;
+ IdPairPtr ipp2 = (IdPairPtr) ptr2;
+
+ if (ipp1 == NULL || ipp2 == NULL) return 0;
+ if (ipp1->before > ipp2->before) return 1;
+ if (ipp1->before < ipp2->before) return -1;
+ return 0;
+}
+
+static Int4 LookupNewFeatID (
+ FidDataPtr fip,
+ Int4 before
+)
+
+{
+ IdPairPtr ipp;
+ Int4 L;
+ Int4 mid;
+ Int4 R;
+
+ if (fip == NULL || fip->pairs == NULL || fip->count < 1) return 0;
+
+ L = 0;
+ R = fip->count - 1;
+ while (L < R) {
+ mid = (L + R) / 2;
+ ipp = &(fip->pairs [mid]);
+ if (ipp->before < before) {
+ L = mid + 1;
+ } else {
+ R = mid;
+ }
+ }
+
+ if (R < fip->count) {
+ ipp = &(fip->pairs [R]);
+ if (ipp->before == before) return ipp->after;
+ }
+
+ return 0;
+}
+
+static void SfpReassignPairList (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ FidDataPtr fip;
+ ObjectIdPtr oip;
+ SeqFeatXrefPtr xref;
+
+ if (sfp == NULL) return;
+ fip = (FidDataPtr) userdata;
+ if (fip == NULL) return;
+ if (fip->pairs == NULL) return;
+
+ if (sfp->id.choice == 3) {
+ oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str == NULL) {
+ oip->id = LookupNewFeatID (fip, oip->id);
+ }
+ }
+ }
+
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->id.choice != 3) continue;
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str == NULL) {
+ oip->id = LookupNewFeatID (fip, oip->id);
+ }
+ }
+ }
}
-NLM_EXTERN void AssignCDSmRNAfeatureIDs (
+NLM_EXTERN void ReassignFeatureIDs (
SeqEntryPtr sep
)
{
+ Int4 count;
FidData fd;
+ count = VisitFeaturesInSep (sep, NULL, NULL);
+ if (count < 1) return;
+
MemSet ((Pointer) &fd, 0, sizeof (FidData));
- fd.highestID = 1;
- VisitFeaturesInSep (sep, (Pointer) &fd, FindHighestFeatureID);
- VisitFeaturesInSep (sep, (Pointer) &fd, SfpAssignCDSmRNAfeatureIDs);
+ fd.highestID = 0;
+ fd.highestRef = 0;
+ fd.count = count;
+ fd.pairs = (IdPairPtr) MemNew (sizeof (IdPairData) * (count + 1));
+ if (fd.pairs == NULL) return;
+
+ VisitFeaturesInSep (sep, (Pointer) &fd, SfpMakePairList);
+
+ HeapSort (fd.pairs, (size_t) count, sizeof (IdPairData), SortPairList);
+
+ VisitFeaturesInSep (sep, (Pointer) &fd, SfpReassignPairList);
+
+ MemFree (fd.pairs);
}
typedef struct vcmdata {
@@ -368,7 +599,7 @@ NLM_EXTERN void LinkCDSmRNAbyOverlap (
)
{
- AssignCDSmRNAfeatureIDs (sep);
+ AssignFeatureIDs (sep);
VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyOverlap);
}
@@ -515,97 +746,100 @@ static void BspLinkCDSmRNAbyProduct (
if (cdna->idx.parenttype == OBJ_BIOSEQSET) {
bssp = (BioseqSetPtr) cdna->idx.parentptr;
if (bssp == NULL) continue;
- if (bssp->_class != BioseqseqSet_class_nuc_prot) continue;
- prot = NULL;
- if (VisitBioseqsInSet (bssp, (Pointer) &prot, FindProtBsp) != 2) continue;
- for (sip = prot->id; sip != NULL; sip = sip->next) {
- MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1);
-
- /* binary search */
-
- L = 0;
- R = numcds - 1;
- while (L < R) {
- mid = (L + R) / 2;
- odp = cdsarray [mid];
- compare = StringCmp (odp->revstr, buf);
- if (compare < 0) {
- L = mid + 1;
- } else {
- R = mid;
- }
- }
- odp = cdsarray [R];
- if (odp != NULL && StringCmp (odp->revstr, buf) == 0) {
- cds = odp->sfp;
- if (cds == NULL) continue;
-
- /* make reciprocal feature ID xrefs */
-
- if (cds->id.choice == 3) {
- oip = (ObjectIdPtr) cds->id.value.ptrvalue;
- if (oip != NULL && oip->str == NULL) {
- id = oip->id;
- if (id > 0) {
- for (xref = mrna->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue;
- if (xref != NULL) {
- oip = (ObjectIdPtr) xref->id.value.ptrvalue;
- if (oip != NULL) {
- if (oip->str != NULL) {
- oip->str = MemFree (oip->str);
- }
- oip->id = id;
- }
- } else {
- xref = SeqFeatXrefNew ();
- if (xref != NULL) {
- oip = ObjectIdNew ();
- if (oip != NULL) {
- oip->id = id;
- xref->id.choice = 3;
- xref->id.value.ptrvalue = (Pointer) oip;
- xref->next = mrna->xref;
- mrna->xref = xref;
- }
- }
- }
- }
- }
- }
-
- if (mrna->id.choice == 3) {
- oip = (ObjectIdPtr) mrna->id.value.ptrvalue;
- if (oip != NULL && oip->str == NULL) {
- id = oip->id;
- if (id > 0) {
- for (xref = cds->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue;
- if (xref != NULL) {
- oip = (ObjectIdPtr) xref->id.value.ptrvalue;
- if (oip != NULL) {
- if (oip->str != NULL) {
- oip->str = MemFree (oip->str);
- }
- oip->id = id;
- }
- } else {
- xref = SeqFeatXrefNew ();
- if (xref != NULL) {
- oip = ObjectIdNew ();
- if (oip != NULL) {
- oip->id = id;
- xref->id.choice = 3;
- xref->id.value.ptrvalue = (Pointer) oip;
- xref->next = cds->xref;
- cds->xref = xref;
- }
- }
- }
- }
- }
- }
- }
+ if (bssp->_class == BioseqseqSet_class_nuc_prot) {
+ prot = NULL;
+ if (VisitBioseqsInSet (bssp, (Pointer) &prot, FindProtBsp) == 2) {
+ for (sip = prot->id; sip != NULL; sip = sip->next) {
+ MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1);
+
+ /* binary search */
+
+ L = 0;
+ R = numcds - 1;
+ while (L < R) {
+ mid = (L + R) / 2;
+ odp = cdsarray [mid];
+ compare = StringCmp (odp->revstr, buf);
+ if (compare < 0) {
+ L = mid + 1;
+ } else {
+ R = mid;
+ }
+ }
+ odp = cdsarray [R];
+ if (odp != NULL && StringCmp (odp->revstr, buf) == 0) {
+ cds = odp->sfp;
+ if (cds == NULL) continue;
+
+ /* make reciprocal feature ID xrefs */
+
+ if (cds->id.choice == 3) {
+ oip = (ObjectIdPtr) cds->id.value.ptrvalue;
+ if (oip != NULL && oip->str == NULL) {
+ id = oip->id;
+ if (id > 0) {
+ for (xref = mrna->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue;
+ if (xref != NULL) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str != NULL) {
+ oip->str = MemFree (oip->str);
+ }
+ oip->id = id;
+ }
+ } else {
+ xref = SeqFeatXrefNew ();
+ if (xref != NULL) {
+ oip = ObjectIdNew ();
+ if (oip != NULL) {
+ oip->id = id;
+ xref->id.choice = 3;
+ xref->id.value.ptrvalue = (Pointer) oip;
+ xref->next = mrna->xref;
+ mrna->xref = xref;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (mrna->id.choice == 3) {
+ oip = (ObjectIdPtr) mrna->id.value.ptrvalue;
+ if (oip != NULL && oip->str == NULL) {
+ id = oip->id;
+ if (id > 0) {
+ for (xref = cds->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue;
+ if (xref != NULL) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str != NULL) {
+ oip->str = MemFree (oip->str);
+ }
+ oip->id = id;
+ }
+ } else {
+ xref = SeqFeatXrefNew ();
+ if (xref != NULL) {
+ oip = ObjectIdNew ();
+ if (oip != NULL) {
+ oip->id = id;
+ xref->id.choice = 3;
+ xref->id.value.ptrvalue = (Pointer) oip;
+ xref->next = cds->xref;
+ cds->xref = xref;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
}
}
+ BioseqUnlock (cdna);
}
}
@@ -626,7 +860,7 @@ NLM_EXTERN void LinkCDSmRNAbyProduct (
)
{
- AssignCDSmRNAfeatureIDs (sep);
+ AssignFeatureIDs (sep);
VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyProduct);
}
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 279fa9c1..472da29a 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.134 $
+* $Revision: 6.140 $
*
* File Description:
*
@@ -233,9 +233,15 @@ NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata);
NLM_EXTERN void ClearFeatIDs (SeqFeatPtr sfp);
NLM_EXTERN void ClearFeatIDXrefs (SeqFeatPtr sfp);
-NLM_EXTERN void ClearCDSmRNAfeatureIDs (SeqEntryPtr sep);
+NLM_EXTERN void ClearFeatureIDs (SeqEntryPtr sep);
+NLM_EXTERN Int4 FindHighestFeatureID (SeqEntryPtr sep);
-NLM_EXTERN void AssignCDSmRNAfeatureIDs (SeqEntryPtr sep);
+NLM_EXTERN void AssignFeatureIDs (SeqEntryPtr sep);
+
+NLM_EXTERN void OffsetFeatureIDs (SeqEntryPtr sep, Int4 offset);
+NLM_EXTERN void OffsetFeatureIDXrefs (SeqEntryPtr sep, Int4 offset);
+
+NLM_EXTERN void ReassignFeatureIDs (SeqEntryPtr sep);
NLM_EXTERN void LinkCDSmRNAbyOverlap (SeqEntryPtr sep);
@@ -264,6 +270,8 @@ NLM_EXTERN CharPtr SqnTagFind (SqnTagPtr stp, CharPtr tag);
NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip);
NLM_EXTERN void ReadCompletenessFromString (CharPtr str, MolInfoPtr mip);
+extern Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2);
+
/* functions to extract BioSource, MolInfo, and Bioseq information from parsed titles */
NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
@@ -312,6 +320,15 @@ NLM_EXTERN UserObjectPtr ParseTitleIntoTpaAssembly (
UserObjectPtr uop
);
+/* structured comment user object for flatfile presentation */
+
+NLM_EXTERN UserObjectPtr ParseStringIntoStructuredComment (
+ UserObjectPtr uop,
+ CharPtr str,
+ CharPtr prefix,
+ CharPtr suffix
+);
+
/* UseLocalAsnloadDataAndErrMsg transiently sets paths to asnload, data, and errmsg
if they are packaged in the same directory as the executing program. */
@@ -624,6 +641,10 @@ NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, Visit
typedef void (*VisitUserObjectFunc) (UserObjectPtr uop, Pointer userdata);
NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback);
+/* explores sub UserObjects including "CombinedFeatureUserObjects" and finds by label */
+
+NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag);
+
/* creates "CombinedFeatureUserObjects" sfp->ext to combine two user objects */
NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop);
diff --git a/api/subutil.c b/api/subutil.c
index 07588e4d..890f700f 100644
--- a/api/subutil.c
+++ b/api/subutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.62 $
+* $Revision: 6.64 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -40,6 +40,12 @@
*
*
* $Log: subutil.c,v $
+* Revision 6.64 2006/05/05 19:49:40 kans
+* added StructuredComment user object creation functions
+*
+* Revision 6.63 2006/03/23 19:35:55 kans
+* expand keywords with semicolons in AddGenBankBlockToEntry, not BSEC - already done in ParseTitleIntoGenBank
+*
* Revision 6.62 2006/02/06 19:00:15 kans
* added CreateFeatureFetchPolicyUserObject
*
@@ -2543,6 +2549,51 @@ NLM_EXTERN Boolean SetGeneticCodeForEntry (
return TRUE;
}
+static void SubExpandSemicolonedKeyword (ValNodePtr vnp)
+
+{
+ Char ch;
+ ValNodePtr lastvnp;
+ ValNodePtr newvnp;
+ ValNodePtr nextvnp;
+ CharPtr ptr;
+ CharPtr str;
+ CharPtr tmp;
+
+ if (vnp == NULL) return;
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) return;
+ if (StringChr (str, ';') == NULL && StringChr (str, ',') == NULL) return;
+
+ lastvnp = vnp;
+ nextvnp = vnp->next;
+
+ tmp = StringSave (str);
+ str = tmp;
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ while (StringDoesHaveText (str)) {
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0' && ch != ',' && ch != ';') {
+ ptr++;
+ ch = *ptr;
+ }
+ if (ptr != NULL && *ptr != '\0') {
+ *ptr = '\0';
+ ptr++;
+ }
+ TrimSpacesAroundString (str);
+ newvnp = ValNodeCopyStr (NULL, 0, str);
+ if (newvnp != NULL) {
+ newvnp->next = nextvnp;
+ lastvnp->next = newvnp;
+ lastvnp = newvnp;
+ }
+ str = ptr;
+ }
+ MemFree (tmp);
+}
+
NLM_EXTERN Boolean AddGenBankBlockToEntry (
NCBISubPtr submission,
SeqEntryPtr entry ,
@@ -2552,7 +2603,7 @@ NLM_EXTERN Boolean AddGenBankBlockToEntry (
CharPtr keyword2 ,
CharPtr keyword3 )
{
- ValNodePtr vnp;
+ ValNodePtr vnp, tmp;
GBBlockPtr gbp;
if ((submission == NULL) || (entry == NULL))
@@ -2570,6 +2621,10 @@ NLM_EXTERN Boolean AddGenBankBlockToEntry (
ValNodeCopyStr(&gbp->keywords, 0, keyword2);
ValNodeCopyStr(&gbp->keywords, 0, keyword3);
+ for (tmp = gbp->keywords; tmp != NULL; tmp = tmp->next) {
+ SubExpandSemicolonedKeyword (tmp);
+ }
+
return TRUE;
}
@@ -5399,3 +5454,54 @@ NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject (
return uop;
}
+/* structured comment user object for flatfile presentation */
+
+NLM_EXTERN UserObjectPtr CreateStructuredCommentUserObject (void)
+
+{
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+
+ uop = UserObjectNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("StructuredComment");
+ uop->type = oip;
+
+ return uop;
+}
+
+NLM_EXTERN void AddItemStructuredCommentUserObject (
+ UserObjectPtr uop,
+ CharPtr field,
+ CharPtr str
+)
+
+{
+ UserFieldPtr curr;
+ ObjectIdPtr oip;
+ UserFieldPtr prev = NULL;
+
+ if (uop == NULL || StringHasNoText (field) || StringHasNoText (str)) return;
+ oip = uop->type;
+ if (oip == NULL || StringICmp (oip->str, "StructuredComment") != 0) return;
+
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ prev = curr;
+ }
+
+ curr = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave (field);
+ curr->label = oip;
+ curr->choice = 1; /* visible string */
+ curr->data.ptrvalue = (Pointer) StringSave (str);
+
+ /* link curator at end of list */
+
+ if (prev != NULL) {
+ prev->next = curr;
+ } else {
+ uop->data = curr;
+ }
+}
+
diff --git a/api/subutil.h b/api/subutil.h
index eaba6f79..b4f1ba9d 100644
--- a/api/subutil.h
+++ b/api/subutil.h
@@ -31,7 +31,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.53 $
+* $Revision: 6.54 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -42,6 +42,9 @@
*
*
* $Log: subutil.h,v $
+* Revision 6.54 2006/05/05 19:49:40 kans
+* added StructuredComment user object creation functions
+*
* Revision 6.53 2006/02/06 19:00:15 kans
* added CreateFeatureFetchPolicyUserObject
*
@@ -1708,6 +1711,16 @@ NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject (
CharPtr policy
);
+/* structured comment user object for flatfile presentation */
+
+NLM_EXTERN UserObjectPtr CreateStructuredCommentUserObject (void);
+
+NLM_EXTERN void AddItemStructuredCommentUserObject (
+ UserObjectPtr uop,
+ CharPtr field,
+ CharPtr str
+);
+
#ifdef __cplusplus
}
diff --git a/api/tofasta.c b/api/tofasta.c
index dcbbb7d3..19dd1daf 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.150 $
+* $Revision: 6.152 $
*
* File Description: various sequence objects to fasta output
*
@@ -39,6 +39,12 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: tofasta.c,v $
+* Revision 6.152 2006/03/29 16:04:47 kans
+* in AddNcTitles, do not clear mip->completeness - cannot determine why this was done in the past
+*
+* Revision 6.151 2006/03/08 21:29:47 kans
+* FindNMDefLine calls ReplaceFlyDashPwithDashR if Drosophila melanogaster curated RefSeq
+*
* Revision 6.150 2006/01/10 22:19:29 kans
* CreateDefLine calls DoTpaPrefix to handle TPA_exp and TPA_inf
*
@@ -3336,6 +3342,65 @@ static void FindNMFeats (SeqFeatPtr sfp, Pointer userdata)
}
}
+static Boolean IsFlyCG (CharPtr str)
+
+{
+ Char ch;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ ch = *str;
+ if (ch != 'C') return FALSE;
+ str++;
+ ch = *str;
+ if (ch != 'G') return FALSE;
+ str++;
+ ch = *str;
+ while (IS_DIGIT (ch)) {
+ str++;
+ ch = *str;
+ }
+ if (ch != '-') return FALSE;
+ str++;
+ ch = *str;
+ if (ch != 'P') return FALSE;
+ str++;
+ ch = *str;
+ if (IS_ALPHA (ch)) {
+ str++;
+ ch = *str;
+ if (ch == '\0' || ch == ' ' || ch == ',' || ch == ';') return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void ReplaceFlyDashPwithDashR (CharPtr str)
+
+{
+ Char ch;
+ CharPtr ptr;
+
+ while (StringDoesHaveText (str)) {
+ ch = *str;
+ while (IS_WHITESP (ch)) {
+ str++;
+ ch = *str;
+ }
+ if (IsFlyCG (str)) {
+ ptr = StringStr (str, "-P");
+ if (ptr != NULL) {
+ ptr [1] = 'R';
+ return;
+ }
+ }
+ while (ch != '\0' && (! IS_WHITESP (ch))) {
+ str++;
+ ch = *str;
+ }
+ }
+}
+
static CharPtr FindNMDefLine (BioseqPtr bsp)
{
@@ -3377,6 +3442,10 @@ static CharPtr FindNMDefLine (BioseqPtr bsp)
}
}
if (is_refseq) {
+ /* special case Drosophila RefSeq NM titles */
+ if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) {
+ ReplaceFlyDashPwithDashR (buf);
+ }
ptr = StringStr (buf, "isoform ");
if (ptr != NULL) {
*ptr = '\0';
@@ -4928,8 +4997,10 @@ static Boolean AddNcTitles (GatherObjectPtr gop)
BioseqPtr bsp;
Char buf [512];
Boolean is_nc;
+ /*
MolInfoPtr mip;
SeqDescrPtr sdp;
+ */
SeqIdPtr sip;
CharPtr str;
TextSeqIdPtr tsip;
@@ -4962,6 +5033,7 @@ static Boolean AddNcTitles (GatherObjectPtr gop)
}
}
+ /*
for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_molinfo) {
mip = (MolInfoPtr) sdp->data.ptrvalue;
@@ -4972,6 +5044,7 @@ static Boolean AddNcTitles (GatherObjectPtr gop)
}
}
}
+ */
return TRUE;
}
diff --git a/api/valid.c b/api/valid.c
index 0a8ecae1..66191353 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.726 $
+* $Revision: 6.755 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,93 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.c,v $
+* Revision 6.755 2006/05/04 20:31:46 kans
+* ERR_SEQ_FEAT_ErroneousException test for unclassified transcription discrepancy that should be mismatches in transcription
+*
+* Revision 6.754 2006/05/04 20:13:15 kans
+* added ERR_SEQ_FEAT_ErroneousException - used for unclassified translation discrepancy that should be mismatches in translation
+*
+* Revision 6.753 2006/04/24 16:28:10 kans
+* InvalidInferenceValue dropped to warning
+*
+* Revision 6.752 2006/04/21 17:59:18 kans
+* added ignoreExceptions flag to vsp - for MrnaTransCheck and CdTransCheck
+*
+* Revision 6.751 2006/04/17 18:05:09 kans
+* added ERR_SEQ_DESCR_BadPunctuation
+*
+* Revision 6.750 2006/04/14 20:35:40 kans
+* ERR_SEQ_FEAT_UnindexedFeature also reports bioseq identifiers
+*
+* Revision 6.749 2006/04/14 20:18:23 kans
+* CDSmRNAmismatch, CDSwithMultipleMRNAs and CDSwithNoMRNAOverlap tests in ValidateCDSmRNAmatch suppressed if genbank
+*
+* Revision 6.748 2006/04/14 17:06:00 kans
+* ERR_SEQ_FEAT_FeatureLocationIsGi0 prints IDs of Bioseq
+*
+* Revision 6.747 2006/04/14 16:25:35 kans
+* added ERR_SEQ_FEAT_PseudoCdsHasProtXref
+*
+* Revision 6.746 2006/04/12 14:24:02 kans
+* CDSmRNArange and mRNAgeneRange reduced to WARNING level
+*
+* Revision 6.745 2006/04/11 18:04:08 kans
+* IdXrefsNotReciprocal also tests MrnaProteinLink user object against cds->product
+*
+* Revision 6.744 2006/04/11 16:16:57 kans
+* raised obsolete experimental evidence qualifier set message to error
+*
+* Revision 6.743 2006/04/10 15:23:24 kans
+* CDSsLinkedToDifferentMRNAs used to suppress duplicate feature message on otherwise identical CDSs
+*
+* Revision 6.742 2006/04/10 14:56:57 kans
+* report obsolete sfp->exp_ev only if no gi present
+*
+* Revision 6.741 2006/04/07 19:49:06 kans
+* allow 5 letter + 7 digit accessions
+*
+* Revision 6.740 2006/04/06 18:43:50 kans
+* report ERR_SEQ_FEAT_InvalidInferenceValue for new records with only sfp->exp_ev set
+*
+* Revision 6.739 2006/04/05 15:04:11 kans
+* ERR_SEQ_FEAT_BadTrnaAA not reported for pseudo tRNA
+*
+* Revision 6.738 2006/03/30 19:24:56 kans
+* made CheckDeltaForReuse more efficient
+*
+* Revision 6.737 2006/03/27 18:27:01 kans
+* biop->origin == ORG_MUT no longer sets bvsp->is_artificial
+*
+* Revision 6.736 2006/03/27 14:52:34 kans
+* ribosomal rna abutting test now handles same rRNA subtype split across segmented parts
+*
+* Revision 6.735 2006/03/24 18:57:26 kans
+* if estimated_length unknown do not complain about different dash count
+*
+* Revision 6.734 2006/03/24 18:48:05 kans
+* added ERR_SEQ_FEAT_GapFeatureProblem
+*
+* Revision 6.733 2006/03/17 16:50:41 kans
+* added INTERNAL_SPACER_X to ITS-rRNA adjacency tests
+*
+* Revision 6.732 2006/03/10 13:38:48 kans
+* raised DeltaComponentIsGi0 and FeatureLocationIsGi0 from ERROR to REJECT
+*
+* Revision 6.731 2006/03/09 13:55:35 kans
+* replace qualifier can have * for termination in protein sequence
+*
+* Revision 6.730 2006/03/08 19:42:27 kans
+* do not call BioseqLockById on 0 or negative gi number
+*
+* Revision 6.729 2006/03/07 21:34:28 kans
+* checks for gi 0 now also check for negative value
+*
+* Revision 6.728 2006/03/07 21:15:59 kans
+* added ERR_SEQ_INST_DeltaComponentIsGi0 and ERR_SEQ_FEAT_FeatureLocationIsGi0
+*
+* Revision 6.727 2006/03/07 17:00:36 kans
+* ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct and ERR_SEQ_FEAT_PseudoCdsHasProduct are now ERROR
+*
* Revision 6.726 2006/02/27 17:49:34 kans
* added adjusted for low-quality genome exception for RefSeq models
*
@@ -2464,6 +2551,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean validateIDSet;
Boolean seqSubmitParent;
Boolean justShowAccession;
+ Boolean ignoreExceptions;
Int2 validationLimit;
ValidErrorFunc errfunc;
Pointer userdata;
@@ -2498,6 +2586,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
validateIDSet = vsp->validateIDSet;
seqSubmitParent = vsp->seqSubmitParent;
justShowAccession = vsp->justShowAccession;
+ ignoreExceptions = vsp->ignoreExceptions;
validationLimit = vsp->validationLimit;
errfunc = vsp->errfunc;
userdata = vsp->userdata;
@@ -2529,6 +2618,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->validateIDSet = validateIDSet;
vsp->seqSubmitParent = seqSubmitParent;
vsp->justShowAccession = justShowAccession;
+ vsp->ignoreExceptions = ignoreExceptions;
vsp->validationLimit = validationLimit;
vsp->errfunc = errfunc;
vsp->userdata = userdata;
@@ -2810,7 +2900,8 @@ static CharPtr err1Label [] = {
"LeadingX",
"InternalNsInSeqRaw",
"InternalNsAdjacentToGap",
- "CaseDifferenceInSeqID"
+ "CaseDifferenceInSeqID",
+ "DeltaComponentIsGi0"
};
static CharPtr err2Label [] = {
@@ -2850,7 +2941,8 @@ static CharPtr err2Label [] = {
"FastaBracketTitle",
"MissingText",
"BadCollectionDate",
- "BadPCRPrimerSequence"
+ "BadPCRPrimerSequence",
+ "BadPunctuation"
};
static CharPtr err3Label [] = {
@@ -3005,7 +3097,11 @@ static CharPtr err5Label [] = {
"FeatureRefersToAccession",
"SelfReferentialProduct",
"ITSdoesNotAbutRRNA",
- "FeatureSeqIDCaseDifference"
+ "FeatureSeqIDCaseDifference",
+ "FeatureLocationIsGi0",
+ "GapFeatureProblem",
+ "PseudoCdsHasProtXref",
+ "ErroneousException"
};
static CharPtr err6Label [] = {
@@ -3670,7 +3766,12 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
}
if (vsp->useSeqMgrIndexes) {
if (SeqMgrGetDesiredFeature (gcp->entityID, NULL, 0, 0, sfp, &context) == NULL) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_UnindexedFeature, "Feature is not indexed");
+ StringCpy (buf, "?");
+ bsp = vsp->bsp;
+ if (bsp != NULL) {
+ SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
+ }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_UnindexedFeature, "Feature is not indexed on Bioseq %s", buf);
} else {
bsp = BioseqFindFromSeqLoc (sfp->location);
if (bsp != NULL) {
@@ -6058,6 +6159,7 @@ static void CheckDeltaForReuse (ValidStructPtr vsp, GatherContextPtr gcp, Bioseq
{
Char buf [80];
ValNodePtr head = NULL;
+ ValNodePtr last = NULL;
ReuseDataPtr lastrdp = NULL;
ReuseDataPtr rdp;
SeqIntPtr sintp;
@@ -6082,7 +6184,11 @@ static void CheckDeltaForReuse (ValidStructPtr vsp, GatherContextPtr gcp, Bioseq
rdp->seqidstr = StringSave (buf);
rdp->from = sintp->from;
rdp->to = sintp->to;
- ValNodeAddPointer (&head, 0, (Pointer) rdp);
+ vnp = ValNodeAddPointer (&last, 0, (Pointer) rdp);
+ if (head == NULL) {
+ head = vnp;
+ }
+ last = vnp;
}
if (head == NULL) return;
@@ -6307,6 +6413,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
} else if (numletters == 4 && numdigits == 9 && ISA_na (bsp->mol) &&
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
+ } else if (numletters == 5 && numdigits == 7 && ISA_na (bsp->mol) &&
+ (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
} else {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
}
@@ -7047,6 +7155,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
switch (vnp->choice) {
case 1: /* SeqLocPtr */
slp = (SeqLocPtr) (vnp->data.ptrvalue);
+ sip3 = SeqLocId (slp);
+ if (sip3 != NULL && sip3->choice == SEQID_GI && sip3->data.intvalue <= 0) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_DeltaComponentIsGi0, "Delta component is gi|0");
+ }
len2 = SeqLocLen (slp);
if (len2 < 0)
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqDataLenWrong, "-1 length on seq-loc of delta seq_ext");
@@ -9384,7 +9496,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
ValNodePtr vnp, vnp2;
OrgRefPtr this_org = NULL, that_org = NULL;
int tmpval;
- Char buf1[20], buf2[20];
+ Char buf1[20], buf2[20], ch;
EMBLBlockPtr ebp;
GBBlockPtr gbp;
ValNodePtr keywords = NULL;
@@ -9400,6 +9512,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
Boolean tpa_inf;
BioseqPtr bsp;
DatePtr dp;
+ size_t len;
SeqMgrFeatContext fcontext;
static char *badmod = "Inconsistent GIBB-mod [%d] and [%d]";
@@ -9660,6 +9773,21 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MultipleTitles, "Undesired multiple title descriptors");
}
}
+ len = StringLen (str);
+ if (len > 4) {
+ ch = str [len - 1];
+ while (ch == ' ' && len > 4) {
+ len--;
+ ch = str [len - 1];
+ }
+ if (ch == '.' && len > 4) {
+ len--;
+ ch = str [len - 1];
+ }
+ if (ch == '.' || ch == ',' || ch == ';' || ch == ':') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPunctuation, "Title descriptor ends in bad punctuation");
+ }
+ }
break;
case Seq_descr_name:
str = (CharPtr) vnp->data.ptrvalue;
@@ -10413,33 +10541,68 @@ static Boolean IdXrefsAreReciprocal (
return FALSE;
}
-static Boolean IdXrefsNotReciprocal (
+static Int2 IdXrefsNotReciprocal (
SeqFeatPtr cds,
SeqFeatPtr mrna
)
{
- SeqFeatXrefPtr xref;
+ Int4 giu = 0, gip = 0;
SeqFeatPtr matchsfp;
+ ObjectIdPtr oip;
+ SeqIdPtr sip;
+ CharPtr tmp;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+ SeqFeatXrefPtr xref;
- if (cds == NULL || mrna == NULL) return FALSE;
- if (cds->id.choice != 3 || mrna->id.choice != 3) return FALSE;
+ if (cds == NULL || mrna == NULL) return 0;
+ if (cds->id.choice != 3 || mrna->id.choice != 3) return 0;
for (xref = cds->xref; xref != NULL; xref = xref->next) {
if (xref->id.choice != 0) {
matchsfp = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL);
- if (matchsfp != mrna) return TRUE;
+ if (matchsfp != mrna) return 1;
}
}
for (xref = mrna->xref; xref != NULL; xref = xref->next) {
if (xref->id.choice != 0) {
matchsfp = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL);
- if (matchsfp != cds) return TRUE;
+ if (matchsfp != cds) return 1;
}
}
- return FALSE;
+ if (cds->product == NULL) return 0;
+ if (mrna->ext == NULL) return 0;
+ uop = FindUopByTag (mrna->ext, "MrnaProteinLink");
+ if (uop == NULL) return 0;
+ sip = SeqLocId (cds->product);
+ if (sip == NULL) return 0;
+ if (sip->choice == SEQID_GI) {
+ gip = (Int4) sip->data.intvalue;
+ } else {
+ gip = GetGIForSeqId (sip);
+ }
+ if (gip == 0) return 0;
+ ufp = uop->data;
+ if (ufp == NULL || ufp->choice != 1) return 0;
+ oip = ufp->label;
+ if (oip == NULL || StringICmp (oip->str, "protein seqID") != 0) return 0;
+ tmp = (CharPtr) ufp->data.ptrvalue;
+ if (StringHasNoText (tmp)) return 0;
+ sip = MakeSeqID (tmp);
+ if (sip == NULL) return 0;
+ if (sip->choice == SEQID_GI) {
+ giu = (Int4) sip->data.intvalue;
+ } else {
+ giu = GetGIForSeqId (sip);
+ }
+ SeqIdFree (sip);
+ if (giu == 0) return 0;
+ if (gip != giu) return 2;
+
+ return 0;
}
static Boolean LIBCALLBACK FindSingleMrnaProc (
@@ -10538,10 +10701,12 @@ static void ValidateCDSmRNAmatch (
Boolean goOn, pseudo;
GeneRefPtr grp;
Int2 i, j, k, numfeats, tmpnumcds, tmpnummrna, count;
+ Boolean is_genbank = FALSE;
LpData ld;
Int4 num_repeat_regions;
Uint2 olditemtype = 0;
Uint2 olditemid = 0;
+ Int2 recip;
VoidPtr repeat_region_array;
SeqFeatPtr rpt_region;
ErrSev sev = /* SEV_INFO */ SEV_WARNING;
@@ -10564,14 +10729,16 @@ static void ValidateCDSmRNAmatch (
}
*/
- repeat_region_array = SeqMgrBuildFeatureIndex (bsp, &num_repeat_regions, 0, FEATDEF_repeat_region);
-
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_OTHER) {
sev = SEV_WARNING;
+ } else if (sip->choice == SEQID_GENBANK) {
+ is_genbank = TRUE;
}
}
+ repeat_region_array = SeqMgrBuildFeatureIndex (bsp, &num_repeat_regions, 0, FEATDEF_repeat_region);
+
if (numgene > 0 && numcds > 0 && nummrna > 0) {
numfeats = numcds + nummrna;
head = (GmcDataPtr) MemNew (sizeof (GmcData) * (size_t) (numfeats + 1));
@@ -10613,7 +10780,7 @@ static void ValidateCDSmRNAmatch (
tmpnummrna++;
}
}
- if (tmpnumcds > 0 && tmpnummrna > 1 && tmpnumcds != tmpnummrna) {
+ if (tmpnumcds > 0 && tmpnummrna > 1 && tmpnumcds != tmpnummrna && (! is_genbank)) {
if (gcp != NULL) {
gcp->itemID = gene->idx.itemID;
@@ -10677,7 +10844,8 @@ static void ValidateCDSmRNAmatch (
if (vdp != NULL) {
vdp->accounted_for = TRUE;
goOn = TRUE;
- if (IdXrefsNotReciprocal (sfp, ld.mrna)) {
+ recip = IdXrefsNotReciprocal (sfp, ld.mrna);
+ if (recip == 1) {
if (gcp != NULL) {
gcp->itemID = sfp->idx.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -10685,6 +10853,14 @@ static void ValidateCDSmRNAmatch (
vsp->descr = NULL;
vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "CDS/mRNA unambiguous pair have erroneous cross-references");
+ } else if (recip == 2) {
+ if (gcp != NULL) {
+ gcp->itemID = ld.mrna->idx.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = ld.mrna;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "MrnaProteinLink inconsistent with feature ID cross-references");
}
}
} else {
@@ -10698,7 +10874,7 @@ static void ValidateCDSmRNAmatch (
}
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
- while (sfp != NULL) {
+ while (sfp != NULL && (! is_genbank)) {
vdp = (VvmDataPtr) sfp->idx.scratch;
if (vdp != NULL) {
count = vdp->num_mrnas;
@@ -10857,6 +11033,38 @@ static Int2 WhichRNA (SeqFeatPtr sfp)
return 0;
}
+static Boolean CDSsLinkedToDifferentMRNAs (SeqFeatPtr sfp, SeqFeatPtr last)
+
+{
+ SeqFeatPtr mrna1 = NULL, mrna2 = NULL;
+ SeqFeatXrefPtr xref;
+
+ if (sfp == NULL || last == NULL) return FALSE;
+ if (sfp->idx.subtype != FEATDEF_CDS || last->idx.subtype != FEATDEF_CDS) return FALSE;
+
+ for (xref = sfp->xref; xref != NULL && mrna1 == NULL; xref = xref->next) {
+ if (xref->id.choice != 0) {
+ mrna1 = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
+ if (mrna1 != NULL && mrna1->idx.subtype != FEATDEF_mRNA) {
+ mrna1 = NULL;
+ }
+ }
+ }
+
+ for (xref = last->xref; xref != NULL && mrna2 == NULL; xref = xref->next) {
+ if (xref->id.choice != 0) {
+ mrna2 = SeqMgrGetFeatureByFeatID (last->idx.entityID, NULL, NULL, xref, NULL);
+ if (mrna2 != NULL && mrna2->idx.subtype != FEATDEF_mRNA) {
+ mrna2 = NULL;
+ }
+ }
+ }
+
+ if (mrna1 != NULL && mrna2 != NULL && mrna1 != mrna2) return TRUE;
+
+ return FALSE;
+}
+
static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp)
{
@@ -10924,6 +11132,15 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
CharPtr except_text = NULL;
ValNodePtr vnp, cds_prod_head = NULL, mrna_prod_head = NULL,
lastcdsprod = NULL, lastmrnaprod = NULL;
+ StreamCache sc;
+ Int2 res;
+ Int4 dashes;
+ Int4 Ns;
+ Int4 realBases;
+ Int4 estimated_length;
+ Int4 loclen;
+ GBQualPtr gbq;
+ long int val;
gcp = bvsp->gcp;
vsp = bvsp->vsp;
@@ -11222,6 +11439,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
/* do not report if both have dbxrefs and they are different */
} else if (featdeftype == FEATDEF_variation && ReplaceQualsDiffer (sfp->qual, last->qual)) {
/* do not report if both have replace quals and they are different */
+ } else if (CDSsLinkedToDifferentMRNAs (sfp, last)) {
+ /* do not report if CDSs are linked to two different mRNAs */
} else if (fcontext.sap == sap) {
if (samelabel) {
ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature");
@@ -11507,6 +11726,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
left = fcontext.left;
right = fcontext.right;
strand = fcontext.strand;
+ partialL = fcontext.partialL;
+ partialR = fcontext.partialR;
sfp = SeqMgrGetNextFeature (bsp, last, SEQFEAT_RNA, 0, &fcontext);
while (sfp != NULL) {
thisrnatype = WhichRNA (sfp);
@@ -11518,10 +11739,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
(lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
/* okay in mitochondria */
- } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_2 || thisrnatype == INTERNAL_SPACER_X)) ||
(lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
- (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11534,10 +11756,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
(lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
/* okay in mitochondria */
- } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_1 || thisrnatype == INTERNAL_SPACER_X)) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
(lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
- (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11559,10 +11782,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
vsp->descr = NULL;
vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "tRNA overlaps adjacent rRNA component");
- } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_2 || thisrnatype == INTERNAL_SPACER_X)) ||
(lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
- (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11575,10 +11799,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
(lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
/* okay in mitochondria */
- } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype == INTERNAL_SPACER_1 || thisrnatype == INTERNAL_SPACER_X)) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
(lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
- (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11591,13 +11816,16 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
} else {
/* abuts */
if (strand == Seq_strand_minus) {
- if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ if (lastrnatype == thisrnatype && partialL && fcontext.partialR && bsp->repr == Seq_repr_seg) {
+ /* okay in segmented set */
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
(lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
/* okay in mitochondria */
- } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) ||
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && (thisrnatype != INTERNAL_SPACER_2 && thisrnatype != INTERNAL_SPACER_X)) ||
(lastrnatype == INTERNAL_SPACER_2 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) ||
- (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11607,13 +11835,16 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "Problem with order of abutting rRNA components");
}
} else {
- if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ if (lastrnatype == thisrnatype && partialR && fcontext.partialL && bsp->repr == Seq_repr_seg) {
+ /* okay in segmented set */
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
(lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
/* okay in mitochondria */
- } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) ||
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && (thisrnatype != INTERNAL_SPACER_1 && thisrnatype != INTERNAL_SPACER_X)) ||
(lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) ||
(lastrnatype == INTERNAL_SPACER_1 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) ||
- (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT)) {
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_X && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -11630,6 +11861,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
left = fcontext.left;
right = fcontext.right;
strand = fcontext.strand;
+ partialL = fcontext.partialL;
+ partialR = fcontext.partialR;
lastrnatype = thisrnatype;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &fcontext);
}
@@ -11677,6 +11910,65 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
}
+ if (ISA_na (bsp->mol)) {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_gap, &fcontext);
+ while (sfp != NULL) {
+ estimated_length = 0;
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "estimated_length") != 0) continue;
+ if (StringHasNoText (gbq->val)) continue;
+ if (StringICmp (gbq->val, "unknown") == 0) continue;
+ if (sscanf (gbq->val, "%ld", &val) == 1) {
+ estimated_length = val;
+ }
+ }
+ if (StreamCacheSetup (NULL, sfp->location, EXPAND_GAPS_TO_DASHES, &sc)) {
+ dashes = 0;
+ Ns = 0;
+ realBases = 0;
+ while ((res = StreamCacheGetResidue (&sc)) != '\0') {
+ if (IS_LOWER (res)) {
+ res = TO_UPPER (res);
+ }
+ if (res == '-') {
+ dashes++;
+ } else if (res == 'N') {
+ Ns++;
+ } else {
+ realBases++;
+ }
+ }
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ loclen = SeqLocLen (sfp->location);
+ if (estimated_length > 0 && estimated_length != loclen) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature estimated_length %ld does not match %ld feature length",
+ (long) estimated_length, (long) loclen);
+ } else if (realBases > 0 && Ns > 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld real bases and %ld Ns", (long) realBases, (long) Ns);
+ } else if (realBases > 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld real bases", (long) realBases);
+ } else if (Ns > 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature over %ld Ns", (long) Ns);
+ } else if (estimated_length > 0 && dashes != estimated_length) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "Gap feature estimated_length %ld does not match %ld gap characters",
+ (long) estimated_length, (long) dashes);
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_gap, &fcontext);
+ }
+ }
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = NULL;
+
lastbiop = NULL;
lastsfp = NULL;
numBadFullSource = 0;
@@ -11972,7 +12264,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
bvs.is_syn_constr = TRUE;
}
}
- if (biop->origin == ORG_ARTIFICIAL || biop->origin == ORG_MUT || biop->origin == ORG_SYNTHETIC) {
+ if (biop->origin == ORG_ARTIFICIAL || biop->origin == ORG_SYNTHETIC) {
bvs.is_artificial = TRUE;
}
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
@@ -12538,13 +12830,13 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
} else if (ISA_aa (bsp->mol)) {
just_prt_letters = TRUE;
for (ptr = gbqual->val, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
- if (StringChr ("acdefghiklmnpqrstuvwy", ch) == NULL) {
+ if (StringChr ("acdefghiklmnpqrstuvwy*", ch) == NULL) {
just_prt_letters = FALSE;
}
}
if (!just_prt_letters) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue,
- "%s is not a legal value for qualifier %s - should only be composed of acdefghiklmnpqrstuvwy amino acids",
+ "%s is not a legal value for qualifier %s - should only be composed of acdefghiklmnpqrstuvwy* amino acids",
gbqual->val, gbqual->qual);
}
}
@@ -12929,7 +13221,10 @@ static Boolean PartialAtSpliceSiteOrGap (SeqLocPtr head, Uint2 slpTag, BoolPtr i
return FALSE;
acceptor = SeqLocStart (slp);
donor = SeqLocStop (slp);
- bsp = BioseqLockById (sip);
+ bsp = NULL;
+ if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) {
+ bsp = BioseqLockById (sip);
+ }
if (bsp == NULL)
return FALSE;
len = bsp->length;
@@ -13112,6 +13407,8 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
}
}
+ if (sfp->pseudo) return;
+
if (aa > 0 && aa != 255) {
/* - no gaps now that O and J are added
if (aa <= 74) {
@@ -13133,7 +13430,7 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid");
}
} else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid");
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Missing tRNA amino acid");
}
}
@@ -13439,7 +13736,7 @@ static void CheckForBadGeneOverlap (ValidStructPtr vsp, SeqFeatPtr sfp)
SeqMgrFeatContext fcontext;
SeqFeatPtr gene, operon;
GeneRefPtr grp;
- ErrSev sev = SEV_ERROR;
+ ErrSev sev = /* SEV_ERROR */ SEV_WARNING;
if (sfp == NULL)
return;
@@ -13469,7 +13766,7 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp)
{
SeqMgrFeatContext fcontext;
SeqFeatPtr mrna;
- ErrSev sev = SEV_ERROR;
+ ErrSev sev = /* SEV_ERROR */ SEV_WARNING;
if (sfp == NULL)
return;
@@ -14630,6 +14927,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
Boolean bypassGeneTest;
Boolean dicistronic = FALSE;
Int2 inferenceCode;
+ Boolean hasInference = FALSE;
+ Boolean hasExperiment = FALSE;
Boolean accn_seqid;
@@ -14950,11 +15249,14 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
if (pseudo && sfp->product != NULL) {
if (ovgenepseudo) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct, "A coding region overlapped by a pseudogene should not have a product");
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsViaGeneHasProduct, "A coding region overlapped by a pseudogene should not have a product");
} else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PseudoCdsHasProduct, "A pseudo coding region should not have a product");
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsHasProtXref, "A pseudo coding region should not have a product");
}
}
+ if (pseudo && SeqMgrGetProtXref (sfp) != NULL) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsHasProduct, "A pseudo coding region should not have a protein xref");
+ }
if (codonqual) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CodonQualifierUsed, "Use the proper genetic code, if available, or set transl_excepts on specific codons");
}
@@ -15344,6 +15646,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
if (StringICmp (gbq->qual, "inference") == 0) {
+ hasInference = TRUE;
inferenceCode = ValidateInferenceQualifier (gbq->val, TRUE);
if (inferenceCode != VALID_INFERENCE) {
if (inferenceCode < VALID_INFERENCE || inferenceCode > ACC_VERSION_NOT_PUBLIC) {
@@ -15351,8 +15654,14 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue, "Inference qualifier problem - %s", infMessage [(int) inferenceCode]);
}
+ } else if (StringICmp (gbq->val, "experiment") == 0) {
+ hasExperiment = TRUE;
}
}
+ if (sfp->exp_ev > 0 && (! hasInference) && (! hasExperiment) && (! vsp->feat_loc_has_gi)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue,
+ "Inference or experiment qualifier missing but obsolete experimental evidence qualifier set");
+ }
if (sfp->product != NULL) {
sip = SeqLocId (sfp->product);
@@ -15622,7 +15931,8 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
CharPtr farstr = "";
ErrSev fetchsev;
GatherContextPtr gcp;
- Boolean has_errors = FALSE, unclassified_except = FALSE, mismatch_except = FALSE;
+ Boolean has_errors = FALSE, unclassified_except = FALSE,
+ mismatch_except = FALSE, other_than_mismatch = FALSE;
Int2 i;
Boolean is_refseq = FALSE;
Int4 mismatch, total;
@@ -15643,7 +15953,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (sfp->product == NULL)
return;
- if (sfp->excpt && (! StringHasNoText (sfp->except_text))) {
+ if (sfp->excpt && (! vsp->ignoreExceptions) && (! StringHasNoText (sfp->except_text))) {
for (i = 0; bypass_mrna_trans_check [i] != NULL; i++) {
if (StringISearch (sfp->except_text, bypass_mrna_trans_check [i]) != NULL) {
report_errors = FALSE; /* biological exception */
@@ -15690,7 +16000,9 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
}
if (bsp == NULL && vsp->farFetchMRNAproducts) {
- bsp = BioseqLockById (sip);
+ if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) {
+ bsp = BioseqLockById (sip);
+ }
if (bsp != NULL) {
unlockProd = TRUE;
farstr = "(far) ";
@@ -15720,6 +16032,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
pdseq = GetSequenceByFeature (&sf);
if (pdseq == NULL) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors || unclassified_except) {
fetchsev = SEV_ERROR;
if (sip->choice != SEQID_GI) {
@@ -15748,18 +16061,21 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (counta < 19 * countnona) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than %sproduct length [%ld], and tail < 95%s polyA", (long) mlen, farstr, (long) plen, "%");
}
plen = mlen; /* even if it fails polyA test, allow base-by-base comparison on common length */
} else if (counta > 0 && countnona == 0) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%");
}
plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */
} else {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%");
}
@@ -15767,6 +16083,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
} else {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] greater than %sproduct length [%ld]", (long) mlen, farstr, (long) plen);
}
@@ -15807,6 +16124,8 @@ erret:
if (! report_errors) {
if (! has_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "mRNA has exception but passes transcription test");
+ } else if (unclassified_except && (! other_than_mismatch)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ErroneousException, "mRNA has unclassified exception but only difference is mismatches");
}
}
}
@@ -15952,7 +16271,8 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
Boolean transl_except = FALSE, prot_ok = TRUE, is_nc = FALSE,
has_errors = FALSE, report_errors = TRUE,
unclassified_except = FALSE, mismatch_except = FALSE,
- frameshift_except = FALSE, rearrange_except = FALSE;
+ frameshift_except = FALSE, rearrange_except = FALSE,
+ other_than_mismatch = FALSE;
CharPtr nuclocstr, farstr = "";
CodeBreakPtr cbp;
Int4 pos1, pos2, pos;
@@ -15973,7 +16293,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (sfp == NULL)
return;
- if (sfp->excpt && (! StringHasNoText (sfp->except_text))) {
+ if (sfp->excpt && (! vsp->ignoreExceptions) && (! StringHasNoText (sfp->except_text))) {
for (i = 0; bypass_cds_trans_check [i] != NULL; i++) {
if (StringISearch (sfp->except_text, bypass_cds_trans_check [i]) != NULL) {
report_errors = FALSE; /* biological exception */
@@ -16037,6 +16357,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
newprot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start); /* include stop codons, do not remove trailing X/B/Z */
if (newprot == NULL) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors || unclassified_except) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CdTransFail, "Unable to translate");
}
@@ -16058,6 +16379,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (sev > SEV_NONE) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_AltStartCodon, "Alternative start codon used");
}
@@ -16127,6 +16449,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
pos = 0;
if ((pos1 % 3) != pos) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExceptPhase, "transl_except qual out of frame.");
}
@@ -16144,6 +16467,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_ERROR;
}
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, "Suspicious CDS location - frame > 1 but not 5' partial");
}
@@ -16153,6 +16477,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_ERROR;
}
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, "Suspicious CDS location - frame > 1 and not at consensus splice site");
}
@@ -16199,6 +16524,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (stop_count > 0) {
if (got_dash) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
sev = SEV_ERROR;
if (unclassified_except) {
sev = SEV_WARNING;
@@ -16209,6 +16535,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
} else {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
sev = SEV_ERROR;
if (unclassified_except) {
sev = SEV_WARNING;
@@ -16248,6 +16575,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
goto erret;
} else if (got_dash) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
@@ -16259,7 +16587,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (protid != NULL) {
prot1seq = BioseqFind (protid);
if (prot1seq == NULL && vsp->farFetchCDSproducts) {
- prot1seq = BioseqLockById (protid);
+ if (protid != NULL && (protid->choice != SEQID_GI || protid->data.intvalue > 0)) {
+ prot1seq = BioseqLockById (protid);
+ }
if (prot1seq != NULL) {
unlockProd = TRUE;
farstr = "(Far) ";
@@ -16292,6 +16622,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (sev != SEV_NONE) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_NoProtein, "No protein Bioseq given");
}
@@ -16377,11 +16708,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
} else if (i == 0) {
if ((sfp->partial) && (!no_beg) && (!no_end)) { /* ok, it's partial */
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "Start of location should probably be partial");
}
} else if (residue1 == '-') {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
@@ -16427,6 +16760,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
*/
} else {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TransLen, "Given protein length [%ld] does not match %stranslation length [%ld]", prot1len, farstr, len);
}
@@ -16436,11 +16770,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if ((!no_beg) && (!no_end)) { /* just didn't label */
if (!got_stop) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial");
}
} else {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial");
}
@@ -16458,16 +16794,19 @@ erret:
if (show_stop) {
if ((!got_stop) && (!no_end)) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_NoStop, "Missing stop codon");
}
} else if ((got_stop) && (no_end)) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "Got stop codon, but 3'end is labeled partial");
}
} else if ((got_stop) && (!no_end) && (ragged)) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
sev = SEV_ERROR;
if (unclassified_except) {
sev = SEV_WARNING;
@@ -16481,6 +16820,7 @@ erret:
if (!prot_ok) {
if (transl_except) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual. Skipped");
}
@@ -16488,6 +16828,7 @@ erret:
} else {
if (transl_except) {
has_errors = TRUE;
+ other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual (but protein is okay). Skipped");
}
@@ -16508,6 +16849,8 @@ erret:
if ((! frameshift_except) && (! rearrange_except)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "CDS has exception but passes translation test");
}
+ } else if (unclassified_except && (! other_than_mismatch)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ErroneousException, "CDS has unclassified exception but only difference is mismatches");
}
}
}
@@ -16653,7 +16996,10 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if ((ctr == 1) || (!SeqIdMatch (sip, last_sip))) {
/* spp = SeqPortFree (spp); */
- bsp = BioseqLockById (sip);
+ bsp = NULL;
+ if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) {
+ bsp = BioseqLockById (sip);
+ }
if (bsp == NULL)
break;
len = bsp->length;
@@ -16931,12 +17277,37 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
SeqIdPtr id1 = NULL, id2;
BioseqPtr bsp;
SeqFeatPtr sfp = NULL;
+ Int2 zeroGi = 0;
+ Char buf [32];
+ SeqIdPtr sip;
if (slp == NULL)
return;
sfp = vsp->sfp;
+ tmp = NULL;
+ while ((tmp = SeqLocFindNext (slp, tmp)) != NULL) {
+ sip = SeqLocId (tmp);
+ if (sip != NULL && sip->choice == SEQID_GI && sip->data.intvalue <= 0) {
+ zeroGi++;
+ }
+ }
+ if (zeroGi > 0) {
+ StringCpy (buf, "?");
+ bsp = vsp->bsp;
+ if (bsp != NULL) {
+ SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
+ }
+ if (zeroGi > 1) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_FEAT_FeatureLocationIsGi0, "Feature has %d gi|0 locations on Bioseq %s",
+ (int) zeroGi, buf);
+ } else if (zeroGi > 0) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_FEAT_FeatureLocationIsGi0, "Feature has %d gi|0 location on Bioseq %s",
+ (int) zeroGi, buf);
+ }
+ }
+
bsp = BioseqFindFromSeqLoc (slp);
if (bsp != NULL && bsp->topology == 2) {
circular = TRUE;
diff --git a/api/valid.h b/api/valid.h
index 33af92a7..aa0eaa87 100644
--- a/api/valid.h
+++ b/api/valid.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.22 $
+* $Revision: 6.23 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.h,v $
+* Revision 6.23 2006/04/21 17:59:18 kans
+* added ignoreExceptions flag to vsp - for MrnaTransCheck and CdTransCheck
+*
* Revision 6.22 2006/02/16 19:34:47 kans
* use vsp->is_smupd_in_sep to suppress ERR_SEQ_FEAT_FeatureRefersToAccession
*
@@ -233,6 +236,7 @@ typedef struct validstruct {
Boolean validateIDSet; /* look for gain or loss of general IDs on sequence update */
Boolean seqSubmitParent; /* flag from tbl2asn to suppress no pub message */
Boolean justShowAccession; /* extremely terse output with accession and error type */
+ Boolean ignoreExceptions; /* report translation and transcription problems even if exception set */
Int2 validationLimit; /* limit validation to major classes in Valid1GatherProc */
/* this section used for finer error reporting callback */
ValidErrorFunc errfunc;
diff --git a/api/valid.msg b/api/valid.msg
index 97bc4de9..0616b3e3 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -204,6 +204,9 @@ $^ CaseDifferenceInSeqID, 57
Multiple Bioseqs have the same Seq-id except for capitalization. Sequence
identifiers must be unique in a case-insensitive manner within a record.
+$^ DeltaComponentIsGi0, 58
+Delta component refers to gi 0. This indicates an error in database processing of this record.
+
$$ SEQ_DESCR, 2
$^ BioSourceMissing, 1
@@ -347,6 +350,9 @@ The collection date is not in the required format.
$^ BadPCRPrimerSequence, 36
The PCR primer sequence has illegal characters or non-IUPAC nucleotides.
+$^ BadPunctuation, 37
+The title ends with incorrect punctuation marks.
+
$$ GENERIC, 3
$^ NonAsciiAsn, 1
@@ -911,6 +917,20 @@ $^ FeatureSeqIDCaseDifference, 117
Feature location and referenced Bioseq have the same Seq-id except for capitalization.
Sequence identifiers must be unique in a case-insensitive manner within a record.
+$^ FeatureLocationIsGi0, 118
+Feature location refers to gi 0. This indicates an error in database processing of this record.
+
+$^ GapFeatureProblem, 119
+Gap features must only cover gaps in the sequence, not actual bases.
+
+$^ PseudoCdsHasProtXref, 120
+A coding region flagged as pseudo has a protein cross reference. There should be no
+protein product bioseq or protein cross reference on a pseudo CDS.
+
+$^ ErroneousException, 121
+The feature is marked with a specific exception qualifier, but validation indicates
+that a different exception should be used.
+
$$ SEQ_ALIGN, 6
$^ SeqIdProblem, 1
diff --git a/api/validerr.h b/api/validerr.h
index cfad0e33..5e2ee13d 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -59,6 +59,7 @@
#define ERR_SEQ_INST_InternalNsInSeqRaw 1,55
#define ERR_SEQ_INST_InternalNsAdjacentToGap 1,56
#define ERR_SEQ_INST_CaseDifferenceInSeqID 1,57
+#define ERR_SEQ_INST_DeltaComponentIsGi0 1,58
#define ERR_SEQ_DESCR 2,0
#define ERR_SEQ_DESCR_BioSourceMissing 2,1
#define ERR_SEQ_DESCR_InvalidForType 2,2
@@ -96,6 +97,7 @@
#define ERR_SEQ_DESCR_MissingText 2,34
#define ERR_SEQ_DESCR_BadCollectionDate 2,35
#define ERR_SEQ_DESCR_BadPCRPrimerSequence 2,36
+#define ERR_SEQ_DESCR_BadPunctuation 2,37
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
@@ -242,6 +244,10 @@
#define ERR_SEQ_FEAT_SelfReferentialProduct 5,115
#define ERR_SEQ_FEAT_ITSdoesNotAbutRRNA 5,116
#define ERR_SEQ_FEAT_FeatureSeqIDCaseDifference 5,117
+#define ERR_SEQ_FEAT_FeatureLocationIsGi0 5,118
+#define ERR_SEQ_FEAT_GapFeatureProblem 5,119
+#define ERR_SEQ_FEAT_PseudoCdsHasProtXref 5,120
+#define ERR_SEQ_FEAT_ErroneousException 5,121
#define ERR_SEQ_ALIGN 6,0
#define ERR_SEQ_ALIGN_SeqIdProblem 6,1
#define ERR_SEQ_ALIGN_StrandRev 6,2