summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
Diffstat (limited to 'api')
-rw-r--r--api/alignval.c8
-rw-r--r--api/asn2gnb1.c135
-rw-r--r--api/asn2gnb2.c263
-rw-r--r--api/asn2gnb3.c141
-rw-r--r--api/asn2gnb4.c257
-rw-r--r--api/asn2gnb5.c60
-rw-r--r--api/asn2gnb6.c104
-rw-r--r--api/asn2gnbi.h11
-rw-r--r--api/asn2gnbk.h4
-rw-r--r--api/edutil.c68
-rw-r--r--api/edutil.h12
-rw-r--r--api/explore.h4
-rw-r--r--api/findrepl.c966
-rw-r--r--api/findrepl.h27
-rw-r--r--api/salsap.c646
-rw-r--r--api/salsap.h8
-rw-r--r--api/seqmgr.c234
-rw-r--r--api/seqmgr.h43
-rw-r--r--api/seqport.c300
-rw-r--r--api/sequtil.c379
-rw-r--r--api/sequtil.h9
-rw-r--r--api/sqnutil1.c262
-rw-r--r--api/sqnutil2.c194
-rw-r--r--api/sqnutil3.c114
-rw-r--r--api/sqnutils.h32
-rw-r--r--api/subutil.c142
-rw-r--r--api/subutil.h94
-rw-r--r--api/tofasta.c148
-rw-r--r--api/txalign.c16
-rw-r--r--api/utilpars.c13
-rw-r--r--api/utilpars.h5
-rw-r--r--api/valid.c1423
-rw-r--r--api/valid.h11
-rw-r--r--api/valid.msg48
-rw-r--r--api/validerr.h11
35 files changed, 5180 insertions, 1012 deletions
diff --git a/api/alignval.c b/api/alignval.c
index 141b366d..753eeb34 100644
--- a/api/alignval.c
+++ b/api/alignval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/3/99
*
-* $Revision: 6.45 $
+* $Revision: 6.46 $
*
* File Description: To validate sequence alignment.
*
@@ -2093,9 +2093,9 @@ static void ValidateSeqAlignInHist (SeqHistPtr hist, SaValPtr svp)
SeqAlignPtr salp;
if (hist == NULL) return;
- for (salp = hist->assembly; salp != NULL; salp = salp->next) {
- ValidateSeqAlign (salp, svp->entityID, svp->message, svp->msg_success, svp->find_remote_bsp, svp->delete_bsp, svp->delete_salp, &svp->dirty);
- }
+ salp = hist->assembly;
+ /* ValidateSeqAlign will validate the entire chain */
+ ValidateSeqAlign (salp, svp->entityID, svp->message, svp->msg_success, svp->find_remote_bsp, svp->delete_bsp, svp->delete_salp, &svp->dirty);
}
static void ValidateSeqAlignCallback (SeqEntryPtr sep, Pointer mydata,
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c
index 3ef4f95d..9e55f9b1 100644
--- a/api/asn2gnb1.c
+++ b/api/asn2gnb1.c
@@ -28,11 +28,11 @@
* Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
* Mati Shomrat
*
-* $Id: asn2gnb1.c,v 1.85 2005/12/01 20:09:32 kans Exp $
+* $Id: asn2gnb1.c,v 1.97 2006/02/23 16:38:54 kans Exp $
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.85 $
+* $Revision: 1.97 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -367,7 +367,7 @@ NLM_EXTERN CharPtr DateToFF (
}
if (day < 1) {
- sprintf (buf, "??-%s-%ld",
+ sprintf (buf, "\?\?-%s-%ld",
month_names [month-1], (long) year);
} else if (day < 10) {
sprintf (buf, "0%ld-%s-%ld",
@@ -1313,7 +1313,7 @@ NLM_EXTERN void FFLineWrap (
FFSavePosition(dest, &line_start, &line_pos);
- // for EMBL 'XX' lines
+ /* for EMBL 'XX' lines */
if (eb_line_prefix != NULL) {
cont = FALSE;
if (break_pos > 1) {
@@ -2998,7 +2998,7 @@ static Boolean IsSepRefseq (
}
typedef struct modeflags {
- Boolean flags [27];
+ Boolean flags [29];
} ModeFlags, PNTR ModeFlagsPtr;
static ModeFlags flagTable [] = {
@@ -3009,7 +3009,7 @@ static ModeFlags flagTable [] = {
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, TRUE},
+ TRUE, TRUE, TRUE, TRUE},
/* ENTREZ_MODE */
{FALSE, TRUE, TRUE, TRUE, TRUE,
@@ -3017,7 +3017,7 @@ static ModeFlags flagTable [] = {
TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, FALSE},
+ TRUE, TRUE, TRUE, FALSE},
/* SEQUIN_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
@@ -3025,7 +3025,7 @@ static ModeFlags flagTable [] = {
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- TRUE, FALSE},
+ FALSE, TRUE, FALSE, FALSE},
/* DUMP_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
@@ -3033,7 +3033,7 @@ static ModeFlags flagTable [] = {
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE}
+ FALSE, FALSE, FALSE, FALSE}
};
static void SetFlagsFromMode (
@@ -3083,7 +3083,9 @@ static void SetFlagsFromMode (
ajp->flags.refSeqQualsToNote = *(bp++);
ajp->flags.selenocysteineToNote = *(bp++);
+ ajp->flags.pyrrolysineToNote = *(bp++);
ajp->flags.extraProductsToNote = *(bp++);
+ ajp->flags.codonRecognizedToNote = *(bp++);
ajp->flags.forGbRelease = *(bp++);
/* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
@@ -3109,6 +3111,7 @@ static void SetFlagsFromMode (
/* selenocysteine always a separate qualifier for RefSeq */
ajp->flags.selenocysteineToNote = FALSE;
+ ajp->flags.pyrrolysineToNote = FALSE;
} else {
@@ -3128,6 +3131,7 @@ static void SetFlagsFromMode (
/* selenocysteine always a separate qualifier for RefSeq */
ajp->flags.selenocysteineToNote = FALSE;
+ ajp->flags.pyrrolysineToNote = FALSE;
}
}
@@ -3363,19 +3367,57 @@ static void MakeGapFeats (
}
}
+static void LookForFeatFetchPolicy (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ BoolPtr forceOnlyNearFeatsP;
+ ObjectIdPtr oip;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user) return;
+ forceOnlyNearFeatsP = (BoolPtr) userdata;
+ if (forceOnlyNearFeatsP == NULL) return;
+
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL) return;
+ oip = uop->type;
+ if (oip == NULL) return;
+ if (StringCmp (oip->str, "FeatureFetchPolicy") != 0) return;
+
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
+ if (StringCmp (oip->str, "Policy") == 0) {
+ if (StringICmp ((CharPtr) ufp->data.ptrvalue, "OnlyNearFeatures") == 0) {
+ *forceOnlyNearFeatsP = TRUE;
+ }
+ }
+ }
+}
+
+static CharPtr bad_html_strings [] = {
+ "<script", "<object", "<applet", "<embed", "<form", "javascript:", NULL
+};
+
static CharPtr defHead = "\
-Content-type: text/html\n\n\
-<HTML>\n\
-<HEAD><TITLE>GenBank entry</TITLE></HEAD>\n\
-<BODY>\n\
-<hr>\n\
+<html>\n\
+<head>\n\
+<meta http-equiv=\"Content-Type\" content=\"text/html; charset=us-ascii\" />\
+<title>GenBank entry</title>\n\
+</head>\n\
+<body>\n\
+<hr />\n\
<pre>";
static CharPtr defTail = "\
</pre>\n\
-<hr>\n\
-</BODY>\n\
-</HTML>\n";
+<hr />\n\
+</body>\n\
+</html>\n";
#define FAR_TRANS_MASK (SHOW_FAR_TRANSLATION | TRANSLATE_IF_NO_PRODUCT | ALWAYS_TRANSLATE_CDS)
#define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS)
@@ -3411,6 +3453,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
CharPtr ffhead = NULL;
CharPtr fftail = NULL;
Asn2gbWriteFunc ffwrite = NULL;
+ Boolean forceOnlyNearFeats = FALSE;
ValNodePtr gapvnp = NULL;
GBSeqPtr gbseq = NULL;
Int4 i;
@@ -3450,6 +3493,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
BaseBlockPtr PNTR paragraphByIDs;
BioseqPtr parent = NULL;
Int4 prevGi = 0;
+ Int2 q;
Pointer remotedata = NULL;
Asn2gbFreeFunc remotefree = NULL;
Asn2gbLockFunc remotelock = NULL;
@@ -3577,6 +3621,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
if (ajp == NULL) return NULL;
+ VisitDescriptorsInSep (sep, (Pointer) &forceOnlyNearFeats, LookForFeatFetchPolicy);
+
gapvnp = NULL;
if (format != FTABLE_FMT) {
if (isG || isTPG || isOnlyLocal || isRefSeq || (isGeneral && (! isGED))) {
@@ -3782,6 +3828,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
aw.onlyNearFeats = TRUE;
+ } else if (forceOnlyNearFeats) {
+ aw.onlyNearFeats = TRUE;
} else {
aw.nearFeatsSuppress = TRUE;
}
@@ -3794,11 +3842,17 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
aw.onlyNearFeats = TRUE;
+ } else if (forceOnlyNearFeats) {
+ aw.onlyNearFeats = TRUE;
} else {
aw.nearFeatsSuppress = TRUE;
}
ajp->showFarTransl = TRUE;
+ } else if (forceOnlyNearFeats) {
+
+ aw.onlyNearFeats = TRUE;
+
} else {
aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
@@ -3861,6 +3915,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
aw.showBaseCount = TRUE;
}
+ aw.forcePrimaryBlock = (Boolean) ((flags & FORCE_PRIMARY_BLOCK) != 0);
+
+ aw.localFeatCount = VisitFeaturesInSep (sep, NULL, NULL);
aw.hup = FALSE;
aw.ssp = NULL;
@@ -3879,6 +3936,12 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
}
}
+ ajp->bad_html_fsa = TextFsaNew ();
+
+ for (q = 0; bad_html_strings [q] != NULL; q++) {
+ TextFsaAdd (ajp->bad_html_fsa, bad_html_strings [q]);
+ }
+
oldscope = SeqEntrySetScope (sep);
if (stream) {
@@ -3896,6 +3959,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
if (ffwrite != NULL) {
ffwrite (ffhead, userdata, HEAD_BLOCK);
}
+ if (is_html) {
+ DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">");
+ }
}
/* if Web Entrez, set awp->sectionMax to decide when Next hyperlink is needed */
@@ -3927,6 +3993,10 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
}
if (stream) {
+ if (is_html) {
+ DoQuickLinkFormat (aw.afp, "</div>");
+ }
+
/* send optional tail string */
if (fftail == NULL && is_html) {
@@ -4686,16 +4756,16 @@ static void PrintBioSourceFtableEntry (
sprintf (str, "\t\t\tidentified_by\t");
break;
case SUBSRC_fwd_primer_seq :
- sprintf (str, "\t\t\tleft_primer\t");
+ sprintf (str, "\t\t\tfwd_pcr_primer_seq\t");
break;
case SUBSRC_rev_primer_seq :
- sprintf (str, "\t\t\tright_primer\t");
+ sprintf (str, "\t\t\trev_pcr_primer_seq\t");
break;
case SUBSRC_fwd_primer_name :
- sprintf (str, "\t\t\tleft_primer\t");
+ sprintf (str, "\t\t\tfwd_pcr_primer_name\t");
break;
case SUBSRC_rev_primer_name :
- sprintf (str, "\t\t\tright_primer\t");
+ sprintf (str, "\t\t\trev_pcr_primer_name\t");
break;
case SUBSRC_other :
sprintf (str, "\t\t\tnote\t");
@@ -5266,16 +5336,21 @@ NLM_EXTERN void DoImmediateFormat (
)
{
- BlockType blocktype;
- BioseqPtr bsp;
- FormatProc fmt;
- size_t max;
- SeqEntryPtr oldscope;
- QualValPtr qv = NULL;
- SeqEntryPtr sep;
- CharPtr str = NULL;
+ IntAsn2gbJobPtr ajp;
+ BlockType blocktype;
+ BioseqPtr bsp;
+ FormatProc fmt;
+ Boolean is_www;
+ size_t max;
+ SeqEntryPtr oldscope;
+ QualValPtr qv = NULL;
+ SeqEntryPtr sep;
+ CharPtr str = NULL;
if (afp == NULL || bbp == NULL) return;
+ ajp = afp->ajp;
+ if (ajp == NULL) return;
+ is_www = GetWWW (ajp);
blocktype = bbp->blocktype;
if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
@@ -5557,6 +5632,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
}
}
+ TextFsaFree (iajp->bad_html_fsa);
+
ValNodeFree (iajp->gihead);
free_buff ();
diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c
index d0a0f1bb..c8353c89 100644
--- a/api/asn2gnb2.c
+++ b/api/asn2gnb2.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.61 $
+* $Revision: 1.69 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -359,6 +359,45 @@ static Boolean LocusHasBadChars (
return FALSE;
}
+static CharPtr gbseq_strd [4] = {
+ NULL, "single", "double", "mixed"
+};
+
+static CharPtr gbseq_mol [10] = {
+ "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
+};
+
+static CharPtr gbseq_top [3] = {
+ NULL, "linear", "circular"
+};
+
+static void LookupAccnForNavLink (
+ Int4 gi,
+ CharPtr seqid,
+ size_t len,
+ CharPtr dfault
+)
+
+{
+ SeqIdPtr sip;
+
+ if (seqid == NULL) return;
+ *seqid = '\0';
+ if (gi > 0) {
+ if (GetAccnVerFromServer (gi, seqid)) return;
+ sip = GetSeqIdForGI (gi);
+ if (sip != NULL) {
+ if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, len) != NULL) {
+ SeqIdFree (sip);
+ return;
+ }
+ SeqIdFree (sip);
+ }
+ }
+ if (dfault == NULL) return;
+ StringCpy (seqid, dfault);
+}
+
NLM_EXTERN void AddLocusBlock (
Asn2gbWorkPtr awp,
Boolean willshowwgs,
@@ -376,7 +415,7 @@ NLM_EXTERN void AddLocusBlock (
BioSourcePtr biop;
Int2 bmol = 0;
BioseqPtr bsp;
- Char buf [512];
+ Char buf [1024];
SeqFeatPtr cds;
Int4 currGi;
Char date [40];
@@ -396,6 +435,7 @@ NLM_EXTERN void AddLocusBlock (
ValNodePtr gilistpos;
Char gi_buf [16];
SeqIdPtr gpp = NULL;
+ Boolean has_next_pref_ul = FALSE;
Boolean hasComment;
Char id [41];
Int2 imol = 0;
@@ -424,6 +464,8 @@ NLM_EXTERN void AddLocusBlock (
Int4 prevGi;
SeqDescrPtr sdp;
Char sect [128];
+ Char seg [32];
+ Char seqid [128];
SeqFeatPtr sfp;
SeqHistPtr hist;
SeqIdPtr sip;
@@ -435,6 +477,7 @@ NLM_EXTERN void AddLocusBlock (
UserObjectPtr uop;
ValNodePtr vnp;
Boolean wgsmaster = FALSE;
+ Int2 moltype, strandedness, topol;
if (awp == NULL) return;
ajp = awp->ajp;
@@ -1005,9 +1048,26 @@ NLM_EXTERN void AddLocusBlock (
gbseq->locus = StringSave (locus);
gbseq->length = length;
gbseq->division = StringSave (div);
+ /*
gbseq->strandedness = bsp->strand;
gbseq->moltype = imolToMoltype [imol];
gbseq->topology = topology;
+ */
+ strandedness = (Int2) bsp->strand;
+ if (strandedness < 0 || strandedness > 3) {
+ strandedness = 0;
+ }
+ gbseq->strandedness = StringSave (gbseq_strd [strandedness]);
+ moltype = (Int2) imolToMoltype [imol];
+ if (moltype < 0 || moltype > 9) {
+ moltype = 0;
+ }
+ gbseq->moltype = StringSave (gbseq_mol [moltype]);
+ topol = (Int2) topology;
+ if (topol < 0 || topol > 2) {
+ topol = 0;
+ }
+ gbseq->topology = StringSave (gbseq_top [topol]);
for (sip = bsp->id; sip != NULL; sip = sip->next) {
SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id));
@@ -1095,7 +1155,6 @@ NLM_EXTERN void AddLocusBlock (
DoQuickLinkFormat (awp->afp, buf);
buf [0] = '\0';
- prefix = NULL;
hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext) != NULL);
if (! hasComment) {
hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext) != NULL);
@@ -1134,43 +1193,33 @@ NLM_EXTERN void AddLocusBlock (
}
buf [0] = '\0';
- StringCpy (buf, "<div class=\"localnav\"><ul class=\"locallinks\">");
+ StringCpy (buf, "<div class=\"localnav\"><ul class=\"locals\">");
if (hasComment) {
- sprintf (sect, "<li><a href=\"#comment_%ld\">Comment</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#comment_%ld\" title=\"Jump to the comment section of this record\">Comment</a></li>", (long) awp->currGi);
StringCat (buf, sect);
}
- sprintf (sect, "<li><a href=\"#feature_%ld\">Features</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#feature_%ld\" title=\"Jump to the feature table of this record\">Features</a></li>", (long) awp->currGi);
StringCat (buf, sect);
if (willshowwgs) {
- sprintf (sect, "<li><a href=\"#wgs_%ld\">WGS</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#wgs_%ld\" title=\"Jump to WGS section of this record\">WGS</a></li>", (long) awp->currGi);
StringCat (buf, sect);
}
if (willshowgenome) {
- sprintf (sect, "<li><a href=\"#genome_%ld\">Genome</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#genome_%ld\" title=\"Jump to the genome section of this record\">Genome</a></li>", (long) awp->currGi);
StringCat (buf, sect);
}
if (willshowcontig) {
- sprintf (sect, "<li><a href=\"#contig_%ld\">Contig</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#contig_%ld\" title=\"Jump to the contig section of this record\">Contig</a></li>", (long) awp->currGi);
StringCat (buf, sect);
}
if (willshowsequence) {
- sprintf (sect, "<li><a href=\"#sequence_%ld\">Sequence</a></li>", (long) awp->currGi);
- StringCat (buf, prefix);
- prefix = " ";
+ sprintf (sect, "<li><a href=\"#sequence_%ld\" title=\"Jump to the sequence of this record\">Sequence</a></li>", (long) awp->currGi);
StringCat (buf, sect);
}
+ StringCat (buf, "</ul>");
+
prevGi = 0;
currGi = 0;
nextGi = 0;
@@ -1194,30 +1243,55 @@ NLM_EXTERN void AddLocusBlock (
}
} while (gilistpos != NULL && currGi != awp->currGi);
+ has_next_pref_ul = FALSE;
if (currGi == awp->currGi && nextGi > 0 && awp->sectionCount < awp->sectionMax) {
- sprintf (sect, "<li class=\"localnext\"><a href=\"#locus_%ld\">Next</a></li>", (long) nextGi);
- StringCat (buf, prefix);
- prefix = " ";
+ if (! has_next_pref_ul) {
+ StringCat (buf, "<ul class=\"nextprevlinks\">");
+ has_next_pref_ul = TRUE;
+ }
+ LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
+ if (awp->seg + 1 > 0 && awp->numsegs > 0 && awp->seg + 1 <= awp->numsegs) {
+ sprintf (seg, " (segment %d of %ld)", (int) (awp->seg + 1), (long) awp->numsegs);
+ StringCat (seqid, seg);
+ }
+ sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) nextGi, seqid);
StringCat (buf, sect);
} else if (awp->nextGi > 0) {
- sprintf (sect, "<li class=\"localnext\"><a href=\"#locus_%ld\">Next</a></li>", (long) awp->nextGi);
- StringCat (buf, prefix);
- prefix = " ";
+ if (! has_next_pref_ul) {
+ StringCat (buf, "<ul class=\"nextprevlinks\">");
+ has_next_pref_ul = TRUE;
+ }
+ LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
+ sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) awp->nextGi, seqid);
StringCat (buf, sect);
}
if (currGi == awp->currGi && prevGi > 0 && awp->sectionCount > 1) {
- sprintf (sect, "<li class=\"localprev\"><a href=\"#locus_%ld\">Previous</a></li>", (long) prevGi);
- StringCat (buf, prefix);
- prefix = " ";
+ if (! has_next_pref_ul) {
+ StringCat (buf, "<ul class=\"nextprevlinks\">");
+ has_next_pref_ul = TRUE;
+ }
+ LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
+ if (awp->seg - 1 > 0 && awp->numsegs > 0 && awp->seg - 1 <= awp->numsegs) {
+ sprintf (seg, " (segment %d of %ld)", (int) (awp->seg - 1), (long) awp->numsegs);
+ StringCat (seqid, seg);
+ }
+ sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) prevGi, seqid);
StringCat (buf, sect);
} else if (awp->prevGi > 0) {
- sprintf (sect, "<li class=\"localprev\"><a href=\"#locus_%ld\">Previous</a></li>", (long) awp->prevGi);
- StringCat (buf, prefix);
- prefix = " ";
+ if (! has_next_pref_ul) {
+ StringCat (buf, "<ul class=\"nextprevlinks\">");
+ has_next_pref_ul = TRUE;
+ }
+ LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
+ sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) awp->prevGi, seqid);
StringCat (buf, sect);
}
- StringCat (buf, "</ul></div>\n");
+ if (has_next_pref_ul) {
+ StringCat (buf, "</ul>");
+ }
+ StringCat (buf, "</div>\n");
+ StringCat (buf, "<pre class=\"genbank\">");
DoQuickLinkFormat (awp->afp, buf);
}
@@ -1922,11 +1996,128 @@ NLM_EXTERN void AddVersionBlock (
}
}
+static void FF_asn2gb_www_projID (
+ StringItemPtr ffstring,
+ CharPtr projID
+)
+
+{
+ FFAddOneString (ffstring, "<a href=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, link_projid, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, ">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+}
+
NLM_EXTERN void AddProjectBlock (
Asn2gbWorkPtr awp
)
{
+ IntAsn2gbJobPtr ajp;
+ BaseBlockPtr bbp;
+ BioseqPtr bsp;
+ Char buf [32];
+ UserFieldPtr curr;
+ SeqMgrDescContext dcontext;
+ StringItemPtr ffstring;
+ UserObjectPtr gpuop = NULL;
+ Uint4 itemID;
+ ObjectIdPtr oip;
+ Int4 parentID;
+ CharPtr prefix;
+ Int4 projectID;
+ SeqDescrPtr sdp;
+ UserObjectPtr uop;
+ Int4 val;
+
+ if (awp == NULL) return;
+ ajp = awp->ajp;
+ if (ajp == NULL) return;
+ bsp = awp->bsp;
+ if (bsp == NULL) return;
+
+ if (! ISA_na (bsp->mol)) return;
+ if (awp->format != GENBANK_FMT) return;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringICmp (oip->str, "GenomeProjectsDB") == 0) {
+ gpuop = uop;
+ itemID = dcontext.itemID;
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+ if (gpuop == NULL) return;
+
+ ffstring = FFGetString (ajp);
+ if ( ffstring == NULL ) return;
+
+ bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
+ if (bbp == NULL) return;
+
+ bbp->entityID = awp->entityID;
+ bbp->itemID = itemID;
+ bbp->itemtype = OBJ_SEQDESC;
+
+ FFStartPrint (ffstring, awp->format, 0, 12, "PROJECT", 12, 5, 5, "XX", TRUE);
+
+ prefix = "GenomeProject:";
+ projectID = 0;
+ parentID = 0;
+ for (curr = gpuop->data; curr != NULL; curr = curr->next) {
+ oip = curr->label;
+ if (oip == NULL) continue;
+ if (StringICmp (oip->str, "ProjectID") == 0) {
+ if (curr->choice == 2) {
+ val = (Int4) curr->data.intvalue;
+ if (projectID > 0) {
+ sprintf (buf, "%ld", (long) projectID);
+ FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ FF_asn2gb_www_projID (ffstring, buf);
+ } else {
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ }
+ /*
+ FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
+ */
+ prefix = ",";
+ parentID = 0;
+ }
+ projectID = val;
+ }
+ } else if (StringICmp (oip->str, "ParentID") == 0) {
+ if (curr->choice == 2) {
+ val = (Int4) curr->data.intvalue;
+ parentID = val;
+ }
+ }
+ }
+ if (projectID > 0) {
+ sprintf (buf, "%ld", (long) projectID);
+ FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ FF_asn2gb_www_projID (ffstring, buf);
+ } else {
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ }
+ /*
+ FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
+ */
+ }
+
+ bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
+ FFRecycleString (ajp, ffstring);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
}
/* only displaying PID in GenPept format */
@@ -5355,7 +5546,7 @@ NLM_EXTERN void AddSlashBlock (
if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
(awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
- sprintf (buf, "//\n<a name=\"slash_%ld\"></a>", (long) awp->currGi);
+ sprintf (buf, "//</pre>\n<a name=\"slash_%ld\"></a>", (long) awp->currGi);
str = StringSave (buf);
} else {
str = MemNew(sizeof(Char) * 4);
diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c
index 177e02bf..fc3948f8 100644
--- a/api/asn2gnb3.c
+++ b/api/asn2gnb3.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.48 $
+* $Revision: 1.55 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -705,12 +705,14 @@ static Boolean DoGetAnnotationComment (
)
{
+ Int2 ce = 0, cm = 0;
SeqMgrDescContext dcontext;
CharPtr method = NULL;
UserObjectPtr moduop;
CharPtr name = NULL;
ObjectIdPtr oip;
SeqDescrPtr sdp;
+ UserFieldPtr u;
UserFieldPtr ufp;
UserObjectPtr uop;
@@ -726,14 +728,32 @@ static Boolean DoGetAnnotationComment (
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL) continue;
- if (StringCmp(oip->str, "Contig Name") == 0) {
+ if (StringCmp (oip->str, "Contig Name") == 0) {
name = (CharPtr) ufp->data.ptrvalue;
- } else if (StringCmp(oip->str, "Method") == 0) {
+ } else if (StringCmp (oip->str, "Method") == 0) {
method = (CharPtr) ufp->data.ptrvalue;
- } else if (StringCmp(oip->str, "mRNA") == 0) {
+ } else if (StringCmp (oip->str, "mRNA") == 0) {
*mrnaEv = TRUE;
- } else if (StringCmp(oip->str, "EST") == 0) {
+ } else if (StringCmp (oip->str, "EST") == 0) {
*estEv = TRUE;
+ } else if (StringCmp (oip->str, "Counts") == 0) {
+ for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
+ if (u->data.ptrvalue == NULL) continue;
+ if (u->choice != 2) continue;
+ oip = u->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "mRNA") == 0) {
+ cm = (Int2) u->data.intvalue;
+ if (cm > 0) {
+ *mrnaEv = TRUE;
+ }
+ } else if (StringCmp (oip->str, "EST") == 0) {
+ ce = (Int2) u->data.intvalue;
+ if (ce > 0) {
+ *estEv = TRUE;
+ }
+ }
+ }
}
}
}
@@ -999,6 +1019,10 @@ static CharPtr GetPrimaryStrForDelta (
} else {
id = GetSeqIdForGI (gi);
}
+ if (id == NULL) {
+ sprintf (buf, "%ld", (long) gi);
+ accn = TRUE;
+ }
} else {
id = SeqIdDup (sip);
}
@@ -1368,10 +1392,8 @@ NLM_EXTERN void AddPrimaryBlock (
hist = bsp->hist;
if ((! IsTpa (bsp, has_tpa_assembly, &isRefSeq)) ||
hist == NULL || hist->assembly == NULL) {
- if (awp->contig) {
- /*
+ if (awp->forcePrimaryBlock) {
AddAltPrimaryBlock (awp);
- */
}
return;
}
@@ -1499,10 +1521,9 @@ NLM_EXTERN void AddCommentBlock (
{
size_t acclen;
- /*
SeqMgrAndContext acontext;
AnnotDescPtr adp;
- */
+ Boolean annotDescCommentToComment;
IntAsn2gbJobPtr ajp;
BioseqPtr bsp;
Char buf [1024];
@@ -1548,6 +1569,7 @@ NLM_EXTERN void AddCommentBlock (
CharPtr str;
Char taxID [32];
TextSeqIdPtr tsip;
+ UserFieldPtr ufp;
UserObjectPtr uop;
CharPtr wgsaccn = NULL;
CharPtr wgsname = NULL;
@@ -2582,38 +2604,61 @@ NLM_EXTERN void AddCommentBlock (
/* look for Seq-annot.desc.comment on annots packaged on current bioseq */
- /*
- adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_comment, &acontext);
+ annotDescCommentToComment = FALSE;
+ adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_user, &acontext);
while (adp != NULL) {
- str = (CharPtr) adp->data.ptrvalue;
- if (StringDoesHaveText (str)) {
- cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
- if (cbp != NULL) {
+ uop = (UserObjectPtr) adp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL) {
+ if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
+ if (StringCmp (oip->str, "Policy") == 0) {
+ if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
+ annotDescCommentToComment = TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
+ adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_user, &acontext);
+ }
- cbp->entityID = awp->entityID;
- cbp->first = first;
- first = FALSE;
+ if (annotDescCommentToComment) {
+ adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_comment, &acontext);
+ while (adp != NULL) {
+ str = (CharPtr) adp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
- if (cbp->first) {
- FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
- } else {
- FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
- }
+ cbp->entityID = awp->entityID;
+ cbp->first = first;
+ first = FALSE;
- FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_EXPAND);
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
- cbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
- FFRecycleString (ajp, ffstring);
- ffstring = FFGetString (ajp);
+ FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_EXPAND);
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ cbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString (ajp, ffstring);
+ ffstring = FFGetString (ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
+ adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_comment, &acontext);
}
- adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_comment, &acontext);
}
- */
FFRecycleString(ajp, ffstring);
}
@@ -4546,7 +4591,10 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ifp->mapToPep = FALSE;
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
- awp->featseen = TRUE;
+ /* this allows remote SNP, CDD, MGC, etc., not to be treated as local annotation */
+ if (awp->entityID != fbp->entityID || fbp->itemID <= awp->localFeatCount) {
+ awp->featseen = TRUE;
+ }
awp->featjustseen = TRUE;
if (fcontext->seqfeattype == SEQFEAT_PROT) {
@@ -4999,7 +5047,7 @@ NLM_EXTERN void AddFeatureBlock (
if (awp->format == GENPEPT_FMT && ISA_aa (bsp->mol)) {
cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
if (cds != NULL && cds->data.choice == SEQFEAT_CDREGION) {
- /* if protein bioseq and cds feature but no nucleotide, cannot index cds, so skip */
+
if (fcontext.entityID > 0 && fcontext.itemID > 0) {
fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
@@ -5023,6 +5071,31 @@ NLM_EXTERN void AddFeatureBlock (
DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
}
}
+ } else if (cds->idx.entityID > 0 && cds->idx.itemID > 0) {
+
+ /* if protein bioseq and cds feature but no nucleotide, handle as special case */
+
+ fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntCdsBlock));
+ if (fbp != NULL) {
+
+ fbp->entityID = cds->idx.entityID;
+ fbp->itemID = cds->idx.itemID;
+ fbp->itemtype = OBJ_SEQFEAT;
+ fbp->featdeftype = FEATDEF_CDS;
+ ifp = (IntFeatBlockPtr) fbp;
+ ifp->mapToNuc = FALSE;
+ ifp->mapToProt = TRUE;
+ ifp->mapToGen = FALSE;
+ ifp->mapToMrna = FALSE;
+ ifp->mapToPep = FALSE;
+ ifp->isCDS = TRUE;
+ ifp->firstfeat = awp->firstfeat;
+ awp->firstfeat = FALSE;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) fbp);
+ }
+ }
}
}
prot = SeqMgrGetPROTgivenProduct (bsp, &fcontext);
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index f43ff114..2aebe7ad 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.85 $
+* $Revision: 1.98 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -110,10 +110,12 @@ static FtQualType feat_qual_order [] = {
FTQUAL_pseudo,
FTQUAL_selenocysteine,
+ FTQUAL_pyrrolysine,
FTQUAL_codon_start,
FTQUAL_anticodon,
+ FTQUAL_trna_codons,
FTQUAL_bound_moiety,
FTQUAL_clone,
FTQUAL_compare,
@@ -179,7 +181,7 @@ static FtQualType feat_note_order [] = {
FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */
FTQUAL_gene_desc,
FTQUAL_gene_syn,
- FTQUAL_trna_codons,
+ FTQUAL_trna_codons_note,
FTQUAL_encodes,
FTQUAL_prot_desc,
FTQUAL_prot_note,
@@ -194,6 +196,7 @@ static FtQualType feat_note_order [] = {
FTQUAL_exception_note,
FTQUAL_region,
FTQUAL_selenocysteine_note,
+ FTQUAL_pyrrolysine_note,
FTQUAL_prot_names,
FTQUAL_bond,
FTQUAL_site,
@@ -294,6 +297,8 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "prot_names", Qual_class_protnames },
{ "protein_id", Qual_class_seq_id },
{ "pseudo", Qual_class_boolean },
+ { "pyrrolysine", Qual_class_boolean },
+ { "pyrrolysine", Qual_class_string },
{ "region", Qual_class_region },
{ "region_name", Qual_class_string },
{ "replace", Qual_class_replace },
@@ -321,6 +326,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "transposon", Qual_class_quote },
{ "trans_splicing", Qual_class_boolean },
{ "trna_aa", Qual_class_ignore },
+ { "codon_recognized", Qual_class_trna_codons },
{ "trna_codons", Qual_class_trna_codons },
{ "usedin", Qual_class_usedin },
{ "xtra_products", Qual_class_xtraprds }
@@ -456,10 +462,12 @@ static CharPtr trnaList [] = {
"tRNA-Gly",
"tRNA-His",
"tRNA-Ile",
+ "tRNA-Xle",
"tRNA-Lys",
"tRNA-Leu",
"tRNA-Met",
"tRNA-Asn",
+ "tRNA-Pyl",
"tRNA-Pro",
"tRNA-Gln",
"tRNA-Arg",
@@ -1578,9 +1586,10 @@ static void GetStrFormRNAEvidence (
)
{
+ Int2 ce = 0, cm = 0, cp = 0, ne = 0, nm = 0, np = 0;
+ Boolean has_counts = FALSE;
size_t len;
CharPtr method = NULL, prefix = NULL;
- Int2 ne = 0, nm = 0, np = 0;
ObjectIdPtr oip;
CharPtr str = NULL;
CharPtr PNTR strp;
@@ -1598,8 +1607,7 @@ static void GetStrFormRNAEvidence (
if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
if (StringCmp (oip->str, "Method") == 0) {
method = StringSaveNoNull ((CharPtr) ufp->data.ptrvalue);
- }
- if (StringCmp (oip->str, "mRNA") == 0) {
+ } else if (StringCmp (oip->str, "mRNA") == 0) {
for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
if (u->data.ptrvalue == NULL) continue;
for (uu = (UserFieldPtr) u->data.ptrvalue; uu != NULL; uu = uu->next) {
@@ -1632,9 +1640,30 @@ static void GetStrFormRNAEvidence (
}
}
}
+ } else if (StringCmp (oip->str, "Counts") == 0) {
+ has_counts = TRUE;
+ for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
+ if (u->data.ptrvalue == NULL) continue;
+ if (u->choice != 2) continue;
+ oip = u->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "mRNA") == 0) {
+ cm = (Int2) u->data.intvalue;
+ } else if (StringCmp (oip->str, "EST") == 0) {
+ ce = (Int2) u->data.intvalue;
+ } else if (StringCmp (oip->str, "Protein") == 0) {
+ cp = (Int2) u->data.intvalue;
+ }
+ }
}
}
+ if (has_counts) {
+ nm = cm;
+ ne = ce;
+ np = cp;
+ }
+
len = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 80;
str = (CharPtr) MemNew (len);
if (str == NULL) return;
@@ -2330,6 +2359,24 @@ static FloatHi MolWtForProtFeat (
return MolWtForLoc (sfp->location);
}
+static void ChangeOToX (CharPtr str)
+
+{
+ Char ch;
+
+ if (str == NULL) return;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == 'O') {
+ *str = 'X';
+ } else if (ch == 'o') {
+ *str = 'x';
+ }
+ str++;
+ ch = *str;
+ }
+}
+
static void FormatFeatureBlockQuals (
StringItemPtr ffstring,
IntAsn2gbJobPtr ajp,
@@ -3134,6 +3181,11 @@ static void FormatFeatureBlockQuals (
if (str != NULL) {
residue = cbaa.value.intvalue;
ptr = Get3LetterSymbol (ajp, seqcode, sctp, residue);
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ if (StringICmp (ptr, "Pyl") == 0 || StringICmp (ptr, "Xle") == 0) {
+ ptr = "OTHER";
+ }
+ }
if (ptr == NULL) {
ptr = "OTHER";
}
@@ -3200,6 +3252,21 @@ static void FormatFeatureBlockQuals (
}
break;
+ case Qual_class_trna_codons :
+ trna = qvp [idx].trp;
+ if (trna) {
+ numcodons = ComposeCodonsRecognizedString (trna, numbuf, sizeof (numbuf));
+ if (numcodons < 1 || StringHasNoText (numbuf)) {
+ } else {
+ FFAddTextToString(ffstring, "/", "codon_recognized", "=\"",
+ FALSE, TRUE, TILDE_IGNORE);
+ FFAddOneString(ffstring, numbuf, FALSE, TRUE, TILDE_TO_SPACES);
+ FFAddOneChar(ffstring, '\"', FALSE);
+ FFAddOneChar(ffstring, '\n', FALSE);
+ }
+ }
+ break;
+
case Qual_class_codon :
gbq = qvp [idx].gbq;
if (gbq == NULL || (ajp->flags.dropIllegalQuals && (! AllowedValQual (featdeftype, idx)))) break;
@@ -3483,6 +3550,9 @@ static void FormatFeatureBlockQuals (
}
}
if (! StringHasNoText (str)) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ ChangeOToX (str);
+ }
FFAddTextToString(ffstring, "/translation=\"", str, "\"",
FALSE, TRUE, TILDE_TO_SPACES);
FFAddOneChar(ffstring, '\n', FALSE);
@@ -3509,6 +3579,9 @@ static void FormatFeatureBlockQuals (
*/
SeqPortStreamLoc (sfp->product, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &protein_seq, SaveGBSeqSequence);
if (! StringHasNoText (str)) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ ChangeOToX (str);
+ }
FFAddTextToString(ffstring, "/translation=\"", str, "\"",
FALSE, TRUE, TILDE_TO_SPACES);
FFAddOneChar(ffstring, '\n', FALSE);
@@ -4387,10 +4460,12 @@ static CharPtr validRefSeqExceptionString [] = {
"nonconsensus splice site",
"modified codon recognition",
"alternative start codon",
+ "dicistronic gene",
"unclassified transcription discrepancy",
"unclassified translation discrepancy",
"mismatches in transcription",
"mismatches in translation",
+ "adjusted for low-quality genome",
NULL
};
@@ -4591,6 +4666,51 @@ static void ParseInference (
*bad_inferenceP = bad;
}
+typedef struct geneprot {
+ SeqFeatPtr gene;
+ SeqFeatPtr cds;
+ Boolean failed;
+} GeneProtData, PNTR GeneProtPtr;
+
+static void CheckGeneOnIsolatedProtein (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ GeneProtPtr gpp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
+ gpp = (GeneProtPtr) userdata;
+ if (gpp == NULL) return;
+
+ if (SeqLocAinB (gpp->cds->location, sfp->location) < 0) return;
+ if (gpp->gene != NULL) {
+ gpp->failed = TRUE;
+ } else {
+ gpp->gene = sfp;
+ }
+}
+
+static SeqFeatPtr FindGeneOnIsolatedProtein (
+ SeqEntryPtr sep,
+ SeqFeatPtr cds
+)
+
+{
+ GeneProtData gpd;
+
+ if (sep == NULL || cds == NULL) return NULL;
+
+ MemSet ((Pointer) &gpd, 0, sizeof (GeneProtData));
+ gpd.cds = cds;
+ VisitFeaturesInSep (sep, (Pointer) &gpd, CheckGeneOnIsolatedProtein);
+
+ if (gpd.failed) return NULL;
+
+ return gpd.gene;
+}
+
static SeqFeatPtr GetOverlappingGeneInEntity (
Uint2 entityID,
SeqMgrFeatContextPtr fcontext,
@@ -4645,7 +4765,11 @@ static SeqFeatPtr GetOverlappingGeneInEntity (
}
}
} else {
- gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
+ if (fcontext->bad_order || fcontext->mixed_strand) {
+ gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext);
+ } else {
+ gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
+ }
}
SeqEntrySetScope (oldscope);
return gene;
@@ -4668,6 +4792,7 @@ static CharPtr FormatFeatureBlockEx (
{
Uint1 aa;
AnnotDescrPtr adp;
+ Boolean annotDescCommentToComment;
ValNodePtr bad_inference = NULL;
Int2 bondidx;
BioseqPtr bsp_for_old_locus_tag = NULL;
@@ -4718,6 +4843,7 @@ static CharPtr FormatFeatureBlockEx (
Boolean noLeft;
Boolean noRight;
SeqMgrFeatContext ocontext;
+ ObjectIdPtr oip;
SeqEntryPtr oldscope;
SeqFeatPtr operon = NULL;
Uint2 partial;
@@ -4732,6 +4858,7 @@ static CharPtr FormatFeatureBlockEx (
ProtRefPtr prpxref;
Boolean pseudo = FALSE;
CharPtr ptr;
+ Uint2 pEID;
Int2 qualclass;
Uint1 residue;
Boolean riboSlippage = FALSE;
@@ -4742,7 +4869,7 @@ static CharPtr FormatFeatureBlockEx (
SeqDescrPtr sdp;
SeqEntryPtr sep;
Uint1 seqcode;
- Uint1 shift;
+ Uint1 seqfeattype;
SeqIdPtr sip;
Int2 siteidx;
SeqMapTablePtr smtp;
@@ -4752,7 +4879,9 @@ static CharPtr FormatFeatureBlockEx (
CharPtr tmp;
Boolean transSplice = FALSE;
tRNAPtr trna;
+ UserFieldPtr ufp;
BioseqPtr unlockme = NULL;
+ UserObjectPtr uop;
ValNodePtr vnp;
StringItemPtr ffstring;
@@ -4884,12 +5013,21 @@ static CharPtr FormatFeatureBlockEx (
is_other = TRUE;
}
- featdeftype = fcontext->featdeftype;
+ featdeftype = fcontext->featdeftype;
+
if (featdeftype < FEATDEF_GENE || featdeftype >= FEATDEF_MAX) {
featdeftype = FEATDEF_BAD;
}
+ if (featdeftype == 0) {
+ featdeftype = sfp->idx.subtype;
+ }
key = FindKeyFromFeatDefType (featdeftype, TRUE);
+ seqfeattype = fcontext->seqfeattype;
+ if (seqfeattype == 0) {
+ seqfeattype = sfp->data.choice;
+ }
+
if (format == GENPEPT_FMT && isProt) {
if (featdeftype == FEATDEF_REGION) {
key = "Region";
@@ -4916,7 +5054,7 @@ static CharPtr FormatFeatureBlockEx (
/* deal with unmappable impfeats */
- if (featdeftype == FEATDEF_BAD && fcontext->seqfeattype == SEQFEAT_IMP) {
+ if (featdeftype == FEATDEF_BAD && seqfeattype == SEQFEAT_IMP) {
imp = (ImpFeatPtr) sfp->data.value.ptrvalue;
if (imp != NULL) {
key = imp->key;
@@ -4926,7 +5064,7 @@ static CharPtr FormatFeatureBlockEx (
FFStartPrint(ffstring, format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE);
if (ajp->ajp.slp != NULL) {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
- } else if ( GetWWW(ajp) /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
+ } else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
FF_asn2gb_www_featkey (ffstring, key, sfp->location, fcontext->left + 1, fcontext->right + 1, fcontext->strand, itemID);
} else {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
@@ -5038,7 +5176,7 @@ static CharPtr FormatFeatureBlockEx (
pseudo = TRUE;
}
- if (fcontext->seqfeattype == SEQFEAT_GENE) {
+ if (seqfeattype == SEQFEAT_GENE) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp != NULL) {
if (! StringHasNoText (grp->locus)) {
@@ -5079,7 +5217,7 @@ static CharPtr FormatFeatureBlockEx (
}
}
- } else if (fcontext->featdeftype != FEATDEF_operon && fcontext->featdeftype != FEATDEF_gap) {
+ } else if (featdeftype != FEATDEF_operon && featdeftype != FEATDEF_gap) {
grp = SeqMgrGetGeneXref (sfp);
if (grp != NULL) {
@@ -5091,11 +5229,23 @@ static CharPtr FormatFeatureBlockEx (
gene_for_old_locus_tag = SeqMgrGetFeatureByLabel (bsp_for_old_locus_tag, grp->locus_tag, SEQFEAT_GENE, 0, &gcontext);
}
}
- if (grp == NULL && fcontext->featdeftype != FEATDEF_primer_bind) {
+ if (grp == NULL && featdeftype != FEATDEF_primer_bind) {
gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene);
if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene);
}
+
+ /* special case to get gene by overlap for coded_by cds on isolated protein bioseq */
+ if (ifp->mapToProt && seqfeattype == SEQFEAT_CDREGION) {
+ sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
+ if (sep != NULL && IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp != NULL && ISA_aa (bsp->mol)) {
+ gene = FindGeneOnIsolatedProtein (sep, sfp);
+ }
+ }
+ }
+
gene_for_old_locus_tag = gene;
if (gene != NULL) {
qvp [FTQUAL_gene_note].str = gene->comment;
@@ -5114,7 +5264,7 @@ static CharPtr FormatFeatureBlockEx (
pseudo = TRUE;
}
if (grp != NULL && (! SeqMgrGeneIsSuppressed (grp)) &&
- (fcontext->featdeftype != FEATDEF_repeat_region || gene == NULL)) {
+ (featdeftype != FEATDEF_repeat_region || gene == NULL)) {
if (! StringHasNoText (grp->locus)) {
qvp [FTQUAL_gene].str = grp->locus;
qvp [FTQUAL_locus_tag].str = grp->locus_tag;
@@ -5133,25 +5283,25 @@ static CharPtr FormatFeatureBlockEx (
}
}
if (grp != NULL &&
- fcontext->featdeftype != FEATDEF_variation &&
- fcontext->featdeftype != FEATDEF_repeat_region) {
+ featdeftype != FEATDEF_variation &&
+ featdeftype != FEATDEF_repeat_region) {
qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */
}
- if (gene_for_old_locus_tag != NULL && fcontext->featdeftype != FEATDEF_repeat_region) {
+ if (gene_for_old_locus_tag != NULL && featdeftype != FEATDEF_repeat_region) {
/* now propagate old_locus_tag to almost any underlying feature */
for (gbq = gene_for_old_locus_tag->qual; gbq != NULL; gbq = gbq->next) {
if (StringHasNoText (gbq->val)) continue;
idx = GbqualToFeaturIndex (gbq->qual);
if (idx == FTQUAL_old_locus_tag) {
qvp [FTQUAL_old_locus_tag].gbq = gbq;
+ break; /* record first old_locus_tag gbqual to display all */
}
}
}
- if (fcontext->seqfeattype != SEQFEAT_CDREGION &&
- fcontext->seqfeattype != SEQFEAT_RNA) {
+ if (seqfeattype != SEQFEAT_CDREGION && seqfeattype != SEQFEAT_RNA) {
qvp [FTQUAL_gene_xref].vnp = NULL;
}
- if (fcontext->featdeftype != FEATDEF_operon) {
+ if (featdeftype != FEATDEF_operon) {
grp = SeqMgrGetGeneXref (sfp);
if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) {
operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext);
@@ -5167,7 +5317,7 @@ static CharPtr FormatFeatureBlockEx (
/* specific fields set here */
- switch (fcontext->seqfeattype) {
+ switch (seqfeattype) {
case SEQFEAT_CDREGION :
if (! ifp->mapToProt) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
@@ -5210,6 +5360,12 @@ static CharPtr FormatFeatureBlockEx (
} else {
qvp [FTQUAL_selenocysteine].ble = TRUE;
}
+ } else if (residue == 'O') {
+ if (ajp->flags.pyrrolysineToNote) {
+ qvp [FTQUAL_pyrrolysine_note].str = "pyrrolysine";
+ } else {
+ qvp [FTQUAL_pyrrolysine].ble = TRUE;
+ }
}
}
}
@@ -5293,6 +5449,12 @@ static CharPtr FormatFeatureBlockEx (
}
}
}
+ pEID = ObjMgrGetEntityIDForPointer (prod);
+ if (pEID != 0 && pEID != ajp->ajp.entityID &&
+ SeqMgrFeaturesAreIndexed (pEID) == 0) {
+ /* index far record so SeqMgrGetBestProteinFeature can work */
+ SeqMgrIndexFeatures (pEID, NULL);
+ }
prot = SeqMgrGetBestProteinFeature (prod, &pcontext);
if (prot != NULL) {
prp = (ProtRefPtr) prot->data.value.ptrvalue;
@@ -5395,6 +5557,12 @@ static CharPtr FormatFeatureBlockEx (
} else {
qvp [FTQUAL_selenocysteine].ble = TRUE;
}
+ } else if (residue == 'O') {
+ if (ajp->flags.pyrrolysineToNote) {
+ qvp [FTQUAL_pyrrolysine_note].str = "pyrrolysine";
+ } else {
+ qvp [FTQUAL_pyrrolysine].ble = TRUE;
+ }
}
}
}
@@ -5567,6 +5735,12 @@ static CharPtr FormatFeatureBlockEx (
}
}
if (aa > 0 && aa != 255) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ if (aa == 79 || aa == 74) { /* O or J quarantined */
+ aa = 88; /* X */
+ }
+ }
+ /* - no gaps now that O and J are added
if (aa <= 74) {
shift = 0;
} else if (aa > 79) {
@@ -5574,12 +5748,13 @@ static CharPtr FormatFeatureBlockEx (
} else {
shift = 1;
}
+ */
if (aa != '*') {
- idx = aa - (64 + shift);
+ idx = aa - (64 /* + shift */);
} else {
idx = 25;
}
- if (idx > 0 && idx < 26) {
+ if (idx > 0 && idx < 28) {
str = trnaList [idx];
qvp [FTQUAL_product].str = str;
if (StringNICmp (str, "tRNA-", 5) == 0) {
@@ -5588,7 +5763,11 @@ static CharPtr FormatFeatureBlockEx (
}
}
qvp [FTQUAL_anticodon].slp = trna->anticodon;
- qvp [FTQUAL_trna_codons].trp = trna;
+ if (ajp->flags.codonRecognizedToNote) {
+ qvp [FTQUAL_trna_codons_note].trp = trna;
+ } else {
+ qvp [FTQUAL_trna_codons].trp = trna;
+ }
}
}
} else {
@@ -5673,7 +5852,7 @@ static CharPtr FormatFeatureBlockEx (
qvp [FTQUAL_go_function].ufp = NULL;
}
- if (fcontext->featdeftype == FEATDEF_repeat_region) {
+ if (featdeftype == FEATDEF_repeat_region) {
pseudo = FALSE;
}
@@ -5683,19 +5862,39 @@ static CharPtr FormatFeatureBlockEx (
sap = fcontext->sap;
if (sap != NULL) {
+ annotDescCommentToComment = FALSE;
for (adp = sap->desc; adp != NULL; adp = adp->next) {
if (adp->choice == Annot_descr_comment) {
if (StringDoesHaveText ((CharPtr) adp->data.ptrvalue)) {
qvp [FTQUAL_seqannot_note].str = (CharPtr) adp->data.ptrvalue;
}
+ } else if (adp->choice == Annot_descr_user) {
+ uop = (UserObjectPtr) adp->data.ptrvalue;
+ if (uop == NULL) continue;
+ oip = uop->type;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
+ if (StringCmp (oip->str, "Policy") == 0) {
+ if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
+ annotDescCommentToComment = TRUE;
+ }
+ }
+ }
+ }
}
}
+ if (annotDescCommentToComment) {
+ qvp [FTQUAL_seqannot_note].str = NULL;
+ }
}
/* if RELEASE_MODE, check list of features that can have /pseudo */
if (ajp->flags.dropIllegalQuals && pseudo &&
- (fcontext->seqfeattype == SEQFEAT_RNA || fcontext->seqfeattype == SEQFEAT_IMP) ) {
+ (seqfeattype == SEQFEAT_RNA || seqfeattype == SEQFEAT_IMP) ) {
switch (featdeftype) {
case FEATDEF_allele:
@@ -5993,6 +6192,12 @@ static CharPtr FormatFeatureBlockEx (
qvp [FTQUAL_selenocysteine_note].str = NULL;
}
+ /* suppress pyrrolysine note if already in comment */
+
+ if (StringStr (sfp->comment, "pyrrolysine") != NULL) {
+ qvp [FTQUAL_pyrrolysine_note].str = NULL;
+ }
+
/* if /allele inherited from gene, suppress allele gbqual on feature */
if (qvp [FTQUAL_gene_allele].str != NULL) {
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index 8ec9b4b7..7abce569 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.48 $
+* $Revision: 1.54 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -74,6 +74,9 @@ NLM_EXTERN Char link_featc [MAX_WWWBUF];
NLM_EXTERN Char link_seq [MAX_WWWBUF];
#define DEF_LINK_SEQ "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?"
+NLM_EXTERN Char link_projid [MAX_WWWBUF];
+#define DEF_LINK_PROJID "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids="
+
NLM_EXTERN Char link_wgs [MAX_WWWBUF];
#define DEF_LINK_WGS "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?"
@@ -204,7 +207,7 @@ static Char link_gabi [MAX_WWWBUF];
#define DEF_LINK_GABI "https://gabi.rzpd.de/cgi-bin-protected/GreenCards.pl.cgi?Mode=ShowBioObject&BioObjectName="
static Char link_fantom [MAX_WWWBUF];
-#define DEF_LINK_FANTOM "http://fantom.gsc.riken.go.jp/db/view/main.cgi?masterid="
+#define DEF_LINK_FANTOM "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="
static Char link_interpro [MAX_WWWBUF];
#define DEF_LINK_INTERPRO "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query="
@@ -213,7 +216,7 @@ static Char link_genedb [MAX_WWWBUF];
#define DEF_LINK_GENEDB "http://www.genedb.org/genedb/Dispatcher?formType=navBar&submit=Search+for&organism=All%3Apombe%3Acerevisiae%3Adicty%3Aasp%3Atryp%3Aleish%3Amalaria%3Astyphi%3Aglossina&desc=yes&ohmr=%2F&name="
static Char link_geneid [MAX_WWWBUF];
-#define DEF_LINK_GENEID "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=retrieve&dopt=graphics&list_uids="
+#define DEF_LINK_GENEID "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids="
static Char link_zfin [MAX_WWWBUF];
#define DEF_LINK_ZFIN "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID="
@@ -296,6 +299,7 @@ NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
GetAppParam ("NCBI", "WWWENTREZ", "LINK_FEAT", DEF_LINK_FEAT, link_feat, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_FEATC", DEF_LINK_FEATC, link_featc, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_SEQ", DEF_LINK_SEQ, link_seq, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_PROJID", DEF_LINK_PROJID, link_projid, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_WGS", DEF_LINK_WGS, link_wgs, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_OMIM", DEF_LINK_OMIM, link_omim, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_REF", DEF_LINK_REF, ref_link, MAX_WWWBUF);
@@ -2504,18 +2508,21 @@ static CharPtr FormatCitArt (
}
static CharPtr FormatCitPat (
- FmtType format,
+ FmtType format,
+ ModType mode,
CitPatPtr cpp,
- SeqIdPtr seqidp,
+ SeqIdPtr seqidp,
IntAsn2gbJobPtr ajp
)
{
AffilPtr afp;
AuthListPtr alp;
+ IdPatPtr cit;
CharPtr consortium = NULL;
Char date [40];
ValNodePtr head = NULL;
+ Boolean is_us_pre_grant = FALSE;
CharPtr prefix = NULL;
CharPtr rsult = NULL;
SeqIdPtr sip;
@@ -2527,9 +2534,30 @@ static CharPtr FormatCitPat (
if (cpp == NULL) return NULL;
+ if (StringHasNoText (cpp->number) &&
+ StringDoesHaveText (cpp->app_number) &&
+ StringCmp (cpp->country, "US") == 0 &&
+ mode != RELEASE_MODE) {
+ for (sip = seqidp; sip != NULL; sip = sip->next) {
+ if (sip->choice != SEQID_PATENT) continue;
+ psip = (PatentSeqIdPtr) sip->data.ptrvalue;
+ if (psip == NULL) continue;
+ cit = psip->cit;
+ if (cit == NULL) continue;
+ if (StringDoesHaveText (cit->app_number)) {
+ is_us_pre_grant = TRUE;
+ }
+ }
+ }
+
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
- ValNodeCopyStr (&head, 0, "Patent: ");
- suffix = " ";
+ if (is_us_pre_grant) {
+ ValNodeCopyStr (&head, 0, "Pre-Grant Patent: ");
+ suffix = " ";
+ } else {
+ ValNodeCopyStr (&head, 0, "Patent: ");
+ suffix = " ";
+ }
} else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
ValNodeCopyStr (&head, 0, "Patent number ");
}
@@ -2550,7 +2578,11 @@ static CharPtr FormatCitPat (
ValNodeCopyStr (&head, 0, cpp->number);
}
} else if (! StringHasNoText (cpp->app_number)) {
- AddValNodeString (&head, "(", cpp->app_number, ")");
+ if (is_us_pre_grant) {
+ AddValNodeString (&head, NULL, cpp->app_number, NULL);
+ } else {
+ AddValNodeString (&head, "(", cpp->app_number, ")");
+ }
}
if (! StringHasNoText (cpp->doc_type)) {
@@ -2922,12 +2954,13 @@ static CharPtr FormatCitSub (
static CharPtr GetPubJournal (
FmtType format,
+ ModType mode,
Boolean dropBadCitGens,
Boolean noAffilOnUnpub,
Boolean citArtIsoJta,
PubdescPtr pdp,
CitSubPtr csp,
- SeqIdPtr seqidp,
+ SeqIdPtr seqidp,
IndxPtr index,
IntAsn2gbJobPtr ajp
)
@@ -2996,7 +3029,7 @@ static CharPtr GetPubJournal (
case PUB_Patent :
cpp = (CitPatPtr) vnp->data.ptrvalue;
if (cpp != NULL) {
- journal = FormatCitPat (format, cpp, seqidp, ajp);
+ journal = FormatCitPat (format, mode, cpp, seqidp, ajp);
}
break;
default :
@@ -3730,9 +3763,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
citArtIsoJta = FALSE;
}
- str = GetPubJournal (afp->format, ajp->flags.dropBadCitGens,
- ajp->flags.noAffilOnUnpub, citArtIsoJta,
- pdp, csp, bsp->id, index, ajp);
+ str = GetPubJournal (afp->format, ajp->mode, ajp->flags.dropBadCitGens,
+ ajp->flags.noAffilOnUnpub, citArtIsoJta, pdp, csp,
+ bsp->id, index, ajp);
if (str == NULL) {
str = StringSave ("Unpublished");
}
@@ -3815,7 +3848,6 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (gbseq != NULL) {
if (gbref != NULL) {
- gbref->medline = muid;
gbref->pubmed = pmid;
}
}
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index 773e03f0..698d7587 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.63 $
+* $Revision: 1.69 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -990,6 +990,34 @@ static CharPtr FindUrlEnding(CharPtr str) {
return ptr;
}
+static Boolean CommentHasSuspiciousHtml (
+ IntAsn2gbJobPtr ajp,
+ CharPtr searchString
+)
+
+{
+ Char ch;
+ CharPtr ptr;
+ Int2 state;
+ ValNodePtr matches;
+
+ if (StringHasNoText (searchString)) return FALSE;
+
+ state = 0;
+ ptr = searchString;
+ ch = *ptr;
+
+ while (ch != '\0') {
+ matches = NULL;
+ state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
+ if (matches != NULL) return TRUE;
+ ptr++;
+ ch = *ptr;
+ }
+
+ return FALSE;
+}
+
NLM_EXTERN void AddCommentWithURLlinks (
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
@@ -1002,6 +1030,17 @@ NLM_EXTERN void AddCommentWithURLlinks (
Char ch;
CharPtr ptr;
+ if (GetWWW (ajp) && CommentHasSuspiciousHtml (ajp, str)) {
+ if (prefix != NULL) {
+ FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
+ }
+ AddCommentStringWithTildes (ffstring, str);
+ if (suffix != NULL) {
+ FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
+ }
+ return;
+ }
+
while (! StringHasNoText (str)) {
ptr = StringStr (str, "http://");
if (ptr == NULL) {
@@ -4111,7 +4150,11 @@ static Int2 ProcessGapSpecialFormat (
FixGapAtEnd (buf, ' ');
ajp->seqGapCurrLen += endgap;
} else if (endgap > 0) {
+ /*
FixGapAtEnd (buf, pad);
+ */
+ FixGapAtEnd (buf, ' ');
+ ajp->seqGapCurrLen += endgap;
}
FixRemainingGaps (buf, pad);
@@ -4119,6 +4162,24 @@ static Int2 ProcessGapSpecialFormat (
return startgapgap;
}
+static void ChangeoTox (CharPtr str)
+
+{
+ Char ch;
+
+ if (str == NULL) return;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == 'O') {
+ *str = 'X';
+ } else if (ch == 'o') {
+ *str = 'x';
+ }
+ str++;
+ ch = *str;
+ }
+}
+
NLM_EXTERN CharPtr FormatSequenceBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
@@ -4181,6 +4242,11 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
} else {
SeqPortStream (bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
}
+ if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ ChangeoTox (str);
+ }
+ }
gbseq->sequence = StringSave (str);
tmp = gbseq->sequence;
@@ -4239,6 +4305,11 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
} else {
SeqPortStreamInt (bsp, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
}
+ if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ ChangeoTox (str);
+ }
+ }
sbp->bases = str;
}
}
@@ -4306,6 +4377,7 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
return str;
}
+/*
static CharPtr insd_strd [4] = {
NULL, "single", "double", "mixed"
};
@@ -4317,6 +4389,7 @@ static CharPtr insd_mol [10] = {
static CharPtr insd_top [3] = {
NULL, "linear", "circular"
};
+*/
NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
@@ -4326,14 +4399,16 @@ NLM_EXTERN CharPtr FormatSlashBlock (
)
{
- IntAsn2gbJobPtr ajp;
- Asn2gbSectPtr asp;
- GBFeaturePtr currf, headf, nextf;
- GBReferencePtr currr, headr, nextr;
- GBSeqPtr gbseq, gbtmp;
- IndxPtr index;
- INSDSeq is;
- Int2 moltype, strandedness, topology;
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ GBFeaturePtr currf, headf, nextf;
+ GBReferencePtr currr, headr, nextr;
+ GBSeqPtr gbseq, gbtmp;
+ IndxPtr index;
+ INSDSeq is;
+ /*
+ Int2 moltype, strandedness, topology;
+ */
if (afp == NULL || bbp == NULL) return NULL;
ajp = afp->ajp;
@@ -4405,21 +4480,26 @@ NLM_EXTERN CharPtr FormatSlashBlock (
is.OBbits__ = gbseq->OBbits__;
is.locus = gbseq->locus;
is.length = gbseq->length;
+ is.strandedness = gbseq->strandedness;
+ is.moltype = gbseq->moltype;
+ is.topology = gbseq->topology;
+ /*
strandedness = (Int2) gbseq->strandedness;
if (strandedness < 0 || strandedness > 3) {
strandedness = 0;
}
- is.strandedness = insd_strd [strandedness];
+ is.strandedness = StringSave (insd_strd [strandedness]);
moltype = (Int2) gbseq->moltype;
if (moltype < 0 || moltype > 9) {
moltype = 0;
}
- is.moltype = insd_mol [moltype];
+ is.moltype = StringSave (insd_mol [moltype]);
topology = (Int2) gbseq->topology;
if (topology < 0 || topology > 2) {
topology = 0;
}
- is.topology = insd_top [topology];
+ is.topology = StringSave (insd_top [topology]);
+ */
is.division = gbseq->division;
is.update_date = gbseq->update_date;
is.create_date = gbseq->create_date;
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index 72d69a81..1c4e4ebd 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.55 $
+* $Revision: 1.61 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -95,7 +95,9 @@ typedef struct asn2gbflags {
Boolean geneSynsToNote;
Boolean refSeqQualsToNote;
Boolean selenocysteineToNote;
+ Boolean pyrrolysineToNote;
Boolean extraProductsToNote;
+ Boolean codonRecognizedToNote;
Boolean forGbRelease;
} Asn2gbFlags, PNTR Asn2gbFlagsPtr;
@@ -153,6 +155,7 @@ typedef struct int_asn2gb_job {
Int4 seqGapCurrLen;
ValNodePtr gihead;
ValNodePtr gitail;
+ TextFsaPtr bad_html_fsa;
} IntAsn2gbJob, PNTR IntAsn2gbJobPtr;
/* array for assigning biosource and feature data fields to qualifiers */
@@ -262,6 +265,7 @@ typedef struct asn2gbwork {
Boolean hideGeneFeats;
Boolean newLocusLine;
Boolean showBaseCount;
+ Boolean forcePrimaryBlock;
Boolean hideImpFeats;
Boolean hideRemImpFeats;
@@ -300,6 +304,7 @@ typedef struct asn2gbwork {
Boolean firstfeat;
Boolean featseen;
Boolean featjustseen;
+ Int4 localFeatCount;
ValNodePtr wgsaccnlist;
Boolean has_mat_peptide;
@@ -640,6 +645,8 @@ typedef enum {
FTQUAL_prot_names,
FTQUAL_protein_id,
FTQUAL_pseudo,
+ FTQUAL_pyrrolysine,
+ FTQUAL_pyrrolysine_note,
FTQUAL_region,
FTQUAL_region_name,
FTQUAL_replace,
@@ -668,6 +675,7 @@ typedef enum {
FTQUAL_trans_splicing,
FTQUAL_trna_aa,
FTQUAL_trna_codons,
+ FTQUAL_trna_codons_note,
FTQUAL_usedin,
FTQUAL_xtra_prod_quals,
ASN2GNBK_TOTAL_FEATUR
@@ -678,6 +686,7 @@ typedef enum {
NLM_EXTERN Char link_feat [MAX_WWWBUF];
NLM_EXTERN Char link_featc [MAX_WWWBUF];
NLM_EXTERN Char link_seq [MAX_WWWBUF];
+NLM_EXTERN Char link_projid [MAX_WWWBUF];
NLM_EXTERN Char link_wgs [MAX_WWWBUF];
NLM_EXTERN Char link_omim [MAX_WWWBUF];
NLM_EXTERN Char ref_link [MAX_WWWBUF];
diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h
index a49171d5..eb181c3c 100644
--- a/api/asn2gnbk.h
+++ b/api/asn2gnbk.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.68 $
+* $Revision: 6.69 $
*
* File Description: New GenBank flatfile generator
*
@@ -118,6 +118,8 @@ typedef unsigned long FlgType;
#define SPECIAL_GAP_DISPLAY 65536
+#define FORCE_PRIMARY_BLOCK 131072
+
/* locking behavior for system performance */
typedef unsigned long LckType;
diff --git a/api/edutil.c b/api/edutil.c
index e11edefe..4a288a79 100644
--- a/api/edutil.c
+++ b/api/edutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/4/94
*
-* $Revision: 6.52 $
+* $Revision: 6.54 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,14 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: edutil.c,v $
+* Revision 6.54 2006/02/07 13:41:29 bollin
+* added function AdjustFeatureForGapChange, which changes a feature to accommodate
+* a change in the length of a gap
+*
+* Revision 6.53 2005/12/12 14:12:54 bollin
+* BioseqCopyEx was not correctly handling copying the data contents of a
+* delta sequence
+*
* Revision 6.52 2005/09/22 19:21:34 bollin
* In the sequence editor, if the user inserts Ns into a gap of known length,
* the gap length will be increased instead of creating two gaps on either side
@@ -1829,6 +1837,29 @@ NLM_EXTERN Boolean LIBCALL SeqEntryDelFeat (SeqEntryPtr sep, SeqIdPtr sip, Int4
*
*****************************************************************************/
+static DeltaSeqPtr CopyDeltaSeqPtrChain (DeltaSeqPtr dsp)
+{
+ DeltaSeqPtr new_chain = NULL;
+ SeqLocPtr slp_orig, slp_new;
+ SeqLitPtr slip_orig, slip_new;
+
+ while (dsp != NULL) {
+ if (dsp->choice == 1) {
+ slp_orig = (SeqLocPtr) dsp->data.ptrvalue;
+ slp_new = AsnIoMemCopy (slp_orig, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
+ ValNodeAddPointer (&new_chain, 1, slp_new);
+ }
+ else if (dsp->choice ==2)
+ {
+ slip_orig = (SeqLitPtr) dsp->data.ptrvalue;
+ slip_new = AsnIoMemCopy(slip_orig, (AsnReadFunc) SeqLitAsnRead, (AsnWriteFunc) SeqLitAsnWrite);
+ ValNodeAddPointer (&new_chain, 2, slip_new);
+ }
+ dsp = dsp->next;
+ }
+
+ return new_chain;
+}
/*****************************************************************************
*
@@ -1947,9 +1978,8 @@ NLM_EXTERN BioseqPtr LIBCALL BioseqCopyEx (SeqIdPtr newid, BioseqPtr oldbsp, Int
else if (newbsp->repr == Seq_repr_delta)
{
dsp = (DeltaSeqPtr)(oldbsp->seq_ext); /* real data is here */
- the_segs = DeltaSeqsToSeqLocs(dsp);
- head = SeqLocCopyPart (the_segs, from, to, strand, FALSE, NULL, NULL);
- SeqLocFree (the_segs);
+
+ head = CopyDeltaSeqPtrChain (dsp);
}
newbsp->seq_ext = (Pointer)head;
@@ -6839,6 +6869,36 @@ NLM_EXTERN void SeqEdFeatureAdjust
}
+NLM_EXTERN void
+AdjustFeatureForGapChange
+(SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Int4 offset,
+ Int4 len_diff)
+{
+ if (sfp == NULL || bsp == NULL || offset < 0 || len_diff == 0)
+ {
+ return;
+ }
+
+ if (len_diff > 0)
+ {
+ SeqEdSeqFeatDelete (sfp, bsp, offset, offset + len_diff - 1, TRUE);
+ }
+ else
+ {
+ sfp->location = SeqEdSeqLocInsert (sfp->location, bsp, offset, -len_diff, FALSE, NULL);
+ if (sfp->data.choice == SEQFEAT_CDREGION)
+ {
+ SeqEdInsertAdjustCdRgn (sfp, bsp, offset, -len_diff, FALSE);
+ }
+ else if (sfp->data.choice == SEQFEAT_RNA)
+ {
+ SeqEdInsertAdjustRNA (sfp, bsp, offset, -len_diff, FALSE);
+ }
+ }
+}
+
diff --git a/api/edutil.h b/api/edutil.h
index 58158b83..8a7f0059 100644
--- a/api/edutil.h
+++ b/api/edutil.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/2/94
*
-* $Revision: 6.16 $
+* $Revision: 6.17 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,10 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: edutil.h,v $
+* Revision 6.17 2006/02/07 13:41:29 bollin
+* added function AdjustFeatureForGapChange, which changes a feature to accommodate
+* a change in the length of a gap
+*
* Revision 6.16 2005/05/02 14:21:15 bollin
* removed function prototypes for PlayJournal and UnplayJournal, since these
* functions live in desktop/seqpanel.c and are only used there
@@ -729,6 +733,12 @@ NLM_EXTERN void SeqEdReindexAffectedFeatures (Int4 shift_start, Int4 shift_amt,
NLM_EXTERN void SeqEdReindexFeature (SeqFeatPtr sfp, BioseqPtr bsp);
NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_deleted);
+NLM_EXTERN void
+AdjustFeatureForGapChange
+(SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Int4 offset,
+ Int4 len_diff);
#ifdef __cplusplus
}
diff --git a/api/explore.h b/api/explore.h
index beea3747..eee7aa9b 100644
--- a/api/explore.h
+++ b/api/explore.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/30/98
*
-* $Revision: 6.53 $
+* $Revision: 6.54 $
*
* File Description: Reengineered and optimized exploration functions
* to be used for future code
@@ -122,6 +122,8 @@ typedef struct seqmgrfeatcontext {
Boolean partialL;
Boolean partialR;
Boolean farloc;
+ Boolean bad_order;
+ Boolean mixed_strand;
Uint1 strand;
Uint1 seqfeattype;
Uint1 featdeftype;
diff --git a/api/findrepl.c b/api/findrepl.c
index d555165d..e91104ea 100644
--- a/api/findrepl.c
+++ b/api/findrepl.c
@@ -44,6 +44,28 @@
* RCS Modification History:
* -------------------------
* $Log: findrepl.c,v $
+* Revision 6.21 2006/01/17 17:50:01 bollin
+* allow FindReplaceInEntity to search for a string made up of whitespace, as
+* long as whole_word is not specified
+*
+* Revision 6.20 2006/01/10 18:13:56 kans
+* FindReplAligns does not have case for SAS_DISC, since visit function recursively presents these components separately
+*
+* Revision 6.19 2006/01/09 21:15:03 bollin
+* allow punctuation to terminate a word in find replace
+*
+* Revision 6.18 2006/01/04 21:26:57 kans
+* FSA hit does not need code from validator unstructured source test, cleaned up variable names
+*
+* Revision 6.17 2006/01/04 20:39:41 kans
+* added FindStringsInEntity using finite state machine, general cleanup of code
+*
+* Revision 6.16 2005/12/29 21:42:06 kans
+* only call callback if text was found or replaced
+*
+* Revision 6.15 2005/12/29 20:54:41 kans
+* FindReplaceInEntity takes callback and userdata
+*
* Revision 6.14 2005/09/21 14:39:09 bollin
* fixed bug in FindReplace where if the whole-word flag was specified but
* the substring was found in a not-whole-word context earlier in the string
@@ -83,7 +105,9 @@
* using NUM_SEQID, added TPA ids to arrays
*
* Revision 6.2 2000/11/03 20:36:00 kans
-* FindReplaceInEntity replaces FindInEntity and FindInEntityX - complete redesign, no longer using AsnExpOptExplore because of the difficulty of replacing with a larger string (TF + JK)
+* FindReplaceInEntity replaces FindInEntity and FindInEntityX - complete redesign,
+* no longer using AsnExpOptExplore because of the difficulty of replacing with a
+* larger string (TF + JK)
*
* Revision 6.1 1999/03/05 23:31:07 kans
* FindInEntityX was not initializing flen, replen
@@ -98,31 +122,31 @@
* added whole_word parameter to FindInEntity and FindInEntityX, and protected
* against multiple ObjMgrAlsoSelects on a single itemID
*
- * Revision 5.1 1996/09/06 20:20:41 kans
- * keeps going even if ObjMgrTypeFind returns NULL (e.g., on OBJ_BIOSEQ_SEG),
- * and adds a case_counts parameter for case sensitive/insensitive searches.
- *
- * Revision 5.0 1996/05/28 13:23:23 ostell
- * Set to revision 5.0
- *
- * Revision 1.7 1996/02/28 04:53:06 ostell
- * fix to prevernt recursion on substring replaces
- *
- * Revision 1.6 1996/02/26 20:24:05 kans
- * replace needs MemCopy instead of StringMove (JO), and set dirty flag
- *
- * Revision 1.5 1996/01/03 23:06:32 ostell
- * support for longer replaces, controlled updating
- *
- * Revision 1.3 1996/01/02 18:40:07 ostell
- * simplified code.
- *
- * Revision 1.2 1996/01/01 00:05:14 kans
- * replaced StringStr with StringISearch to ignore case
- *
- * Revision 1.1 1995/12/31 18:13:14 kans
- * Initial revision
- *
+* Revision 5.1 1996/09/06 20:20:41 kans
+* keeps going even if ObjMgrTypeFind returns NULL (e.g., on OBJ_BIOSEQ_SEG),
+* and adds a case_counts parameter for case sensitive/insensitive searches.
+*
+* Revision 5.0 1996/05/28 13:23:23 ostell
+* Set to revision 5.0
+*
+* Revision 1.7 1996/02/28 04:53:06 ostell
+* fix to prevernt recursion on substring replaces
+*
+* Revision 1.6 1996/02/26 20:24:05 kans
+* replace needs MemCopy instead of StringMove (JO), and set dirty flag
+*
+* Revision 1.5 1996/01/03 23:06:32 ostell
+* support for longer replaces, controlled updating
+*
+* Revision 1.3 1996/01/02 18:40:07 ostell
+* simplified code.
+*
+* Revision 1.2 1996/01/01 00:05:14 kans
+* replaced StringStr with StringISearch to ignore case
+*
+* Revision 1.1 1995/12/31 18:13:14 kans
+* Initial revision
+*
* Revision 1.1.1.1 1995/10/19 18:42:10 sad
* Initial version
*
@@ -137,47 +161,43 @@
#include <subutil.h>
#include <findrepl.h>
-/* internal structure passed to callbacks */
+/* callback type for search/replace functions */
-typedef struct findstruct {
- Uint2 entityID;
- CharPtr find_string;
- CharPtr replace_string;
- Boolean case_counts;
- Boolean whole_word;
- Boolean do_replace;
- Boolean select_item;
- Int2 send_update;
- Boolean did_find;
- Boolean did_replace;
- Boolean dirty;
- Boolean descFilter [SEQDESCR_MAX];
- Boolean featFilter [FEATDEF_MAX];
- Boolean seqidFilter [NUM_SEQID];
- int d [256];
- size_t subLen;
-} FindStruct, PNTR FindStructPtr;
+typedef void (*FindReplFunc) (CharPtr PNTR strp, Pointer fspdata);
-#define PID_NOTSET 0
-#define PID_DBTAG 1
-#define PID_NAME 2
-#define PID_ML 3
-#define PID_STR 4
+/* internal data structure */
-#define NAMESTD_LAST 0
-#define NAMESTD_FIRST 1
-#define NAMESTD_MIDDLE 2
-#define NAMESTD_FULL 3
-#define NAMESTD_INITIALS 4
-#define NAMESTD_SUFFIX 5
-#define NAMESTD_TITLE 6
+typedef struct findstruct {
+ Uint2 entityID;
+ FindReplFunc func;
+ FindReplProc callback;
+ Pointer userdata;
+
+ CharPtr find_string;
+ CharPtr replace_string;
+ Boolean case_counts;
+ Boolean whole_word;
+ Int4 findLen;
+ Int4 replaceLen;
+
+ Boolean select_item;
+ Int2 send_update;
+ Boolean did_find;
+ Boolean did_replace;
+ Boolean dirty;
+
+ Boolean descFilter [SEQDESCR_MAX];
+ Boolean featFilter [FEATDEF_MAX];
+ Boolean seqidFilter [NUM_SEQID];
+
+ int d [256];
+ TextFsaPtr fsa;
+} FindStruct, PNTR FindStructPtr;
-#define AUTHLIST_STRUCTURED 1
-#define AUTHLIST_ML 2
-#define AUTHLIST_STRING 3
-
#define FINDREPL_BUFFER_MAX 1000000
+/* BOYER-MOORE SEARCH FUNCTIONS */
+
/* StringSearch and StringISearch use the Boyer-Moore algorithm, as described
in Niklaus Wirth, Algorithms and Data Structures, Prentice- Hall, Inc.,
Englewood Cliffs, NJ., 1986, p. 69. The original had an error, where
@@ -226,8 +246,8 @@ static CharPtr FindSubString (
return NULL;
}
-/* passed subLen and d array to avoid repeated initialization of the Boyer-Moore
- displacement table */
+/* passed subLen and d array to avoid repeated initialization
+ of the Boyer-Moore displacement table */
static CharPtr SearchForString (
CharPtr str,
@@ -251,86 +271,70 @@ static CharPtr SearchForString (
ptr = FindSubString (str, sub, case_counts, strLen, subLen, d);
if (ptr == NULL) return NULL;
- if (whole_word) {
- while (keep_looking && ptr != NULL)
- {
- keep_looking = FALSE;
- if (ptr > str) {
- tmp = ptr - 1;
- if (! IS_WHITESP (*tmp))
- {
- keep_looking = TRUE;
- }
- }
- if (!keep_looking)
- {
- tmp = ptr + StringLen (sub);
- if (*tmp != '\0' && (! IS_WHITESP (*tmp)))
- {
- keep_looking = TRUE;
- }
+ if (! whole_word) return ptr;
+
+ while (keep_looking && ptr != NULL) {
+ keep_looking = FALSE;
+ if (ptr > str) {
+ tmp = ptr - 1;
+ if (! IS_WHITESP (*tmp)) {
+ keep_looking = TRUE;
}
- if (keep_looking)
- {
- ptr = FindSubString (ptr + subLen, sub, case_counts, strLen, subLen, d);
+ }
+ if (! keep_looking) {
+ tmp = ptr + StringLen (sub);
+ if (*tmp != '\0' && (! IS_WHITESP (*tmp)) && (! ispunct (*tmp))) {
+ keep_looking = TRUE;
}
}
+ if (keep_looking) {
+ ptr = FindSubString (ptr + subLen, sub, case_counts, strLen, subLen, d);
+ }
}
return ptr;
}
-/*=======================================================================*/
-/* */
-/* FindReplString () - Does a search and replace in a given string. */
-/* */
-/* Main Parameters: */
-/* */
-/* strp : The string to operate on. Passed as a pointer to */
-/* a string so that it can be replaced by the */
-/* resulting string. */
-/* */
-/* fsp->find_string : The substring that is being replaced */
-/* in strp. */
-/* */
-/* fsp->replace_string : The substring that is replacing */
-/* find_string in strp. */
-/* */
-/*=======================================================================*/
-
-static Boolean FindReplString (
+static void BoyerMooreFindString (
CharPtr PNTR strp,
- FindStructPtr fsp
+ Pointer userdata
)
{
- Boolean wasChanged;
- Int4 replaceLen;
- Int4 findLen;
- Int4 searchLen;
- Int4 buffSize;
- CharPtr workingBuffer;
- CharPtr searchString;
- CharPtr substringPtr;
-
- if (strp == NULL || fsp == NULL) return FALSE;
+ FindStructPtr fsp;
+ CharPtr searchString;
- replaceLen = StringLen (fsp->replace_string);
- findLen = StringLen (fsp->find_string);
- searchLen = StringLen (*strp);
+ if (strp == NULL || userdata == NULL) return;
+ fsp = (FindStructPtr) userdata;
searchString = *strp;
- wasChanged = FALSE;
+ if (SearchForString (searchString, fsp->find_string, fsp->case_counts,
+ fsp->whole_word, fsp->findLen, fsp->d) != NULL) {
+ fsp->did_find = TRUE;
+ }
+}
- if (! fsp->do_replace) {
- if (SearchForString (searchString, fsp->find_string,
- fsp->case_counts, fsp->whole_word,
- findLen, fsp->d) != NULL) {
+static void BoyerMooreReplaceString (
+ CharPtr PNTR strp,
+ Pointer userdata
+)
- fsp->did_find = TRUE;
- }
- return TRUE;
- }
+{
+ Int4 buffSize;
+ FindStructPtr fsp;
+ Int4 searchLen;
+ CharPtr searchString;
+ CharPtr substringPtr;
+ Boolean wasChanged;
+ CharPtr workingBuffer;
+
+ if (strp == NULL || userdata == NULL) return;
+ fsp = (FindStructPtr) userdata;
+
+ searchString = *strp;
+ searchLen = StringLen (searchString);
+
+ wasChanged = FALSE;
/*------------------------------------------------*/
/* Make a guess of how big a working buffer we'll */
@@ -347,18 +351,17 @@ static Boolean FindReplString (
/* */
/*------------------------------------------------*/
- if (replaceLen > findLen)
- {
- buffSize = searchLen + ((searchLen/findLen) * (replaceLen - findLen));
- if (buffSize > FINDREPL_BUFFER_MAX)
+ if (fsp->replaceLen > fsp->findLen) {
+ buffSize = searchLen + ((searchLen/fsp->findLen) * (fsp->replaceLen - fsp->findLen));
+ if (buffSize > FINDREPL_BUFFER_MAX) {
buffSize = FINDREPL_BUFFER_MAX;
- }
- else
+ }
+ } else {
buffSize = searchLen;
+ }
workingBuffer = (CharPtr) MemNew (buffSize + 2);
- if (workingBuffer == NULL)
- return FALSE;
+ if (workingBuffer == NULL) return;
workingBuffer[0] = '\0';
@@ -369,45 +372,104 @@ static Boolean FindReplString (
/*----------------------------------------*/
while ((substringPtr = SearchForString (searchString, fsp->find_string,
- fsp->case_counts, fsp->whole_word,
- findLen, fsp->d)) != NULL)
- {
- wasChanged = TRUE;
- substringPtr[0] = '\0';
+ fsp->case_counts, fsp->whole_word, fsp->findLen, fsp->d)) != NULL) {
+ wasChanged = TRUE;
+ substringPtr [0] = '\0';
- if (StringLen (workingBuffer) + StringLen (searchString) > buffSize)
- return FALSE;
+ if (StringLen (workingBuffer) + StringLen (searchString) > buffSize) return;
- StringCat (workingBuffer, searchString);
- StringCat (workingBuffer, fsp->replace_string);
- substringPtr[0] = 'x';
- searchString = substringPtr + findLen;
- }
+ StringCat (workingBuffer, searchString);
+ StringCat (workingBuffer, fsp->replace_string);
+ substringPtr [0] = 'x';
+ searchString = substringPtr + fsp->findLen;
+ }
- if (searchString != NULL)
+ if (searchString != NULL) {
StringCat (workingBuffer, searchString);
+ }
/*-------------------------------------*/
/* If any replacements were made, then */
/* swap in the new string for the old. */
/*-------------------------------------*/
- if (wasChanged)
- {
- MemFree (*strp);
- (*strp) = workingBuffer;
+ if (wasChanged) {
+ MemFree (*strp);
+ (*strp) = workingBuffer;
- fsp->did_replace = TRUE;
- fsp->dirty = TRUE;
- }
- else
+ fsp->did_replace = TRUE;
+ fsp->dirty = TRUE;
+ } else {
MemFree (workingBuffer);
+ }
+}
+
+/* FINITE-STATE AUTOMATON SEARCH FUNCTION */
+
+static void FSAFindStrings (
+ CharPtr PNTR strp,
+ Pointer userdata
+)
+
+{
+ Char ch;
+ FindStructPtr fsp;
+ CharPtr ptr;
+ CharPtr searchString;
+ Int2 state;
+ ValNodePtr matches;
+
+ if (strp == NULL || userdata == NULL) return;
+ fsp = (FindStructPtr) userdata;
+
+ searchString = *strp;
+ if (searchString == NULL) return;
- /*---------------------*/
- /* Return successfully */
- /*---------------------*/
+ state = 0;
+ ptr = searchString;
+ ch = *ptr;
- return TRUE;
+ while (ch != '\0') {
+ matches = NULL;
+ state = TextFsaNext (fsp->fsa, state, ch, &matches);
+ if (matches != NULL) {
+ fsp->did_find = TRUE;
+ return;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+}
+
+/* MASTER SEARCH FUNCTION CALLS DESIGNATED FUNC CALLBACK */
+
+/*=======================================================================*/
+/* */
+/* FindReplString () - Does a search and replace in a given string. */
+/* */
+/* Main Parameters: */
+/* */
+/* strp : The string to operate on. Passed as a pointer to */
+/* a string so that it can be replaced by the */
+/* resulting string. */
+/* */
+/* fsp->find_string : The substring that is being replaced */
+/* in strp. */
+/* */
+/* fsp->replace_string : The substring that is replacing */
+/* find_string in strp. */
+/* */
+/*=======================================================================*/
+
+static void FindReplString (
+ CharPtr PNTR strp,
+ FindStructPtr fsp
+)
+
+{
+ if (strp == NULL || fsp == NULL || fsp->func == NULL) return;
+
+ fsp->func (strp, (Pointer) fsp);
}
/*=======================================================================*/
@@ -464,26 +526,25 @@ static void FindReplDbxrefs (
static void FindReplAffil (
AffilPtr pAffil,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pAffil == NULL)
- return;
+ if (pAffil == NULL) return;
if (pAffil->choice == 1) {
- FindReplString (&(pAffil->affil) , pFindStruct);
+ FindReplString (&(pAffil->affil) , fsp);
} else {
- FindReplString (&(pAffil->affil) , pFindStruct);
- FindReplString (&(pAffil->div) , pFindStruct);
- FindReplString (&(pAffil->city) , pFindStruct);
- FindReplString (&(pAffil->sub) , pFindStruct);
- FindReplString (&(pAffil->country) , pFindStruct);
- FindReplString (&(pAffil->street) , pFindStruct);
- FindReplString (&(pAffil->email) , pFindStruct);
- FindReplString (&(pAffil->fax) , pFindStruct);
- FindReplString (&(pAffil->phone) , pFindStruct);
- FindReplString (&(pAffil->postal_code), pFindStruct);
+ FindReplString (&(pAffil->affil) , fsp);
+ FindReplString (&(pAffil->div) , fsp);
+ FindReplString (&(pAffil->city) , fsp);
+ FindReplString (&(pAffil->sub) , fsp);
+ FindReplString (&(pAffil->country) , fsp);
+ FindReplString (&(pAffil->street) , fsp);
+ FindReplString (&(pAffil->email) , fsp);
+ FindReplString (&(pAffil->fax) , fsp);
+ FindReplString (&(pAffil->phone) , fsp);
+ FindReplString (&(pAffil->postal_code), fsp);
}
}
@@ -493,9 +554,23 @@ static void FindReplAffil (
/* */
/*=======================================================================*/
+#define NAMESTD_LAST 0
+#define NAMESTD_FIRST 1
+#define NAMESTD_MIDDLE 2
+#define NAMESTD_FULL 3
+#define NAMESTD_INITIALS 4
+#define NAMESTD_SUFFIX 5
+#define NAMESTD_TITLE 6
+
+#define PID_NOTSET 0
+#define PID_DBTAG 1
+#define PID_NAME 2
+#define PID_ML 3
+#define PID_STR 4
+
static void FindReplAuthor (
AuthorPtr pAuthor,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
@@ -503,36 +578,33 @@ static void FindReplAuthor (
CharPtr pNameStr;
ValNodePtr pDbxref;
- if (pAuthor == NULL)
- return;
+ if (pAuthor == NULL) return;
- FindReplAffil (pAuthor->affil, pFindStruct);
+ FindReplAffil (pAuthor->affil, fsp);
- switch (pAuthor->name->choice)
- {
+ switch (pAuthor->name->choice) {
case PID_NOTSET :
break;
case PID_DBTAG :
pDbxref = pAuthor->name->data;
- FindReplDbxrefs (pDbxref, pFindStruct);
+ FindReplDbxrefs (pDbxref, fsp);
break;
case PID_NAME :
pNameStandard = pAuthor->name->data;
- if (pNameStandard != NULL)
- {
- FindReplString (&(pNameStandard->names [NAMESTD_LAST]) , pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_FIRST]) , pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_MIDDLE]) , pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_FULL]) , pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_INITIALS]), pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_SUFFIX]) , pFindStruct);
- FindReplString (&(pNameStandard->names [NAMESTD_TITLE]) , pFindStruct);
+ if (pNameStandard != NULL) {
+ FindReplString (&(pNameStandard->names [NAMESTD_LAST]) , fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_FIRST]) , fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_MIDDLE]) , fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_FULL]) , fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_INITIALS]), fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_SUFFIX]) , fsp);
+ FindReplString (&(pNameStandard->names [NAMESTD_TITLE]) , fsp);
}
break;
case PID_ML :
case PID_STR :
pNameStr = pAuthor->name->data;
- FindReplString (&pNameStr, pFindStruct);
+ FindReplString (&pNameStr, fsp);
break;
default:
break;
@@ -545,6 +617,10 @@ static void FindReplAuthor (
/* */
/*=======================================================================*/
+#define AUTHLIST_STRUCTURED 1
+#define AUTHLIST_ML 2
+#define AUTHLIST_STRING 3
+
static void FindReplAuthlist (
AuthListPtr alp,
FindStructPtr fsp
@@ -555,30 +631,25 @@ static void FindReplAuthlist (
CharPtr szAuthor;
AuthorPtr pAuthor;
- if (alp == NULL)
- return;
+ if (alp == NULL) return;
FindReplAffil (alp->affil, fsp);
vnpNames = alp->names;
- while (vnpNames != NULL)
- {
- if (alp->choice == AUTHLIST_STRUCTURED)
- {
- pAuthor = (AuthorPtr) vnpNames->data.ptrvalue;
- if (pAuthor != NULL)
- FindReplAuthor (pAuthor, fsp);
+ while (vnpNames != NULL) {
+ if (alp->choice == AUTHLIST_STRUCTURED) {
+ pAuthor = (AuthorPtr) vnpNames->data.ptrvalue;
+ if (pAuthor != NULL) {
+ FindReplAuthor (pAuthor, fsp);
}
- else
- {
- szAuthor = (CharPtr) vnpNames->data.ptrvalue;
- if (szAuthor != NULL)
- {
- FindReplString (&szAuthor, fsp);
- vnpNames->data.ptrvalue = szAuthor;
- }
+ } else {
+ szAuthor = (CharPtr) vnpNames->data.ptrvalue;
+ if (szAuthor != NULL) {
+ FindReplString (&szAuthor, fsp);
+ vnpNames->data.ptrvalue = szAuthor;
}
- vnpNames = vnpNames->next;
}
+ vnpNames = vnpNames->next;
+ }
}
/*=======================================================================*/
@@ -589,14 +660,13 @@ static void FindReplAuthlist (
static void FindReplCitRetract (
CitRetractPtr pCitRetract,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pCitRetract == NULL)
- return;
+ if (pCitRetract == NULL) return;
- FindReplString (&(pCitRetract->exp), pFindStruct);
+ FindReplString (&(pCitRetract->exp), fsp);
}
/*=======================================================================*/
@@ -607,32 +677,22 @@ static void FindReplCitRetract (
static void FindReplImprint (
ImprintPtr pImprint,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
-
- /*------------------*/
- /* Check parameters */
- /*------------------*/
-
- if (pImprint == NULL)
- return;
-
- /*-------------------------*/
- /* Do the find and replace */
- /*-------------------------*/
-
- FindReplString (&(pImprint->volume) , pFindStruct);
- FindReplString (&(pImprint->issue) , pFindStruct);
- FindReplString (&(pImprint->pages) , pFindStruct);
- FindReplString (&(pImprint->section) , pFindStruct);
- FindReplString (&(pImprint->part_sup) , pFindStruct);
- FindReplString (&(pImprint->language) , pFindStruct);
- FindReplString (&(pImprint->part_supi), pFindStruct);
-
- FindReplAffil (pImprint->pub, pFindStruct);
- FindReplCitRetract (pImprint->retract, pFindStruct);
+ if (pImprint == NULL) return;
+
+ FindReplString (&(pImprint->volume) , fsp);
+ FindReplString (&(pImprint->issue) , fsp);
+ FindReplString (&(pImprint->pages) , fsp);
+ FindReplString (&(pImprint->section) , fsp);
+ FindReplString (&(pImprint->part_sup) , fsp);
+ FindReplString (&(pImprint->language) , fsp);
+ FindReplString (&(pImprint->part_supi), fsp);
+
+ FindReplAffil (pImprint->pub, fsp);
+ FindReplCitRetract (pImprint->retract, fsp);
}
/*=======================================================================*/
@@ -643,31 +703,31 @@ static void FindReplImprint (
static void FindReplCitBook (
CitBookPtr pCitBook,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
AffilPtr afp;
- ValNodePtr vnp;
CharPtr tmpStr;
+ ValNodePtr vnp;
if (pCitBook == NULL) return;
- FindReplStringList (pCitBook->title, pFindStruct);
- FindReplImprint (pCitBook->imp, pFindStruct);
- FindReplAuthlist (pCitBook->authors, pFindStruct);
- FindReplStringList (pCitBook->title, pFindStruct);
- FindReplStringList (pCitBook->coll, pFindStruct);
+ FindReplStringList (pCitBook->title, fsp);
+ FindReplImprint (pCitBook->imp, fsp);
+ FindReplAuthlist (pCitBook->authors, fsp);
+ FindReplStringList (pCitBook->title, fsp);
+ FindReplStringList (pCitBook->coll, fsp);
if (pCitBook->othertype == 1) {
for (vnp = (ValNodePtr) pCitBook->otherdata; vnp != NULL; vnp = vnp->next) {
switch (vnp->choice) {
case 1 :
- FindReplString ((CharPtr PNTR) &(vnp->data.ptrvalue), pFindStruct);
+ FindReplString ((CharPtr PNTR) &(vnp->data.ptrvalue), fsp);
break;
case 3 :
afp = (AffilPtr) vnp->data.ptrvalue;
- FindReplAffil (afp, pFindStruct);
+ FindReplAffil (afp, fsp);
break;
default :
break;
@@ -675,35 +735,34 @@ static void FindReplCitBook (
}
} else if (pCitBook->othertype == 2) {
tmpStr = (CharPtr) pCitBook->otherdata;
- FindReplString (&tmpStr, pFindStruct);
+ FindReplString (&tmpStr, fsp);
pCitBook->otherdata = tmpStr;
}
}
static void FindReplCitArt (
CitArtPtr pCitArt,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- CitJourPtr pCitJournal;
- CitBookPtr pCitBook;
+ CitBookPtr pCitBook;
+ CitJourPtr pCitJournal;
- if (pCitArt == NULL)
- return;
+ if (pCitArt == NULL) return;
- FindReplAuthlist (pCitArt->authors, pFindStruct);
+ FindReplAuthlist (pCitArt->authors, fsp);
if (pCitArt->fromptr != NULL) {
switch (pCitArt->from) {
case 1 :
pCitJournal = (CitJourPtr) pCitArt->fromptr;
- FindReplStringList (pCitArt->title, pFindStruct);
- FindReplImprint (pCitJournal->imp, pFindStruct);
+ FindReplStringList (pCitArt->title, fsp);
+ FindReplImprint (pCitJournal->imp, fsp);
break;
case 2 :
case 3 :
pCitBook = (CitBookPtr) pCitArt->fromptr;
- FindReplCitBook (pCitBook, pFindStruct);
+ FindReplCitBook (pCitBook, fsp);
break;
default :
break;
@@ -719,7 +778,7 @@ static void FindReplCitArt (
static void FindReplMedlineEntry (
MedlineEntryPtr pMedlineEntry,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
@@ -728,61 +787,53 @@ static void FindReplMedlineEntry (
MedlineRnPtr pRn;
CharPtr tmpStr;
- if (pMedlineEntry == NULL)
- return;
+ if (pMedlineEntry == NULL) return;
- FindReplCitArt(pMedlineEntry->cit, pFindStruct);
- FindReplString (&(pMedlineEntry->abstract), pFindStruct);
+ FindReplCitArt(pMedlineEntry->cit, fsp);
+ FindReplString (&(pMedlineEntry->abstract), fsp);
pRn = pMedlineEntry->substance;
- while (pRn != NULL)
- {
- FindReplString (&(pRn->cit), pFindStruct);
- FindReplString (&(pRn->name), pFindStruct);
- pRn = pRn->next;
- }
+ while (pRn != NULL) {
+ FindReplString (&(pRn->cit), fsp);
+ FindReplString (&(pRn->name), fsp);
+ pRn = pRn->next;
+ }
pMesh = pMedlineEntry->mesh;
- while (pMesh != NULL)
- {
- FindReplString (&(pMesh->term), pFindStruct);
- pMesh = pMesh->next;
- }
+ while (pMesh != NULL) {
+ FindReplString (&(pMesh->term), fsp);
+ pMesh = pMesh->next;
+ }
- if (pMedlineEntry->xref != NULL)
- {
- tmpStr = (CharPtr) pMedlineEntry->xref->data.ptrvalue;
- FindReplString (&tmpStr, pFindStruct);
- pMedlineEntry->xref->data.ptrvalue = tmpStr;
- }
+ if (pMedlineEntry->xref != NULL) {
+ tmpStr = (CharPtr) pMedlineEntry->xref->data.ptrvalue;
+ FindReplString (&tmpStr, fsp);
+ pMedlineEntry->xref->data.ptrvalue = tmpStr;
+ }
- if (pMedlineEntry->idnum != NULL)
- {
- tmpStr = (CharPtr) pMedlineEntry->idnum->data.ptrvalue;
- FindReplString (&tmpStr, pFindStruct);
- pMedlineEntry->idnum->data.ptrvalue = tmpStr;
- }
+ if (pMedlineEntry->idnum != NULL) {
+ tmpStr = (CharPtr) pMedlineEntry->idnum->data.ptrvalue;
+ FindReplString (&tmpStr, fsp);
+ pMedlineEntry->idnum->data.ptrvalue = tmpStr;
+ }
- if (pMedlineEntry->pub_type != NULL)
- {
- tmpStr = (CharPtr) pMedlineEntry->pub_type->data.ptrvalue;
- FindReplString (&tmpStr, pFindStruct);
- pMedlineEntry->pub_type->data.ptrvalue = tmpStr;
- }
+ if (pMedlineEntry->pub_type != NULL) {
+ tmpStr = (CharPtr) pMedlineEntry->pub_type->data.ptrvalue;
+ FindReplString (&tmpStr, fsp);
+ pMedlineEntry->pub_type->data.ptrvalue = tmpStr;
+ }
- if (pMedlineEntry->gene != NULL)
- {
- tmpStr = (CharPtr) pMedlineEntry->gene->data.ptrvalue;
- FindReplString (&tmpStr, pFindStruct);
- pMedlineEntry->gene->data.ptrvalue = tmpStr;
- }
+ if (pMedlineEntry->gene != NULL) {
+ tmpStr = (CharPtr) pMedlineEntry->gene->data.ptrvalue;
+ FindReplString (&tmpStr, fsp);
+ pMedlineEntry->gene->data.ptrvalue = tmpStr;
+ }
pField = pMedlineEntry->mlfield;
- while (pField != NULL)
- {
- FindReplString (&(pField->str), pFindStruct);
- pField = pField->next;
- }
+ while (pField != NULL) {
+ FindReplString (&(pField->str), fsp);
+ pField = pField->next;
+ }
}
/*=======================================================================*/
@@ -822,8 +873,7 @@ static void FindReplPub (
}
if (vnp->data.ptrvalue == NULL) return;
- switch (vnp->choice)
- {
+ switch (vnp->choice) {
case PUB_Gen :
cgp = (CitGenPtr) vnp->data.ptrvalue;
FindReplAuthlist (cgp->authors, fsp);
@@ -832,8 +882,7 @@ static void FindReplPub (
FindReplString (&(cgp->issue), fsp);
FindReplString (&(cgp->pages), fsp);
FindReplString (&(cgp->title), fsp);
- if (cgp->journal != NULL)
- {
+ if (cgp->journal != NULL) {
tmpStr = (CharPtr) cgp->journal->data.ptrvalue;
FindReplString (&tmpStr, fsp);
cgp->journal->data.ptrvalue = tmpStr;
@@ -854,8 +903,7 @@ static void FindReplPub (
break;
case PUB_Journal :
cjp = (CitJourPtr) vnp->data.ptrvalue;
- if (cjp->title != NULL)
- {
+ if (cjp->title != NULL) {
tmpStr = (CharPtr) cjp->title->data.ptrvalue;
FindReplString (&tmpStr, fsp);
cjp->title->data.ptrvalue = tmpStr;
@@ -869,16 +917,14 @@ static void FindReplPub (
case PUB_Proc :
cbp = (CitBookPtr) vnp->data.ptrvalue;
cpvnp = cbp->otherdata;
- while (cpvnp != NULL)
- {
- if (cpvnp->choice == 1)
- {
- tmpStr = (CharPtr) cpvnp->data.ptrvalue;
- FindReplString (&tmpStr, fsp);
- cpvnp->data.ptrvalue = tmpStr;
- }
- else if (cpvnp->choice == 3)
+ while (cpvnp != NULL) {
+ if (cpvnp->choice == 1) {
+ tmpStr = (CharPtr) cpvnp->data.ptrvalue;
+ FindReplString (&tmpStr, fsp);
+ cpvnp->data.ptrvalue = tmpStr;
+ } else if (cpvnp->choice == 3) {
FindReplAffil((AffilPtr) cpvnp->data.ptrvalue, fsp);
+ }
cpvnp = cpvnp->next;
}
break;
@@ -1021,20 +1067,17 @@ static void FindReplBioSource (
static void FindReplPatentSeqId (
PatentSeqIdPtr pPatentSeqId,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pPatentSeqId == NULL)
- return;
-
- if (pPatentSeqId->cit == NULL)
- return;
+ if (pPatentSeqId == NULL) return;
+ if (pPatentSeqId->cit == NULL) return;
- FindReplString (&(pPatentSeqId->cit->country), pFindStruct);
- FindReplString (&(pPatentSeqId->cit->number), pFindStruct);
- FindReplString (&(pPatentSeqId->cit->app_number), pFindStruct);
- FindReplString (&(pPatentSeqId->cit->doc_type), pFindStruct);
+ FindReplString (&(pPatentSeqId->cit->country), fsp);
+ FindReplString (&(pPatentSeqId->cit->number), fsp);
+ FindReplString (&(pPatentSeqId->cit->app_number), fsp);
+ FindReplString (&(pPatentSeqId->cit->doc_type), fsp);
}
/*=======================================================================*/
@@ -1045,16 +1088,15 @@ static void FindReplPatentSeqId (
static void FindReplTextSeqId (
TextSeqIdPtr pTextSeqId,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pTextSeqId == NULL)
- return;
+ if (pTextSeqId == NULL) return;
- FindReplString (&(pTextSeqId->name), pFindStruct);
- FindReplString (&(pTextSeqId->accession), pFindStruct);
- FindReplString (&(pTextSeqId->release), pFindStruct);
+ FindReplString (&(pTextSeqId->name), fsp);
+ FindReplString (&(pTextSeqId->accession), fsp);
+ FindReplString (&(pTextSeqId->release), fsp);
}
/*=======================================================================*/
@@ -1065,15 +1107,14 @@ static void FindReplTextSeqId (
static void FindReplGiim (
GiimPtr pGiim,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pGiim == NULL)
- return;
+ if (pGiim == NULL) return;
- FindReplString (&(pGiim->db), pFindStruct);
- FindReplString (&(pGiim->release), pFindStruct);
+ FindReplString (&(pGiim->db), fsp);
+ FindReplString (&(pGiim->release), fsp);
}
/*=======================================================================*/
@@ -1084,14 +1125,13 @@ static void FindReplGiim (
static void FindReplPDBSeqId (
PDBSeqIdPtr pPDBSeqId,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pPDBSeqId == NULL)
- return;
+ if (pPDBSeqId == NULL) return;
- FindReplString (&(pPDBSeqId->mol), pFindStruct);
+ FindReplString (&(pPDBSeqId->mol), fsp);
}
/*=======================================================================*/
@@ -1102,14 +1142,13 @@ static void FindReplPDBSeqId (
static void FindReplObjectId (
ObjectIdPtr pObjectId,
- FindStructPtr pFindStruct
+ FindStructPtr fsp
)
{
- if (pObjectId == NULL)
- return;
+ if (pObjectId == NULL) return;
- FindReplString (&(pObjectId->str), pFindStruct);
+ FindReplString (&(pObjectId->str), fsp);
}
/*=======================================================================*/
@@ -1127,35 +1166,16 @@ static void FindReplSeqId (
FindStructPtr fsp;
Uint1 subtype;
- /*------------------*/
- /* Check parameters */
- /*------------------*/
-
- if (sip == NULL)
- return;
-
+ if (sip == NULL) return;
fsp = (FindStructPtr) userdata;
- if (fsp == NULL)
- return;
- /*-----------------------------------*/
- /* Check to see if we're supposed to */
- /* process this subtype or not. */
- /*-----------------------------------*/
+ /* check subtype against filter */
subtype = sip->choice;
- if (subtype >= NUM_SEQID)
- return;
- if (! fsp->seqidFilter [subtype])
- return;
-
- /*------------------------------*/
- /* Do search/replace on all the */
- /* different SeqId types. */
- /*------------------------------*/
+ if (subtype >= NUM_SEQID) return;
+ if (! fsp->seqidFilter [subtype]) return;
- switch (subtype)
- {
+ switch (subtype) {
case SEQID_NOT_SET :
break;
case SEQID_LOCAL :
@@ -1216,6 +1236,9 @@ static void FindReplSendMessages (
if (fsp->select_item && (fsp->did_find || fsp->did_replace)) {
ObjMgrAlsoSelect (fsp->entityID, itemID, itemtype, 0, NULL);
}
+ if (fsp->callback != NULL && (fsp->did_find || fsp->did_replace)) {
+ fsp->callback (fsp->entityID, itemID, itemtype, fsp->userdata);
+ }
}
/*=======================================================================*/
@@ -1233,6 +1256,8 @@ static void FindReplBioseqs (
FindStructPtr fsp;
SeqIdPtr sip;
+ if (bsp == NULL) return;
+
fsp = (FindStructPtr) userdata;
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1241,7 +1266,9 @@ static void FindReplBioseqs (
FindReplSeqId (sip, userdata);
}
- SeqMgrReplaceInBioseqIndex(bsp);
+ if (fsp->did_replace) {
+ SeqMgrReplaceInBioseqIndex (bsp);
+ }
FindReplSendMessages (fsp, bsp->idx.itemID, bsp->idx.itemtype);
}
@@ -1265,6 +1292,8 @@ static void FindReplAligns (
SeqLocPtr slp;
StdSegPtr ssp;
+ if (sap == NULL) return;
+
fsp = (FindStructPtr) userdata;
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1274,6 +1303,8 @@ static void FindReplAligns (
if (sap->segs == NULL) return;
+ /* SAS_DISC recursively presented by visit function, so removed here */
+
switch (sap->segtype) {
case SAS_DENDIAG :
ddp = (DenseDiagPtr) sap->segs;
@@ -1297,12 +1328,6 @@ static void FindReplAligns (
}
}
break;
- case SAS_DISC :
- /* recursive */
- for (sap = (SeqAlignPtr) sap->segs; sap != NULL; sap = sap->next) {
- FindReplAligns (sap, userdata);
- }
- break;
default :
break;
}
@@ -1324,6 +1349,8 @@ static void FindReplGraphs (
{
FindStructPtr fsp;
+ if (sgp == NULL) return;
+
fsp = (FindStructPtr) userdata;
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1359,6 +1386,8 @@ static void FindReplFeats (
Uint1 subtype;
tRNAPtr trp;
+ if (sfp == NULL) return;
+
fsp = (FindStructPtr) userdata;
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1498,6 +1527,8 @@ static void FindReplDescs (
PubdescPtr pdp;
Uint1 subtype;
+ if (sdp == NULL) return;
+
fsp = (FindStructPtr) userdata;
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1613,6 +1644,7 @@ static void FindReplSubmitBlock (
if (ssp == NULL) return;
sub = ssp->sub;
if (sub == NULL) return;
+
fsp->did_find = FALSE;
fsp->did_replace = FALSE;
@@ -1644,6 +1676,8 @@ static void FindReplSubmitBlock (
FindReplSendMessages (fsp, ssp->idx.itemID, ssp->idx.itemtype);
}
+/* EXTERNAL FIND-REPLACE FUNCTIONS */
+
/*=======================================================================*/
/* */
/* FindReplaceInEntity() - New find/replace function. */
@@ -1662,7 +1696,9 @@ NLM_EXTERN void FindReplaceInEntity (
BoolPtr descFilter,
BoolPtr featFilter,
BoolPtr seqidFilter,
- Boolean do_seqid_local
+ Boolean do_seqid_local,
+ FindReplProc callback,
+ Pointer userdata
)
{
@@ -1672,8 +1708,10 @@ NLM_EXTERN void FindReplaceInEntity (
ObjMgrDataPtr omdp;
SeqEntryPtr sep = NULL;
SeqSubmitPtr ssp = NULL;
+ size_t subLen;
- if (entityID == 0 || StringHasNoText (find_string)) return;
+ if (entityID == 0 || find_string == NULL
+ || (whole_word && StringHasNoText (find_string))) return;
omdp = ObjMgrGetData (entityID);
if (omdp != NULL) {
@@ -1698,29 +1736,38 @@ NLM_EXTERN void FindReplaceInEntity (
MemSet ((Pointer) &fs, 0, sizeof (FindStruct));
fs.entityID = entityID;
+ if (do_replace) {
+ fs.func = BoyerMooreReplaceString;
+ } else {
+ fs.func = BoyerMooreFindString;
+ }
+ fs.callback = callback;
+ fs.userdata = userdata;
+
fs.find_string = find_string;
fs.replace_string = replace_string;
fs.case_counts = case_counts;
fs.whole_word = whole_word;
- fs.do_replace = do_replace;
+ fs.findLen = StringLen (find_string);
+ fs.replaceLen = StringLen (replace_string);
+
fs.select_item = select_item;
fs.send_update = send_update;
-
fs.did_find = FALSE;
fs.did_replace = FALSE;
fs.dirty = FALSE;
/* build Boyer-Moore displacement array in advance */
- fs.subLen = StringLen (find_string);
+ subLen = StringLen (find_string);
for (ch = 0; ch < 256; ch++) {
- fs.d [ch] = fs.subLen;
+ fs.d [ch] = subLen;
}
- for (j = 0; j < (int) (fs.subLen - 1); j++) {
+ for (j = 0; j < (int) (subLen - 1); j++) {
ch = (int) (case_counts ? find_string [j] : TO_UPPER (find_string [j]));
if (ch >= 0 && ch <= 255) {
- fs.d [ch] = fs.subLen - j - 1;
+ fs.d [ch] = subLen - j - 1;
}
}
@@ -1779,6 +1826,141 @@ NLM_EXTERN void FindReplaceInEntity (
/*=======================================================================*/
/* */
+/* FindStringsInEntity() - Multi-string find function. */
+/* */
+/*=======================================================================*/
+
+NLM_EXTERN void FindStringsInEntity (
+ Uint2 entityID,
+ CharPtr PNTR find_strings,
+ Boolean case_counts,
+ Boolean whole_word,
+ Boolean select_item,
+ Int2 send_update,
+ BoolPtr descFilter,
+ BoolPtr featFilter,
+ BoolPtr seqidFilter,
+ Boolean do_seqid_local,
+ FindReplProc callback,
+ Pointer userdata
+)
+
+{
+ FindStruct fs;
+ int j;
+ ObjMgrDataPtr omdp;
+ SeqEntryPtr sep = NULL;
+ SeqSubmitPtr ssp = NULL;
+
+ if (entityID == 0 || find_strings == NULL) return;
+
+ omdp = ObjMgrGetData (entityID);
+ if (omdp != NULL) {
+ switch (omdp->datatype) {
+ case OBJ_SEQSUB :
+ ssp = (SeqSubmitPtr) omdp->dataptr;
+ if (ssp != NULL && ssp->datatype == 1) {
+ sep = (SeqEntryPtr) ssp->data;
+ }
+ break;
+ case OBJ_BIOSEQ :
+ sep = (SeqEntryPtr) omdp->choice;
+ case OBJ_BIOSEQSET :
+ sep = (SeqEntryPtr) omdp->choice;
+ default :
+ break;
+ }
+ }
+ /* sep = GetTopSeqEntryForEntityID (entityID); */
+ if (sep == NULL) return;
+
+ MemSet ((Pointer) &fs, 0, sizeof (FindStruct));
+
+ fs.entityID = entityID;
+ fs.func = FSAFindStrings;
+ fs.callback = callback;
+ fs.userdata = userdata;
+
+ fs.find_string = NULL;
+ fs.replace_string = NULL;
+ fs.case_counts = case_counts;
+ fs.whole_word = whole_word;
+ fs.findLen = 0;
+ fs.replaceLen = 0;
+
+ fs.select_item = select_item;
+ fs.send_update = send_update;
+ fs.did_find = FALSE;
+ fs.did_replace = FALSE;
+ fs.dirty = FALSE;
+
+ /* build finite state machine in advance */
+
+ fs.fsa = TextFsaNew ();
+
+ for (j = 0; find_strings [j] != NULL; j++) {
+ TextFsaAdd (fs.fsa, find_strings [j]);
+ }
+
+ /* if desc or feat filter arrays not supplied, default to all TRUE */
+
+ if (descFilter != NULL) {
+ MemCopy ((Pointer) &fs.descFilter, (Pointer) descFilter, sizeof (fs.descFilter));
+ } else {
+ MemSet ((Pointer) &fs.descFilter, (int) TRUE, sizeof (fs.descFilter));
+ }
+
+ if (featFilter != NULL) {
+ MemCopy ((Pointer) &fs.featFilter, (Pointer) featFilter, sizeof (fs.featFilter));
+ } else {
+ MemSet ((Pointer) &fs.featFilter, (int) TRUE, sizeof (fs.featFilter));
+ }
+
+ /* if seqid filter array not supplied, default to all FALSE */
+
+ if (seqidFilter != NULL) {
+ MemCopy ((Pointer) &fs.seqidFilter, (Pointer) seqidFilter, sizeof (fs.seqidFilter));
+ } else if (do_seqid_local) {
+ MemSet ((Pointer) &fs.seqidFilter, (int) FALSE, sizeof (fs.seqidFilter));
+ fs.seqidFilter [SEQID_LOCAL] = TRUE;
+ } else {
+ MemSet ((Pointer) &fs.seqidFilter, (int) FALSE, sizeof (fs.seqidFilter));
+ }
+
+ /* ensure feature subtype is set in sfp->idx block */
+
+ AssignIDsInEntity (entityID, 0, NULL);
+
+ /* visit callbacks that find/replace specific fields */
+
+ VisitBioseqsInSep (sep, (Pointer) &fs, FindReplBioseqs);
+
+ VisitFeaturesInSep (sep, (Pointer) &fs, FindReplFeats);
+
+ VisitAlignmentsInSep (sep, (Pointer) &fs, FindReplAligns);
+
+ VisitGraphsInSep (sep, (Pointer) &fs, FindReplGraphs);
+
+ VisitDescriptorsInSep (sep, (Pointer) &fs, FindReplDescs);
+
+ if (ssp != NULL) {
+ FindReplSubmitBlock (ssp, &fs);
+ }
+
+ /* clean up finite state machine */
+
+ TextFsaFree (fs.fsa);
+
+ /* send select message, if applicable */
+
+ if (fs.send_update == UPDATE_ONCE && fs.dirty) {
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+ }
+}
+
+/*=======================================================================*/
+/* */
/* FindReplaceString() - find/replace just one string. */
/* */
/*=======================================================================*/
@@ -1795,35 +1977,41 @@ NLM_EXTERN void FindReplaceString (
int ch;
FindStruct fs;
int j;
+ size_t subLen;
if (strp == NULL || StringHasNoText (find_string)) return;
MemSet ((Pointer) &fs, 0, sizeof (FindStruct));
fs.entityID = 0;
+ fs.func = BoyerMooreReplaceString;
+ fs.callback = NULL;
+ fs.userdata = NULL;
+
fs.find_string = find_string;
fs.replace_string = replace_string;
fs.case_counts = case_counts;
fs.whole_word = whole_word;
- fs.do_replace = TRUE;
- fs.select_item = FALSE;
- fs.send_update = FALSE;
+ fs.findLen = StringLen (find_string);
+ fs.replaceLen = StringLen (replace_string);
+ fs.select_item = FALSE;
+ fs.send_update = UPDATE_NEVER;
fs.did_find = FALSE;
fs.did_replace = FALSE;
fs.dirty = FALSE;
/* build Boyer-Moore displacement array in advance */
- fs.subLen = StringLen (find_string);
+ subLen = StringLen (find_string);
for (ch = 0; ch < 256; ch++) {
- fs.d [ch] = fs.subLen;
+ fs.d [ch] = subLen;
}
- for (j = 0; j < (int) (fs.subLen - 1); j++) {
+ for (j = 0; j < (int) (subLen - 1); j++) {
ch = (int) (case_counts ? find_string [j] : TO_UPPER (find_string [j]));
if (ch >= 0 && ch <= 255) {
- fs.d [ch] = fs.subLen - j - 1;
+ fs.d [ch] = subLen - j - 1;
}
}
diff --git a/api/findrepl.h b/api/findrepl.h
index 20df8b12..a23ffad4 100644
--- a/api/findrepl.h
+++ b/api/findrepl.h
@@ -44,6 +44,12 @@
* RCS Modification History:
* -------------------------
* $Log: findrepl.h,v $
+* Revision 6.5 2006/01/04 20:39:41 kans
+* added FindStringsInEntity using finite state machine, general cleanup of code
+*
+* Revision 6.4 2005/12/29 20:54:41 kans
+* FindReplaceInEntity takes callback and userdata
+*
* Revision 6.3 2003/07/31 20:54:54 kans
* FindReplaceString does not need do_replace argument
*
@@ -103,6 +109,8 @@ extern "C" {
#define UPDATE_EACH 1 /* send it on each replace */
#define UPDATE_ONCE 2 /* send once for whole entityID, if any replacements occur */
+typedef void (*FindReplProc) (Uint2 entityID, Uint2 itemID, Uint2 itemtype, Pointer userdata);
+
NLM_EXTERN void FindReplaceInEntity (
Uint2 entityID,
CharPtr find_string,
@@ -115,7 +123,9 @@ NLM_EXTERN void FindReplaceInEntity (
BoolPtr descFilter,
BoolPtr featFilter,
BoolPtr seqidFilter,
- Boolean do_seqid_local
+ Boolean do_seqid_local,
+ FindReplProc callback,
+ Pointer userdata
);
NLM_EXTERN void FindReplaceString (
@@ -126,6 +136,21 @@ NLM_EXTERN void FindReplaceString (
Boolean whole_word
);
+NLM_EXTERN void FindStringsInEntity (
+ Uint2 entityID,
+ CharPtr PNTR find_strings,
+ Boolean case_counts,
+ Boolean whole_word,
+ Boolean select_item,
+ Int2 send_update,
+ BoolPtr descFilter,
+ BoolPtr featFilter,
+ BoolPtr seqidFilter,
+ Boolean do_seqid_local,
+ FindReplProc callback,
+ Pointer userdata
+);
+
#ifdef __cplusplus
extern "C" }
diff --git a/api/salsap.c b/api/salsap.c
index 1be3b1b7..30f5dcd5 100644
--- a/api/salsap.c
+++ b/api/salsap.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/27/96
*
-* $Revision: 6.11 $
+* $Revision: 6.13 $
*
* File Description:
*
@@ -2250,6 +2250,650 @@ NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDeleteByLoc (SeqLocPtr slp, SeqAlignPtr s
return salp;
}
+static Boolean AreDenseSegSegmentsValid (DenseSegPtr dsp, Int4 start, Int4 num)
+{
+ Int4 k, seg_num, next_pos;
+
+ if (dsp == NULL || start < 0 || num < 1)
+ {
+ return FALSE;
+ }
+
+ for (k = 0; k < dsp->dim; k++)
+ {
+ if (dsp->strands == NULL || dsp->strands[k] == Seq_strand_plus)
+ {
+ if(dsp->starts [dsp->dim * start + k] > -1)
+ {
+ next_pos = dsp->starts [dsp->dim * start + k] + dsp->lens[start];
+ }
+ else
+ {
+ next_pos = -1;
+ }
+ for (seg_num = start + 1; seg_num - start < num; seg_num++)
+ {
+ if (dsp->starts[dsp->dim * seg_num + k] == -1)
+ {
+ continue;
+ }
+ if (next_pos != -1)
+ {
+ if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
+ {
+ return FALSE;
+ }
+ }
+ next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
+ }
+ }
+ else
+ {
+ if (dsp->starts [dsp->dim * (start + num - 1) + k] > -1)
+ {
+ next_pos = dsp->starts [dsp->dim * (start + num - 1) + k] + dsp->lens [start + num - 1];
+ }
+ else
+ {
+ next_pos = -1;
+ }
+ for (seg_num = start + num - 2; seg_num >= start; seg_num--)
+ {
+ if (dsp->starts [dsp->dim * seg_num + k] == -1)
+ {
+ continue;
+ }
+ if (next_pos != -1)
+ {
+ if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
+ {
+ return FALSE;
+ }
+ }
+ next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
+ }
+ }
+
+ }
+
+ return TRUE;
+}
+
+
+static void
+FillInPlusStrandInsertionSegmentA
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 first_len,
+ Int4 second_len,
+ Int4 orig_segment,
+ Int4Ptr this_seg)
+{
+ Int4 k;
+
+ if (dsp_orig == NULL || dsp_new == NULL
+ || insert_start < 0 || insert_len < 0
+ || first_len < 0
+ || second_len < 0
+ || orig_segment < 0 || orig_segment >= dsp_orig->numseg
+ || this_seg == NULL
+ || *this_seg < 0 || *this_seg >= dsp_new->numseg)
+ {
+ return;
+ }
+
+ if (first_len == 0)
+ {
+ return;
+ }
+
+ for (k = 0; k < dsp_orig->dim; k++)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
+ if (dsp_orig->strands != NULL)
+ {
+ dsp_new->strands[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
+ if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
+ && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
+ }
+ }
+ }
+
+ dsp_new->lens[*this_seg] = first_len;
+ (*this_seg)++;
+}
+
+
+static void
+FillInInsertionSegmentB
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 first_len,
+ Int4 second_len,
+ Int4 orig_segment,
+ Int4Ptr this_seg)
+{
+ Int4 k;
+
+ if (dsp_orig == NULL || dsp_new == NULL
+ || insert_start < 0 || insert_len < 0
+ || first_len < 0
+ || second_len < 0
+ || orig_segment < 0 || orig_segment >= dsp_orig->numseg
+ || this_seg == NULL
+ || *this_seg < 0 || *this_seg >= dsp_new->numseg)
+ {
+ return;
+ }
+
+ if (insert_len == 0)
+ {
+ return;
+ }
+
+ for (k = 0; k < dsp_orig->dim; k++)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] = -1;
+ if (dsp_orig->strands != NULL)
+ {
+ dsp_new->strands[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
+ }
+ }
+ dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] = insert_start;
+
+ dsp_new->lens[*this_seg] = insert_len;
+ (*this_seg)++;
+}
+
+static void FillInPlusStrandInsertionSegmentC
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 first_len,
+ Int4 second_len,
+ Int4 orig_segment,
+ Int4Ptr this_seg)
+{
+ Int4 k;
+
+ if (dsp_orig == NULL || dsp_new == NULL
+ || insert_start < 0 || insert_len < 0
+ || first_len < 0
+ || second_len < 0
+ || orig_segment < 0 || orig_segment >= dsp_orig->numseg
+ || this_seg == NULL
+ || *this_seg < 0 || *this_seg >= dsp_new->numseg)
+ {
+ return;
+ }
+
+ if (second_len == 0)
+ {
+ return;
+ }
+
+ for (k = 0; k < dsp_orig->dim; k++)
+ {
+ if ((dsp_orig->strands == NULL
+ || dsp_orig->strands[orig_segment * dsp_new->dim + k] != Seq_strand_minus)
+ && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
+ dsp_orig->starts[orig_segment * dsp_orig->dim + k] + first_len;
+ }
+ else
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
+ dsp_orig->starts[orig_segment * dsp_orig->dim + k];
+ }
+
+ if (dsp_orig->strands != NULL)
+ {
+ dsp_new->strands[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
+ }
+ }
+ dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
+
+ dsp_new->lens[*this_seg] = second_len;
+ (*this_seg)++;
+}
+
+
+static void
+FillInMinusStrandInsertionSegmentA
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 first_len,
+ Int4 second_len,
+ Int4 orig_segment,
+ Int4Ptr this_seg)
+{
+ Int4 k;
+
+ if (dsp_orig == NULL || dsp_new == NULL
+ || insert_start < 0 || insert_len < 0
+ || first_len < 0
+ || second_len < 0
+ || orig_segment < 0 || orig_segment >= dsp_orig->numseg
+ || this_seg == NULL
+ || *this_seg < 0 || *this_seg >= dsp_new->numseg)
+ {
+ return;
+ }
+
+ if (first_len == 0)
+ {
+ return;
+ }
+
+ for (k = 0; k < dsp_orig->dim; k++)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
+ if (dsp_orig->strands != NULL)
+ {
+ dsp_new->strands[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
+ if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
+ && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
+ }
+ }
+ }
+
+ dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
+
+ dsp_new->lens[*this_seg] = first_len;
+ (*this_seg)++;
+}
+
+static void FillInMinusStrandInsertionSegmentC
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 first_len,
+ Int4 second_len,
+ Int4 orig_segment,
+ Int4Ptr this_seg)
+{
+ Int4 k;
+
+ if (dsp_orig == NULL || dsp_new == NULL
+ || insert_start < 0 || insert_len < 0
+ || first_len < 0
+ || second_len < 0
+ || orig_segment < 0 || orig_segment >= dsp_orig->numseg
+ || this_seg == NULL
+ || *this_seg < 0 || *this_seg >= dsp_new->numseg)
+ {
+ return;
+ }
+
+ if (second_len == 0)
+ {
+ return;
+ }
+
+ for (k = 0; k < dsp_orig->dim; k++)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
+ dsp_orig->starts[orig_segment * dsp_orig->dim + k];
+
+ if ((dsp_orig->strands == NULL
+ || dsp_orig->strands[orig_segment * dsp_orig->dim + k] != Seq_strand_minus)
+ && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
+ {
+ dsp_new->starts[(*this_seg) * dsp_new->dim + k] += first_len;
+ }
+
+ if (dsp_orig->strands != NULL)
+ {
+ dsp_new->strands[(*this_seg) * dsp_new->dim + k]
+ = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
+ }
+ }
+
+ dsp_new->lens[*this_seg] = second_len;
+ (*this_seg)++;
+}
+
+static void
+InsertInSegment
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 insert_start,
+ Int4 insert_len,
+ Int4 insert_row,
+ Int4 orig_segment,
+ Int4Ptr this_segment)
+{
+ /* The original segment needs to be replaced by either one or two segments
+ * in the new alignment.
+ * Call segment O the segment that contains insert_start.
+ * If insert_start == the start of segment O, only one additional segment
+ * will be needed, otherwise allocate space for two extra segments.
+ * If insert_row is a plus row:
+ * If insert_start == the start of segment O,
+ * the gap segment will be inserted immediately before
+ * segment O. For insert_row, all starts
+ * for segment O and beyond will be increased by insert_len.
+ * Otherwise, segment O will be replaced by segment (A) (a truncated
+ version of segment O), a gap segment (B) will be
+ * inserted after A, and a third segment (C) will be inserted
+ * after segment B.
+ * Call first_len = insert_start - start of segment O on insert_row
+ * Call second_len = length of segment O on insert_row - first_len
+ * The length of the segment A will be first_len.
+ * The start of segment A for all plus strand rows will be
+ * the start of segment O.
+ * The start of segment A for all minus strand rows will be
+ * the start of segment O + second_len.
+ * The start of segment B for insert_row will be insert_start,
+ * The start of segment B for all other rows in the gap segment will be -1.
+ * The length of segment B will be insert_len.
+ * For insert_row, the start of segment C will be
+ * the start of segment O + first_len + insert_len.
+ * For all remaining plus strand rows, the start of segment C
+ * will be the start of segment O + first_len.
+ * For all minus strand rows, the start of segment C will be
+ * be the start of segment O.
+ * The length of segment C will be second_len.
+ * If insert_row is a minus row:
+ * If insert_start == the start of segment O,
+ * the gap segment will be inserted immediately after
+ * segment O and all of the starts for insert_row
+ * before segment O will be increased by insert_len.
+ * Otherwise, segment O will be replaced by segment A, a gap segment (B),
+ * and segment C.
+ * Call first_len = start of segment O on insert_row + length of segment O on insert_row - insert_start
+ * Call second_len = insert_start - start of segment O on insert_row
+ * The length of segment A will be first_len.
+ * For every plus strand row, the start of segment A will be the start of
+ * segment O.
+ * For insert_row, the start of segment A will be the start of segment O + second_len + insert_len.
+ * For every other minus strand row, the start of segment A will be the
+ * the start of segment O + second_len.
+ * The length of segment B will be insert_len.
+ * For insert_row, the start of segment B will be insert_start.
+ * For every other row, the start of segment B will be -1.
+ * The length of segment C will be second_len.
+ * For every minus row, the start of segment C will be the start of segment O.
+ * For every plus row, the start of segment C will be the start of segment O + first_len.
+ * For insert_row, the start of every segment prior to segment O will be increased
+ * by insert_len.
+ */
+
+ Int4 first_len, second_len;
+
+ if (dsp_orig->strands != NULL && dsp_orig->strands[insert_row] == Seq_strand_minus)
+ {
+ first_len = dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row]
+ + dsp_orig->lens [orig_segment] - insert_start;
+ second_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
+ FillInMinusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ FillInMinusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ }
+ else
+ {
+ first_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
+ second_len = dsp_orig->lens[orig_segment] - first_len;
+ FillInPlusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ FillInPlusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
+ insert_row, first_len, second_len,
+ orig_segment, this_segment);
+ }
+}
+
+static Int4
+FindSegmentForInsertPoint
+(DenseSegPtr dsp,
+ Int4 insert_start,
+ Int4 insert_row,
+ Uint1 insert_strand)
+{
+ Int4 insert_segment = -1, k = 0;
+
+ if (dsp == NULL || insert_start < 0
+ || insert_row < 0 || insert_row >= dsp->dim)
+ {
+ return -1;
+ }
+
+ if (insert_strand == Seq_strand_minus)
+ {
+ while (k < dsp->numseg && insert_segment == -1)
+ {
+ if (dsp->starts [k * dsp->dim + insert_row] != -1
+ && dsp->starts [k * dsp->dim + insert_row] <= insert_start
+ && dsp->starts [k * dsp->dim + insert_row] + dsp->lens[k] > insert_start)
+ {
+ insert_segment = k;
+ }
+ k++;
+ }
+ }
+ else
+ {
+ while (k < dsp->numseg && insert_segment == -1)
+ {
+ if (dsp->starts [k * dsp->dim + insert_row] != -1
+ && dsp->starts [dsp->dim * k + insert_row] <= insert_start
+ && dsp->starts [dsp->dim * k + insert_row] + dsp->lens [k] > insert_start)
+ {
+ insert_segment = k;
+ }
+ k++;
+ }
+ }
+ return insert_segment;
+}
+
+static void
+CopyDensegSegments
+(DenseSegPtr dsp_orig,
+ DenseSegPtr dsp_new,
+ Int4 start_seg,
+ Int4 copy_seg,
+ Int4 num_to_copy)
+{
+ Int4 num_copied = 0, k;
+
+ if (dsp_orig == NULL || dsp_new == NULL)
+ {
+ return;
+ }
+
+ while (start_seg < dsp_orig->numseg && copy_seg < dsp_new->numseg
+ && num_copied < num_to_copy)
+ {
+ if (start_seg >= 0 && copy_seg >= 0)
+ {
+ for (k = 0; k < dsp_orig->dim && k < dsp_new->dim; k++)
+ {
+ dsp_new->starts [copy_seg * dsp_new->dim + k]
+ = dsp_orig->starts[start_seg * dsp_orig->dim + k];
+ if (dsp_orig->strands != NULL && dsp_new->strands != NULL)
+ {
+ dsp_new->strands[copy_seg * dsp_new->dim + k]
+ = dsp_orig->strands[start_seg * dsp_orig->dim + k];
+ }
+ }
+ dsp_new->lens [copy_seg] = dsp_orig->lens[start_seg];
+ num_copied++;
+ }
+ start_seg ++;
+ copy_seg ++;
+ }
+}
+
+
+/**************************************************
+***
+***************************************************/
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignInsertByLoc (SeqLocPtr slp, SeqAlignPtr salp)
+{
+ SeqIdPtr sip;
+ DenseSegPtr dsp, dsp_new;
+ Int4 from, start;
+ Int2 j;
+ Int2 index;
+ Int4 insert_len;
+ Int4 extra_segs;
+ Uint1 insert_strand;
+ Int4 insert_seg;
+ Int4 orig_segment;
+
+ if (salp == NULL || salp->segtype != SAS_DENSEG)
+ return salp;
+ sip = SeqLocId(slp);
+ insert_len = SeqLocLen (slp);
+ dsp = (DenseSegPtr) salp->segs;
+ if (dsp == NULL) {
+ return salp;
+ }
+
+ index = SeqIdOrderInBioseqIdList (sip, dsp->ids);
+ if (index == 0) {
+ /* bioseq not in alignment */
+ return salp;
+ }
+ index -= 1;
+ insert_strand = SeqAlignStrand (salp, index);
+
+ if (insert_strand == Seq_strand_minus)
+ {
+ from = SeqAlignStop (salp, index);
+ }
+ else
+ {
+ from = SeqAlignStart(salp, index);
+ }
+ start = SeqLocStart (slp);
+ if (start <= from)
+ {
+ /* just adjust the starts */
+ for (j = 0; j < dsp->numseg; j++)
+ {
+ if (dsp->starts [dsp->dim * j + index] > -1)
+ {
+ dsp->starts [dsp->dim * j + index] += insert_len;
+ }
+ }
+ }
+ else
+ {
+ /* need to insert gap of length insert_len at start */
+ /* first, find affected segment */
+ insert_seg = FindSegmentForInsertPoint (dsp, start, index, insert_strand);
+ if (insert_seg < 0 || insert_seg > dsp->numseg)
+ {
+ return salp;
+ }
+
+ if (dsp->starts[dsp->dim * insert_seg + index] == start)
+ {
+ extra_segs = 1;
+ }
+ else
+ {
+ extra_segs = 2;
+ }
+
+ dsp_new = (DenseSegPtr) MemNew (sizeof (DenseSeg));
+ dsp_new->dim = dsp->dim;
+ dsp_new->numseg = dsp->numseg + extra_segs;
+ dsp_new->starts = (Int4Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Int4));
+ if (dsp->strands != NULL)
+ {
+ dsp_new->strands = (Uint1Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Uint1));
+ }
+ dsp_new->lens = (Int4Ptr) MemNew ((dsp->numseg + extra_segs) * sizeof (Int4));
+
+ /* copy alignment up to point of insertion */
+ CopyDensegSegments (dsp, dsp_new, 0, 0, insert_seg);
+
+ /* adjust starts in insert_row before insert_seg if insert_row on minus strand */
+ if (insert_strand == Seq_strand_minus)
+ {
+ for (j = 0; j < insert_seg; j++)
+ {
+ if (dsp_new->starts[dsp_new->dim * j + index] != -1)
+ {
+ dsp_new->starts[dsp_new->dim * j + index] += insert_len;
+ }
+ }
+ }
+
+ /* create gap */
+ orig_segment = insert_seg;
+ InsertInSegment (dsp, dsp_new, start, insert_len, index, orig_segment, &insert_seg);
+
+ /* Copy after insertion point */
+ CopyDensegSegments (dsp, dsp_new, orig_segment + 1, insert_seg, dsp->numseg - orig_segment);
+
+ /* Adjust starts in insert row after insert_seg if insert_row on plus strand */
+ if (insert_strand == Seq_strand_plus)
+ {
+ while (insert_seg < dsp_new->numseg)
+ {
+ if (dsp_new->starts[dsp_new->dim * insert_seg + index] != -1)
+ {
+ dsp_new->starts[dsp_new->dim * insert_seg + index] += insert_len;
+ }
+ insert_seg++;
+ }
+ }
+
+ /* replace in old DenseSeg */
+ dsp->starts = MemFree (dsp->starts);
+ dsp->starts = dsp_new->starts;
+ dsp_new->starts = NULL;
+ dsp->strands = MemFree (dsp->strands);
+ dsp->strands = dsp_new->strands;
+ dsp_new->strands = NULL;
+ dsp->lens = MemFree (dsp->lens);
+ dsp->lens = dsp_new->lens;
+ dsp_new->lens = NULL;
+ dsp->numseg = dsp_new->numseg;
+
+ }
+
+ return salp;
+}
+
+
/*******************************************
***
*** DeleteRegion
diff --git a/api/salsap.h b/api/salsap.h
index c48112f4..f06d8f0e 100644
--- a/api/salsap.h
+++ b/api/salsap.h
@@ -28,13 +28,18 @@
*
* Version Creation Date: 1/27/96
*
-* $Revision: 6.3 $
+* $Revision: 6.4 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: salsap.h,v $
+* Revision 6.4 2006/01/10 22:27:06 bollin
+* added function SeqAlignInsertByLoc, to be used when nucleotides are inserted
+* into a sequence that is in an alignment. An extra segment, where all functions
+* except the one with the insertion are gapped, is created.
+*
* Revision 6.3 1999/11/24 21:24:28 vakatov
* Fixed for the C++ and/or MSVC DLL compilation
*
@@ -156,6 +161,7 @@ NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignEndExtend (SeqAlignPtr sap, Int4 start1,
Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2,
Uint1 strand1, Uint1 strand2);
+NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignInsertByLoc (SeqLocPtr slp, SeqAlignPtr salp);
/**Delete, Truncate**/
NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDeleteByLoc (SeqLocPtr slp, SeqAlignPtr salp);
diff --git a/api/seqmgr.c b/api/seqmgr.c
index fe80abac..ecefefa8 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.252 $
+* $Revision: 6.259 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -39,6 +39,27 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqmgr.c,v $
+* Revision 6.259 2006/02/17 19:05:05 kans
+* special case coded_by only for CDS feature on isolated protein bioseq
+*
+* Revision 6.258 2006/02/17 18:46:20 kans
+* get gene overlapping coded_by CDS on isolated protein bioseq within flatfile generator, not feature indexer
+*
+* Revision 6.257 2006/02/17 17:24:24 kans
+* changes to index CDS feature (with ignore flag) on isolated protein bioseq, xref gene feature
+*
+* Revision 6.256 2006/02/16 22:00:55 kans
+* always pass FALSE for circular to CheckForTransSplice for more stringency
+*
+* Revision 6.255 2006/02/16 21:09:20 kans
+* SeqMgrGetBestOverlappingFeat takes new parameter from get best gene by overlap, uses LOCATION_SUBSET if gene candidate is bad_order or mixed_strand
+*
+* Revision 6.254 2006/02/16 20:24:32 kans
+* added bad_order and mixed_strand fields to feature index - to be used for get best gene overlap function in cases of trans-splicing
+*
+* Revision 6.253 2006/01/20 20:12:21 kans
+* in LockAllSegments, bail if BioseqLockById returns NULL
+*
* Revision 6.252 2005/09/21 19:20:45 kans
* SeqMgrGetNextAnnotDesc sets context->index properly
*
@@ -5373,6 +5394,8 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetDesiredFeature (Uint2 entityID, BioseqPtr
context->partialL = item->partialL;
context->partialR = item->partialR;
context->farloc = item->farloc;
+ context->bad_order = item->bad_order;
+ context->mixed_strand = item->mixed_strand;
context->strand = item->strand;
if (curr != NULL) {
context->seqfeattype = curr->data.choice;
@@ -5665,6 +5688,7 @@ NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrGetDesiredAnnotDesc (
*****************************************************************************/
typedef struct extraindex {
+ SeqEntryPtr topsep;
BioseqPtr lastbsp;
SeqAnnotPtr lastsap;
BioseqSetPtr lastbssp;
@@ -5764,6 +5788,7 @@ NLM_EXTERN void LIBCALL SeqMgrIndexAlignments (Uint2 entityID)
/* count alignments */
+ exind.topsep = NULL;
exind.lastbsp = NULL;
exind.lastsap = NULL;
exind.lastbssp = NULL;
@@ -6004,7 +6029,8 @@ static void ProcessFeatureProducts (SeqFeatPtr sfp, Uint2 itemID, GatherObjectPt
static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
BioseqPtr bsp, ExtraIndexPtr exindx, SeqFeatPtr sfp,
Int4 left, Int4 right, Uint4 itemID, Uint2 subtype,
- Boolean farloc, Boolean ignore)
+ Boolean farloc, Boolean bad_order, Boolean mixed_strand,
+ Boolean ignore)
{
Char buf [129];
@@ -6084,6 +6110,8 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
item->dnaStop = -1;
CheckSeqLocForPartial (sfp->location, &(item->partialL), &(item->partialR));
item->farloc = farloc;
+ item->bad_order = bad_order;
+ item->mixed_strand = mixed_strand;
/*
item->strand = SeqLocStrand (sfp->location);
if (exindx->flip) {
@@ -6159,6 +6187,105 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
}
}
+
+static void CheckForTransSplice (
+ SeqFeatPtr sfp,
+ BoolPtr bad_orderP,
+ BoolPtr mixed_strandP,
+ Boolean circular
+)
+
+{
+ Boolean mixed_strand = FALSE, ordered = TRUE;
+ SeqIdPtr id1, id2;
+ SeqLocPtr prev, tmp;
+ SeqIntPtr sip1, sip2, prevsip;
+ Uint1 strand1, strand2;
+
+ if (sfp == NULL || sfp->location == NULL) return;
+
+ tmp = NULL;
+ prev = NULL;
+ sip1 = NULL;
+ id1 = NULL;
+ prevsip = NULL;
+ strand1 = Seq_strand_other;
+
+ while ((tmp = SeqLocFindNext (sfp->location, tmp)) != NULL) {
+
+ /* just check seqloc_interval */
+
+ if (tmp->choice == SEQLOC_INT) {
+ sip1 = prevsip;
+ sip2 = (SeqIntPtr) (tmp->data.ptrvalue);
+ strand2 = sip2->strand;
+ id2 = sip2->id;
+ if ((sip1 != NULL) && (ordered) && (! circular)) {
+ if (SeqIdForSameBioseq (sip1->id, sip2->id)) {
+ if (strand2 == Seq_strand_minus) {
+ if (sip1->to < sip2->to) {
+ ordered = FALSE;
+ }
+ } else {
+ if (sip1->to > sip2->to) {
+ ordered = FALSE;
+ }
+ }
+ }
+ }
+ prevsip = sip2;
+ if ((strand1 != Seq_strand_other) && (strand2 != Seq_strand_other)) {
+ if (SeqIdForSameBioseq (id1, id2)) {
+ if (strand1 != strand2) {
+ if (strand1 == Seq_strand_plus && strand2 == Seq_strand_unknown) {
+ /* unmarked_strand = TRUE; */
+ } else if (strand1 == Seq_strand_unknown && strand2 == Seq_strand_plus) {
+ /* unmarked_strand = TRUE; */
+ } else {
+ mixed_strand = TRUE;
+ }
+ }
+ }
+ }
+
+ strand1 = strand2;
+ id1 = id2;
+ }
+ }
+
+ /* Publication intervals ordering does not matter */
+
+ if (sfp->idx.subtype == FEATDEF_PUB) {
+ ordered = TRUE;
+ }
+
+ /* ignore ordering of heterogen bonds */
+
+ if (sfp->data.choice == SEQFEAT_HET) {
+ ordered = TRUE;
+ }
+
+ /* misc_recomb intervals SHOULD be in reverse order */
+
+ if (sfp->idx.subtype == FEATDEF_misc_recomb) {
+ ordered = TRUE;
+ }
+
+ /* primer_bind intervals MAY be in on opposite strands */
+
+ if (sfp->idx.subtype == FEATDEF_primer_bind) {
+ mixed_strand = FALSE;
+ ordered = TRUE;
+ }
+
+ if (! ordered) {
+ *bad_orderP = TRUE;
+ }
+ if (mixed_strand) {
+ *mixed_strandP = TRUE;
+ }
+}
+
typedef struct adpbspdata {
AnnotDescPtr adp;
BioseqPtr bsp;
@@ -6171,6 +6298,7 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
{
AdpBspPtr abp;
AnnotDescPtr adp = NULL;
+ Boolean bad_order;
BioseqPtr bsp = NULL;
BioseqExtraPtr bspextra;
BioseqSetPtr bssp = NULL;
@@ -6181,6 +6309,7 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
ExtraIndexPtr exindx;
Int4 left;
CharPtr loclbl;
+ Boolean mixed_strand;
ObjMgrDataPtr omdp;
ProtRefPtr prp;
Int4 right;
@@ -6189,6 +6318,7 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
SeqFeatPtr sfp = NULL;
SeqAlignPtr sal = NULL;
SeqLocPtr slp;
+ Boolean special_case = FALSE;
Int4 swap;
SeqFeatPtr tmp;
Boolean usingLocalBsp = FALSE;
@@ -6376,8 +6506,18 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
}
MemFree (ctmp);
- if (bsp == NULL) return TRUE;
- usingLocalBsp = TRUE;
+ if (bsp == NULL && sfp->product != NULL &&
+ sfp->data.choice == SEQFEAT_CDREGION &&
+ IS_Bioseq (exindx->topsep)) {
+ bsp = (BioseqPtr) exindx->topsep->data.ptrvalue;
+ if (bsp == NULL || (! ISA_aa (bsp->mol))) return TRUE;
+ special_case = TRUE;
+ bsp = FindAppropriateBioseq (sfp->product, exindx->lastbsp);
+ if (bsp == NULL) return TRUE;
+ } else {
+ if (bsp == NULL) return TRUE;
+ usingLocalBsp = TRUE;
+ }
}
/* assume subsequent features will be on this bioseq */
@@ -6403,7 +6543,11 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
/*
slp = SeqLocMergeEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE);
*/
- slp = sfp->location;
+ if (special_case) {
+ slp = sfp->product;
+ } else {
+ slp = sfp->location;
+ }
left = GetOffsetInNearBioseq (slp, bsp, SEQLOC_LEFT_END);
right = GetOffsetInNearBioseq (slp, bsp, SEQLOC_RIGHT_END);
/*
@@ -6517,8 +6661,13 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
right = swap;
}
+ bad_order = FALSE;
+ mixed_strand = FALSE;
+ CheckForTransSplice (sfp, &bad_order, &mixed_strand, /* (Boolean) (bsp->topology == TOPOLOGY_CIRCULAR) */ FALSE);
+
RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left,
- right, gop->itemID, gop->subtype, usingLocalBsp, FALSE);
+ right, gop->itemID, gop->subtype, usingLocalBsp,
+ bad_order, mixed_strand, special_case);
/* record gene, publication, and biosource features twice if spanning the origin */
@@ -6529,7 +6678,8 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
sfp->idx.subtype == FEATDEF_operon) {
RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left + bsp->length,
- right + bsp->length, gop->itemID, gop->subtype, usingLocalBsp, TRUE);
+ right + bsp->length, gop->itemID, gop->subtype, usingLocalBsp,
+ bad_order, mixed_strand, TRUE);
}
}
@@ -8270,6 +8420,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
/* gather all segmented locations */
+ exind.topsep = sep;
exind.lastbsp = NULL;
exind.lastsap = NULL;
exind.lastbssp = NULL;
@@ -8302,6 +8453,7 @@ NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
/* now gather to get descriptor itemID counts on each bioseq or bioseq set,
and record features on the bioseq indicated by the feature location */
+ exind.topsep = sep;
exind.lastbsp = NULL;
exind.lastsap = NULL;
exind.lastbssp = NULL;
@@ -8514,6 +8666,8 @@ static void SetContextForFeature (SeqFeatPtr sfp, SeqMgrFeatContext PNTR context
context->partialL = best->partialL;
context->partialR = best->partialR;
context->farloc = best->farloc;
+ context->bad_order = best->bad_order;
+ context->mixed_strand = best->mixed_strand;
context->strand = best->strand;
if (bst != NULL) {
context->seqfeattype = bst->data.choice;
@@ -8826,6 +8980,8 @@ static void SeqMgrBestOverlapSetContext (
context->partialL = best->partialL;
context->partialR = best->partialR;
context->farloc = best->farloc;
+ context->bad_order = best->bad_order;
+ context->mixed_strand = best->mixed_strand;
context->strand = best->strand;
if (bst != NULL) {
context->seqfeattype = bst->data.choice;
@@ -8841,14 +8997,19 @@ static void SeqMgrBestOverlapSetContext (
}
}
-static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype,
- SMFeatItemPtr PNTR array,
- Int4 num, Int4Ptr pos,
- Int2 overlapType,
- SeqMgrFeatContext PNTR context,
- Int2Ptr count,
- Pointer userdata,
- SeqMgrFeatExploreProc userfunc)
+static SeqFeatPtr SeqMgrGetBestOverlappingFeat (
+ SeqLocPtr slp,
+ Uint2 subtype,
+ SMFeatItemPtr PNTR array,
+ Int4 num,
+ Int4Ptr pos,
+ Int2 overlapType,
+ SeqMgrFeatContext PNTR context,
+ Int2Ptr count,
+ Pointer userdata,
+ SeqMgrFeatExploreProc userfunc,
+ Boolean special
+)
{
SMFeatItemPtr best = NULL;
@@ -9028,7 +9189,11 @@ static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype,
/* requires feature to be contained within gene, etc. */
- diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
+ if (special && (feat->bad_order || feat->mixed_strand)) {
+ diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
+ } else {
+ diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
+ }
if (diff >= 0) {
if (StrandsMatch (feat->strand, strand)) {
@@ -9063,7 +9228,11 @@ static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype,
feat = array [hier];
if (feat != NULL && ((! feat->ignore) || userfunc == NULL)) {
- diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
+ if (special && (feat->bad_order || feat->mixed_strand)) {
+ diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
+ } else {
+ diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
+ }
if (diff >= 0) {
if (StrandsMatch (feat->strand, strand)) {
@@ -9134,43 +9303,43 @@ NLM_EXTERN Int4 TestFeatOverlap (SeqFeatPtr sfpA, SeqFeatPtr sfpB, Int2 overlapT
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_GENE, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_GENE, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, TRUE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetLocationSupersetmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingCDS (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_CDS, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_CDS, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingPub (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_PUB, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_PUB, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
{
- return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
}
/*****************************************************************************
@@ -9275,6 +9444,8 @@ static SeqFeatPtr LIBCALL SeqMgrGetFeatureByLabelEx (BioseqPtr bsp, CharPtr labe
context->partialL = feat->partialL;
context->partialR = feat->partialR;
context->farloc = feat->farloc;
+ context->bad_order = feat->bad_order;
+ context->mixed_strand = feat->mixed_strand;
context->strand = feat->strand;
context->seqfeattype = seqfeattype;
context->featdeftype = feat->subtype;
@@ -9398,6 +9569,8 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureByFeatID (
context->partialL = feat->partialL;
context->partialR = feat->partialR;
context->farloc = feat->farloc;
+ context->bad_order = feat->bad_order;
+ context->mixed_strand = feat->mixed_strand;
context->strand = feat->strand;
context->seqfeattype = sfp->data.choice;;
context->featdeftype = feat->subtype;
@@ -9519,7 +9692,7 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, Uint2
{
return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
- numfeats, position, overlapType, context, NULL, NULL, NULL);
+ numfeats, position, overlapType, context, NULL, NULL, NULL, FALSE);
}
NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 subtype,
@@ -9535,7 +9708,7 @@ NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 su
SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
numfeats, NULL, overlapType, &context, &count,
- userdata, userfunc);
+ userdata, userfunc, FALSE);
return count;
}
@@ -9579,6 +9752,8 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (BioseqPtr bsp, VoidPtr fe
context->partialL = item->partialL;
context->partialR = item->partialR;
context->farloc = item->farloc;
+ context->bad_order = item->bad_order;
+ context->mixed_strand = item->mixed_strand;
context->strand = item->strand;
if (curr != NULL) {
context->seqfeattype = curr->data.choice;
@@ -9781,6 +9956,8 @@ static SeqFeatPtr LIBCALL SeqMgrGetNextFeatureEx (BioseqPtr bsp, SeqFeatPtr curr
context->partialL = item->partialL;
context->partialR = item->partialR;
context->farloc = item->farloc;
+ context->bad_order = item->bad_order;
+ context->mixed_strand = item->mixed_strand;
context->strand = item->strand;
context->seqfeattype = seqfeattype;
context->featdeftype = item->subtype;
@@ -10258,6 +10435,8 @@ static Int4 LIBCALL SeqMgrExploreFeaturesInt (BioseqPtr bsp, Pointer userdata,
context.partialL = item->partialL;
context.partialR = item->partialR;
context.farloc = item->farloc;
+ context.bad_order = item->bad_order;
+ context.mixed_strand = item->mixed_strand;
context.strand = item->strand;
context.seqfeattype = seqfeattype;
context.featdeftype = item->subtype;
@@ -10421,6 +10600,8 @@ NLM_EXTERN Int2 LIBCALL SeqMgrVisitFeatures (Uint2 entityID, Pointer userdata,
context.partialL = item->partialL;
context.partialR = item->partialR;
context.farloc = item->farloc;
+ context.bad_order = item->bad_order;
+ context.mixed_strand = item->mixed_strand;
context.strand = item->strand;
context.seqfeattype = seqfeattype;
context.featdeftype = item->subtype;
@@ -10812,6 +10993,7 @@ static void LockAllSegments (SeqLocPtr slp, ValNodePtr PNTR vnpp)
}
bsp = BioseqLockById (sip);
+ if (bsp == NULL) return;
ValNodeAddPointer (vnpp, 0, (Pointer) bsp);
/* now recurse if component is also far delta or seg */
diff --git a/api/seqmgr.h b/api/seqmgr.h
index aa1b06d8..c80ab55e 100644
--- a/api/seqmgr.h
+++ b/api/seqmgr.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.58 $
+* $Revision: 6.59 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -40,6 +40,9 @@
*
*
* $Log: seqmgr.h,v $
+* Revision 6.59 2006/02/16 20:24:32 kans
+* added bad_order and mixed_strand fields to feature index - to be used for get best gene overlap function in cases of trans-splicing
+*
* Revision 6.58 2005/08/18 21:02:34 kans
* defined SMFidItemPtr structure and added featsByFeatID and numfids fields, in preparation for indexing by feature ID
*
@@ -884,24 +887,26 @@ NLM_EXTERN void FreeSeqIdGiCache (void);
/* the following structures are not frequently used directly by applications */
typedef struct smfeatitem {
- SeqFeatPtr sfp; /* freed when TL_CACHED, later will implement reassignment when reloaded */
- SeqAnnotPtr sap; /* SeqAnnot containing SeqFeat, same reap/reload criteria as above */
- BioseqPtr bsp; /* Bioseq on which this feature is indexed */
- CharPtr label; /* featdef content label */
- Int4 left; /* extreme left on bioseq (first copy spanning origin is < 1) */
- Int4 right; /* extreme right on bioseq (second copy spanning origin is > length) */
- Int4Ptr ivals; /* array of start/stop pairs */
- Int2 numivals; /* number of start/stop pairs in ivals array */
- Int4 dnaStop; /* last stop on protein mapped to DNA coordinate for flatfile */
- Boolean partialL; /* left end is partial */
- Boolean partialR; /* right end is partial */
- Boolean farloc; /* location has an accession not packaged in entity */
- Uint1 strand; /* strand (mapped to segmented bioseq if segmented) */
- Uint1 subtype; /* featdef subtype */
- Uint4 itemID; /* storing itemID so no need to gather again */
- Boolean ignore; /* ignore this second copy of a feature spanning the origin */
- Uint4 index; /* position index needed for SeqMgrGetDesiredFeature */
- Int4 overlap; /* for xxxByPos, index of leftmost candidate that overlaps this */
+ SeqFeatPtr sfp; /* freed when TL_CACHED, later will implement reassignment when reloaded */
+ SeqAnnotPtr sap; /* SeqAnnot containing SeqFeat, same reap/reload criteria as above */
+ BioseqPtr bsp; /* Bioseq on which this feature is indexed */
+ CharPtr label; /* featdef content label */
+ Int4 left; /* extreme left on bioseq (first copy spanning origin is < 1) */
+ Int4 right; /* extreme right on bioseq (second copy spanning origin is > length) */
+ Int4Ptr ivals; /* array of start/stop pairs */
+ Int2 numivals; /* number of start/stop pairs in ivals array */
+ Int4 dnaStop; /* last stop on protein mapped to DNA coordinate for flatfile */
+ Boolean partialL; /* left end is partial */
+ Boolean partialR; /* right end is partial */
+ Boolean farloc; /* location has an accession not packaged in entity */
+ Boolean bad_order; /* location is out of order - possibly trans-spliced */
+ Boolean mixed_strand; /* location has mixed strands - possibly trans-spliced */
+ Uint1 strand; /* strand (mapped to segmented bioseq if segmented) */
+ Uint1 subtype; /* featdef subtype */
+ Uint4 itemID; /* storing itemID so no need to gather again */
+ Boolean ignore; /* ignore this second copy of a feature spanning the origin */
+ Uint4 index; /* position index needed for SeqMgrGetDesiredFeature */
+ Int4 overlap; /* for xxxByPos, index of leftmost candidate that overlaps this */
} SMFeatItem, PNTR SMFeatItemPtr;
typedef struct smfeatblock {
diff --git a/api/seqport.c b/api/seqport.c
index fcaf4bd6..89550a8a 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.144 $
+* $Revision: 6.147 $
*
* File Description: Ports onto Bioseqs
*
@@ -39,6 +39,16 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqport.c,v $
+* Revision 6.147 2006/01/23 13:01:41 bollin
+* when converting sequences from raw to delta, adjust any alignments that the
+* sequence may be part of.
+*
+* Revision 6.146 2005/12/16 20:19:56 bollin
+* only allow reverse for delta sequences when the delta sequence has no far locations
+*
+* Revision 6.145 2005/12/15 19:45:24 bollin
+* added functions to reverse and complement delta sequences
+*
* Revision 6.144 2005/08/24 15:14:31 kans
* modified MolWtForLoc to use StreamCache, added MolWtForBsp and MolWtForStr
*
@@ -597,6 +607,7 @@ static char *this_file = __FILE__;
#include <subutil.h>
#include <tofasta.h> /* for FastaSeqLineEx function */
#include <salutil.h>
+#include <alignmgr2.h> /* for correcting alignments when converting to delta */
NLM_EXTERN Boolean LIBCALL SeqPortAdjustLength (SeqPortPtr spp);
@@ -4989,45 +5000,19 @@ NLM_EXTERN Boolean LIBCALL BioseqRevComp (BioseqPtr bsp)
return retval;
}
-/*-------------- BioseqComplement () ---------------------------*/
-/***********************************************************************
-* BioseqComplement: Takes the nucleic acid sequence from Bioseq
-* Entry and gives the complement sequence in place
-* Does not change features.
-************************************************************************/
-NLM_EXTERN Boolean LIBCALL BioseqComplement (BioseqPtr bsp)
+static Boolean ComplementSeqData (Uint1 seqtype, Int4 seqlen, ByteStorePtr bysp)
{
SeqCodeTablePtr sctp;
- ByteStorePtr bysp;
- long readbyte, bslen;
- Int4 seqlen;
- Uint1 seqtype, byte = 0, byte_to, newbyte = 0, residue;
+ long readbyte, bslen;
+ Uint1 byte = 0, byte_to, newbyte = 0, residue;
Uint1 comp, bitctr, mask, lshift, rshift, bc;
- if (bsp == NULL)
- {
- ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
- return FALSE;
- }
-
- if (bsp->repr != Seq_repr_raw)
- {
- ErrPostEx(SEV_ERROR,0,0, "Error: not a raw sequence\n");
- return FALSE;
- }
-
- if (bsp->seq_data == NULL)
+ if (bysp == NULL)
{
ErrPostEx(SEV_ERROR,0,0, "Error: no sequence data\n");
- return FALSE;
+ return FALSE;
}
- seqtype = bsp->seq_data_type;
- if ( ISA_aa(bsp->mol)) {
- ErrPostEx(SEV_ERROR,0,0, "Error: cannot complement aa\n");
- return FALSE;
- }
-
if ((sctp = SeqCodeTableFind (seqtype)) == NULL)
{
ErrPostEx(SEV_ERROR,0,0, "Can't open table\n");
@@ -5056,22 +5041,21 @@ NLM_EXTERN Boolean LIBCALL BioseqComplement (BioseqPtr bsp)
lshift = 0;
mask = 255;
break;
- case Seq_code_iupacaa:
- case Seq_code_ncbi8aa:
- case Seq_code_ncbieaa:
- case Seq_code_ncbipaa:
- case Seq_code_iupacaa3:
- case Seq_code_ncbistdaa: /* ignore amino acid */
- ErrPostEx(SEV_ERROR,0,0, "Error: cannot complement aa ; No ->mol flag on Bioseq\n");
- return FALSE;
- case Seq_code_ncbipna:
- ErrPostEx(SEV_WARNING,0,0, "Error: Don't yet know how to complement profile\n");
+ case Seq_code_iupacaa:
+ case Seq_code_ncbi8aa:
+ case Seq_code_ncbieaa:
+ case Seq_code_ncbipaa:
+ case Seq_code_iupacaa3:
+ case Seq_code_ncbistdaa: /* ignore amino acid */
+ ErrPostEx(SEV_ERROR,0,0, "Error: cannot complement aa ; No ->mol flag on Bioseq\n");
+ return FALSE;
+ case Seq_code_ncbipna:
+ ErrPostEx(SEV_WARNING,0,0, "Error: Don't yet know how to complement profile\n");
+ return FALSE;
default:
return FALSE;
}
- seqlen = bsp->length;
- bysp = bsp->seq_data;
bslen = BSLen(bysp);
bitctr = 0;
readbyte = 0;
@@ -5110,46 +5094,100 @@ together*/
}
}
return TRUE;
+
+}
-} /* BioseqComplement */
-
-/*-------------- BioseqReverse () ---------------------------*/
+static Boolean DeltaBioseqComplement (BioseqPtr bsp)
+{
+ DeltaSeqPtr dsp;
+ SeqLitPtr slip;
+ Boolean rval = FALSE;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta)
+ {
+ return rval;
+ }
+
+ dsp = (DeltaSeqPtr) bsp->seq_ext;
+ while (dsp != NULL)
+ {
+ if (dsp->choice != 2)
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: Can't complement delta sequences with far locs\n");
+ return FALSE;
+ }
+ dsp = dsp->next;
+ }
+ rval = TRUE;
+ dsp = (DeltaSeqPtr) bsp->seq_ext;
+ while (dsp != NULL)
+ {
+ slip = (SeqLitPtr) dsp->data.ptrvalue;
+ /* complement data */
+ if (slip->seq_data != NULL)
+ {
+ rval &= ComplementSeqData (slip->seq_data_type, slip->length, slip->seq_data);
+ }
+ dsp = dsp->next;
+ }
+ return rval;
+}
+
+
+/*-------------- BioseqComplement () ---------------------------*/
/***********************************************************************
-* BioseqReverse: Takes nucleic acid sequence from Bioseq Entry and
-* reverses the whole sequence in place
+* BioseqComplement: Takes the nucleic acid sequence from Bioseq
+* Entry and gives the complement sequence in place
* Does not change features.
************************************************************************/
-NLM_EXTERN Boolean LIBCALL BioseqReverse (BioseqPtr bsp)
+NLM_EXTERN Boolean LIBCALL BioseqComplement (BioseqPtr bsp)
+{
+ Boolean rval = FALSE;
+
+ if (bsp == NULL)
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
+ rval = FALSE;
+ }
+ else if (ISA_aa(bsp->mol))
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: cannot complement aa\n");
+ rval = FALSE;
+ }
+ else if (bsp->repr == Seq_repr_delta)
+ {
+ rval = DeltaBioseqComplement (bsp);
+ }
+ else if (bsp->repr == Seq_repr_raw)
+ {
+ rval = ComplementSeqData (bsp->seq_data_type, bsp->length, bsp->seq_data);
+ }
+ else
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: not a raw or delta sequence\n");
+ rval = FALSE;
+ }
+ return rval;
+
+} /* BioseqComplement */
+
+
+static Boolean LIBCALL ReverseSeqData (Uint1 seqtype, Int4 seqlen, ByteStorePtr bysp1)
{
- ByteStorePtr bysp1 = '\0';
ByteStorePtr bysp2 = '\0';
long readbyte, bslen = 0;
- Int4 seqlen, count = 0;
- Uint1 seqtype, byte = 0, byte2, byte_to = 0, byte_to2, newbyte = 0;
+ Int4 count = 0;
+ Uint1 byte = 0, byte2, byte_to = 0, byte_to2, newbyte = 0;
Uint1 newbyte2, finalbyte, residue, residue2, bitctr, bc2 = 0;
Uint1 bitctr2, mask, mask2, lshift, rshift, bc = 0, jagged;
- if (bsp == NULL)
- {
- ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
- return FALSE;
- }
-
- if (bsp->repr != Seq_repr_raw)
- {
- ErrPostEx(SEV_ERROR,0,0, "Error: not a raw sequence\n");
- return FALSE;
- }
-
- if (bsp->seq_data == NULL)
- {
- ErrPostEx(SEV_ERROR,0,0, "Error: No sequence data\n");
- return FALSE;
- }
+ if (bysp1 == NULL)
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: No sequence data\n");
+ return FALSE;
+ }
- seqlen = bsp->length;
- seqtype = bsp->seq_data_type;
switch (seqtype){
case Seq_code_ncbi2na: /*bitshifts needed*/
mask = 192;
@@ -5219,7 +5257,6 @@ NLM_EXTERN Boolean LIBCALL BioseqReverse (BioseqPtr bsp)
default: /*ignores amino acid sequence*/
return FALSE;
}
- bysp1 = bsp->seq_data;
bysp2 = BSDup(bysp1);
bslen = BSLen (bysp1);
bitctr = bitctr2 = 0;
@@ -5314,6 +5351,84 @@ bytes*/
}
BSFree(bysp2);
return TRUE;
+} /* ReverseSeqData */
+
+
+static Boolean DeltaBioseqReverse (BioseqPtr bsp)
+{
+ DeltaSeqPtr dsp, next_dsp, newchain = NULL;
+ SeqLitPtr slip;
+ Boolean rval = FALSE;
+ Boolean split = FALSE;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta)
+ {
+ return rval;
+ }
+
+ dsp = (DeltaSeqPtr) bsp->seq_ext;
+ while (dsp != NULL)
+ {
+ if (dsp->choice != 2)
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: Can't reverse delta sequences with far locs\n");
+ return FALSE;
+ }
+ dsp = dsp->next;
+ }
+
+ dsp = (DeltaSeqPtr) bsp->seq_ext;
+ rval = TRUE;
+ while (dsp != NULL)
+ {
+ slip = (SeqLitPtr) dsp->data.ptrvalue;
+ /* reverse data */
+ if (slip->seq_data != NULL)
+ {
+ rval &= ReverseSeqData (slip->seq_data_type, slip->length, slip->seq_data);
+ }
+
+ /* reverse the chain */
+ next_dsp = dsp->next;
+ dsp->next = newchain;
+ newchain = dsp;
+
+ dsp = next_dsp;
+ }
+ bsp->seq_ext = newchain;
+ return rval;
+}
+
+/*-------------- BioseqReverse () ---------------------------*/
+/***********************************************************************
+* BioseqReverse: Takes nucleic acid sequence from Bioseq Entry and
+* reverses the whole sequence in place
+* Does not change features.
+************************************************************************/
+NLM_EXTERN Boolean LIBCALL BioseqReverse (BioseqPtr bsp)
+{
+ Boolean rval;
+
+ if (bsp == NULL)
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
+ rval = FALSE;
+ }
+ else if (bsp->repr == Seq_repr_delta)
+ {
+ rval = DeltaBioseqReverse (bsp);
+ }
+ else if (bsp->repr == Seq_repr_raw)
+ {
+ rval = ReverseSeqData (bsp->seq_data_type, bsp->length, bsp->seq_data);
+ }
+ else
+ {
+ ErrPostEx(SEV_ERROR,0,0, "Error: not a raw or delta sequence\n");
+ rval = FALSE;
+ }
+
+ return rval;
} /* BioseqReverse */
#define SPC_BUFF_CHUNK 1024
@@ -7817,12 +7932,17 @@ NLM_EXTERN CharPtr GetDNAbyAccessionDotVersion (CharPtr accession)
}
-static void FixGapLength (SeqIdPtr sip, Uint2 moltype, Int4 offset, Int4 diff)
+static void FixGapLength (BioseqPtr bsp, Int4 offset, Int4 diff)
{
- CharPtr extra_ns;
- SeqLocPtr slp;
+ CharPtr extra_ns;
+ SeqLocPtr slp;
+ ValNodePtr align_annot_list, vnp;
+ SeqAnnotPtr sanp;
+
+ if (bsp == NULL || bsp->id == NULL || diff == 0) return;
- if (sip == NULL || diff == 0) return;
+ align_annot_list = FindAlignSeqAnnotsForBioseq (bsp);
+
if (diff > 0)
{
extra_ns = (CharPtr)MemNew ((diff + 1) * sizeof (Char));
@@ -7830,13 +7950,33 @@ static void FixGapLength (SeqIdPtr sip, Uint2 moltype, Int4 offset, Int4 diff)
{
MemSet (extra_ns, 'N', diff);
extra_ns [diff] = 0;
- insertchar (extra_ns, offset, sip, moltype, FALSE);
+ insertchar (extra_ns, offset, bsp->id, bsp->mol, FALSE);
}
+ slp = SeqLocIntNew (offset, offset + diff - 1, Seq_strand_plus, bsp->id);
+ for (vnp = align_annot_list; vnp != NULL; vnp = vnp->next)
+ {
+ sanp = vnp->data.ptrvalue;
+ if (sanp != NULL && sanp->type == 2)
+ {
+ sanp->data = SeqAlignInsertByLoc (slp, sanp->data);
+ }
+ }
+ SeqLocFree (slp);
}
else
{
- slp = SeqLocIntNew (offset, offset - diff - 1, Seq_strand_plus, sip);
+ slp = SeqLocIntNew (offset, offset - diff - 1, Seq_strand_plus, bsp->id);
SeqDeleteByLoc (slp, TRUE, FALSE);
+
+ for (vnp = align_annot_list; vnp != NULL; vnp = vnp->next)
+ {
+ sanp = vnp->data.ptrvalue;
+ if (sanp != NULL && sanp->type == 2)
+ {
+ sanp->data = SeqAlignDeleteByLoc (slp, sanp->data);
+ }
+ }
+
SeqLocFree (slp);
}
}
@@ -7998,7 +8138,7 @@ NLM_EXTERN void ConvertNsToGaps (
slp->fuzz = ifp;
if (slp->length != 100)
{
- FixGapLength (bsp->id, bsp->mol, len, 100 - slp->length);
+ FixGapLength (bsp, len, 100 - slp->length);
slp->length = 100;
}
}
diff --git a/api/sequtil.c b/api/sequtil.c
index 5d459f8c..c7d8d53e 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,13 +29,36 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.183 $
+* $Revision: 6.190 $
*
* File Description: Sequence Utilities for objseq and objsset
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: sequtil.c,v $
+* Revision 6.190 2006/02/16 17:19:14 kans
+* better handling of trans splicing in GetThePointForOffset, SeqLocStart (CB)
+*
+* Revision 6.189 2006/02/07 17:50:53 kans
+* support for pgp instead of pat for pre-grant publication in SeqIdWrite and SeqIdParse
+*
+* Revision 6.188 2006/02/01 21:53:44 kans
+* DZ and EA for ncbi patent in WHICH_db_accession
+*
+* Revision 6.187 2006/01/24 17:59:26 kans
+* use DY for NCBI EST
+*
+* Revision 6.186 2006/01/05 14:11:56 bollin
+* added SeqLocPrintUseBestID function, which prints out the sequence location
+* but uses the "best" sequence ID instead of the one actually stored in the
+* SeqLoc.
+*
+* Revision 6.185 2006/01/03 15:49:36 kans
+* added DX as ncbi gss to WHICH_db_accession
+*
+* Revision 6.184 2005/12/09 19:43:43 kans
+* added DW as NCBI EST
+*
* Revision 6.183 2005/09/20 21:11:34 kans
* added DV as NCBI EST
*
@@ -3321,7 +3344,7 @@ NLM_EXTERN SeqIdPtr SeqIdSelect (SeqIdPtr sip, Uint1Ptr order, Int2 num)
"emb", /* embl = emb|accession|locus */
"pir", /* pir = pir|accession|name */
"sp", /* swissprot = sp|accession|name */
- "pat", /* patent = pat|country|patent number (string)|seq number (integer) */
+ "pat", /* patent = pat|country|patent number (string)|seq number (integer) - use pgp for pre-grant pub */
"ref", /* other = ref|accession|name|release - changed from oth to ref */
"gnl", /* general = gnl|database(string)|id (string or number) */
"gi", /* gi = gi|integer */
@@ -3465,8 +3488,10 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
PDBSeqIdPtr psip;
ObjectIdPtr oip;
PatentSeqIdPtr patsip;
+ IdPatPtr ipp;
Boolean got_gi = FALSE;
Boolean got_tmsmart = FALSE;
+ Boolean is_us_pre_grant = FALSE;
DbtagPtr dbt;
Char chainbuf[3];
Char versionbuf[10];
@@ -3556,6 +3581,14 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
if (dbt != NULL && StringICmp (dbt->db, "TMSMART") == 0) {
got_tmsmart = TRUE;
}
+ } else if (sip->choice == SEQID_PATENT) {
+ patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
+ if (patsip != NULL) {
+ ipp = patsip->cit;
+ if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
+ is_us_pre_grant = TRUE;
+ }
+ }
}
}
if (useGeneral) {
@@ -3576,8 +3609,18 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
}
format = PRINTID_FASTA_SHORT; /* put on second (or only) SeqId in this format */
}
- else
+ else {
sip = isip; /* only one id processed */
+ if (sip != NULL && sip->choice == SEQID_PATENT) {
+ patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
+ if (patsip != NULL) {
+ ipp = patsip->cit;
+ if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
+ is_us_pre_grant = TRUE;
+ }
+ }
+ }
+ }
/* deal with LOCUS and ACCESSION */
if ((format == PRINTID_TEXTID_ACCESSION) || (format == PRINTID_TEXTID_LOCUS) ||
@@ -3626,7 +3669,11 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
if (format == PRINTID_FASTA_SHORT)
{
- Nlm_LabelCopyNext(&tmp, txtid[sip->choice], &buflen);
+ if (sip->choice == SEQID_PATENT && is_us_pre_grant) {
+ Nlm_LabelCopyNext(&tmp, "pgp", &buflen);
+ } else {
+ Nlm_LabelCopyNext(&tmp, txtid[sip->choice], &buflen);
+ }
Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
}
@@ -3694,7 +3741,11 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
Nlm_LabelCopyNext(&tmp, patsip->cit->country, &buflen);
if (format == PRINTID_FASTA_SHORT)
Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
- Nlm_LabelCopyNext(&tmp, patsip->cit->number, &buflen);
+ if (is_us_pre_grant) {
+ Nlm_LabelCopyNext(&tmp, patsip->cit->app_number, &buflen);
+ } else {
+ Nlm_LabelCopyNext(&tmp, patsip->cit->number, &buflen);
+ }
if (format == PRINTID_FASTA_SHORT)
Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
else
@@ -3884,7 +3935,7 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
IdPatPtr ipp;
PDBSeqIdPtr psip;
GiimPtr gim;
- Boolean done = FALSE;
+ Boolean done = FALSE, is_us_pre_grant = FALSE;
static Uint1 expect_tokens[NUM_SEQID] = { /* number of tokens to expect */
0, /* 0 = not set */
1, /* 1 = local Object-id */
@@ -3941,6 +3992,12 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
type = SEQID_OTHER;
}
+ /* pgp is for pre-grant patent publications */
+ if ((! type) && (! StringCmp(localbuf, "pgp"))) {
+ type = SEQID_PATENT;
+ is_us_pre_grant = TRUE;
+ }
+
if (! type) goto erret;
/* copy and tokenize - token\0token\0\n */
@@ -4077,7 +4134,11 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
ipp = IdPatNew();
patsip->cit = ipp;
ipp->country = StringSave(tokens[0]);
- ipp->number = StringSave(tokens[1]);
+ if (is_us_pre_grant) {
+ ipp->app_number = StringSave(tokens[1]);
+ } else {
+ ipp->number = StringSave(tokens[1]);
+ }
sscanf(tokens[2], "%ld", &num);
patsip->seqid = (Int2)num;
break;
@@ -4831,8 +4892,9 @@ NLM_EXTERN Int4 SeqLocStart (SeqLocPtr anp) /* seqloc */
{
Int4 pos = -1L, tpos, numpnt;
- SeqIdPtr sip;
- SeqLocPtr slp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ SeqIntPtr sintp;
if (anp == NULL)
return pos;
@@ -4869,7 +4931,8 @@ NLM_EXTERN Int4 SeqLocStart (SeqLocPtr anp) /* seqloc */
}
break;
case SEQLOC_INT: /* int */
- pos = ((SeqIntPtr)anp->data.ptrvalue)->from;
+ sintp = (SeqIntPtr) anp->data.ptrvalue;
+ pos = sintp->from;
break;
case SEQLOC_PNT: /* pnt */
pos = ((SeqPntPtr)anp->data.ptrvalue)->point;
@@ -6558,6 +6621,61 @@ NLM_EXTERN Int4 CheckPointInBioseq (SeqPntPtr sp, BioseqPtr in)
return retval; /* all failed */
}
+
+static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2)
+{
+ BioseqPtr bsp1, bsp2;
+ BioseqSetPtr bssp;
+ SeqEntryPtr sep;
+
+ if (sip1 == NULL && sip2 != NULL)
+ {
+ return sip2;
+ }
+ else if (sip1 != NULL && sip2 == NULL)
+ {
+ return sip1;
+ }
+ else if (SeqIdComp(sip1, sip2) == SIC_YES)
+ {
+ return sip1;
+ }
+
+ bsp1 = BioseqFind (sip1);
+ bsp2 = BioseqFind (sip2);
+ if (bsp1 == NULL && bsp2 == NULL)
+ {
+ return sip1;
+ }
+ else if (bsp1 == NULL)
+ {
+ return sip2;
+ }
+ else if (bsp2 == NULL)
+ {
+ return sip1;
+ }
+
+ if (bsp1->idx.parenttype == OBJ_BIOSEQSET
+ && bsp2->idx.parenttype == OBJ_BIOSEQSET
+ && bsp1->idx.parentptr == bsp2->idx.parentptr)
+ {
+ bssp = (BioseqSetPtr) bsp1->idx.parentptr;
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
+ {
+ if (sep->data.ptrvalue == bsp1)
+ {
+ return sip1;
+ }
+ else if (sep->data.ptrvalue == bsp2)
+ {
+ return sip2;
+ }
+ }
+ }
+ return sip1;
+}
+
/*****************************************************************************
*
* Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
@@ -6565,46 +6683,118 @@ NLM_EXTERN Int4 CheckPointInBioseq (SeqPntPtr sp, BioseqPtr in)
*****************************************************************************/
Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
{
- SeqLocPtr tmp, pnt, first=NULL, last=NULL;
- Uint1 ofstrand;
- Boolean getstart;
+ SeqLocPtr pnt, first=NULL, last=NULL;
+ Uint1 first_strand, last_strand;
+ Boolean all_minus = TRUE;
+ Int4 lowest = -1, highest = 0, tmp;
+ SeqIdPtr low_sip = NULL, high_sip = NULL, first_sip = NULL, last_sip = NULL;
+ Boolean id_same;
pnt = NULL; /* get first or last single span type in "of"*/
- tmp = NULL;
while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
{
+ last_strand = SeqLocStrand (pnt);
+ last_sip = SeqLocId (pnt);
+ if (last_strand != Seq_strand_minus)
+ {
+ all_minus = FALSE;
+ }
last = pnt;
if (first == NULL)
+ {
first = pnt;
+ first_strand = last_strand;
+ first_sip = last_sip;
+ lowest = SeqLocStart(pnt);
+ highest = SeqLocStop (pnt);
+ low_sip = last_sip;
+ high_sip = last_sip;
+ }
+ else
+ {
+ tmp = SeqLocStart (pnt);
+ if (SeqIdComp (last_sip, low_sip))
+ {
+ id_same = TRUE;
+ }
+ else
+ {
+ id_same = FALSE;
+ }
+ if ((id_same && tmp < lowest)
+ || (!id_same && last_sip == GetEarlierSeqIdPtr (last_sip, low_sip)))
+ {
+ lowest = tmp;
+ low_sip = last_sip;
+ }
+ tmp = SeqLocStop (pnt);
+
+ if (SeqIdComp (last_sip, high_sip))
+ {
+ id_same = TRUE;
+ }
+ else
+ {
+ id_same = FALSE;
+ }
+ if ((id_same && tmp > highest)
+ || (!id_same && high_sip == GetEarlierSeqIdPtr (high_sip, last_sip)))
+ {
+ highest = tmp;
+ high_sip = last_sip;
+ }
+ }
} /* otherwise, get last */
if (first == NULL)
return FALSE;
- ofstrand = SeqLocStrand(first);
- getstart = TRUE; /* assume we are getting SeqLocStart() */
+
switch (which_end)
{
case SEQLOC_LEFT_END:
- if (ofstrand == Seq_strand_minus)
- tmp = last;
- else
- tmp = first;
+ target->point = lowest;
+ target->id = low_sip;
break;
case SEQLOC_RIGHT_END:
- if (ofstrand == Seq_strand_minus)
- tmp = first;
- else
- tmp = last;
- getstart = FALSE;
+ target->point = highest;
+ target->id = high_sip;
break;
case SEQLOC_START:
- tmp = first;
- if (ofstrand == Seq_strand_minus)
- getstart = FALSE;
+ if (all_minus)
+ {
+ target->point = SeqLocStop (last);
+ target->id = last_sip;
+ }
+ else
+ {
+ if (first_strand == Seq_strand_minus)
+ {
+ target->point = SeqLocStop (first);
+ }
+ else
+ {
+ target->point = SeqLocStart (first);
+ }
+ target->id = first_sip;
+ }
break;
case SEQLOC_STOP:
- tmp = last;
- if (ofstrand != Seq_strand_minus)
- getstart = FALSE;
+ if (all_minus)
+ {
+ target->point = SeqLocStart (first);
+ target->id = first_sip;
+ }
+ else
+ {
+ if (last_strand == Seq_strand_minus)
+ {
+ target->point = SeqLocStart (last);
+ }
+ else
+ {
+ target->point = SeqLocStop (last);
+ }
+ target->id = last_sip;
+ }
break;
default:
return FALSE; /* error */
@@ -6612,12 +6802,6 @@ Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
/* SeqLocStart returns 'from', and SeqLocStop returns 'to', regardless of strand! */
- if (getstart)
- target->point = SeqLocStart(tmp);
- else
- target->point = SeqLocStop(tmp);
- target->id = SeqLocId(tmp);
-
if ((target->point < 0) || (target->id == NULL))
return FALSE;
@@ -6792,15 +6976,11 @@ NLM_EXTERN Int2 SeqLocMol (SeqLocPtr seqloc)
return the_mol;
}
-static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first, SeqIdPtr lastid);
+static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first, SeqIdPtr lastid, Boolean use_best_id);
static void BSstring(ByteStorePtr bsp, CharPtr str);
-/*****************************************************************************
-*
-* SeqLocPrint(slp)
-*
-*****************************************************************************/
-NLM_EXTERN CharPtr SeqLocPrint(SeqLocPtr slp)
+
+static CharPtr SeqLocPrintEx (SeqLocPtr slp, Boolean use_best_id)
{
ByteStorePtr bsp;
CharPtr str;
@@ -6813,19 +6993,35 @@ NLM_EXTERN CharPtr SeqLocPrint(SeqLocPtr slp)
tmp = slp->next; /* save possible chain */
slp->next = NULL; /* take out of possible chain */
- SeqLocPrintProc(slp, bsp, TRUE, NULL);
+ SeqLocPrintProc(slp, bsp, TRUE, NULL, use_best_id);
slp->next = tmp; /* replace possible chain */
str = (CharPtr)BSMerge(bsp, NULL);
BSFree(bsp);
- return str;
+ return str;
+}
+
+/*****************************************************************************
+*
+* SeqLocPrint(slp)
+*
+*****************************************************************************/
+NLM_EXTERN CharPtr SeqLocPrint(SeqLocPtr slp)
+{
+ return SeqLocPrintEx (slp, FALSE);
+}
+
+NLM_EXTERN CharPtr SeqLocPrintUseBestID(SeqLocPtr slp)
+{
+ return SeqLocPrintEx (slp, TRUE);
}
NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen);
NLM_EXTERN SeqIdPtr SeqPointPrint(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid);
NLM_EXTERN void IntFuzzPrint(IntFuzzPtr ifp, Int4 pos, CharPtr buf, Boolean right);
static char strandsymbol[5] = { '\0', '\0', 'c', 'b', 'r' };
+static SeqIdPtr SeqPointWriteEx (SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen, Boolean use_best_id);
/*****************************************************************************
@@ -6835,7 +7031,13 @@ static char strandsymbol[5] = { '\0', '\0', 'c', 'b', 'r' };
* goes down slp chain
*
*****************************************************************************/
-static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first, SeqIdPtr lastid)
+static SeqIdPtr
+SeqLocPrintProc
+(SeqLocPtr slp,
+ ByteStorePtr bsp,
+ Boolean first,
+ SeqIdPtr lastid,
+ Boolean use_best_id)
{
Char buf[41];
SeqBondPtr sbp;
@@ -6844,6 +7046,8 @@ static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first,
IntFuzzPtr ifp1, ifp2;
Int4 from, to;
Int2 delim, delim2;
+ BioseqPtr seq;
+ SeqIdPtr thisid;
while (slp != NULL)
{
@@ -6861,7 +7065,7 @@ static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first,
sbp = (SeqBondPtr)(slp->data.ptrvalue);
if (sbp->a != NULL)
{
- lastid = SeqPointWrite(sbp->a, buf, lastid, 40);
+ lastid = SeqPointWriteEx(sbp->a, buf, lastid, 40, use_best_id);
BSstring(bsp, buf);
}
else
@@ -6871,7 +7075,7 @@ static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first,
if (sbp->b != NULL)
{
- lastid = SeqPointWrite(sbp->b, buf, lastid, 40);
+ lastid = SeqPointWriteEx(sbp->b, buf, lastid, 40, use_best_id);
BSstring(bsp, buf);
}
else
@@ -6906,18 +7110,27 @@ static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first,
delim2 = ']';
}
BSPutByte(bsp, delim);
- lastid = SeqLocPrintProc((SeqLocPtr)(slp->data.ptrvalue), bsp, TRUE, lastid);
+ lastid = SeqLocPrintProc((SeqLocPtr)(slp->data.ptrvalue), bsp, TRUE, lastid, use_best_id);
BSPutByte(bsp, delim2);
break;
case SEQLOC_INT: /* int */
sip = (SeqIntPtr)(slp->data.ptrvalue);
+ thisid = sip->id;
+ if (use_best_id)
+ {
+ seq = BioseqFind (thisid);
+ if (seq != NULL)
+ {
+ thisid = SeqIdFindBest (seq->id, SEQID_GENBANK);
+ }
+ }
if (! SeqIdMatch(sip->id, lastid))
{
- SeqIdWrite(sip->id, buf, PRINTID_FASTA_SHORT, 40);
+ SeqIdWrite(thisid, buf, PRINTID_FASTA_SHORT, 40);
BSstring(bsp, buf);
BSPutByte(bsp, ':');
}
- lastid = sip->id;
+ lastid = thisid;
if (strandsymbol[sip->strand])
BSPutByte(bsp, (Int2)strandsymbol[sip->strand]);
if ((sip->strand == Seq_strand_minus) ||
@@ -6944,8 +7157,8 @@ static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first,
break;
case SEQLOC_PNT: /* pnt */
- lastid = SeqPointWrite((SeqPntPtr)(slp->data.ptrvalue),
- buf, lastid, 40);
+ lastid = SeqPointWriteEx((SeqPntPtr)(slp->data.ptrvalue),
+ buf, lastid, 40, use_best_id);
BSstring(bsp, buf);
break;
case SEQLOC_PACKED_PNT: /* packed pnt */
@@ -7004,22 +7217,37 @@ NLM_EXTERN SeqIdPtr SeqPointPrint(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid)
return spp->id;
}
-/*****************************************************************************
-*
-* SeqPointWrite(spp, buf, lastid, buflen)
-*
-*****************************************************************************/
-NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen)
+static SeqIdPtr
+SeqPointWriteEx
+(SeqPntPtr spp,
+ CharPtr buf,
+ SeqIdPtr lastid,
+ Int2 buflen,
+ Boolean use_best_id)
{
- CharPtr tmp;
+ CharPtr tmp;
+ SeqIdPtr best_id, tmp_next;
+ BioseqPtr bsp;
if ((spp == NULL) || (buf == NULL)) return NULL;
tmp = buf;
*tmp = '\0';
- if (! SeqIdMatch(spp->id, lastid))
+ best_id = spp->id;
+ if (use_best_id)
+ {
+ bsp = BioseqFind (spp->id);
+ if (bsp != NULL)
+ {
+ best_id = SeqIdFindBest (bsp->id, SEQID_GENBANK);
+ }
+ }
+ tmp_next = best_id->next;
+ best_id->next = NULL;
+
+ if (! SeqIdMatch(best_id, lastid))
{
- SeqIdWrite(spp->id, tmp, PRINTID_FASTA_SHORT, buflen);
+ SeqIdWrite(best_id, tmp, PRINTID_FASTA_SHORT, buflen);
while (*tmp != '\0') tmp++;
*tmp = ':';
tmp++; *tmp = '\0';
@@ -7031,7 +7259,19 @@ NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, I
}
IntFuzzPrint(spp->fuzz, spp->point, tmp, TRUE);
- return spp->id;
+ best_id->next = tmp_next;
+
+ return best_id;
+}
+
+/*****************************************************************************
+*
+* SeqPointWrite(spp, buf, lastid, buflen)
+*
+*****************************************************************************/
+NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen)
+{
+ return SeqPointWriteEx (spp, buf, lastid, buflen, FALSE);
}
/*****************************************************************************
@@ -9083,7 +9323,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"DN") == 0) ||
(StringICmp(temp,"DR") == 0) ||
(StringICmp(temp,"DT") == 0) ||
- (StringICmp(temp,"DV") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"DV") == 0) ||
+ (StringICmp(temp,"DW") == 0) ||
+ (StringICmp(temp,"DY") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */
retcode = ACCN_NCBI_STS;
@@ -9118,9 +9360,12 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"CL") == 0) ||
(StringICmp(temp,"CW") == 0) ||
(StringICmp(temp,"CZ") == 0) ||
- (StringICmp(temp,"DU") == 0) ) { /* NCBI GSS */
+ (StringICmp(temp,"DU") == 0) ||
+ (StringICmp(temp,"DX") == 0) ) { /* NCBI GSS */
retcode = ACCN_NCBI_GSS;
- } else if ((StringICmp(temp,"AR") == 0)) { /* NCBI patent */
+ } else if ((StringICmp(temp,"AR") == 0) ||
+ (StringICmp(temp,"DZ") == 0) ||
+ (StringICmp(temp,"EA") == 0)) { /* NCBI patent */
retcode = ACCN_NCBI_PATENT;
} else if((StringICmp(temp,"BC")==0)) { /* NCBI long cDNA project : MGC */
retcode = ACCN_NCBI_cDNA;
diff --git a/api/sequtil.h b/api/sequtil.h
index c643448d..7e0213a5 100644
--- a/api/sequtil.h
+++ b/api/sequtil.h
@@ -29,13 +29,18 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.48 $
+* $Revision: 6.49 $
*
* File Description: Sequence Utilities for objseq and objsset
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: sequtil.h,v $
+* Revision 6.49 2006/01/05 14:11:56 bollin
+* added SeqLocPrintUseBestID function, which prints out the sequence location
+* but uses the "best" sequence ID instead of the one actually stored in the
+* SeqLoc.
+*
* Revision 6.48 2005/08/03 18:28:36 kans
* ValidateAccnDotVer returns -5 for missing version and -6 for bad version (not just digits)
*
@@ -844,7 +849,7 @@ NLM_EXTERN Int2 SeqLocOrder(SeqLocPtr a, SeqLocPtr b, BioseqPtr in);
NLM_EXTERN Int2 SeqLocMol(SeqLocPtr seqloc);
NLM_EXTERN CharPtr SeqLocPrint(SeqLocPtr slp);
-
+NLM_EXTERN CharPtr SeqLocPrintUseBestID(SeqLocPtr slp);
/*****************************************************************************
*
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index 9fc1b88e..e44797e5 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.361 $
+* $Revision: 6.369 $
*
* File Description:
*
@@ -331,6 +331,98 @@ NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxn
return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, biopp);
}
+NLM_EXTERN Boolean BioseqToGeneticCode (
+ BioseqPtr bsp,
+ Int2Ptr gencodep,
+ BoolPtr mitop,
+ BoolPtr plastidp,
+ CharPtr taxnamep,
+ size_t maxsize,
+ BioSourcePtr PNTR biopp
+)
+
+{
+ BioSourcePtr biop = NULL;
+ SeqMgrDescContext dcontext;
+ SeqMgrFeatContext fcontext;
+ Int2 gencode = 0;
+ Boolean mito = FALSE;
+ Int2 mitoCode = 0;
+ Int2 nuclCode = 0;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ Boolean plastid = FALSE;
+ SeqDescrPtr sdp;
+ SeqFeatPtr sfp;
+ CharPtr taxname = NULL;
+
+ if (bsp == NULL) return FALSE;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ }
+
+ if (biop == NULL) {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
+ if (sfp != NULL) {
+ biop = (BioSourcePtr) sfp->data.value.ptrvalue;
+ }
+ }
+
+ if (biop == NULL) return FALSE;
+ orp = biop->org;
+ if (orp == NULL) return FALSE;
+
+ taxname = orp->taxname;
+ if (StringHasNoText (taxname)) return FALSE;
+
+ onp = orp->orgname;
+ if (onp != NULL) {
+ nuclCode = onp->gcode;
+ mitoCode = onp->mgcode;
+ }
+
+ mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
+ biop->genome == GENOME_mitochondrion ||
+ biop->genome == GENOME_hydrogenosome);
+
+ plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
+ biop->genome == GENOME_chromoplast ||
+ biop->genome == GENOME_plastid ||
+ biop->genome == GENOME_cyanelle ||
+ biop->genome == GENOME_apicoplast ||
+ biop->genome == GENOME_leucoplast ||
+ biop->genome == GENOME_proplastid);
+
+ if (plastid) {
+ gencode = 11;
+ } else if (mito) {
+ gencode = mitoCode;
+ } else {
+ gencode = nuclCode;
+ }
+
+ if (gencodep != NULL) {
+ *gencodep = gencode;
+ }
+ if (mitop != NULL) {
+ *mitop = mito;
+ }
+ if (plastidp != NULL) {
+ *plastidp = plastid;
+ }
+ if (taxnamep != NULL && maxsize > 0) {
+ StringNCpy_0 (taxnamep, taxname, maxsize);
+ }
+ if (biopp != NULL) {
+ *biopp = biop;
+ }
+
+ return TRUE;
+}
+
+
static Boolean FindBspItem (GatherContextPtr gcp)
{
@@ -2711,7 +2803,14 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
if (StringHasNoText (val)) return FALSE;
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
- if (rrp == NULL || rrp->ext.choice != 2) return FALSE;
+ if (rrp == NULL) return FALSE;
+
+ if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) {
+ rrp->ext.choice = 2;
+ trp = (tRNAPtr) MemNew (sizeof (tRNA));
+ rrp->ext.value.ptrvalue = (Pointer) trp;
+ }
+ if (rrp->ext.choice != 2) return FALSE;
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
if (trp == NULL) return FALSE;
@@ -4002,6 +4101,7 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
if (StringICmp (gbq->qual, "partial") == 0) {
sfp->partial = TRUE;
} else if (StringICmp (gbq->qual, "evidence") == 0) {
+ /*
if (StringICmp (gbq->val, "experimental") == 0) {
if (sfp->exp_ev != 2) {
sfp->exp_ev = 1;
@@ -4009,6 +4109,7 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
} else if (StringICmp (gbq->val, "not_experimental") == 0) {
sfp->exp_ev = 2;
}
+ */
} else if (StringICmp (gbq->qual, "exception") == 0) {
sfp->excpt = TRUE;
if (! HasNoText (gbq->val)) {
@@ -4524,15 +4625,83 @@ static SubSourcePtr SortSubSourceList (SubSourcePtr list)
return list;
}
+static CharPtr TrimParenthesesAndCommasAroundString (CharPtr str)
+
+{
+ Uchar ch; /* to use 8bit characters in multibyte languages */
+ CharPtr dst;
+ CharPtr ptr;
+
+ if (str != NULL && str [0] != '\0') {
+ dst = str;
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0' && (ch < ' ' || ch == '(' || ch == ',')) {
+ ptr++;
+ ch = *ptr;
+ }
+ while (ch != '\0') {
+ *dst = ch;
+ dst++;
+ ptr++;
+ ch = *ptr;
+ }
+ *dst = '\0';
+ dst = NULL;
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch != ')' && ch != ',') {
+ dst = NULL;
+ } else if (dst == NULL) {
+ dst = ptr;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ if (dst != NULL) {
+ *dst = '\0';
+ }
+ }
+ return str;
+}
+
+static CharPtr CombineSplitQual (CharPtr origval, CharPtr newval)
+
+{
+ size_t len;
+ CharPtr str = NULL;
+
+ if (StringStr (origval, newval) != NULL) return origval;
+ len = StringLen (origval) + StringLen (newval) + 5;
+ str = MemNew (sizeof (Char) * len);
+ if (str == NULL) return origval;
+ TrimParenthesesAndCommasAroundString (origval);
+ TrimParenthesesAndCommasAroundString (newval);
+ StringCpy (str, "(");
+ StringCat (str, origval);
+ StringCat (str, ",");
+ StringCat (str, newval);
+ StringCat (str, ")");
+ /* free original string, knowing return value will replace it */
+ MemFree (origval);
+ return str;
+}
+
static void CleanSubSourceList (SubSourcePtr PNTR sspp)
{
Char ch;
+ CharPtr dst;
+ SubSourcePtr fwd_seq = NULL, rev_seq = NULL;
SubSourcePtr last = NULL;
+ size_t len;
SubSourcePtr next;
SubSourcePtr PNTR prev;
CharPtr ptr;
+ CharPtr src;
SubSourcePtr ssp;
+ CharPtr str;
Boolean unlink;
if (sspp == NULL) return;
@@ -4550,6 +4719,7 @@ static void CleanSubSourceList (SubSourcePtr PNTR sspp)
if (ssp->subtype == SUBSRC_fwd_primer_seq ||
ssp->subtype == SUBSRC_rev_primer_seq) {
if (ssp->name != NULL) {
+ /* upper case sequence */
ptr = ssp->name;
ch = *ptr;
while (ch != '\0') {
@@ -4559,6 +4729,35 @@ static void CleanSubSourceList (SubSourcePtr PNTR sspp)
ptr++;
ch = *ptr;
}
+ /* remove any spaces in sequence */
+ src = ssp->name;
+ dst = ssp->name;
+ ch = *src;
+ while (ch != '\0') {
+ if (ch != ' ') {
+ *dst = ch;
+ dst++;
+ }
+ src++;
+ ch = *src;
+ }
+ *dst = '\0';
+ }
+ }
+ if (ssp->subtype == SUBSRC_fwd_primer_seq) {
+ if (fwd_seq == NULL) {
+ fwd_seq = ssp;
+ } else {
+ fwd_seq->name = CombineSplitQual (fwd_seq->name, ssp->name);
+ unlink = TRUE;
+ }
+ }
+ if (ssp->subtype == SUBSRC_rev_primer_seq) {
+ if (rev_seq == NULL) {
+ rev_seq = ssp;
+ } else {
+ rev_seq->name = CombineSplitQual (rev_seq->name, ssp->name);
+ unlink = TRUE;
}
}
CleanVisString (&(ssp->attrib));
@@ -4574,9 +4773,9 @@ static void CleanSubSourceList (SubSourcePtr PNTR sspp)
ssp->subtype == SUBSRC_rearranged ||
ssp->subtype == SUBSRC_transgenic ||
ssp->subtype == SUBSRC_environmental_sample ||
- StringICmp (last->name, ssp->name) == 0) ||
+ StringICmp (last->name, ssp->name) == 0 ||
(last->subtype == SUBSRC_other &&
- StringStr (last->name, ssp->name) != NULL)) {
+ StringStr (last->name, ssp->name) != NULL))) {
unlink = TRUE;
} else if (last->subtype == ssp->subtype &&
last->subtype == SUBSRC_other &&
@@ -4599,6 +4798,40 @@ static void CleanSubSourceList (SubSourcePtr PNTR sspp)
}
ssp = next;
}
+ if (fwd_seq != NULL) {
+ if (StringChr (fwd_seq->name, ',') != NULL) {
+ ptr = fwd_seq->name;
+ len = StringLen (ptr);
+ if (ptr [0] != '(' || ptr [len - 1] != ')') {
+ TrimParenthesesAndCommasAroundString (fwd_seq->name);
+ str = MemNew (sizeof (Char) * (len + 4));
+ if (str != NULL) {
+ StringCpy (str, "(");
+ StringCat (str, fwd_seq->name);
+ StringCat (str, ")");
+ fwd_seq->name = MemFree (fwd_seq->name);
+ fwd_seq->name = str;
+ }
+ }
+ }
+ }
+ if (rev_seq != NULL) {
+ if (StringChr (rev_seq->name, ',') != NULL) {
+ ptr = rev_seq->name;
+ len = StringLen (ptr);
+ if (ptr [0] != '(' || ptr [len - 1] != ')') {
+ TrimParenthesesAndCommasAroundString (rev_seq->name);
+ str = MemNew (sizeof (Char) * (len + 4));
+ if (str != NULL) {
+ StringCpy (str, "(");
+ StringCat (str, rev_seq->name);
+ StringCat (str, ")");
+ rev_seq->name = MemFree (rev_seq->name);
+ rev_seq->name = str;
+ }
+ }
+ }
+ }
}
/* if string starts with given prefix, return pointer to remaining text */
@@ -6277,7 +6510,8 @@ static void CleanUpExceptText (SeqFeatPtr sfp)
if (StringStr (sfp->except_text, "ribosome slippage") == NULL &&
StringStr (sfp->except_text, "trans splicing") == NULL &&
StringStr (sfp->except_text, "alternate processing") == NULL &&
- StringStr (sfp->except_text, "non-consensus splice site") == NULL) return;
+ StringStr (sfp->except_text, "non-consensus splice site") == NULL &&
+ StringStr (sfp->except_text, "adjusted for low quality genome") == NULL) return;
head = NULL;
str = sfp->except_text;
@@ -6307,6 +6541,9 @@ static void CleanUpExceptText (SeqFeatPtr sfp)
} else if (StringCmp (tmp, "non-consensus splice site") == 0) {
vnp->data.ptrvalue = MemFree (tmp);
vnp->data.ptrvalue = StringSave ("nonconsensus splice site");
+ } else if (StringCmp (tmp, "adjusted for low quality genome") == 0) {
+ vnp->data.ptrvalue = MemFree (tmp);
+ vnp->data.ptrvalue = StringSave ("adjusted for low-quality genome");
}
}
@@ -6634,11 +6871,16 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
}
}
}
+/*
+ * This section has been commented out based on a request by DeAnne Cravaritis.
+ * If left in, this causes unexpected results when RNA comments are copied to
+ * the product name or vice versa.
if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
if (StringICmp ((CharPtr) rrp->ext.value.ptrvalue, sfp->comment) == 0) {
sfp->comment = MemFree (sfp->comment);
}
}
+*/
if (rrp->type == 4) {
name = (CharPtr) rrp->ext.value.ptrvalue;
len = StringLen (name);
@@ -6823,13 +7065,13 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
}
if (rrp->type == 255 && rrp->ext.choice == 1) {
name = (CharPtr) rrp->ext.value.ptrvalue;
- if (StringICmp (name, "its1") == 0) {
+ if (StringICmp (name, "its1") == 0 || StringICmp (name, "its 1") == 0) {
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 1");
- } else if (StringICmp (name, "its2") == 0) {
+ } else if (StringICmp (name, "its2") == 0 || StringICmp (name, "its 2") == 0) {
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 2");
- } else if (StringICmp (name, "its3") == 0) {
+ } else if (StringICmp (name, "its3") == 0 || StringICmp (name, "its 3") == 0) {
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 3");
}
@@ -6883,8 +7125,8 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
CleanVisStringList (&(orp->mod));
OrpModToSubSource (&(orp->mod), &(biop->subtype));
}
- biop->subtype = SortSubSourceList (biop->subtype);
CleanSubSourceList (&(biop->subtype));
+ biop->subtype = SortSubSourceList (biop->subtype);
break;
default :
break;
@@ -7029,8 +7271,8 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
CleanVisStringList (&(orp->mod));
OrpModToSubSource (&(orp->mod), &(biop->subtype));
}
- biop->subtype = SortSubSourceList (biop->subtype);
CleanSubSourceList (&(biop->subtype));
+ biop->subtype = SortSubSourceList (biop->subtype);
break;
case Seq_descr_molinfo :
break;
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index 784d5c39..f5d24d2b 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.240 $
+* $Revision: 6.244 $
*
* File Description:
*
@@ -1852,6 +1852,10 @@ static CharPtr molinfo_tech_list [] = {
"fli cDNA", "htgs 0", "htc", "wgs", "barcode", "composite-wgs-htgs", NULL
};
+static CharPtr molinfo_completeness_list [] = {
+ "unknown", "complete", "partial", "no-left", "no-right", "no-ends", "has-left", "has-right", NULL
+};
+
NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip)
{
Int4 i;
@@ -1868,6 +1872,22 @@ NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip)
}
}
+NLM_EXTERN void ReadCompletenessFromString (CharPtr str, MolInfoPtr mip)
+{
+ Int4 i;
+
+ if (mip == NULL || str == NULL)
+ {
+ return;
+ }
+
+ for (i = 0; molinfo_completeness_list [i] != NULL; i++) {
+ if (StringsAreEquivalent (str, molinfo_completeness_list [i])) {
+ mip->completeness = (Uint1) i;
+ }
+ }
+}
+
NLM_EXTERN MolInfoPtr ParseTitleIntoMolInfo (
SqnTagPtr stp,
MolInfoPtr mip
@@ -1903,11 +1923,7 @@ NLM_EXTERN MolInfoPtr ParseTitleIntoMolInfo (
ReadTechFromString (str, mip);
str = SqnTagFind (stp, "completeness");
- if (str != NULL) {
- if (StringICmp (str, "complete") == 0) {
- mip->completeness = 1;
- }
- }
+ ReadCompletenessFromString (str, mip);
return mip;
}
@@ -4002,6 +4018,8 @@ static CharPtr aaList [] = {
"Z", "Glx", "Glu or Gln",
"U", "Sec", "Selenocysteine",
"*", "Ter", "Termination",
+ "O", "Pyl", "Pyrrolysine",
+ "J", "Xle", "Leu or Ile",
NULL, NULL, NULL
};
@@ -4966,6 +4984,9 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
isLocusTag = TRUE;
}
}
+ if (qnum == GBQUAL_evidence) {
+ qnum = -1; /* no longer legal */
+ }
if (qnum <= -1) {
bail = TRUE;
if (sfp->data.choice == SEQFEAT_IMP) {
@@ -5151,6 +5172,7 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
} else if (qnum == GBQUAL_replace && StringCmp (val, "-") == 0) {
val = "";
} else if (qnum == GBQUAL_evidence) {
+ /*
if (StringICmp (val, "experimental") == 0) {
sfp->exp_ev = 1;
} else if (StringICmp (val, "not_experimental") == 0 ||
@@ -5159,6 +5181,7 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
StringICmp (val, "non-experimental") == 0) {
sfp->exp_ev = 2;
}
+ */
return;
} else if (qnum == GBQUAL_exception) {
sfp->excpt = TRUE;
@@ -8519,6 +8542,165 @@ NLM_EXTERN void PrintQualityScoresToBuffer (BioseqPtr bsp, Boolean gapIsZero, Po
ValNodeFreeData (head);
}
+
+NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_left)
+{
+ FloatHiPtr new_flvalues = NULL, old_flvalues;
+ Int4Ptr new_intvalues = NULL, old_intvalues;
+ ByteStorePtr new_bytevalues = NULL, old_bytevalues;
+ Int4 new_len;
+ Int4 start_pos;
+ FloatHi fhmax = 0.0, fhmin = 0.0;
+ Int4 intmax = 0, intmin = 0;
+ Int2 bs_max = 0, bs_min = 0;
+ Int4 new_pos, old_pos;
+ Int2 val;
+
+ if (sgp == NULL || num_to_trim < 1)
+ {
+ return;
+ }
+
+ new_len = sgp->numval - num_to_trim;
+ if (from_left)
+ {
+ start_pos = num_to_trim;
+ }
+ else
+ {
+ start_pos = 0;
+ }
+
+ if (sgp->flags[2] == 1)
+ {
+ new_flvalues = (FloatHiPtr) MemNew (new_len * sizeof (FloatHi));
+ old_flvalues = (FloatHiPtr) sgp->values;
+ new_pos = 0;
+ old_pos = start_pos;
+ while (old_pos < sgp->numval)
+ {
+ new_flvalues [new_pos] = old_flvalues[start_pos];
+ if (old_pos == start_pos)
+ {
+ fhmax = new_flvalues[new_pos];
+ fhmin = new_flvalues[new_pos];
+ }
+ else
+ {
+ if (fhmax < new_flvalues[new_pos])
+ {
+ fhmax = new_flvalues[new_pos];
+ }
+
+ if (fhmin > new_flvalues[new_pos])
+ {
+ fhmin = new_flvalues[new_pos];
+ }
+ }
+ new_pos++;
+ old_pos++;
+ }
+ old_flvalues = MemFree (old_flvalues);
+ sgp->values = new_flvalues;
+ sgp->numval = new_len;
+ sgp->max.realvalue = fhmax;
+ sgp->min.realvalue = fhmin;
+ }
+ else if (sgp->flags[2] == 2)
+ {
+ new_intvalues = (Int4Ptr) MemNew (new_len * sizeof (FloatHi));
+ old_intvalues = (Int4Ptr) sgp->values;
+ new_pos = 0;
+ old_pos = start_pos;
+ while (old_pos < sgp->numval)
+ {
+ new_intvalues [new_pos] = old_intvalues[start_pos];
+ if (old_pos == start_pos)
+ {
+ intmax = new_intvalues[new_pos];
+ intmin = new_intvalues[new_pos];
+ }
+ else
+ {
+ if (intmax < new_intvalues[new_pos])
+ {
+ intmax = new_intvalues[new_pos];
+ }
+
+ if (intmin > new_intvalues[new_pos])
+ {
+ intmin = new_intvalues[new_pos];
+ }
+ }
+ new_pos++;
+ old_pos++;
+ }
+ old_intvalues = MemFree (old_intvalues);
+ sgp->values = new_intvalues;
+ sgp->numval = new_len;
+ sgp->max.intvalue = intmax;
+ sgp->min.intvalue = intmin;
+ }
+ else if (sgp->flags[2] == 3)
+ {
+ new_bytevalues = BSNew(new_len + 1);
+ old_bytevalues = (ByteStorePtr) sgp->values;
+ new_pos = 0;
+ old_pos = start_pos;
+ while (old_pos < sgp->numval)
+ {
+ BSSeek (old_bytevalues, old_pos, SEEK_SET);
+ BSSeek (new_bytevalues, new_pos, SEEK_SET);
+ val = (Int2) BSGetByte (old_bytevalues);
+ BSPutByte (new_bytevalues, val);
+
+ if (old_pos == start_pos)
+ {
+ bs_max = val;
+ bs_min = val;
+ }
+ else
+ {
+ if (bs_max < val)
+ {
+ bs_max = val;
+ }
+
+ if (bs_min > val)
+ {
+ bs_min = val;
+ }
+ }
+ new_pos++;
+ old_pos++;
+ }
+ BSPutByte (new_bytevalues, EOF);
+ old_bytevalues = BSFree (old_bytevalues);
+ sgp->values = new_bytevalues;
+ sgp->numval = new_len;
+ sgp->max.intvalue = bs_max;
+ sgp->min.intvalue = bs_min;
+ }
+}
+
+
+NLM_EXTERN void TrimQualityScores (BioseqPtr bsp, Int4 num_to_trim, Boolean from_left)
+{
+ ValNodePtr qual_scores, vnp;
+ GphItemPtr gip;
+
+ if (bsp == NULL) return;
+ qual_scores = GetSeqGraphsOnBioseq (bsp->idx.entityID, bsp);
+ for (vnp = qual_scores; vnp != NULL; vnp = vnp->next)
+ {
+ gip = (GphItemPtr) vnp->data.ptrvalue;
+ if (gip == NULL) continue;
+ TrimSeqGraph (gip->sgp, num_to_trim, from_left);
+ }
+
+}
+
+
NLM_EXTERN BytePtr GetScoresbySeqId (SeqIdPtr sip, Int4Ptr bsplength)
{
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index 2942e514..ef2a21bf 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.63 $
+* $Revision: 6.69 $
*
* File Description:
*
@@ -4329,3 +4329,115 @@ extern Boolean RemoveSequenceFromAlignments (SeqEntryPtr sep, SeqIdPtr sip)
VisitAnnotsInSep (sep, (Pointer) sip, RemoveSequenceFromAlignmentsCallback);
return TRUE;
}
+
+
+static CharPtr inferencePrefix [] = {
+ "",
+ "similar to sequence",
+ "similar to AA sequence",
+ "similar to DNA sequence",
+ "similar to RNA sequence",
+ "similar to RNA sequence, mRNA",
+ "similar to RNA sequence, EST",
+ "similar to RNA sequence, other RNA",
+ "profile",
+ "nucleotide motif",
+ "protein motif",
+ "ab initio prediction",
+ NULL
+};
+
+NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
+
+{
+ Int2 accnv, best, j, rsult;
+ Char ch;
+ Boolean has_fetch_function, same_species;
+ size_t len;
+ ObjMgrProcPtr ompp = NULL;
+ CharPtr rest, str, tmp;
+ ErrSev sev;
+ SeqIdPtr sip;
+
+ if (StringHasNoText (val)) return EMPTY_INFERENCE_STRING;
+
+ rest = NULL;
+ best = -1;
+ for (j = 0; inferencePrefix [j] != NULL; j++) {
+ len = StringLen (inferencePrefix [j]);
+ if (StringNICmp (val, inferencePrefix [j], len) != 0) continue;
+ rest = val + len;
+ best = j;
+ }
+
+ if (best < 0 || inferencePrefix [best] == NULL) return BAD_INFERENCE_PREFIX;
+
+ if (rest == NULL) return BAD_INFERENCE_BODY;
+
+ same_species = FALSE;
+ ch = *rest;
+ while (IS_WHITESP (ch)) {
+ rest++;
+ ch = *rest;
+ }
+ if (StringNICmp (rest, "(same species)", 14) == 0) {
+ same_species = TRUE;
+ rest += 14;
+ }
+ ch = *rest;
+ while (IS_WHITESP (ch) || ch == ':') {
+ rest++;
+ ch = *rest;
+ }
+
+ if (StringHasNoText (rest)) return BAD_INFERENCE_BODY;
+
+ rsult = VALID_INFERENCE;
+ if (same_species && best > 7) {
+ rsult = SAME_SPECIES_MISUSED;
+ }
+
+ str = StringSave (rest);
+
+ tmp = StringChr (str, ':');
+ if (tmp != NULL) {
+ *tmp = '\0';
+ tmp++;
+ TrimSpacesAroundString (str);
+ TrimSpacesAroundString (tmp);
+ if (StringDoesHaveText (tmp)) {
+ if (StringICmp (str, "INSD") == 0 || StringICmp (str, "RefSeq") == 0) {
+ accnv = ValidateAccnDotVer (tmp);
+ if (accnv == -5 || accnv == -6) {
+ rsult = BAD_INFERENCE_ACC_VERSION;
+ } else if (accnv != 0) {
+ rsult = BAD_INFERENCE_ACCESSION;
+ } else if (fetchAccn) {
+ sip = SeqIdFromAccessionDotVersion (tmp);
+ has_fetch_function = FALSE;
+ while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL) {
+ if ((ompp->subinputtype == 0) && (ompp->suboutputtype == SEQID_GI)) {
+ has_fetch_function = TRUE;
+ }
+ }
+ sev = ErrGetMessageLevel ();
+ ErrSetMessageLevel (SEV_ERROR);
+ if (has_fetch_function && GetGIForSeqId (sip) == 0) {
+ rsult = ACC_VERSION_NOT_PUBLIC;
+ }
+ ErrSetMessageLevel (sev);
+ SeqIdFree (sip);
+ }
+ }
+ }
+ if (StringChr (str, ' ') != NULL) rsult = SPACES_IN_INFERENCE;
+ if (StringChr (tmp, ' ') != NULL) rsult = SPACES_IN_INFERENCE;
+ } else {
+ rsult = SINGLE_INFERENCE_FIELD;
+ }
+
+ MemFree (str);
+
+ return rsult;
+}
+
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 8496dfb2..279fa9c1 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.128 $
+* $Revision: 6.134 $
*
* File Description:
*
@@ -94,6 +94,16 @@ NLM_EXTERN Int2 EntityIDToGeneticCode (Uint2 entityID, BoolPtr mito, CharPtr tax
NLM_EXTERN Int2 SeqEntryToGeneticCode (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize);
NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize, BioSourcePtr PNTR biopp);
+NLM_EXTERN Boolean BioseqToGeneticCode (
+ BioseqPtr bsp,
+ Int2Ptr gencodep,
+ BoolPtr mitop,
+ BoolPtr plastidp,
+ CharPtr taxnamep,
+ size_t maxsize,
+ BioSourcePtr PNTR biopp
+);
+
NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep);
NLM_EXTERN SeqFeatPtr CreateNewFeature (SeqEntryPtr sep, SeqEntryPtr placeHere, Uint1 choice, SeqFeatPtr useThis);
NLM_EXTERN ValNodePtr CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice);
@@ -252,6 +262,7 @@ NLM_EXTERN SqnTagPtr SqnTagFree (SqnTagPtr stp);
NLM_EXTERN CharPtr SqnTagFind (SqnTagPtr stp, CharPtr tag);
NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip);
+NLM_EXTERN void ReadCompletenessFromString (CharPtr str, MolInfoPtr mip);
/* functions to extract BioSource, MolInfo, and Bioseq information from parsed titles */
@@ -356,6 +367,22 @@ NLM_EXTERN void KeyTagClear (KeyTag PNTR ktp);
NLM_EXTERN Int2 KeyFromTag (KeyTag PNTR ktp, CharPtr tag);
NLM_EXTERN CharPtr TagFromKey (KeyTag PNTR ktp, Int2 key);
+/* inference qualifier utility */
+
+#define VALID_INFERENCE 0
+#define EMPTY_INFERENCE_STRING 1
+#define BAD_INFERENCE_PREFIX 2
+#define BAD_INFERENCE_BODY 3
+#define SINGLE_INFERENCE_FIELD 4
+#define SPACES_IN_INFERENCE 5
+#define SAME_SPECIES_MISUSED 6
+#define BAD_INFERENCE_ACCESSION 7
+#define BAD_INFERENCE_ACC_VERSION 8
+#define ACC_VERSION_NOT_PUBLIC 9
+
+NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn);
+
+
/* from Colombe */
NLM_EXTERN SeqLocPtr StringSearchInBioseq (SeqIdPtr sip, CharPtr sub);
@@ -391,6 +418,9 @@ NLM_EXTERN SeqEntryPtr SetPhrapContigOrder (SeqEntryPtr head, CharPtr contigs);
NLM_EXTERN void PrintQualityScores (BioseqPtr bsp, FILE *fp);
+NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_left);
+NLM_EXTERN void TrimQualityScores (BioseqPtr bsp, Int4 num_to_trim, Boolean from_left);
+
typedef void (*QualityWriteFunc) (CharPtr buf, Uint4 buflen, Pointer userdata);
NLM_EXTERN void PrintQualityScoresToBuffer (BioseqPtr bsp, Boolean gapIsZero, Pointer userdata, QualityWriteFunc callback);
diff --git a/api/subutil.c b/api/subutil.c
index 801aafb2..07588e4d 100644
--- a/api/subutil.c
+++ b/api/subutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.57 $
+* $Revision: 6.62 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -40,6 +40,21 @@
*
*
* $Log: subutil.c,v $
+* Revision 6.62 2006/02/06 19:00:15 kans
+* added CreateFeatureFetchPolicyUserObject
+*
+* Revision 6.61 2006/01/23 20:57:04 kans
+* cosmetic change
+*
+* Revision 6.60 2006/01/23 16:39:57 kans
+* added CreateAnnotDescCommentPolicyUserObject
+*
+* Revision 6.59 2006/01/17 20:47:05 kans
+* fixed AddIDsToGenomeProjectsDBUserObject
+*
+* Revision 6.58 2006/01/17 18:25:06 kans
+* support for genomeprojectsdb user object
+*
* Revision 6.57 2005/10/26 21:30:46 kans
* bug fix in AddSecondaryAccnToEntry provided by Joe Carlson
*
@@ -4941,7 +4956,7 @@ NLM_EXTERN UserObjectPtr CreateModelEvidenceUserObject (
)
{
- UserFieldPtr curr;
+ UserFieldPtr curr;
ObjectIdPtr oip;
UserFieldPtr prev = NULL;
UserObjectPtr uop;
@@ -5261,3 +5276,126 @@ NLM_EXTERN void AddAccessionToTpaAssemblyUserObject (UserObjectPtr uop, CharPtr
prev->next = ufp;
}
+NLM_EXTERN UserObjectPtr CreateGenomeProjectsDBUserObject (
+ void
+)
+
+{
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+
+ uop = UserObjectNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("GenomeProjectsDB");
+ uop->type = oip;
+
+ return uop;
+}
+
+NLM_EXTERN UserObjectPtr AddIDsToGenomeProjectsDBUserObject (
+ UserObjectPtr uop,
+ Int4 projectID,
+ Int4 parentID
+)
+
+{
+ UserFieldPtr curr;
+ UserFieldPtr prev = NULL;
+ UserFieldPtr last = NULL;
+ ObjectIdPtr oip;
+ UserFieldPtr ufp;
+
+ if (uop == NULL) return;
+ oip = uop->type;
+ if (oip == NULL || StringICmp (oip->str, "GenomeProjectsDB") != 0) return;
+
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ prev = curr;
+ }
+
+ ufp = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("ProjectID");
+ ufp->label = oip;
+ ufp->choice = 2; /* integer */
+ ufp->data.intvalue = projectID;
+
+ if (prev != NULL) {
+ prev->next = ufp;
+ } else {
+ uop->data = ufp;
+ }
+ last = ufp;
+
+ ufp = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("ParentID");
+ ufp->label = oip;
+ ufp->choice = 2; /* integer */
+ ufp->data.intvalue = parentID;
+
+ last->next = ufp;
+
+ return uop;
+}
+
+/* annot desc comment policy user object */
+
+NLM_EXTERN UserObjectPtr CreateAnnotDescCommentPolicyUserObject (
+ Boolean showInCommentBlock
+)
+
+{
+ UserFieldPtr curr;
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+
+ uop = UserObjectNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("AnnotDescCommentPolicy");
+ uop->type = oip;
+
+ curr = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("Policy");
+ curr->label = oip;
+ curr->choice = 1; /* visible string */
+ if (showInCommentBlock) {
+ curr->data.ptrvalue = (Pointer) StringSave ("ShowInComment");
+ } else {
+ curr->data.ptrvalue = (Pointer) StringSave ("ShowInNote");
+ }
+
+ uop->data = curr;
+ return uop;
+}
+
+/* feature fetch policy user object */
+
+NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject (
+ CharPtr policy
+)
+
+{
+ UserFieldPtr curr;
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+
+ if (StringHasNoText (policy)) return NULL;
+
+ uop = UserObjectNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("FeatureFetchPolicy");
+ uop->type = oip;
+
+ curr = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("Policy");
+ curr->label = oip;
+ curr->choice = 1; /* visible string */
+ curr->data.ptrvalue = (Pointer) StringSave (policy);
+
+ uop->data = curr;
+ return uop;
+}
+
diff --git a/api/subutil.h b/api/subutil.h
index 37cd14f3..eaba6f79 100644
--- a/api/subutil.h
+++ b/api/subutil.h
@@ -31,7 +31,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.50 $
+* $Revision: 6.53 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -42,6 +42,15 @@
*
*
* $Log: subutil.h,v $
+* Revision 6.53 2006/02/06 19:00:15 kans
+* added CreateFeatureFetchPolicyUserObject
+*
+* Revision 6.52 2006/01/23 16:39:57 kans
+* added CreateAnnotDescCommentPolicyUserObject
+*
+* Revision 6.51 2006/01/17 18:25:07 kans
+* support for genomeprojectsdb user object
+*
* Revision 6.50 2005/06/10 14:06:16 kans
* added GENOME_hydrogenosome define
*
@@ -1679,6 +1688,26 @@ NLM_EXTERN void AddAccessionToTpaAssemblyUserObject (
Int4 to
);
+NLM_EXTERN UserObjectPtr CreateGenomeProjectsDBUserObject (
+ void
+);
+NLM_EXTERN UserObjectPtr AddIDsToGenomeProjectsDBUserObject (
+ UserObjectPtr uop,
+ Int4 projectID,
+ Int4 parentID
+);
+
+/* annot desc comment policy user object */
+NLM_EXTERN UserObjectPtr CreateAnnotDescCommentPolicyUserObject (
+ Boolean showInCommentBlock
+);
+
+/* feature fetch policy user object */
+
+NLM_EXTERN UserObjectPtr CreateFeatureFetchPolicyUserObject (
+ CharPtr policy
+);
+
#ifdef __cplusplus
}
@@ -1739,6 +1768,7 @@ NLM_EXTERN void AddAccessionToTpaAssemblyUserObject (
( symbol "R", name "Arginine"),
( symbol "S", name "Serine"),
( symbol "T", name "Threoine"),
+ { symbol "U", name "Selenocysteine"},
( symbol "V", name "Valine"),
( symbol "W", name "Tryptophan" ),
( symbol "X", name "Undetermined or atypical"),
@@ -1753,38 +1783,56 @@ NLM_EXTERN void AddAccessionToTpaAssemblyUserObject (
* Genetic Code id's and names from /ncbi/data/gc.prt
* gc.prt lists the legal start codons and genetic codes fully
*
- name "Standard" ,
- id 1 ,
+ name "Standard" ,
+ id 1 ,
+
+ name "Vertebrate Mitochondrial" ,
+ id 2 ,
+
+ name "Yeast Mitochondrial" ,
+ id 3 ,
+
+ name "Mold Mitochondrial and Mycoplasma" ,
+ id 4 ,
+
+ name "Invertebrate Mitochondrial" ,
+ id 5 ,
+
+ name "Ciliate Macronuclear and Daycladacean" ,
+ id 6 ,
+
+ name "Echinoderm Mitochondrial" ,
+ id 9 ,
- name "Vertebrate Mitochondrial" ,
- id 2 ,
+ name "Euplotid Macronuclear" ,
+ id 10 ,
- name "Yeast Mitochondrial" ,
- id 3 ,
+ name "Bacterial and Plant Plastid" ,
+ id 11 ,
- name "Mold Mitochondrial and Mycoplasma" ,
- id 4 ,
+ name "Alternative Yeast Nuclear" ,
+ id 12 ,
- name "Invertebrate Mitochondrial" ,
- id 5 ,
+ name "Ascidian Mitochondrial" ,
+ id 13 ,
- name "Ciliate Macronuclear and Daycladacean" ,
- id 6 ,
+ name "Alternative Flatworm Mitochondrial" ,
+ id 14 ,
- name "Protozoan Mitochondrial (and Kinetoplast)" ,
- id 7 ,
+ name "Blepharisma Macronuclear" ,
+ id 15 ,
- name "Plant Mitochondrial" ,
- id 8 ,
+ name "Chlorophycean Mitochondrial" ,
+ id 16 ,
- name "Echinoderm Mitochondrial" ,
- id 9 ,
+ name "Trematode Mitochondrial" ,
+ id 21 ,
- name "Euplotid Macronuclear" ,
- id 10 ,
+ name "Scenedesmus obliquus Mitochondrial" ,
+ id 22 ,
- name "Eubacterial" ,
- id 11 ,
+ name "Thraustochytrium Mitochondrial" ,
+ id 23 ,
*
*
diff --git a/api/tofasta.c b/api/tofasta.c
index 2246549f..dcbbb7d3 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.148 $
+* $Revision: 6.150 $
*
* File Description: various sequence objects to fasta output
*
@@ -39,6 +39,12 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: tofasta.c,v $
+* Revision 6.150 2006/01/10 22:19:29 kans
+* CreateDefLine calls DoTpaPrefix to handle TPA_exp and TPA_inf
+*
+* Revision 6.149 2005/12/07 19:49:46 kans
+* in BioseqFastaStreamInternal, bail if virtual Bioseq
+*
* Revision 6.148 2005/09/12 17:44:21 kans
* in complete chromosome title, use virus instead of virion
*
@@ -1584,6 +1590,8 @@ static Int4 BioseqFastaStreamInternal (
if (bsp == NULL && slp == NULL) return 0;
if (fp == NULL && bs == NULL) return 0;
+ if (bsp != NULL && bsp->repr == Seq_repr_virtual) return 0;
+
if (linelen > 128) {
linelen = 128;
}
@@ -4215,6 +4223,46 @@ static Boolean NotSpecialTaxName (CharPtr taxname)
return TRUE;
}
+static Boolean DoTpaPrefix (
+ CharPtr title,
+ CharPtr PNTR ttl,
+ CharPtr PNTR pfx,
+ Boolean is_tpa,
+ Boolean tpa_exp,
+ Boolean tpa_inf
+)
+
+{
+ /* must be called with ttl and pfx pointing to stack variables */
+ *ttl = title;
+ *pfx = NULL;
+
+ if (title == NULL || *title == '\0') return FALSE;
+
+ if (is_tpa) {
+ if (tpa_exp) {
+ if (StringNICmp (title, "TPA_exp: ", 9) == 0) return FALSE;
+ *pfx = "TPA_exp: ";
+ if (StringNICmp (title, "TPA: ", 5) == 0) {
+ *ttl = title + 5;
+ }
+ return TRUE;
+ } else if (tpa_inf) {
+ if (StringNICmp (title, "TPA_inf: ", 9) == 0) return FALSE;
+ *pfx = "TPA_inf: ";
+ if (StringNICmp (title, "TPA: ", 5) == 0) {
+ *ttl = title + 5;
+ }
+ return TRUE;
+ } else {
+ if (StringNICmp (title, "TPA: ", 5) == 0) return FALSE;
+ *pfx = "TPA: ";
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
/*****************************************************************************
*
* CreateDefLine(iip, bsp, buf, buflen, tech)
@@ -4230,7 +4278,7 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
CharPtr accession, CharPtr organism, Boolean ignoreTitle, Boolean extProtTitle)
{
ValNodePtr vnp = NULL;
- CharPtr tmp = NULL, title = NULL;
+ CharPtr tmp = NULL, title = NULL, ttl = NULL, pfx = NULL;
PdbBlockPtr pbp;
PatentSeqIdPtr psip;
PDBSeqIdPtr pdbip;
@@ -4245,7 +4293,8 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
"WORKING DRAFT SEQUENCE",
"*** SEQUENCING IN PROGRESS ***" };
Boolean htg_tech = FALSE, htgs_draft = FALSE, htgs_cancelled = FALSE,
- is_nc = FALSE, is_nm = FALSE, is_nr = FALSE, is_tpa = FALSE;
+ is_nc = FALSE, is_nm = FALSE, is_nr = FALSE, is_tpa = FALSE,
+ tpa_exp = FALSE, tpa_inf = FALSE;
MolInfoPtr mip;
GBBlockPtr gbp = NULL;
EMBLBlockPtr ebp = NULL;
@@ -4321,7 +4370,7 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
buf += diff;
}
diff = 0;
- if (htg_tech) {
+ if (htg_tech || is_tpa) {
vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_genbank,TRUE);
if (vnp != NULL) {
gbp = (GBBlockPtr) vnp->data.ptrvalue;
@@ -4337,6 +4386,20 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
}
}
}
+ if (keywords != NULL) {
+ for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
+ if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_DRAFT") == 0) {
+ htgs_draft = TRUE;
+ } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_CANCELLED") == 0) {
+ htgs_cancelled = TRUE;
+ } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "TPA:experimental") == 0) {
+ tpa_exp = TRUE;
+ } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "TPA:inferential") == 0) {
+ tpa_inf = TRUE;
+ }
+ }
+ }
+
if (! ignoreTitle)
{
vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_title,TRUE);
@@ -4409,12 +4472,14 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
}
/* some titles may have zero length */
if (title != NULL && *title != '\0') {
- if (is_tpa && StringNICmp (title, "TPA: ", 5) != 0) {
- diff = LabelCopy (buf, "TPA: ", buflen);
+ ttl = title;
+ pfx = NULL;
+ if (DoTpaPrefix (title, &ttl, &pfx, is_tpa, tpa_exp, tpa_inf)) {
+ diff = LabelCopy (buf, pfx, buflen);
buflen -= diff;
buf += diff;
}
- diff = LabelCopy(buf, title, buflen);
+ diff = LabelCopy (buf, ttl, buflen);
/* remove trailing blanks and periods */
tmp = buf + diff - 1; /* point at last character */
while (tmp >= buf && ((*tmp <= ' ') || (*tmp == '.'))) {
@@ -4495,12 +4560,14 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
diff = LabelCopy(buf, title, buflen);
}
*/
- if (is_tpa && StringNICmp (title, "TPA: ", 5) != 0) {
- diff = LabelCopy (buf, "TPA: ", buflen);
- buflen -= diff;
- buf += diff;
- }
- diff = LabelCopy(buf, title, buflen);
+ ttl = title;
+ pfx = NULL;
+ if (DoTpaPrefix (title, &ttl, &pfx, is_tpa, tpa_exp, tpa_inf)) {
+ diff = LabelCopy (buf, pfx, buflen);
+ buflen -= diff;
+ buf += diff;
+ }
+ diff = LabelCopy (buf, ttl, buflen);
if (organism == NULL && taxname != NULL) {
organism = taxname;
iip = NULL;
@@ -4512,15 +4579,17 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
if (title == NULL) {
title = UseOrgMods(bsp, NULL);
}
- if (is_tpa && StringNICmp (title, "TPA: ", 5) != 0) {
- diff = LabelCopy (buf, "TPA: ", buflen);
- buflen -= diff;
- buf += diff;
- }
- if (title != NULL) {
- diff = LabelCopy(buf, title, buflen);
+ ttl = title;
+ pfx = NULL;
+ if (DoTpaPrefix (title, &ttl, &pfx, is_tpa, tpa_exp, tpa_inf)) {
+ diff = LabelCopy (buf, pfx, buflen);
+ buflen -= diff;
+ buf += diff;
+ }
+ if (ttl != NULL) {
+ diff = LabelCopy (buf, ttl, buflen);
} else {
- diff = LabelCopy(buf, "No definition line found", buflen);
+ diff = LabelCopy (buf, "No definition line found", buflen);
}
}
}
@@ -4537,12 +4606,14 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
title = UseOrgMods(bsp, NULL);
organism = NULL;
if (title != NULL) {
- if (is_tpa && StringNICmp (title, "TPA: ", 5) != 0) {
- diff = LabelCopy (buf, "TPA: ", buflen);
- buflen -= diff;
- buf += diff;
- }
- diff = LabelCopy(buf, title, buflen);
+ ttl = title;
+ pfx = NULL;
+ if (DoTpaPrefix (title, &ttl, &pfx, is_tpa, tpa_exp, tpa_inf)) {
+ diff = LabelCopy (buf, pfx, buflen);
+ buflen -= diff;
+ buf += diff;
+ }
+ diff = LabelCopy (buf, ttl, buflen);
buflen -= diff;
buf += diff;
}
@@ -4564,15 +4635,6 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
i = 0;
}
} else {
- if (keywords != NULL) {
- for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
- if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_DRAFT") == 0) {
- htgs_draft = TRUE;
- } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_CANCELLED") == 0) {
- htgs_cancelled = TRUE;
- }
- }
- }
if (htgs_draft) {
if (StringStr(title, "WORKING DRAFT") == NULL) {
doit = TRUE;
@@ -4629,12 +4691,14 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
title = UseOrgMods(bsp, NULL);
organism = NULL;
if (title != NULL) {
- if (is_tpa && StringNICmp (title, "TPA: ", 5) != 0) {
- diff = LabelCopy (buf, "TPA: ", buflen);
- buflen -= diff;
- buf += diff;
- }
- diff = LabelCopy(buf, title, buflen);
+ ttl = title;
+ pfx = NULL;
+ if (DoTpaPrefix (title, &ttl, &pfx, is_tpa, tpa_exp, tpa_inf)) {
+ diff = LabelCopy (buf, pfx, buflen);
+ buflen -= diff;
+ buf += diff;
+ }
+ diff = LabelCopy (buf, ttl, buflen);
buflen -= diff;
buf += diff;
}
diff --git a/api/txalign.c b/api/txalign.c
index cf366ced..8af8d57e 100644
--- a/api/txalign.c
+++ b/api/txalign.c
@@ -1,4 +1,4 @@
-/* $Id: txalign.c,v 6.91 2005/05/16 17:39:20 papadopo Exp $
+/* $Id: txalign.c,v 6.92 2006/01/24 18:37:08 papadopo Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -27,13 +27,16 @@
*
* File Name: txalign.c
*
-* $Revision: 6.91 $
+* $Revision: 6.92 $
*
* File Description: Formating of text alignment for the BLAST output
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: txalign.c,v $
+* Revision 6.92 2006/01/24 18:37:08 papadopo
+* from Mike Gertz: Use enumerated values, rather than #define'd constants, to specify the composition adjustment method
+*
* Revision 6.91 2005/05/16 17:39:20 papadopo
* From Alejandro Schaffer: if matrix is adjusted due to composition in
* blastpgp, then print the method for adjustment in the output alignments.
@@ -547,6 +550,7 @@
#include <salpstat.h>
#include <fdlKludge.h>
#include <blastdef.h>
+#include <algo/blast/composition_adjustment/composition_constants.h>
#define BUFFER_LENGTH 2048
#define MIN_INS_SPACE 50
@@ -5662,7 +5666,7 @@ NLM_EXTERN int LIBCALLBACK FormatScoreFunc(AlignStatOptionPtr asop)
Char fastaLongIdBuf[BUFFER_LENGTH+1];
SeqIdPtr firstSip=NULL;
Int4 num_ident;
- Int2 comp_adjustment_method = NO_COMP_ADJUSTMENT;
+ Int2 comp_adjustment_method = eNoCompositionBasedStats;
sp = asop->sp;
@@ -5881,10 +5885,10 @@ NLM_EXTERN int LIBCALLBACK FormatScoreFunc(AlignStatOptionPtr asop)
else
sprintf(buffer, "Expect(%ld+) = %s", (long) number, eval_buff_ptr);
fprintf(asop->fp, "%s", buffer);
- if (NO_COMP_ADJUSTMENT != comp_adjustment_method) {
- if (COMP_BASED_STATISTICS == comp_adjustment_method)
+ if (eNoCompositionBasedStats != comp_adjustment_method) {
+ if (eCompositionBasedStats == comp_adjustment_method)
sprintf(buffer,", Method: Composition-based stats.");
- if (COMP_MATRIX_ADJUSTMENT == comp_adjustment_method)
+ if (eCompositionMatrixAdjust == comp_adjustment_method)
sprintf(buffer,", Method: Compositional matrix adjust.");
fprintf(asop->fp, "%s", buffer);
}
diff --git a/api/utilpars.c b/api/utilpars.c
index 69d7070e..4e82173a 100644
--- a/api/utilpars.c
+++ b/api/utilpars.c
@@ -3,6 +3,9 @@
* -- all common routines for main programs in this directory
*
* $Log: utilpars.c,v $
+* Revision 6.3 2006/01/31 22:31:49 kans
+* added O for pyrrolysine and J for leu or ile ambiguity
+*
* Revision 6.2 2001/12/06 17:00:41 kans
* TextSave takes size_t, not Int2, otherwise titin protein tries to allocate negative number
*
@@ -113,21 +116,23 @@ ParFlat_AA1_to_AA3
"Gly", 'G',
"His", 'H',
"Ile", 'I',
- "Lys", 'K', /* notice no 'J', breaks naive meaning of index -Karl */
+ "Xle", 'J', /* was - notice no 'J', breaks naive meaning of index -Karl */
+ "Lys", 'K',
"Leu", 'L',
"Met", 'M',
"Asn", 'N',
- "Pro", 'P', /* no 'O' */
+ "Pyl", 'O', /* was - no 'O' */
+ "Pro", 'P',
"Gln", 'Q',
"Arg", 'R',
"Ser", 'S',
"Thr", 'T',
"Val", 'V',
"Trp", 'W',
- "Xxx", 'X', /* no U */
+ "Sec", 'U', /* was - not in iupacaa */
+ "Xxx", 'X',
"Tyr", 'Y',
"Glx", 'Z',
- "Sec", 'U', /* not in iupacaa */
"TERM", '*', /* not in iupacaa */ /*changed by Tatiana 06.07.95?`*/
"OTHER", 'X'};
diff --git a/api/utilpars.h b/api/utilpars.h
index 9856b683..57551c01 100644
--- a/api/utilpars.h
+++ b/api/utilpars.h
@@ -2,6 +2,9 @@
* utilpars.h:
*
* $Log: utilpars.h,v $
+* Revision 6.2 2006/01/31 22:31:49 kans
+* added O for pyrrolysine and J for leu or ile ambiguity
+*
* Revision 6.1 2001/12/06 17:00:41 kans
* TextSave takes size_t, not Int2, otherwise titin protein tries to allocate negative number
*
@@ -37,7 +40,7 @@
#endif
-#define ParFlat_TOTAL_AA 26
+#define ParFlat_TOTAL_AA 28
/*************************************************************************/
diff --git a/api/valid.c b/api/valid.c
index 64f4f377..0a8ecae1 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.682 $
+* $Revision: 6.726 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,138 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.c,v $
+* Revision 6.726 2006/02/27 17:49:34 kans
+* added adjusted for low-quality genome exception for RefSeq models
+*
+* Revision 6.725 2006/02/24 22:49:39 kans
+* call BioseqToGeneticCode instead of much less efficient functions
+*
+* Revision 6.724 2006/02/23 23:05:53 kans
+* added ERR_SEQ_FEAT_FeatureSeqIDCaseDifference
+*
+* Revision 6.723 2006/02/23 22:36:05 kans
+* added ERR_SEQ_INST_CaseDifferenceInSeqID
+*
+* Revision 6.722 2006/02/17 20:12:06 kans
+* fixed text of ITSdoesNotAbutRRNA for one overlap case
+*
+* Revision 6.721 2006/02/16 19:34:28 kans
+* use vsp->is_smupd_in_sep to suppress ERR_SEQ_FEAT_FeatureRefersToAccession
+*
+* Revision 6.720 2006/02/15 17:08:55 kans
+* made ITSdoesNotAbutRRNA more sophisticated, also handles tRNA inside small and large rRNA
+*
+* Revision 6.719 2006/02/10 18:26:50 kans
+* added ERR_SEQ_FEAT_ITSdoesNotAbutRRNA
+*
+* Revision 6.718 2006/02/08 17:49:25 kans
+* added ERR_SEQ_FEAT_SelfReferentialProduct
+*
+* Revision 6.717 2006/02/08 16:27:18 kans
+* report ERR_SEQ_FEAT_TranslExcept even if protein is okay
+*
+* Revision 6.716 2006/02/08 14:34:56 kans
+* [fwd/rev]-primer-[seq/name] changed to [fwd/rev]-pcr-primer-[seq/name]
+*
+* Revision 6.715 2006/02/07 20:36:37 kans
+* ERR_SEQ_INST_InternalNsAdjacentToGap shows first position
+*
+* Revision 6.714 2006/02/07 20:29:59 kans
+* added ERR_SEQ_INST_InternalNsAdjacentToGap
+*
+* Revision 6.713 2006/02/06 16:26:03 kans
+* check for both TPA:experimental and TPA:inferential keywords
+*
+* Revision 6.712 2006/02/03 19:37:12 kans
+* ERR_SEQ_INST_InternalNsInSeq[Lit/Raw] add one to zero-based position
+*
+* Revision 6.711 2006/02/02 22:24:38 kans
+* warn if product gbqual on trna
+*
+* Revision 6.710 2006/01/31 22:31:49 kans
+* added O for pyrrolysine and J for leu or ile ambiguity
+*
+* Revision 6.709 2006/01/26 19:54:26 kans
+* added ERR_SEQ_FEAT_FeatureRefersToAccession to look for inconsistent use of gi and accession (with or without version) for sfp->location or sfp->product references in a single blob
+*
+* Revision 6.708 2006/01/25 20:09:33 kans
+* BadDeltaSeq not done if MI_TECH_composite_wgs_htgs
+*
+* Revision 6.707 2006/01/24 20:17:12 kans
+* ERR_SEQ_FEAT_InternalStop goes to SEV_REJECT if has GI and GenBank/EMBL/DDBJ and not RefSeq
+*
+* Revision 6.706 2006/01/24 19:06:39 kans
+* added ERR_SEQ_DESCR_BadPCRPrimerSequence
+*
+* Revision 6.705 2006/01/24 15:46:08 kans
+* added ERR_SEQ_FEAT_HpotheticalProteinMismatch
+*
+* Revision 6.704 2006/01/18 20:55:08 kans
+* CheckTrnaCodons reports BadTrnaAA if aa is 0 or 255 - usually meaning it was not set
+*
+* Revision 6.703 2006/01/13 20:26:24 kans
+* lower severity of duplicate feature error to warning if partial viral genes
+*
+* Revision 6.702 2006/01/10 18:22:18 kans
+* find embedded html strings only if VALIDATE_ALL
+*
+* Revision 6.701 2006/01/05 20:23:00 kans
+* set isCuratedFlybase flag even if GenBank record for lowering duplicate feature severity, suppressing if dicistronic gene
+*
+* Revision 6.700 2006/01/04 21:29:22 kans
+* use FindStringsInEntity to find embedded script tags by finite state machine
+*
+* Revision 6.699 2006/01/03 19:48:39 kans
+* added javascript: to findrepstrs
+*
+* Revision 6.698 2006/01/03 16:52:54 kans
+* ValidateInferenceQualifier takes fetchAccn argument, added ACC_VERSION_NOT_PUBLIC reply type
+*
+* Revision 6.697 2006/01/03 14:31:39 kans
+* LookForMultipleUnpubPubs relies on SetPubScratchData and ClearPubScratchData to make unique strings only once per pub
+*
+* Revision 6.696 2005/12/30 16:24:37 kans
+* inference qualifier for INSD or RefSeq requires valid accession.version
+*
+* Revision 6.695 2005/12/29 22:24:02 kans
+* added <applet and <form to list of strings to check for script injection attack
+*
+* Revision 6.694 2005/12/29 21:45:57 kans
+* added ERR_GENERIC_EmbeddedScript, use FindReplaceInEntity with callback to find possible javascript injection attacks
+*
+* Revision 6.693 2005/12/29 19:20:28 kans
+* InternalNsInSeqRaw printed for each run of Ns, not just for maximum length
+*
+* Revision 6.692 2005/12/23 20:16:32 kans
+* added ERR_SEQ_FEAT_InvalidInferenceValue
+*
+* Revision 6.691 2005/12/23 18:34:18 kans
+* modified cds/mrna/gene conditions on reporting partials
+*
+* Revision 6.690 2005/12/16 18:42:59 kans
+* dicistronic gene exception turns off Duplicate Feature and SuspiciousGeneXref if curated Drosophila
+*
+* Revision 6.689 2005/12/15 14:22:01 kans
+* ERR_SEQ_INST_InternalNsInSeqRaw triggered if >= 100, not > 100
+*
+* Revision 6.688 2005/12/13 23:17:27 kans
+* In Splice acceptor consensus (AG) not found before exon message, print sip if no bsp
+*
+* Revision 6.687 2005/12/13 23:05:22 kans
+* added ERR_GENERIC_CollidingSerialNumbers
+*
+* Revision 6.686 2005/12/13 22:16:55 kans
+* always initialize tbuf in SpliceCheckEx
+*
+* Revision 6.685 2005/12/08 19:50:30 kans
+* FindSameCDS does not suppress if only one end is identical - also require dashes in collection_date
+*
+* Revision 6.684 2005/12/07 21:15:53 kans
+* ERR_SEQ_FEAT_UTRdoesNotAbutCDS always sets UTR feature context, clears once at end
+*
+* Revision 6.683 2005/12/06 22:20:12 kans
+* raised ERR_SEQ_DESCR_BadCountryCode to SEV_ERROR
+*
* Revision 6.682 2005/12/02 15:11:09 kans
* in ValidateSeqFeat, comment out exception for cdregion same as mrna in partial not at start/stop and not consensus splice site
*
@@ -2258,6 +2390,7 @@ static char *this_file = __FILE__;
#include <explore.h>
#include <subutil.h>
#include <tofasta.h>
+#include <findrepl.h>
/*****************************************************************************
*
@@ -2338,6 +2471,9 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
TextFsaPtr sourceQualTags;
Boolean is_htg_in_sep;
Boolean is_refseq_in_sep;
+ Boolean is_smupd_in_sep;
+ Boolean feat_loc_has_gi;
+ Boolean feat_prod_has_gi;
if (vsp == NULL)
return;
@@ -2369,6 +2505,9 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
sourceQualTags = vsp->sourceQualTags;
is_htg_in_sep = vsp->is_htg_in_sep;
is_refseq_in_sep = vsp->is_refseq_in_sep;
+ is_smupd_in_sep = vsp->is_smupd_in_sep;
+ feat_loc_has_gi = vsp->feat_loc_has_gi;
+ feat_prod_has_gi = vsp->feat_prod_has_gi;
MemSet ((VoidPtr) vsp, 0, sizeof (ValidStruct));
vsp->errbuf = errbuf;
vsp->cutoff = cutoff;
@@ -2397,6 +2536,9 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->sourceQualTags = sourceQualTags;
vsp->is_htg_in_sep = is_htg_in_sep;
vsp->is_refseq_in_sep = is_refseq_in_sep;
+ vsp->is_smupd_in_sep = is_smupd_in_sep;
+ vsp->feat_loc_has_gi = feat_loc_has_gi;
+ vsp->feat_prod_has_gi = feat_prod_has_gi;
return;
}
@@ -2666,7 +2808,9 @@ static CharPtr err1Label [] = {
"TerminalGap",
"OverlappingDeltaRange",
"LeadingX",
- "InternalNsInSeqRaw"
+ "InternalNsInSeqRaw",
+ "InternalNsAdjacentToGap",
+ "CaseDifferenceInSeqID"
};
static CharPtr err2Label [] = {
@@ -2705,7 +2849,8 @@ static CharPtr err2Label [] = {
"BioSourceInconsistency",
"FastaBracketTitle",
"MissingText",
- "BadCollectionDate"
+ "BadCollectionDate",
+ "BadPCRPrimerSequence"
};
static CharPtr err3Label [] = {
@@ -2718,7 +2863,9 @@ static CharPtr err3Label [] = {
"BadPageNumbering",
"MedlineEntryPub",
"BadDate",
- "StructuredCitGenCit"
+ "StructuredCitGenCit",
+ "CollidingSerialNumbers",
+ "EmbeddedScript"
};
static CharPtr err4Label [] = {
@@ -2852,7 +2999,13 @@ static CharPtr err5Label [] = {
"ExceptionProblem",
"PolyAsignalNotRange",
"OldLocusTagMismtach",
- "DuplicateGeneOntologyTerm"
+ "DuplicateGeneOntologyTerm",
+ "InvalidInferenceValue",
+ "HpotheticalProteinMismatch",
+ "FeatureRefersToAccession",
+ "SelfReferentialProduct",
+ "ITSdoesNotAbutRRNA",
+ "FeatureSeqIDCaseDifference"
};
static CharPtr err6Label [] = {
@@ -3441,6 +3594,10 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
ValNodePtr sdp;
BioSourcePtr biop;
PubdescPtr pdp;
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ Char buf [64];
+ Char tmp [64];
SeqMgrFeatContext context;
vsp = (ValidStructPtr) (gcp->userdata);
@@ -3514,6 +3671,22 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
if (vsp->useSeqMgrIndexes) {
if (SeqMgrGetDesiredFeature (gcp->entityID, NULL, 0, 0, sfp, &context) == NULL) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_UnindexedFeature, "Feature is not indexed");
+ } else {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ sip = SeqLocId (sfp->location);
+ if (sip != NULL && sip->choice != SEQID_GI && sip->choice != SEQID_GIBBSQ && sip->choice != SEQID_GIBBMT) {
+ SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI || sip->choice == SEQID_GIBBSQ || sip->choice == SEQID_GIBBMT) continue;
+ SeqIdWrite (sip, tmp, PRINTID_FASTA_SHORT, sizeof (tmp) - 1);
+ if (StringICmp (buf, tmp) != 0) continue;
+ if (StringCmp (buf, tmp) == 0) continue;
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_FeatureSeqIDCaseDifference,
+ "Sequence identifier in feature location differs in capitalization with identifier on Bioseq");
+ }
+ }
+ }
}
}
}
@@ -3624,6 +3797,12 @@ typedef struct ftprob {
Uint4 num_tpa_with_hist;
Uint4 num_tpa_without_hist;
Boolean has_gi;
+ Boolean loc_has_gi;
+ Boolean loc_has_just_accn;
+ Boolean loc_has_accn_ver;
+ Boolean prod_has_gi;
+ Boolean prod_has_just_accn;
+ Boolean prod_has_accn_ver;
} FeatProb, PNTR FeatProbPtr;
static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_misplaced_features)
@@ -3791,6 +3970,87 @@ static void CountGeneXrefs (SeqFeatPtr sfp, Pointer userdata)
(fpp->num_gene_xrefs)++;
}
+static void CountSfpLocIdTypes (SeqIdPtr sip, Pointer userdata)
+
+{
+ FeatProbPtr fpp;
+ TextSeqIdPtr tsip;
+
+ if (sip == NULL || userdata == NULL) return;
+ fpp = (FeatProbPtr) userdata;
+
+ switch (sip->choice) {
+ case SEQID_GI :
+ fpp->loc_has_gi = TRUE;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ case SEQID_OTHER :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringDoesHaveText (tsip->accession)) {
+ if (tsip->version < 1) {
+ fpp->loc_has_just_accn = TRUE;
+ } else {
+ fpp->loc_has_accn_ver = TRUE;
+ }
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+static void CountSfpProdIdTypes (SeqIdPtr sip, Pointer userdata)
+
+{
+ FeatProbPtr fpp;
+ TextSeqIdPtr tsip;
+
+ if (sip == NULL || userdata == NULL) return;
+ fpp = (FeatProbPtr) userdata;
+
+ switch (sip->choice) {
+ case SEQID_GI :
+ fpp->prod_has_gi = TRUE;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ case SEQID_OTHER :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringDoesHaveText (tsip->accession)) {
+ if (tsip->version < 1) {
+ fpp->prod_has_just_accn = TRUE;
+ } else {
+ fpp->prod_has_accn_ver = TRUE;
+ }
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+static void CountFeatLocIdTypes (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ if (sfp == NULL || userdata == NULL) return;
+
+ VisitSeqIdsInSeqLoc (sfp->location, userdata, CountSfpLocIdTypes);
+ VisitSeqIdsInSeqLoc (sfp->product, userdata, CountSfpProdIdTypes);
+}
+
static Boolean HasTpaUserObject (BioseqPtr bsp)
{
@@ -3864,6 +4124,7 @@ typedef struct vfcdata {
ValNodePtr uids;
ValNodePtr unpub;
ValNodePtr publshd;
+ ValNodePtr serial;
ValidStructPtr vsp;
} VfcData, PNTR VfcPtr;
@@ -3886,6 +4147,7 @@ static void MakePubTags (PubdescPtr pdp, Pointer userdata)
{
Char buf [1024];
+ CitGenPtr cgp;
Int4 muid = 0, pmid = 0;
VfcPtr vfp;
ValNodePtr vnp;
@@ -3898,6 +4160,16 @@ static void MakePubTags (PubdescPtr pdp, Pointer userdata)
muid = vnp->data.intvalue;
} else if (vnp->choice == PUB_PMid) {
pmid = vnp->data.intvalue;
+ } else if (vnp->choice == PUB_Gen) {
+ cgp = (CitGenPtr) vnp->data.ptrvalue;
+ if (cgp != NULL && cgp->serial_number > 0) {
+ vnp = ValNodeNew (NULL);
+ if (vnp != NULL) {
+ vnp->data.intvalue = (Int4) cgp->serial_number;
+ vnp->next = vfp->serial;
+ vfp->serial = vnp;
+ }
+ }
}
}
@@ -4018,6 +4290,43 @@ static void CheckFeatCits (SeqFeatPtr sfp, Pointer userdata)
}
}
+static void CheckForCollidingSerials (
+ ValidStructPtr vsp,
+ GatherContextPtr gcp,
+ ValNodePtr list
+)
+
+{
+ Int4 curr, last, max;
+ Uint2 olditemtype = 0;
+ Uint2 olditemid = 0;
+ ValNodePtr vnp;
+
+ if (vsp == NULL || gcp == NULL || list == NULL) return;
+
+ olditemid = gcp->itemID;
+ olditemtype = gcp->thistype;
+ gcp->itemID = 0;
+ gcp->thistype = 0;
+
+ last = (Int4) list->data.intvalue;
+ max = last;
+ for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
+ curr = (Int4) vnp->data.intvalue;
+ if (last == curr) {
+ if (curr > max) {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_CollidingSerialNumbers,
+ "Multiple publications have serial number %ld", (long) curr);
+ max = curr;
+ }
+ }
+ last = curr;
+ }
+
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+}
+
static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
{
@@ -4038,9 +4347,17 @@ static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
VisitFeaturesInSep (sep, (Pointer) &vfd, CheckFeatCits);
+ vsp->bssp = NULL;
+ vsp->bsp = NULL;
+ vsp->sfp = NULL;
+ vsp->descr = NULL;
+ vfd.serial = ValNodeSort (vfd.serial, SortByIntvalue);
+ CheckForCollidingSerials (vsp, vsp->gcp, vfd.serial);
+
ValNodeFree (vfd.uids);
ValNodeFreeData (vfd.unpub);
ValNodeFreeData (vfd.publshd);
+ ValNodeFree (vfd.serial);
}
static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp)
@@ -4097,6 +4414,111 @@ static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp)
}
}
+typedef struct vsicdata {
+ ValidStructPtr vsp;
+ ValNodePtr headid;
+ ValNodePtr tailid;
+} VsicData, PNTR VsicDataPtr;
+
+static void CaptureTextSeqIDs (BioseqPtr bsp, Pointer userdata)
+
+{
+ Char buf [64];
+ SeqIdPtr sip;
+ VsicDataPtr vdp;
+ ValNodePtr vnp;
+
+ if (bsp == NULL || userdata == NULL) return;
+ vdp = (VsicDataPtr) userdata;
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI || sip->choice == SEQID_GIBBSQ || sip->choice == SEQID_GIBBMT) continue;
+ SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
+ vnp = ValNodeCopyStr (&(vdp->tailid), 0, buf);
+ if (vdp->headid == NULL) {
+ vdp->headid = vnp;
+ }
+ vdp->tailid = vnp;
+ }
+}
+
+static ValNodePtr UniqueValNodeCaseSensitive (ValNodePtr list)
+
+{
+ CharPtr last;
+ ValNodePtr next;
+ Pointer PNTR prev;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ if (list == NULL) return NULL;
+ last = (CharPtr) list->data.ptrvalue;
+ vnp = list->next;
+ prev = (Pointer PNTR) &(list->next);
+ while (vnp != NULL) {
+ next = vnp->next;
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringCmp (last, str) == 0) {
+ vnp->next = NULL;
+ *prev = next;
+ ValNodeFreeData (vnp);
+ } else {
+ last = (CharPtr) vnp->data.ptrvalue;
+ prev = (Pointer PNTR) &(vnp->next);
+ }
+ vnp = next;
+ }
+
+ return list;
+}
+
+static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp)
+
+{
+ CharPtr curr;
+ GatherContext gc;
+ GatherContextPtr gcp;
+ CharPtr prev;
+ VsicData vd;
+ ValNodePtr vnp;
+
+ if (vsp == NULL || sep == NULL) return;
+
+ MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
+ MemSet ((Pointer) &vd, 0, sizeof (VsicData));
+
+ gcp = &gc;
+ vsp->gcp = &gc;
+ vsp->bssp = NULL;
+ vsp->bsp = NULL;
+ vsp->sfp = NULL;
+ vsp->descr = NULL;
+ vd.vsp = vsp;
+
+ VisitBioseqsInSep (sep, (Pointer) &vd, CaptureTextSeqIDs);
+ vd.headid = ValNodeSort (vd.headid, SortVnpByString);
+ vd.headid = UniqueValNodeCaseSensitive (vd.headid);
+
+ curr = NULL;
+ prev = NULL;
+ for (vnp = vd.headid; vnp != NULL; vnp = vnp->next, prev = curr) {
+ curr = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (curr)) continue;
+ if (StringHasNoText (prev)) continue;
+ if (StringICmp (curr, prev) != 0) continue;
+ if (StringCmp (curr, prev) == 0) continue;
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_CaseDifferenceInSeqID,
+ "Sequence identifier differs only by case - %s and %s", curr, prev);
+ }
+
+ vsp->bssp = NULL;
+ vsp->bsp = NULL;
+ vsp->sfp = NULL;
+ vsp->descr = NULL;
+
+ ValNodeFreeData (vd.headid);
+}
+
static void LookForNC (BioseqPtr bsp, Pointer userdata)
{
@@ -4143,6 +4565,120 @@ static void LookForHTG (SeqDescrPtr sdp, Pointer userdata)
}
}
+static void LookForSMUPD (SeqDescrPtr sdp, Pointer userdata)
+
+{
+ BoolPtr is_smupdp;
+ UserObjectPtr uop;
+
+ if (sdp == NULL || userdata == NULL) return;
+ if (sdp->choice != Seq_descr_user) return;
+
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL) return;
+
+ if (StringICmp (uop->_class, "SMART_V1.0") == 0) {
+
+ is_smupdp = (BoolPtr) userdata;
+ *is_smupdp = TRUE;
+ }
+}
+
+static void SetPubScratchData (SeqDescrPtr sdp, Pointer userdata)
+
+{
+ AuthListPtr alp;
+ Char buf [2048];
+ CitGenPtr cgp;
+ CharPtr consortium, str, tmp;
+ ValNodePtr vnp;
+ ObjValNodePtr ovp;
+ PubdescPtr pdp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_pub || sdp->extended == 0) return;
+ ovp = (ObjValNodePtr) sdp;
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (pdp == NULL) return;
+
+ vnp = pdp->pub;
+
+ /* skip over just serial number */
+
+ if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
+ cgp = (CitGenPtr) vnp->data.ptrvalue;
+ if (cgp != NULL) {
+ if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
+ if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
+ vnp = vnp->next;
+ }
+ }
+ }
+ }
+
+ if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
+ alp = GetAuthListPtr (pdp, NULL);
+ if (alp != NULL) {
+ consortium = NULL;
+ str = GetAuthorsString (GENBANK_FMT, alp, &consortium, NULL, NULL);
+ tmp = MemNew (StringLen (buf) + StringLen (str) + StringLen (consortium) + 10);
+ if (tmp != NULL) {
+ StringCpy (tmp, buf);
+ if (StringDoesHaveText (str)) {
+ StringCat (tmp, "; ");
+ StringCat (tmp, str);
+ }
+ if (StringDoesHaveText (consortium)) {
+ StringCat (tmp, "; ");
+ StringCat (tmp, consortium);
+ }
+ ovp->idx.scratch = tmp;
+ }
+ MemFree (str);
+ MemFree (consortium);
+ }
+ }
+}
+
+static void ClearPubScratchData (SeqDescrPtr sdp, Pointer userdata)
+
+{
+ ObjValNodePtr ovp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_pub || sdp->extended == 0) return;
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.scratch = MemFree (ovp->idx.scratch);
+}
+
+typedef struct frd {
+ ValidStructPtr vsp;
+ GatherContextPtr gcp;
+ /*
+ CharPtr string;
+ */
+} FindRepData, PNTR FindRepPtr;
+
+static void FindRepValidate (Uint2 entityID, Uint2 itemID, Uint2 itemtype, Pointer userdata)
+
+{
+ FindRepPtr frp;
+ GatherContextPtr gcp;
+ ValidStructPtr vsp;
+
+ frp = (FindRepPtr) userdata;
+ vsp = frp->vsp;
+ gcp = frp->gcp;
+
+ gcp->entityID = entityID;
+ gcp->itemID = itemID;
+ gcp->thistype = itemtype;
+
+ ValidErr (vsp, SEV_ERROR, ERR_GENERIC_EmbeddedScript, "Script tag found in item");
+}
+
+static CharPtr findrepstrs [] = {
+ "<script", "<object", "<applet", "<embed", "<form", "javascript:", NULL
+};
+
NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
{
Uint2 entityID = 0;
@@ -4164,7 +4700,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
SeqEntryPtr oldsep;
ErrSev oldsev;
ObjMgrDataPtr omdp;
- SeqEntryPtr topsep;
+ SeqEntryPtr topsep = NULL;
SeqEntryPtr tmp;
ValNodePtr bsplist;
ErrSev sev;
@@ -4172,6 +4708,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
Boolean isGPS = FALSE;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
+ FindRepData frd;
if (sep == NULL || vsp == NULL) return FALSE;
@@ -4195,6 +4732,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
topsep = GetTopSeqEntryForEntityID (entityID);
VisitGraphsInSep (topsep, (Pointer) &featprob, CheckGraphPacking);
VisitFeaturesInSep (topsep, (Pointer) &featprob, CountGeneXrefs);
+ VisitFeaturesInSep (topsep, (Pointer) &featprob, CountFeatLocIdTypes);
VisitBioseqsInSep (topsep, (Pointer) &featprob, CheckTpaHist);
} else {
@@ -4247,12 +4785,20 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
vsp->is_htg_in_sep = FALSE;
VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_htg_in_sep), LookForHTG);
+ vsp->is_smupd_in_sep = FALSE;
+ VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_smupd_in_sep), LookForSMUPD);
vsp->is_refseq_in_sep = FALSE;
VisitBioseqsInSep (sep, (Pointer) &(vsp->is_refseq_in_sep), LookForNC);
+ vsp->feat_loc_has_gi = featprob.loc_has_gi;
+ vsp->feat_prod_has_gi = featprob.prod_has_gi;
+
globalvsp = vsp; /* for spell checker */
while (sep != NULL) {
+ /* calculate strings for LookForMultipleUnpubPubs test only once for genome product set efficiency */
+ VisitDescriptorsInSep (sep, NULL, SetPubScratchData);
+
MemSet (&gs, 0, sizeof (GatherScope));
gs.scope = sep; /* default is to scope to this set */
@@ -4384,6 +4930,10 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
ValidateFeatIDs (gc.entityID, vsp);
vsp->gcp = NULL;
+ vsp->gcp = NULL;
+ ValidateSeqIdCase (sep, vsp);
+ vsp->gcp = NULL;
+
if (vsp->validateAlignments) {
vsp->gcp = NULL;
ValidateSeqAlignWithinValidator (vsp, sep, vsp->alignFindRemoteBsp, vsp->doSeqHistAssembly);
@@ -4392,6 +4942,8 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
SeqEntrySetScope (oldsep);
+ VisitDescriptorsInSep (sep, NULL, ClearPubScratchData);
+
if (vsp->useSeqMgrIndexes) {
/* unlock all pre-locked remote genome components */
@@ -4407,6 +4959,22 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
sep = NULL;
}
+ MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
+ gcp = &gc;
+ gc.entityID = ObjMgrGetEntityIDForChoice (sep);
+ vsp->gcp = gcp;
+ frd.vsp = vsp;
+ frd.gcp = gcp;
+
+ limit = vsp->validationLimit;
+ if (limit == VALIDATE_ALL) {
+ /*
+ frd.string = "?";
+ */
+ FindStringsInEntity (entityID, findrepstrs, FALSE, FALSE, FALSE, UPDATE_NEVER,
+ NULL, NULL, NULL, TRUE, FindRepValidate, (Pointer) &frd);
+ }
+
if (do_many) {
for (i = 0; i < 6; i++)
vsp->errors[i] = errors[i];
@@ -5284,16 +5852,23 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi
}
typedef struct enrun {
- Int4 ncount;
- Int4 maxrun;
+ GatherContextPtr gcp;
+ ValidStructPtr vsp;
+ Int4 ncount;
+ Int4 maxrun;
+ Int4 seqpos;
+ Boolean showAll;
+ Boolean inNrun;
} RunOfNs, PNTR RunOfNsPtr;
static void LIBCALLBACK CountAdjacentProc (CharPtr sequence, Pointer userdata)
{
- Char ch;
- RunOfNsPtr ronp;
- CharPtr str;
+ Char ch;
+ GatherContextPtr gcp;
+ RunOfNsPtr ronp;
+ CharPtr str;
+ ValidStructPtr vsp;
ronp = (RunOfNsPtr) userdata;
if (sequence == NULL || ronp == NULL) return;
@@ -5301,20 +5876,29 @@ static void LIBCALLBACK CountAdjacentProc (CharPtr sequence, Pointer userdata)
str = sequence;
ch = *str;
while (ch != '\0') {
+ (ronp->seqpos)++;
if (ch == 'N') {
(ronp->ncount)++;
if (ronp->ncount > ronp->maxrun) {
ronp->maxrun = ronp->ncount;
}
+ ronp->inNrun = TRUE;
} else {
+ if (ronp->inNrun && ronp->showAll && ronp->ncount >= 100) {
+ vsp = ronp->vsp;
+ gcp = ronp->gcp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqRaw, "Run of %ld Ns in raw sequence starting at base %ld",
+ (long) ronp->ncount, (long) (ronp->seqpos - ronp->ncount + 1));
+ }
ronp->ncount = 0;
+ ronp->inNrun = FALSE;
}
str++;
ch = *str;
}
}
-static Int4 CountAdjacentNsInSeqLit (SeqLitPtr slitp, Boolean is_na)
+static Int4 CountAdjacentNsInSeqLit (GatherContextPtr gcp, SeqLitPtr slitp, Boolean is_na)
{
BioseqPtr bsp;
@@ -5340,8 +5924,13 @@ static Int4 CountAdjacentNsInSeqLit (SeqLitPtr slitp, Boolean is_na)
bsp->length = slitp->length;
bsp->id = SeqIdParse ("lcl|countseqlitns");
+ ron.gcp = gcp;
+ ron.vsp = (ValidStructPtr) (gcp->userdata);
ron.ncount = 0;
ron.maxrun = 0;
+ ron.seqpos = 0;
+ ron.showAll = FALSE;
+ ron.inNrun = FALSE;
SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) &ron, CountAdjacentProc);
@@ -5572,6 +6161,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean hasGi = FALSE;
SeqHistPtr hist;
IntFuzzPtr ifp;
+ Int4 adjacent_N_gap_position;
+ Boolean adjacent_N_and_gap;
+ Boolean in_gap;
+ Boolean in_N;
Boolean isActiveFin = FALSE;
Boolean isGB = FALSE;
Boolean isPatent = FALSE;
@@ -6001,12 +6594,6 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
oldItemID = gcp->itemID;
oldItemtype = gcp->thistype;
- if (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext) != NULL) {
- gcp->entityID = dcontext.entityID;
- gcp->itemID = dcontext.itemID;
- gcp->thistype = OBJ_SEQDESC;
- }
-
if (ISA_aa (bsp->mol)) {
if (bsp->topology > 1) { /* not linear */
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_CircularProtein, "Non-linear topology set on protein");
@@ -6276,6 +6863,47 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
+ if (ISA_na (bsp->mol) && bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp)) {
+ if (! StreamCacheSetup (bsp, NULL, EXPAND_GAPS_TO_DASHES, &sc)) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqPortFail, "Can't open StreamCache");
+ return;
+ }
+ in_gap = FALSE;
+ in_N = FALSE;
+ adjacent_N_and_gap = FALSE;
+ adjacent_N_gap_position = 0;
+ for (len = 0; len < bsp->length; len++) {
+ residue = StreamCacheGetResidue (&sc);
+ if (residue == '-') {
+ if (in_N) {
+ adjacent_N_and_gap = TRUE;
+ if (adjacent_N_gap_position == 0) {
+ adjacent_N_gap_position = len;
+ }
+ }
+ in_N = FALSE;
+ in_gap = TRUE;
+ } else if (residue == 'N') {
+ if (in_gap) {
+ adjacent_N_and_gap = TRUE;
+ if (adjacent_N_gap_position == 0) {
+ adjacent_N_gap_position = len;
+ }
+ }
+ in_gap = FALSE;
+ in_N = TRUE;
+ } else {
+ in_gap = FALSE;
+ in_N = FALSE;
+ }
+ }
+ if (adjacent_N_and_gap) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_InternalNsAdjacentToGap,
+ "Ambiguous residue N is adjacent to a gap around position %ld",
+ (long) adjacent_N_gap_position);
+ }
+ }
+
if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_ref)) { /* check segmented sequence */
head.choice = SEQLOC_MIX;
head.data.ptrvalue = bsp->seq_ext;
@@ -6499,19 +7127,19 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (mip != NULL) {
if (mip->tech == MI_TECH_htgs_1 || mip->tech == MI_TECH_htgs_2) {
- runsofn = CountAdjacentNsInSeqLit (slitp, (Boolean) ISA_na (bsp->mol));
+ runsofn = CountAdjacentNsInSeqLit (gcp, slitp, (Boolean) ISA_na (bsp->mol));
if (runsofn > 80) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) len);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) (len + 1));
}
} else if (mip->tech == MI_TECH_wgs || mip->tech == MI_TECH_composite_wgs_htgs) {
- runsofn = CountAdjacentNsInSeqLit (slitp, (Boolean) ISA_na (bsp->mol));
+ runsofn = CountAdjacentNsInSeqLit (gcp, slitp, (Boolean) ISA_na (bsp->mol));
if (runsofn > 80) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) len);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) (len + 1));
}
} else {
- runsofn = CountAdjacentNsInSeqLit (slitp, (Boolean) ISA_na (bsp->mol));
+ runsofn = CountAdjacentNsInSeqLit (gcp, slitp, (Boolean) ISA_na (bsp->mol));
if (runsofn > 100) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) len);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta component %ld that starts at base %ld", (long) runsofn, (int) segnum, (long) (len + 1));
}
}
}
@@ -6547,19 +7175,31 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs &&
- mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) {
+ mip->tech != MI_TECH_composite_wgs_htgs && mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Delta seq technique should not be [%d]", (int) (mip->tech));
}
}
} else if (bsp->repr == Seq_repr_raw) {
+ ron.gcp = gcp;
+ ron.vsp = vsp;
ron.ncount = 0;
ron.maxrun = 0;
+ ron.seqpos = 0;
+ ron.showAll = TRUE;
+ ron.inNrun = FALSE;
SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) &ron, CountAdjacentProc);
- if (ron.maxrun > 100) {
+ if (ron.inNrun && ron.showAll && ron.ncount >= 100) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqRaw, "Run of %ld Ns in raw sequence starting at base %ld",
+ (long) ron.ncount, (long) (ron.seqpos - ron.ncount + 1));
+ }
+
+ /*
+ if (ron.maxrun >= 100) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqRaw, "Run of %ld Ns in raw sequence", (long) ron.maxrun);
}
+ */
}
if (bsp->repr == Seq_repr_delta) {
@@ -7086,12 +7726,11 @@ static void LookForMultiplePubs (ValidStructPtr vsp, GatherContextPtr gcp, SeqDe
static void LookForMultipleUnpubPubs (ValidStructPtr vsp, GatherContextPtr gcp, BioseqPtr bsp)
{
- AuthListPtr alp;
Char buf [2048];
- CitGenPtr cgp;
- CharPtr consortium, last, str, tmp;
+ CharPtr last, str;
SeqMgrDescContext dcontext;
ValNodePtr list = NULL, next, vnp;
+ ObjValNodePtr ovp;
PubdescPtr pdp;
SeqDescrPtr sdp;
@@ -7099,42 +7738,9 @@ static void LookForMultipleUnpubPubs (ValidStructPtr vsp, GatherContextPtr gcp,
while (sdp) {
pdp = (PubdescPtr) sdp->data.ptrvalue;
if (pdp != NULL) {
- vnp = pdp->pub;
-
- /* skip over just serial number */
-
- if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
- cgp = (CitGenPtr) vnp->data.ptrvalue;
- if (cgp != NULL) {
- if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
- if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
- vnp = vnp->next;
- }
- }
- }
- }
-
- if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
- alp = GetAuthListPtr (pdp, NULL);
- if (alp != NULL) {
- consortium = NULL;
- str = GetAuthorsString (GENBANK_FMT, alp, &consortium, NULL, NULL);
- tmp = MemNew (StringLen (buf) + StringLen (str) + StringLen (consortium) + 10);
- if (tmp != NULL) {
- StringCpy (tmp, buf);
- if (StringDoesHaveText (str)) {
- StringCat (tmp, "; ");
- StringCat (tmp, str);
- }
- if (StringDoesHaveText (consortium)) {
- StringCat (tmp, "; ");
- StringCat (tmp, consortium);
- }
- ValNodeAddStr (&list, 0, tmp);
- }
- MemFree (str);
- MemFree (consortium);
- }
+ ovp = (ObjValNodePtr) sdp;
+ if (ovp->idx.scratch != NULL) {
+ ValNodeCopyStr (&list, 0, ovp->idx.scratch);
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext);
@@ -8231,7 +8837,7 @@ static Boolean CountryIsValid (CharPtr name)
return FALSE;
}
-static CharPtr GetDashOrSpace (CharPtr str)
+static CharPtr GetDash (CharPtr str)
{
Char ch;
@@ -8239,7 +8845,7 @@ static CharPtr GetDashOrSpace (CharPtr str)
if (str == NULL) return NULL;
ch = *str;
while (ch != '\0') {
- if (ch == ' ' || ch == '-') return str;
+ if (ch == '-') return str;
str++;
ch = *str;
}
@@ -8275,11 +8881,11 @@ static Boolean CollectionDateIsValid (CharPtr name)
if (StringHasNoText (name)) return FALSE;
StringNCpy_0 (str, name, sizeof (str));
- ptr1 = GetDashOrSpace (str);
+ ptr1 = GetDash (str);
if (ptr1 != NULL) {
*ptr1 = '\0';
ptr1++;
- ptr2 = GetDashOrSpace (ptr1);
+ ptr2 = GetDash (ptr1);
if (ptr2 != NULL) {
*ptr2 = '\0';
ptr2++;
@@ -8323,6 +8929,42 @@ static Boolean CollectionDateIsValid (CharPtr name)
return FALSE;
}
+static Boolean PrimerSeqIsValid (CharPtr name)
+
+{
+ Char ch;
+ size_t len;
+ CharPtr ptr;
+
+ if (StringHasNoText (name)) return FALSE;
+ len = StringLen (name);
+ if (len < 1) return FALSE;
+
+ if (StringChr (name, ',') != NULL) {
+ if (name [0] != '(' || name [len - 1] != ')') return FALSE;
+ } else {
+ if (StringChr (name, '(') != NULL) return FALSE;
+ if (StringChr (name, ')') != NULL) return FALSE;
+ }
+
+ if (StringChr (name, ';') != NULL) return FALSE;
+ if (StringChr (name, ' ') != NULL) return FALSE;
+
+ ptr = name;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch != '(' && ch != ')' && ch != ',') {
+ if (! (IS_ALPHA (ch))) return FALSE;
+ ch = TO_UPPER (ch);
+ if (StringChr ("ABCDGHKMNRSTVWY", ch) == NULL) return FALSE;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+
+ return TRUE;
+}
+
static CharPtr source_qual_prefixes [] = {
"acronym:",
"anamorph:",
@@ -8349,6 +8991,10 @@ static CharPtr source_qual_prefixes [] = {
"forma:",
"forma_specialis:",
"frequency:",
+ "fwd_pcr_primer_name",
+ "fwd_pcr_primer_seq",
+ "fwd_primer_name",
+ "fwd_primer_seq",
"genotype:",
"germline:",
"group:",
@@ -8367,6 +9013,10 @@ static CharPtr source_qual_prefixes [] = {
"plastid_name:",
"pop_variant:",
"rearranged:",
+ "rev_pcr_primer_name",
+ "rev_pcr_primer_seq",
+ "rev_primer_name",
+ "rev_primer_seq",
"right_primer:",
"segment:",
"serogroup:",
@@ -8489,7 +9139,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (StringHasNoText (countryname)) {
countryname = "?";
}
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCountryCode, "Bad country name [%s]", countryname);
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BadCountryCode, "Bad country name [%s]", countryname);
}
} else if (ssp->subtype == SUBSRC_chromosome) {
chromcount++;
@@ -8523,6 +9173,10 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (! CollectionDateIsValid (ssp->name)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCollectionDate, "Collection_date format is not in DD-Mmm-YYYY format");
}
+ } else if (ssp->subtype == SUBSRC_fwd_primer_seq || ssp->subtype == SUBSRC_rev_primer_seq) {
+ if (! PrimerSeqIsValid (ssp->name)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerSequence, "PCR primer sequence format is incorrect");
+ }
}
ssp = ssp->next;
}
@@ -8731,6 +9385,9 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
OrgRefPtr this_org = NULL, that_org = NULL;
int tmpval;
Char buf1[20], buf2[20];
+ EMBLBlockPtr ebp;
+ GBBlockPtr gbp;
+ ValNodePtr keywords = NULL;
PubdescPtr pdp;
MolInfoPtr mip;
Uint2 olditemtype = 0;
@@ -8739,6 +9396,8 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
GatherContextPtr gcp = NULL;
CharPtr str;
SeqFeatPtr sfp;
+ Boolean tpa_exp;
+ Boolean tpa_inf;
BioseqPtr bsp;
DatePtr dp;
SeqMgrFeatContext fcontext;
@@ -8859,12 +9518,24 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "Multiple GenBank blocks");
else
bvsp->last_gb = vnp;
+ if (vnp != NULL) {
+ gbp = (GBBlockPtr) vnp->data.ptrvalue;
+ if (gbp != NULL) {
+ keywords = gbp->keywords;
+ }
+ }
break;
case Seq_descr_embl:
if (bvsp->last_embl != NULL)
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "Multiple EMBL blocks");
else
bvsp->last_embl = vnp;
+ if (vnp != NULL) {
+ ebp = (EMBLBlockPtr) vnp->data.ptrvalue;
+ if (ebp != NULL) {
+ keywords = ebp->keywords;
+ }
+ }
break;
case Seq_descr_pir:
if (bvsp->last_pir != NULL)
@@ -9109,6 +9780,20 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
break;
}
+ if (keywords != NULL) {
+ tpa_exp = FALSE;
+ tpa_inf = FALSE;
+ for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
+ if (StringICmp ((CharPtr) vnp->data.ptrvalue, "TPA:experimental") == 0) {
+ tpa_exp = TRUE;
+ } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "TPA:inferential") == 0) {
+ tpa_inf = TRUE;
+ }
+ }
+ if (tpa_exp && tpa_inf) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "TPA:experimental and TPA:inferential should not both be in the same set of keywords");
+ }
+ }
if (gcp != NULL) {
gcp->itemID = olditemid;
@@ -9223,6 +9908,20 @@ static Boolean GPSorNTorNC (SeqEntryPtr sep, SeqLocPtr location)
return FALSE;
}
+static Boolean IsGenBankAccn (SeqEntryPtr sep, SeqLocPtr location)
+{
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+
+ bsp = BioseqFindFromSeqLoc (location);
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK) return TRUE;
+ }
+ }
+ return FALSE;
+}
+
static Boolean NGorNT (SeqEntryPtr sep, SeqLocPtr location, BoolPtr is_nc)
{
BioseqPtr bsp;
@@ -10110,6 +10809,54 @@ static Boolean HaveUniqueFeatIDXrefs (SeqFeatXrefPtr xref1, SeqFeatXrefPtr xref2
return FALSE;
}
+#define SMALL_RIBOSOMAL_SUBUNIT 1
+#define INTERNAL_SPACER_1 2
+#define MIDDLE_RIBOSOMAL_SUBUNIT 3
+#define INTERNAL_SPACER_2 4
+#define LARGE_RIBOSOMAL_SUBUNIT 5
+#define INTERNAL_SPACER_X 6
+#define TRANSFER_RNA 7
+
+static Int2 WhichRNA (SeqFeatPtr sfp)
+
+{
+ RnaRefPtr rrp;
+ CharPtr str;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return 0;
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp == NULL) return 0;
+ if (rrp->type == 3) {
+ return TRANSFER_RNA;
+ }
+ if (rrp->ext.choice != 1) return 0;
+ str = (CharPtr) rrp->ext.value.ptrvalue;
+ if (StringHasNoText (str)) return 0;
+ if (rrp->type == 4) {
+ if (StringNICmp (str, "small ", 6) == 0) return SMALL_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "18S ", 4) == 0) return SMALL_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "5.8S ", 5) == 0) return MIDDLE_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "large ", 6) == 0) return LARGE_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "26S ", 4) == 0) return LARGE_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "28S ", 4) == 0) return LARGE_RIBOSOMAL_SUBUNIT;
+ /* variant spellings */
+ if (StringNICmp (str, "18 ", 3) == 0) return SMALL_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "5.8 ", 4) == 0) return MIDDLE_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "26 ", 3) == 0) return LARGE_RIBOSOMAL_SUBUNIT;
+ if (StringNICmp (str, "28 ", 3) == 0) return LARGE_RIBOSOMAL_SUBUNIT;
+ }
+ if (rrp->type == 255) {
+ if (StringICmp (str, "internal transcribed spacer 1") == 0) return INTERNAL_SPACER_1;
+ if (StringICmp (str, "internal transcribed spacer 2") == 0) return INTERNAL_SPACER_2;
+ /* variant spellings */
+ if (StringICmp (str, "internal transcribed spacer1") == 0) return INTERNAL_SPACER_1;
+ if (StringICmp (str, "internal transcribed spacer2") == 0) return INTERNAL_SPACER_2;
+ if (StringICmp (str, "internal transcribed spacer") == 0) return INTERNAL_SPACER_X;
+ if (StringICmp (str, "ITS") == 0) return INTERNAL_SPACER_X;
+ }
+ return 0;
+}
+
static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp)
{
@@ -10142,6 +10889,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
CharPtr lastLabel;
CharPtr message;
Int2 i;
+ Boolean isCuratedFlybase = FALSE;
+ Boolean isDrosophila = FALSE;
+ Boolean isGenBankAccn = FALSE;
+ Boolean isGPSorNTorNC = FALSE;
+ Boolean isViral = FALSE;
Int2 j;
CdRegionPtr crp;
Uint1 frame;
@@ -10150,6 +10902,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
int overlapPepSev;
BioSourcePtr biop = NULL, lastbiop;
OrgRefPtr orp = NULL;
+ OrgNamePtr onp = NULL;
Int4 fiveUTRright;
Int4 cdsRight;
Int4 threeUTRright;
@@ -10162,12 +10915,13 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
Int2 numBadFullSource;
SubSourcePtr sbsp;
Int2 numgene, numcds, nummrna, numcdsproducts, nummrnaproducts,
- numcdspseudo, nummrnapseudo;
+ numcdspseudo, nummrnapseudo, lastrnatype, thisrnatype;
Boolean cds_products_unique = TRUE, mrna_products_unique = TRUE,
suppress_duplicate_messages = FALSE, pseudo;
SeqIdPtr sip;
Char buf [64];
SeqFeatXrefPtr xref = NULL;
+ CharPtr except_text = NULL;
ValNodePtr vnp, cds_prod_head = NULL, mrna_prod_head = NULL,
lastcdsprod = NULL, lastmrnaprod = NULL;
@@ -10350,10 +11104,31 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ValNodeFreeData (cds_prod_head);
ValNodeFreeData (mrna_prod_head);
+ /*
+ SeqEntryToBioSource (vsp->sep, NULL, NULL, 0, &biop);
+ */
+ BioseqToGeneticCode (bsp, NULL, NULL, NULL, NULL, 0, &biop);
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ /* curated fly source still has duplicate features */
+ if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) {
+ isDrosophila = TRUE;
+ }
+ onp = orp->orgname;
+ if (onp != NULL) {
+ if (StringNICmp (onp->lineage, "Viruses; ", 9) == 0) {
+ isViral = TRUE;
+ }
+ }
+ }
+ }
+
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (sfp != NULL) {
leave = TRUE;
if (last != NULL) {
+ ivalssame = FALSE;
if (fcontext.left == left && fcontext.right == right && fcontext.featdeftype == featdeftype) {
if (fcontext.strand == strand || strand == Seq_strand_unknown || fcontext.strand == Seq_strand_unknown) {
ivalssame = TRUE;
@@ -10389,21 +11164,30 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
featdeftype == FEATDEF_REGION || featdeftype == FEATDEF_misc_feature || featdeftype == FEATDEF_STS || featdeftype == FEATDEF_variation) {
severity = SEV_WARNING;
} else {
- if (! GPSorNTorNC (vsp->sep, sfp->location)) {
- severity = SEV_WARNING;
- } else {
- if (orp == NULL) {
- SeqEntryToBioSource (vsp->sep, NULL, NULL, 0, &biop);
- if (biop != NULL) {
- orp = biop->org;
+ if (isGPSorNTorNC || GPSorNTorNC (vsp->sep, sfp->location)) {
+ isGPSorNTorNC = TRUE;
+ if (! isCuratedFlybase) {
+ if (isDrosophila) {
+ isCuratedFlybase = TRUE;
}
}
- if (orp != NULL) {
+ if (isCuratedFlybase) {
/* curated fly source still has duplicate features */
- if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) {
- severity = SEV_WARNING;
+ severity = SEV_WARNING;
+ }
+ } else if (isGenBankAccn || IsGenBankAccn (vsp->sep, sfp->location)) {
+ isGenBankAccn = TRUE;
+ if (! isCuratedFlybase) {
+ if (isDrosophila) {
+ isCuratedFlybase = TRUE;
}
}
+ if (isCuratedFlybase) {
+ /* curated fly source still has duplicate features */
+ severity = SEV_WARNING;
+ }
+ } else {
+ severity = SEV_WARNING;
}
}
/* if different CDS frames, lower to warning */
@@ -10447,7 +11231,15 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
} else {
if (suppress_duplicate_messages && (featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
/* do not report CDS or mRNA if every one has a unique product and unique featID xrefs */
+ } else if (featdeftype == FEATDEF_GENE &&
+ StringStr (sfp->except_text, "dicistronic gene") != NULL &&
+ StringStr (except_text, "dicistronic gene") != NULL &&
+ isCuratedFlybase) {
+ /* do not report genes marked dicistronic */
} else {
+ if (featdeftype == FEATDEF_GENE && isViral && (sfp->partial || last->partial)) {
+ severity = SEV_WARNING;
+ }
ValidErr (vsp, severity, ERR_SEQ_FEAT_DuplicateFeat, "Features have identical intervals, but labels differ");
}
}
@@ -10505,6 +11297,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ivals = fcontext.ivals;
sap = fcontext.sap;
xref = sfp->xref;
+ except_text = sfp->except_text;
frame = 0;
if (sfp->data.choice == SEQFEAT_CDREGION) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
@@ -10621,22 +11414,18 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (sfp != NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
if (sfp->idx.subtype == FEATDEF_3UTR && utr3count < 2) {
if (fcontext.strand != Seq_strand_minus) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "3'UTR is not on minus strand");
} else if (threeUTRright > 0) {
if (threeUTRright + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "Previous 3'UTR does not abut next 3'UTR");
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
}
threeUTRright = fcontext.right;
@@ -10644,18 +11433,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
cdsRight = fcontext.right;
if (threeUTRright > 0 && firstCDS) {
if (threeUTRright + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR");
- vsp->sfp = NULL;
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
}
firstCDS = FALSE;
@@ -10664,17 +11442,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "5'UTR is not on minus strand");
} else if (cdsRight > 0) {
if (cdsRight + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "5'UTR does not abut CDS");
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
}
threeUTRright = fcontext.right;
@@ -10686,6 +11454,12 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (sfp != NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
if (sfp->idx.subtype == FEATDEF_5UTR && utr5count < 2) {
if (fcontext.strand == Seq_strand_minus) {
if (genecount > 1 && cdsgene != NULL && utr5gene != NULL && cdsgene != utr5gene) {
@@ -10699,22 +11473,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
cdsRight = fcontext.right;
if (fiveUTRright > 0 && firstCDS) {
if (fiveUTRright + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
if (genecount > 1 && cdsgene != NULL && utr5gene != NULL && cdsgene != utr5gene) {
/* ignore */
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "5'UTR does not abut CDS");
}
- vsp->sfp = NULL;
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
}
firstCDS = FALSE;
@@ -10723,31 +11486,11 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "3'UTR is not on plus strand");
} else if (threeUTRright > 0) {
if (threeUTRright + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "Previous 3'UTR does not abut next 3'UTR");
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
} else if (cdsRight > 0) {
if (cdsRight + 1 != fcontext.left) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR");
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
- }
}
}
threeUTRright = fcontext.right;
@@ -10757,6 +11500,148 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
}
+ if (! bvsp->is_mrna) {
+ last = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &fcontext);
+ if (last != NULL) {
+ lastrnatype = WhichRNA (last);
+ left = fcontext.left;
+ right = fcontext.right;
+ strand = fcontext.strand;
+ sfp = SeqMgrGetNextFeature (bsp, last, SEQFEAT_RNA, 0, &fcontext);
+ while (sfp != NULL) {
+ thisrnatype = WhichRNA (sfp);
+ if (fcontext.strand == strand || (strand != Seq_strand_minus && fcontext.strand != Seq_strand_minus)) {
+ if (lastrnatype != 0 && thisrnatype != 0) {
+ if (right + 1 < fcontext.left) {
+ /* gap */
+ if (strand == Seq_strand_minus) {
+ if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ /* okay in mitochondria */
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "ITS does not abut adjacent rRNA component");
+ }
+ } else {
+ if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ /* okay in mitochondria */
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "ITS does not abut adjacent rRNA component");
+ }
+ }
+ } else if (right + 1 > fcontext.left) {
+ /* overlaps */
+ if (strand == Seq_strand_minus) {
+ if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "tRNA overlaps adjacent rRNA component");
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "ITS overlaps adjacent rRNA component");
+ }
+ } else {
+ if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ /* okay in mitochondria */
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_1) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype == INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype == MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "ITS overlaps adjacent rRNA component");
+ }
+ }
+ } else {
+ /* abuts */
+ if (strand == Seq_strand_minus) {
+ if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == SMALL_RIBOSOMAL_SUBUNIT)) {
+ /* okay in mitochondria */
+ } else if ((lastrnatype == LARGE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != SMALL_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "Problem with order of abutting rRNA components");
+ }
+ } else {
+ if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype == TRANSFER_RNA) ||
+ (lastrnatype == TRANSFER_RNA && thisrnatype == LARGE_RIBOSOMAL_SUBUNIT)) {
+ /* okay in mitochondria */
+ } else if ((lastrnatype == SMALL_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_1) ||
+ (lastrnatype == MIDDLE_RIBOSOMAL_SUBUNIT && thisrnatype != INTERNAL_SPACER_2) ||
+ (lastrnatype == INTERNAL_SPACER_1 && thisrnatype != MIDDLE_RIBOSOMAL_SUBUNIT) ||
+ (lastrnatype == INTERNAL_SPACER_2 && thisrnatype != LARGE_RIBOSOMAL_SUBUNIT)) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ITSdoesNotAbutRRNA, "Problem with order of abutting rRNA components");
+ }
+ }
+ }
+ }
+ }
+ last = sfp;
+ left = fcontext.left;
+ right = fcontext.right;
+ strand = fcontext.strand;
+ lastrnatype = thisrnatype;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &fcontext);
+ }
+ }
+ }
+
+ vsp->sfp = NULL;
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+
mrna = SeqMgrGetRNAgivenProduct (bsp, &fcontext);
if (mrna != NULL) {
genomicgrp = SeqMgrGetGeneXref (mrna);
@@ -12144,9 +13029,7 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
GeneticCodePtr gncp;
Uint2 idx;
Int2 j;
- SeqEntryPtr sep;
ErrSev sev = SEV_ERROR;
- Uint1 shift;
SeqMapTablePtr smtp;
Uint1 taa;
ValNodePtr vnp;
@@ -12188,8 +13071,11 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (trp->codon[j] < 64) {
if (codes == NULL) {
bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
+ /*
sep = GetBestTopParentForData (gcp->entityID, bsp);
code = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
+ */
+ BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL);
gncp = GeneticCodeFind (code, NULL);
if (gncp == NULL) {
gncp = GeneticCodeFind (1, NULL);
@@ -12207,12 +13093,14 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
taa = codes[trp->codon[j]];
if (aa > 0 && aa != 255) {
if (taa != aa) {
- if (aa == 'U') {
+ if (aa == 'U' || aa == 'O') {
sev = SEV_WARNING;
}
if (aa == 'U' && taa == '*' && trp->codon [j] == 14) {
/* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */
- /* TAG (11) is used for pyrrolysine in archaebacteria */
+ } else if (aa == 'O' && taa == '*' && trp->codon [j] == 11) {
+ /* pyrrolysine normally uses TAG (11) in archaebacteria, so ignore without requiring exception in record */
+
/* TAA (10) is not yet known to be used for an exceptional amino acid */
} else if (StringISearch (sfp->except_text, "modified codon recognition") == NULL) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_TrnaCodonWrong, "tRNA codon does not match genetic code");
@@ -12225,6 +13113,7 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
}
if (aa > 0 && aa != 255) {
+ /* - no gaps now that O and J are added
if (aa <= 74) {
shift = 0;
} else if (aa > 79) {
@@ -12232,16 +13121,19 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
} else {
shift = 1;
}
+ */
if (aa != '*') {
- idx = aa - (64 + shift);
+ idx = aa - (64 /* + shift */);
} else {
- idx = 25;
+ idx = 25; /* termination */
}
- if (idx > 0 && idx < 26) {
+ if (idx > 0 && idx < 28) {
/* valid trna amino acid */
} else {
ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid");
}
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid");
}
}
@@ -12752,6 +13644,7 @@ static CharPtr legal_exception_strings [] = {
"nonconsensus splice site",
"modified codon recognition",
"alternative start codon",
+ "dicistronic gene",
NULL
};
@@ -12760,6 +13653,7 @@ static CharPtr refseq_exception_strings [] = {
"unclassified translation discrepancy",
"mismatches in transcription",
"mismatches in translation",
+ "adjusted for low-quality genome",
NULL
};
@@ -12847,7 +13741,9 @@ static void ValidateExceptText (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
typedef struct samecds {
Boolean found;
SeqMgrFeatContextPtr gcontext;
+ Uint2 slpTag;
Uint1 subtype;
+ Boolean bypassGeneTest;
} SameCds, PNTR SameCdsPtr;
static Boolean LIBCALLBACK FindSameCDS (SeqFeatPtr sfp, SeqMgrFeatContextPtr ccontext)
@@ -12893,16 +13789,57 @@ static Boolean LIBCALLBACK FindSameCDS (SeqFeatPtr sfp, SeqMgrFeatContextPtr cco
return FALSE;
}
}
- } else if (gcontext->left == ccontext->left || gcontext->right == ccontext->right) {
- /* if either end of CDS and mRNA is identical, okay to suppress partial warning */
- same->found = TRUE;
- return FALSE;
+ } else if (SeqLocAinB (sfp->location, gcontext->sfp->location) > 0) {
+
+ if (ccontext->strand == Seq_strand_minus || gcontext->strand == Seq_strand_minus) {
+ if (same->slpTag == SLP_NOSTART && gcontext->partialL) {
+ if (gcontext->right == ccontext->right) {
+ same->found = TRUE;
+ return FALSE;
+ }
+ if (gcontext->right > ccontext->right) {
+ same->bypassGeneTest = TRUE;
+ return FALSE;
+ }
+ } else if (same->slpTag == SLP_NOSTOP && gcontext->partialR) {
+ if (gcontext->left == ccontext->left) {
+ same->found = TRUE;
+ return FALSE;
+ }
+ if (gcontext->left < ccontext->left) {
+ same->bypassGeneTest = TRUE;
+ return FALSE;
+ }
+ }
+
+ } else {
+
+ if (same->slpTag == SLP_NOSTART && gcontext->partialL) {
+ if (gcontext->left == ccontext->left) {
+ same->found = TRUE;
+ return FALSE;
+ }
+ if (gcontext->left < ccontext->left) {
+ same->bypassGeneTest = TRUE;
+ return FALSE;
+ }
+ } else if (same->slpTag == SLP_NOSTOP && gcontext->partialR) {
+ if (gcontext->right == ccontext->right) {
+ same->found = TRUE;
+ return FALSE;
+ }
+ if (gcontext->right > ccontext->right) {
+ same->bypassGeneTest = TRUE;
+ return FALSE;
+ }
+ }
+ }
}
}
return TRUE;
}
-static Boolean SameAsCDS (SeqFeatPtr sfp)
+static Boolean SameAsCDS (SeqFeatPtr sfp, Uint2 slpTag, BoolPtr bypassGeneTestP)
{
BioseqPtr bsp;
@@ -12915,10 +13852,15 @@ static Boolean SameAsCDS (SeqFeatPtr sfp)
if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &gcontext) != sfp) return FALSE;
same.found = FALSE;
same.gcontext = &gcontext;
+ same.slpTag = slpTag;
same.subtype = sfp->idx.subtype;
+ same.bypassGeneTest = FALSE;
MemSet ((Pointer) &cdsFilt, 0, sizeof (cdsFilt));
cdsFilt [SEQFEAT_CDREGION] = TRUE;
SeqMgrExploreFeatures (bsp, (Pointer) &same, FindSameCDS, sfp->location, cdsFilt, NULL);
+ if (bypassGeneTestP != NULL) {
+ *bypassGeneTestP = same.bypassGeneTest;
+ }
return same.found;
}
@@ -13568,6 +14510,49 @@ static void ValidateGoTermsSfp (
}
}
+static void LookForAccnLocs (SeqIdPtr sip, Pointer userdata)
+
+{
+ BoolPtr bp;
+ TextSeqIdPtr tsip;
+
+ if (sip == NULL || userdata == NULL) return;
+ bp = (BoolPtr) userdata;
+
+ switch (sip->choice) {
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ case SEQID_OTHER :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringDoesHaveText (tsip->accession)) {
+ *bp = TRUE;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+static CharPtr infMessage [] = {
+ "unknown error",
+ "empty inference string",
+ "bad inference prefix",
+ "bad inference body",
+ "single inference field",
+ "spaces in inference",
+ "same species misused",
+ "bad inference accession",
+ "bad inference accession version",
+ "accession.version not public",
+ NULL
+};
+
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
{
Int2 type, i, j;
@@ -13590,7 +14575,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
tRNAPtr trp;
GBQualPtr gbq;
Boolean pseudo, excpt, conflict, codonqual,
- anticodonqual, protidqual, transidqual, ovgenepseudo;
+ anticodonqual, productqual, protidqual,
+ transidqual, ovgenepseudo;
ImpFeatPtr ifp;
GeneRefPtr grp;
ProtRefPtr prp;
@@ -13641,6 +14627,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
Boolean hasxref;
CharPtr sfp_old_locus_tag;
CharPtr gene_old_locus_tag;
+ Boolean bypassGeneTest;
+ Boolean dicistronic = FALSE;
+ Int2 inferenceCode;
+ Boolean accn_seqid;
vsp = (ValidStructPtr) (gcp->userdata);
@@ -13653,6 +14643,26 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidateSeqLoc (vsp, sfp->product, "Product");
+ if (vsp->feat_loc_has_gi) {
+ accn_seqid = FALSE;
+ VisitSeqIdsInSeqLoc (sfp->location, (Pointer) &accn_seqid, LookForAccnLocs);
+ if (accn_seqid) {
+ if (! vsp->is_smupd_in_sep) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureRefersToAccession, "Feature location refers to accession");
+ }
+ }
+ }
+
+ if (vsp->feat_prod_has_gi) {
+ accn_seqid = FALSE;
+ VisitSeqIdsInSeqLoc (sfp->product, (Pointer) &accn_seqid, LookForAccnLocs);
+ if (accn_seqid) {
+ if (! vsp->is_smupd_in_sep) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureRefersToAccession, "Feature product refers to accession");
+ }
+ }
+ }
+
partials[0] = SeqLocPartialCheck (sfp->product);
partials[1] = SeqLocPartialCheck (sfp->location);
if ((partials[0] != SLP_COMPLETE) || (partials[1] != SLP_COMPLETE) || (sfp->partial)) { /* partialness */
@@ -13729,21 +14739,21 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
for (i = 0; i < 2; i++) {
errtype = SLP_NOSTART;
for (j = 0; j < 4; j++) {
+ bypassGeneTest = FALSE;
if (partials[i] & errtype) {
if (i == 1 && j < 2 && IsCddFeat (sfp)) {
/* suppresses warning */
- } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp)) {
+ } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp, errtype, NULL)) {
/*
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
"%s: %s",
parterr[i], parterrs[j]);
*/
} else if (i == 1 && j < 2 && sfp->idx.subtype == SEQFEAT_GENE && SameAsMRNA (sfp)) {
- } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp)) {
- } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && SameAsGene (sfp)) {
- /*
- } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp)) {
- */
+ } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp, errtype, &bypassGeneTest)) {
+ } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && (! bypassGeneTest) && SameAsGene (sfp)) {
+ } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp) &&
+ PartialAtSpliceSiteOrGap (sfp->location, errtype, &isgap, &badseq)) {
} else if (i == 1 && j < 2 && PartialAtSpliceSiteOrGap (sfp->location, errtype, &isgap, &badseq)) {
if (! isgap) {
if (sfp->idx.subtype != FEATDEF_CDS || SplicingNotExpected (sfp)) {
@@ -14031,6 +15041,19 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ProteinNameEndsInBracket, "Protein name ends with bracket and may contain organism name");
}
}
+ if (StringNICmp (str, "hypothetical protein XP_", 24) == 0) {
+ bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice != SEQID_OTHER) continue;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ if (StringICmp (tsip->accession, str + 21) != 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_HpotheticalProteinMismatch, "Hypothetical protein reference does not match accession");
+ }
+ }
+ }
+ }
}
if (str != NULL && sfp->comment != NULL) {
if (StringCmp (str, sfp->comment) == 0) {
@@ -14126,16 +15149,22 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
if (rrp->type == 3) { /* tRNA */
anticodonqual = FALSE;
+ productqual = FALSE;
gbq = sfp->qual;
while (gbq != NULL) {
if (StringICmp (gbq->qual, "anticodon") == 0) {
anticodonqual = TRUE;
+ } else if (StringICmp (gbq->qual, "product") == 0) {
+ productqual = TRUE;
}
gbq = gbq->next;
}
if (anticodonqual) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Unparsed anticodon qualifier in tRNA");
}
+ if (productqual) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Unparsed product qualifier in tRNA");
+ }
}
if (rrp->type == 3 && rrp->ext.choice == 1) { /* tRNA with string extension */
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Unparsed product qualifier in tRNA");
@@ -14314,6 +15343,15 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Qualifier other than replace has just quotation marks");
}
}
+ if (StringICmp (gbq->qual, "inference") == 0) {
+ inferenceCode = ValidateInferenceQualifier (gbq->val, TRUE);
+ if (inferenceCode != VALID_INFERENCE) {
+ if (inferenceCode < VALID_INFERENCE || inferenceCode > ACC_VERSION_NOT_PUBLIC) {
+ inferenceCode = VALID_INFERENCE;
+ }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue, "Inference qualifier problem - %s", infMessage [(int) inferenceCode]);
+ }
+ }
}
if (sfp->product != NULL) {
@@ -14347,7 +15385,13 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
break;
}
}
+ bsp = BioseqFindFromSeqLoc (sfp->location);
protBsp = BioseqFindFromSeqLoc (sfp->product);
+ if (bsp != NULL && protBsp != NULL) {
+ if (bsp == protBsp) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_SelfReferentialProduct, "Self-referential feature product");
+ }
+ }
if (protBsp != NULL && protBsp->id != NULL) {
for (sip = protBsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
@@ -14450,6 +15494,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (sfpx == NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GeneXrefWithoutGene,
"Feature has gene locus cross-reference but no equivalent gene feature exists");
+ } else if (StringStr (sfpx->except_text, "dicistronic gene") != NULL) {
+ dicistronic = TRUE;
}
}
}
@@ -14460,6 +15506,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (sfpx == NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GeneXrefWithoutGene,
"Feature has gene locus_tag cross-reference but no equivalent gene feature exists");
+ } else if (StringStr (sfpx->except_text, "dicistronic gene") != NULL) {
+ dicistronic = TRUE;
}
}
}
@@ -14508,8 +15556,12 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryGeneXref, "Unnecessary gene cross-reference %s", label);
} else {
- if (GPSorNTorNC (vsp->sep, sfp->location)) {
+ if ((! dicistronic) && GPSorNTorNC (vsp->sep, sfp->location)) {
+ /*
SeqEntryToBioSource (vsp->sep, NULL, NULL, 0, &biop);
+ */
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ BioseqToGeneticCode (bsp, NULL, NULL, NULL, NULL, 0, &biop);
if (biop != NULL) {
orp = biop->org;
if (orp != NULL) {
@@ -14558,6 +15610,7 @@ static CharPtr bypass_mrna_trans_check [] = {
"artificial frameshift",
"unclassified transcription discrepancy",
"mismatches in transcription",
+ "adjusted for low-quality genome",
NULL
};
@@ -14873,6 +15926,7 @@ static CharPtr bypass_cds_trans_check [] = {
"rearrangement required for product",
"unclassified translation discrepancy",
"mismatches in translation",
+ "adjusted for low-quality genome",
NULL
};
@@ -14909,6 +15963,11 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
StreamCache sc;
Boolean isgap;
Boolean badseq;
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ Boolean is_ged = FALSE;
+ Boolean is_refseq = FALSE;
+ Boolean has_gi = FALSE;
if (sfp == NULL)
@@ -15155,6 +16214,32 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_WARNING;
}
if (report_errors || unclassified_except) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GI :
+ has_gi = TRUE;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ is_ged = TRUE;
+ break;
+ case SEQID_OTHER :
+ is_refseq = TRUE;
+ break;
+ default :
+ break;
+ }
+ }
+ if (has_gi && is_ged && (! is_refseq)) {
+ sev = SEV_REJECT;
+ }
+ }
ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops. Genetic code [%d]", (long) stop_count, gccode);
}
}
@@ -15400,6 +16485,13 @@ erret:
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual. Skipped");
}
}
+ } else {
+ if (transl_except) {
+ has_errors = TRUE;
+ if (report_errors) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept, "Unparsed transl_except qual (but protein is okay). Skipped");
+ }
+ }
}
if (prot2seq != NULL)
@@ -15466,7 +16558,8 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if (sfp->excpt) {
if (StringISearch (sfp->except_text, "ribosomal slippage") != NULL||
StringISearch (sfp->except_text, "artificial frameshift") != NULL ||
- StringISearch (sfp->except_text, "nonconsensus splice site") != NULL) {
+ StringISearch (sfp->except_text, "nonconsensus splice site") != NULL ||
+ StringISearch (sfp->except_text, "adjusted for low-quality genome") != NULL) {
report_errors = FALSE;
}
}
@@ -15605,6 +16698,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
if (((checkAll && (!lastPartial)) || ctr < total) && (stp < (len - 2))) { /* check donor on all but last exon and on sequence */
+ tbuf[0] = '\0';
StreamCacheSetPosition (&sc, stp + 1);
residue1 = StreamCacheGetResidue (&sc);
residue2 = StreamCacheGetResidue (&sc);
@@ -15694,6 +16788,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
tbuf[0] = '\0';
if (bsp == NULL) {
StringCpy (tbuf, "?");
+ SeqIdWrite (sip, tbuf, PRINTID_FASTA_SHORT, 39);
} else if (vsp->suppressContext || vsp->convertGiToAccn) {
WorstBioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
} else {
diff --git a/api/valid.h b/api/valid.h
index 833594ee..33af92a7 100644
--- a/api/valid.h
+++ b/api/valid.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.20 $
+* $Revision: 6.22 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,12 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.h,v $
+* Revision 6.22 2006/02/16 19:34:47 kans
+* use vsp->is_smupd_in_sep to suppress ERR_SEQ_FEAT_FeatureRefersToAccession
+*
+* Revision 6.21 2006/01/26 19:54:26 kans
+* added ERR_SEQ_FEAT_FeatureRefersToAccession to look for inconsistent use of gi and accession (with or without version) for sfp->location or sfp->product references in a single blob
+*
* Revision 6.20 2005/06/08 15:26:06 kans
* added is_htg_in_sep and is_refseq_in_sep flags to vsp to avoid repetitive checks
*
@@ -236,6 +242,9 @@ typedef struct validstruct {
TextFsaPtr sourceQualTags; /* for detecting structured qual tags in notes */
Boolean is_htg_in_sep; /* record has technique of htgs 0 through htgs 3 */
Boolean is_refseq_in_sep; /* record has seqid of type other (refseq) */
+ Boolean is_smupd_in_sep; /* record in INSD internal processing */
+ Boolean feat_loc_has_gi; /* at least one feature has a gi location reference */
+ Boolean feat_prod_has_gi; /* at least one feature has a gi product reference */
} ValidStruct, PNTR ValidStructPtr;
NLM_EXTERN Boolean ValidateSeqEntry PROTO((SeqEntryPtr sep, ValidStructPtr vsp));
diff --git a/api/valid.msg b/api/valid.msg
index 1e20cb9a..97bc4de9 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -194,7 +194,15 @@ $^ LeadingX, 54
The protein sequence starts with one or more X (unknown) amino acids.
$^ InternalNsInSeqRaw, 55
-There are runs of many Ns inside a raw sequence.
+There are runs of greater than 100 Ns within sequence. Please describe
+what these Ns represent with your sequence submission.
+
+$^ InternalNsAdjacentToGap, 56
+There are Ns directly adjacent to a SeqLit gap in a delta Bioseq.
+
+$^ CaseDifferenceInSeqID, 57
+Multiple Bioseqs have the same Seq-id except for capitalization. Sequence
+identifiers must be unique in a case-insensitive manner within a record.
$$ SEQ_DESCR, 2
@@ -336,6 +344,9 @@ the descriptor should be removed.
$^ BadCollectionDate, 35
The collection date is not in the required format.
+$^ BadPCRPrimerSequence, 36
+The PCR primer sequence has illegal characters or non-IUPAC nucleotides.
+
$$ GENERIC, 3
$^ NonAsciiAsn, 1
@@ -367,7 +378,15 @@ $^ BadDate, 8
There are bad values for month, day, or year in a date.
$^ StructuredCitGenCit, 9
-The publication has title or journal embedded in the unstructured citgen.cit field.
+The publication has title or journal embedded in the unstructured citgen.cit
+field.
+
+$^ CollidingSerialNumbers, 10
+Multiple publications have the same serial number explicitly recorded in the
+data.
+
+$^ EmbeddedScript, 11
+Script or other markup tags should not be used in sequence record fields.
$$ SEQ_PKG, 4
@@ -867,6 +886,31 @@ The old_locus_tag qualifier on a feature does not match that on the overlapping
$^ DuplicateGeneOntologyTerm, 111
A feature has multiple identical Gene Ontology (GO) term specifications.
+$^ InvalidInferenceValue, 112
+The value of the inference qualifier is constrained by agreement of the international
+nucleotide sequence database collaboration. This value does not conform to those
+constraints. Please see the feature table documentation for more information.
+
+$^ HpotheticalProteinMismatch, 113
+There is a mismatch between the accession cited by the hypothetical protein claim
+and the actual accession of the record.
+
+$^ FeatureRefersToAccession, 114
+There is a mixture of features referring to sequence by gi numbers and by accession.
+This inconsistency is likely due to incomplete processing by software.
+
+$^ SelfReferentialProduct, 115
+A feature product points to the same sequence that the feature location does.
+The product must point to a different sequence that is the biological product
+of the first, due to transcription, translation, or peptide processing.
+
+$^ ITSdoesNotAbutRRNA, 116
+The internal transcribed spacer misc_RNA features should exactly abut the flanking rRNA features.
+
+$^ FeatureSeqIDCaseDifference, 117
+Feature location and referenced Bioseq have the same Seq-id except for capitalization.
+Sequence identifiers must be unique in a case-insensitive manner within a record.
+
$$ SEQ_ALIGN, 6
$^ SeqIdProblem, 1
diff --git a/api/validerr.h b/api/validerr.h
index c0f972b4..cfad0e33 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -57,6 +57,8 @@
#define ERR_SEQ_INST_OverlappingDeltaRange 1,53
#define ERR_SEQ_INST_LeadingX 1,54
#define ERR_SEQ_INST_InternalNsInSeqRaw 1,55
+#define ERR_SEQ_INST_InternalNsAdjacentToGap 1,56
+#define ERR_SEQ_INST_CaseDifferenceInSeqID 1,57
#define ERR_SEQ_DESCR 2,0
#define ERR_SEQ_DESCR_BioSourceMissing 2,1
#define ERR_SEQ_DESCR_InvalidForType 2,2
@@ -93,6 +95,7 @@
#define ERR_SEQ_DESCR_FastaBracketTitle 2,33
#define ERR_SEQ_DESCR_MissingText 2,34
#define ERR_SEQ_DESCR_BadCollectionDate 2,35
+#define ERR_SEQ_DESCR_BadPCRPrimerSequence 2,36
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
@@ -103,6 +106,8 @@
#define ERR_GENERIC_MedlineEntryPub 3,7
#define ERR_GENERIC_BadDate 3,8
#define ERR_GENERIC_StructuredCitGenCit 3,9
+#define ERR_GENERIC_CollidingSerialNumbers 3,10
+#define ERR_GENERIC_EmbeddedScript 3,11
#define ERR_SEQ_PKG 4,0
#define ERR_SEQ_PKG_NoCdRegionPtr 4,1
#define ERR_SEQ_PKG_NucProtProblem 4,2
@@ -231,6 +236,12 @@
#define ERR_SEQ_FEAT_PolyAsignalNotRange 5,109
#define ERR_SEQ_FEAT_OldLocusTagMismtach 5,110
#define ERR_SEQ_FEAT_DuplicateGeneOntologyTerm 5,111
+#define ERR_SEQ_FEAT_InvalidInferenceValue 5,112
+#define ERR_SEQ_FEAT_HpotheticalProteinMismatch 5,113
+#define ERR_SEQ_FEAT_FeatureRefersToAccession 5,114
+#define ERR_SEQ_FEAT_SelfReferentialProduct 5,115
+#define ERR_SEQ_FEAT_ITSdoesNotAbutRRNA 5,116
+#define ERR_SEQ_FEAT_FeatureSeqIDCaseDifference 5,117
#define ERR_SEQ_ALIGN 6,0
#define ERR_SEQ_ALIGN_SeqIdProblem 6,1
#define ERR_SEQ_ALIGN_StrandRev 6,2