summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
authorAndreas Tille <tille@debian.org>2016-12-01 16:38:47 +0100
committerAndreas Tille <tille@debian.org>2016-12-01 16:38:47 +0100
commit9c3449b3f79213138ad1d315580af98e89d8e1b6 (patch)
tree157d2da8682da6cfb9ee28db762c9c94dcdaa98c /api
parentbe323245ea77f0e457e3d42c83b58a82f67ca0f2 (diff)
New upstream version 6.1.20160908
Diffstat (limited to 'api')
-rw-r--r--api/alignmgr2.c29
-rw-r--r--api/alignval.c443
-rw-r--r--api/alignval.h11
-rw-r--r--api/aliread.c9
-rw-r--r--api/asn2ff3.c599
-rw-r--r--api/asn2ffp.h328
-rw-r--r--api/asn2gnb1.c445
-rw-r--r--api/asn2gnb2.c1162
-rw-r--r--api/asn2gnb3.c1955
-rw-r--r--api/asn2gnb4.c498
-rw-r--r--api/asn2gnb5.c412
-rw-r--r--api/asn2gnb6.c577
-rw-r--r--api/asn2gnbi.h67
-rw-r--r--api/asn2gnbk.h8
-rw-r--r--api/asn2gnbp.h4
-rw-r--r--api/ecnum_ambiguous.inc234
-rw-r--r--api/ecnum_deleted.inc15
-rw-r--r--api/ecnum_replaced.inc237
-rw-r--r--api/ecnum_specific.inc1569
-rw-r--r--api/explore.h3
-rw-r--r--api/fdlKludge.h54
-rw-r--r--api/ffprint.h105
-rw-r--r--api/gather.c46
-rw-r--r--api/gather.h6
-rw-r--r--api/gbfeat.c72
-rw-r--r--api/gbftdef.h10
-rw-r--r--api/gbftglob.c87
-rw-r--r--api/gbparint.c6
-rw-r--r--api/lsqfetch.c99
-rwxr-xr-xapi/macroapi.c3390
-rw-r--r--api/macroapi.h26
-rw-r--r--api/objmgr.c4
-rw-r--r--api/pgppop.c273
-rw-r--r--api/pgppop.h180
-rw-r--r--api/product_rules.inc12070
-rw-r--r--api/samutil.c246
-rw-r--r--api/seqmgr.c209
-rw-r--r--api/seqmgr.h25
-rw-r--r--api/seqport.c231
-rw-r--r--api/seqport.h29
-rw-r--r--api/sequtil.c270
-rw-r--r--api/sequtil.h32
-rw-r--r--api/sqnutil1.c7738
-rw-r--r--api/sqnutil2.c8532
-rw-r--r--api/sqnutil3.c7933
-rwxr-xr-xapi/sqnutil4.c3385
-rw-r--r--api/sqnutils.h350
-rw-r--r--api/subutil.c279
-rw-r--r--api/subutil.h25
-rw-r--r--api/tofasta.c1476
-rw-r--r--api/tofasta.h32
-rw-r--r--api/tomedlin.c13
-rw-r--r--api/txalign.c546
-rw-r--r--api/utilpub.c5
-rwxr-xr-xapi/valapi.c357
-rwxr-xr-xapi/valapi.h9
-rw-r--r--api/valid.c6126
-rw-r--r--api/valid.h33
-rw-r--r--api/valid.msg136
-rw-r--r--api/validerr.h40
-rw-r--r--api/validrules.inc87
-rw-r--r--api/wprint.c14
62 files changed, 46014 insertions, 17177 deletions
diff --git a/api/alignmgr2.c b/api/alignmgr2.c
index 5b43ef36..0e2fe2de 100644
--- a/api/alignmgr2.c
+++ b/api/alignmgr2.c
@@ -28,13 +28,23 @@
*
* Version Creation Date: 10/01
*
-* $Revision: 6.63 $
+* $Revision: 6.66 $
*
* File Description: SeqAlign indexing, access, and manipulation functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr2.c,v $
+* Revision 6.66 2016/09/02 14:57:38 ucko
+* Formally clean up calls to printf-family functions that are at least
+* nominally unsafe, as already done in Debian/Ubuntu packages.
+*
+* Revision 6.65 2013/11/26 01:23:42 kans
+* JIRA:GP-6623 AlnMgr2ConvertAllToDenseSeg bails specifically for Spliced-seg
+*
+* Revision 6.64 2013/11/26 00:15:42 kans
+* JIRA:GP-5360 AlnMgr2ConvertAllToDenseSeg returns Boolean if not Dense-diag or Dense-seg to avoid crash on Spliced-seg
+*
* Revision 6.63 2008/12/01 19:35:39 bollin
* prevent crash when mapping positions and row of alignment is entirely in the gapl.
*
@@ -248,7 +258,7 @@ static void AMIntervalSetFree(AMIntervalSetPtr amint);
static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap);
static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap);
static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap);
-static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
+static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap);
static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap);
static void AlnMgr2SortBySeqId(SeqAlignPtr sap);
@@ -1106,7 +1116,7 @@ static void AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)
* non-allocated strands are allocated and all set to Seq_strand_plus.
*
***************************************************************************/
-static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
+static Boolean AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
{
DenseSegPtr dsp;
Int4 i;
@@ -1115,8 +1125,9 @@ static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
while (sap != NULL)
{
sap_next = sap->next;
- if (sap->segtype == SAS_DENDIAG)
+ if (sap->segtype == SAS_DENDIAG) {
AlnMgr2ConvertDendiagToDensegChain(sap);
+ }
else if (sap->segtype == SAS_DENSEG)
{
dsp = (DenseSegPtr)(sap->segs);
@@ -1129,8 +1140,13 @@ static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
}
}
}
+ else if (sap->segtype == SAS_SPLICED)
+ {
+ return FALSE;
+ }
sap = sap_next;
}
+ return TRUE;
}
/* SECTION 2c */
@@ -1152,7 +1168,8 @@ NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap)
return FALSE;
if (!AlnMgr2UnpackSeqAlign(sap))
return FALSE;
- AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs);
+ if (!AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs))
+ return FALSE;
amaip = AMAlignIndex2New();
amaip->alnstyle = AM2_LITE;
salp = (SeqAlignPtr)(sap->segs);
@@ -5616,7 +5633,7 @@ NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isn
spp = SeqPortNew(bsp, amp->from_row, amp->to_row, amp->strand, seqcode);
ctr = SeqPortRead(spp, (Uint1Ptr)buf, amp->to_row-amp->from_row+1);
buf[ctr] = '\0';
- fprintf(ofp, buf);
+ fwrite(buf, 1, ctr, ofp);
SeqPortFree(spp);
}
}
diff --git a/api/alignval.c b/api/alignval.c
index e602baa7..feb7dd86 100644
--- a/api/alignval.c
+++ b/api/alignval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/3/99
*
-* $Revision: 6.77 $
+* $Revision: 6.95 $
*
* File Description: To validate sequence alignment.
*
@@ -93,6 +93,7 @@ static ValNodePtr errorp = NULL;
static Uint2 AlignmentPercentIdentityEx (SeqAlignPtr salp, Boolean internal_gaps, Boolean internal_validation);
+//LCOV_EXCL_START
static ValNodePtr JYConstructErrorMessage (CharPtr function, CharPtr message, Uint1 level, ValNodePtr PNTR vnpp)
{
Char buffer[BUFFER_LENGTH];
@@ -144,6 +145,8 @@ static ValNodePtr JYErrorChainDestroy (ValNodePtr vnp)
return NULL;
}
+//LCOV_EXCL_STOP
+
/******************************************************************
Output error message according to code defined in alignval.h.
id refers to seqid of the sequence that causes the error
@@ -346,15 +349,13 @@ static Int4 valmsggetseqpos(SeqAlignPtr sap, Int4 segment, SeqIdPtr sip)
}
-static BioseqPtr BioseqForAlignment (SeqAlignPtr salp)
+static BioseqPtr BioseqForAlignmentWork (SeqAlignPtr salp)
{
Int4 row, num_rows;
BioseqPtr bsp = NULL;
SeqIdPtr sip;
- SeqEntryPtr oldscope;
DenseDiagPtr ddp;
- oldscope = SeqEntrySetScope (NULL);
/* NOTE - can't index DenseDiag chain during validation because we're examining the individual DenseDiags,
* and indexing converts it to DenseSegs.
*/
@@ -375,7 +376,23 @@ static BioseqPtr BioseqForAlignment (SeqAlignPtr salp)
bsp = BioseqFind(sip);
}
}
- SeqEntrySetScope (oldscope);
+ return bsp;
+}
+
+static BioseqPtr BioseqForAlignment (SeqAlignPtr salp)
+{
+ BioseqPtr bsp;
+ SeqEntryPtr oldscope;
+
+ /* first look locally to scope */
+ bsp = BioseqForAlignmentWork (salp);
+ if (bsp != NULL) return bsp;
+
+ /* otherwise temporarily clear scope */
+ oldscope = SeqEntrySetScope (NULL);
+ bsp = BioseqForAlignmentWork (salp);
+ SeqEntrySetScope (oldscope);
+
return bsp;
}
@@ -397,7 +414,7 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
{
case Err_SeqId:
sprintf(string1, "SeqId");
- sprintf(string2, "The sequence corresponding to SeqId %s could not be found", buf);
+ sprintf(string2, "The sequence corresponding to SeqId %s could not be found.", buf);
break;
case Err_Strand_Rev:
@@ -415,11 +432,14 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
break;
case Err_Start_Less_Than_Zero:
+ //LCOV_EXCL_START
+ //unreachable for ASN.1 valid for C++ Toolkit
pos = valmsggetseqpos(salp, Intvalue, id);
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
sprintf(string1, "Start");
sprintf(string2, "Start point is less than zero in segment %ld (near sequence position %ld) for sequence ID: %s in the context of %s", (long) Intvalue, (long) pos, buf, buf3);
break;
+ //LCOV_EXCL_STOP
case Err_Start_More_Than_Biolen:
pos = valmsggetseqpos(salp, Intvalue, id);
@@ -429,11 +449,14 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
break;
case Err_End_Less_Than_Zero:
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
pos = valmsggetseqpos(salp, Intvalue, id);
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
sprintf(string1, "Length");
sprintf(string2, "End point is less than zero in segment %ld (near position %d) for sequence ID: %s in the context of %s. This could be a formatting error", (long) Intvalue, (int) pos,buf, buf3);
break;
+ //LCOV_EXCL_STOP
case Err_End_More_Than_Biolen:
pos = valmsggetseqpos(salp, Intvalue, id);
@@ -443,11 +466,14 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
break;
case Err_Len_Less_Than_Zero:
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
pos = valmsggetseqpos(salp, Intvalue, id);
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
sprintf(string1, "Length");
sprintf(string2, "Segment length is less than zero in segment %ld (near sequence position %ld) for sequence ID: %s in the context of %s. Look for extra characters in this segment or flanking segments", (long) Intvalue, (long) pos, buf, buf3);
break;
+ //LCOV_EXCL_STOP
case Err_Len_More_Than_Biolen:
pos = valmsggetseqpos(salp, Intvalue, id);
@@ -482,9 +508,12 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
break;
case Err_Null_Segs:
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
sprintf(string1, "Segs");
sprintf(string2, "This alignment is missing all segments. This is a non-correctable error -- look for serious formatting problems.");
break;
+ //LCOV_EXCL_STOP
case Err_Segment_Gap:
pos = valmsggetseqpos(salp, Intvalue, id);
@@ -518,9 +547,12 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
break;
case Err_Short_Aln:
+ //LCOV_EXCL_START
+ //only call to only function to generate this error is commented out
sprintf(string1, "ShortAln");
sprintf(string2, "This alignment is shorter than at least one non-farpointer sequence.");
break;
+ //LCOV_EXCL_STOP
case Err_Unexpected_Alignment_Type:
sprintf(string1, "UnexpectedAlignmentType");
@@ -544,8 +576,10 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
}
return;
}
+//LCOV_EXCL_START
if (StringLen(string1) > 0)
errorp = JYConstructErrorMessage (string1, string2, errlevel, &errorp);
+//LCOV_EXCL_STOP
}
@@ -660,8 +694,11 @@ static SeqIdPtr SeqIdInAlignSegs(Pointer segs, Uint1 segtype, SeqAlignPtr salp)
if(!segs)
{
+ //LCOV_EXCL_START
+ //unreachable for valid ASN.1
ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
return NULL;
+ //LCOV_EXCL_STOP
}
if(segtype==1)
{ /* DenseDiag */
@@ -710,8 +747,12 @@ static void ValidateSeqIdInSeqAlign (SeqAlignPtr salp)
if(salp)
{
segptr=salp->segs;
- if(!segptr)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!segptr) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
@@ -797,6 +838,8 @@ static Uint1 SeqLocStrandForSipInStdSeg (SeqIdPtr sip, StdSegPtr ssp, SeqAlignPt
}
+//LCOV_EXCL_START
+//code for this error does not actually work as described in comments
/******************************************************************
check if the strand is consistent in Stdseg
******************************************************************/
@@ -810,8 +853,10 @@ static void ValidateStrandInStdSeg(StdSegPtr ssp, SeqAlignPtr salp)
Boolean CheckedStatus;
Int4 start_numseg=0, end_numseg=0;
- if(!ssp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ssp) {
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ }
else
for(ssptemp=ssp; ssptemp!=NULL; ssptemp=ssptemp->next)
{
@@ -878,15 +923,15 @@ static void ValidateStrandInStdSeg(StdSegPtr ssp, SeqAlignPtr salp)
ValNodeFree(FinishedSip);
}
+//LCOV_EXCL_STOP
/******************************************************************
check if the strand is consistent in Denseseg
******************************************************************/
-static void ValidateStrandInPack_DenseSeg(Pointer segs, Uint1 segtype, SeqAlignPtr salp)
+static void ValidateStrandInDenseSeg(Pointer segs, Uint1 segtype, SeqAlignPtr salp)
{
DenseSegPtr dsp=NULL;
- PackSegPtr psp=NULL;
Int4 numseg, aligndim, dimnumseg, i, j, m;
SeqIdPtr sip=NULL, siptemp;
Uint1 strand1=0, strand2=0;
@@ -894,26 +939,18 @@ static void ValidateStrandInPack_DenseSeg(Pointer segs, Uint1 segtype, SeqAlignP
if(!segs)
{
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
}
- else if(segtype==2||segtype==4)
+ else if(segtype==2)
{
- if(segtype==2)
- {
- dsp=(DenseSegPtr)segs;
- strandptr=dsp->strands;
- sip=dsp->ids;
- numseg=dsp->numseg;
- aligndim=dsp->dim;
- }
- else if(segtype==4)
- {
- psp=(PackSegPtr)segs;
- strandptr=psp->strands;
- sip=psp->ids;
- numseg=psp->numseg;
- aligndim=psp->dim;
- }
+ dsp=(DenseSegPtr)segs;
+ strandptr=dsp->strands;
+ sip=dsp->ids;
+ numseg=dsp->numseg;
+ aligndim=dsp->dim;
dimnumseg=numseg*aligndim;
if(strandptr)
@@ -972,10 +1009,10 @@ static void ValidateStrandinSeqAlign(SeqAlignPtr salp)
/*Strands needs to be validated in case of global or partial alignment*/
- /*denseseg or packseg*/
- if(salp->segtype==2||salp->segtype==4)
+ /*denseseg*/
+ if(salp->segtype==2)
- ValidateStrandInPack_DenseSeg(salp->segs, salp->segtype, salp);
+ ValidateStrandInDenseSeg(salp->segs, salp->segtype, salp);
/*stdseg*/
else if(salp->segtype==3)
@@ -1003,8 +1040,12 @@ static void ValidateSeqlengthInDenseDiag (DenseDiagPtr ddp, SeqAlignPtr salp)
BioseqPtr bsp=NULL;
- if(!ddp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ddp){
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
for(ddptemp=ddp, numseg=0; ddptemp!=NULL; ddptemp=ddptemp->next, numseg++)
@@ -1022,15 +1063,23 @@ static void ValidateSeqlengthInDenseDiag (DenseDiagPtr ddp, SeqAlignPtr salp)
bslen=bsp->length;
AlignValBioseqUnlock (bsp);
/*verify start*/
- if(stptr[i]<0)
- ValMessage (salp, Err_Start_Less_Than_Zero, SEV_ERROR, siptemp, sip , numseg);
+ if (stptr[i] < 0) {
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
+ ValMessage(salp, Err_Start_Less_Than_Zero, SEV_ERROR, siptemp, sip, numseg);
+ //LCOV_EXCL_STOP
+ }
if(stptr[i]>=bslen)
ValMessage (salp, Err_Start_More_Than_Biolen, SEV_ERROR, siptemp, sip , numseg);
/*verify length*/
- if(ddptemp->len<0)
- ValMessage (salp, Err_Len_Less_Than_Zero, SEV_ERROR, siptemp, sip , numseg);
+ if (ddptemp->len<0) {
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
+ ValMessage(salp, Err_Len_Less_Than_Zero, SEV_ERROR, siptemp, sip, numseg);
+ //LCOV_EXCL_STOP
+ }
if(ddptemp->len+stptr[i]>bslen)
ValMessage (salp, Err_Sum_Len_Start, SEV_ERROR, siptemp, sip , numseg);
@@ -1109,8 +1158,12 @@ static void ValidateSeqlengthInDenseSeg (DenseSegPtr dsp, SeqAlignPtr salp)
Int4 bslen = 0;
BioseqPtr bsp=NULL;
- if(!dsp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!dsp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
numseg=dsp->numseg;
@@ -1251,7 +1304,10 @@ static void ValidateSeqlengthInStdSeg (StdSegPtr ssp, SeqAlignPtr salp)
SeqLocPtr slp=NULL, slptemp;
if(!ssp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
} else {
for(ssptemp=ssp, numseg=0; ssptemp!=NULL; ssptemp=ssptemp->next, numseg++) {
/*get all seqid in current segment*/
@@ -1272,7 +1328,10 @@ static void ValidateSeqlengthInStdSeg (StdSegPtr ssp, SeqAlignPtr salp)
/*verify start*/
if(start<0) {
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
ValMessage (salp, Err_Start_Less_Than_Zero, SEV_ERROR, siptemp, sip , numseg+1);
+ //LCOV_EXCL_STOP
}
if(start>bslen-1) {
@@ -1281,7 +1340,10 @@ static void ValidateSeqlengthInStdSeg (StdSegPtr ssp, SeqAlignPtr salp)
/*verify end*/
if(end<0) {
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
ValMessage (salp, Err_End_Less_Than_Zero, SEV_ERROR, siptemp, sip , numseg+1);
+ //LCOV_EXCL_STOP
}
if(end>bslen-1) {
ValMessage (salp, Err_End_More_Than_Biolen, SEV_ERROR, siptemp, sip , numseg+1);
@@ -1289,7 +1351,10 @@ static void ValidateSeqlengthInStdSeg (StdSegPtr ssp, SeqAlignPtr salp)
/*verify length*/
if(length<0) {
+ //LCOV_EXCL_START
+ //unreachable with valid ASN.1
ValMessage (salp, Err_Len_Less_Than_Zero, SEV_ERROR, siptemp, sip , numseg+1);
+ //LCOV_EXCL_STOP
}
if(length>bslen) {
@@ -1316,8 +1381,12 @@ static void ValidateSeqlengthInPackSeg (PackSegPtr psp, SeqAlignPtr salp)
BioseqPtr bsp=NULL;
Int4 bslen, seg_start;
- if(!psp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!psp){
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
numseg=psp->numseg;
@@ -1350,8 +1419,11 @@ static void ValidateSeqlengthInPackSeg (PackSegPtr psp, SeqAlignPtr salp)
AlignValBioseqUnlock (bsp);
seg_start=stptr[j];
/*check start*/
- if(seg_start<0)
- ValMessage (salp, Err_Start_Less_Than_Zero, SEV_ERROR, siptemp, sip , 0);
+ if (seg_start < 0) {
+ //LCOV_EXCL_START
+ ValMessage(salp, Err_Start_Less_Than_Zero, SEV_ERROR, siptemp, sip, 0);
+ //LCOV_EXCL_STOP
+ }
if(seg_start>=bslen)
ValMessage (salp, Err_Start_More_Than_Biolen, SEV_ERROR, siptemp, sip , 0);
@@ -1413,8 +1485,12 @@ static void ValidateDimSeqIds (SeqAlignPtr salp)
{
ddp=(DenseDiagPtr)salp->segs;
- if(!ddp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ddp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
for(ddptemp=ddp, numseg=0; ddptemp!=NULL; ddptemp=ddptemp->next, numseg++)
{
@@ -1431,8 +1507,12 @@ static void ValidateDimSeqIds (SeqAlignPtr salp)
else if(salp->segtype==2||salp->segtype==4)
{
dsp=(DenseSegPtr) (salp->segs);
- if(!dsp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!dsp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
sip=dsp->ids;
@@ -1449,8 +1529,12 @@ static void ValidateDimSeqIds (SeqAlignPtr salp)
{
ssp=(StdSegPtr)salp->segs;
- if(!ssp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ssp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
for(ssptemp=ssp, numseg=0; ssptemp!=NULL; ssptemp=ssptemp->next, numseg++)
{
@@ -1594,7 +1678,10 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp)
dsp = (DenseSegPtr) salp->segs;
if(!dsp)
{
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
}
else
{
@@ -1685,16 +1772,24 @@ static void Segment_Gap_In_SeqAlign(SeqAlignPtr salp)
if(salp->segtype==1)
{
ddp=(DenseDiagPtr)salp->segs;
- if(!ddp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ddp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
for(ddptemp=ddp, numseg=0; ddptemp!=NULL; ddptemp=ddptemp->next, numseg++)
{
sip=ddptemp->id;
/*empty segment*/
- if(ddptemp->dim==0)
- ValMessage (salp, Err_Segment_Gap, SEV_ERROR, NULL, sip, numseg);
+ if (ddptemp->dim == 0) {
+ //LCOV_EXCL_START
+ //ASN.1 is unreadable if dim is 0
+ ValMessage(salp, Err_Segment_Gap, SEV_ERROR, NULL, sip, numseg);
+ //LCOV_EXCL_STOP
+ }
}
}
}
@@ -1704,8 +1799,12 @@ static void Segment_Gap_In_SeqAlign(SeqAlignPtr salp)
else if(salp->segtype==2)
{
dsp=(DenseSegPtr)salp->segs;
- if(!dsp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!dsp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
numseg=dsp->numseg;
@@ -1742,8 +1841,12 @@ static void Segment_Gap_In_SeqAlign(SeqAlignPtr salp)
else if(salp->segtype==3)
{
ssp=(StdSegPtr)salp->segs;
- if(!ssp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!ssp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
/*go through each segment*/
@@ -1776,8 +1879,12 @@ static void Segment_Gap_In_SeqAlign(SeqAlignPtr salp)
else if(salp->segtype==4)
{
psp=(PackSegPtr)salp->segs;
- if(!psp)
- ValMessage (salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ if (!psp) {
+ //LCOV_EXCL_START
+ // unreachable for valid ASN.1
+ ValMessage(salp, Err_Null_Segs, SEV_ERROR, NULL, NULL, 0);
+ //LCOV_EXCL_STOP
+ }
else
{
numseg=psp->numseg;
@@ -1876,9 +1983,11 @@ static Boolean IsAlignmentTPA (SeqAlignPtr salp)
for (sip=SeqIdInAlignSegs(salp->segs, salp->segtype, salp);
sip != NULL && !isTPA;
sip = sip->next) {
- bsp = BioseqLockById(sip);
+ /* NOTE - we do not want to fetch Bioseqs if they aren't local.
+ * we only care about TpaUserObjects on Bioseqs in THIS record.
+ */
+ bsp = BioseqFind(sip);
isTPA = HasTpaUserObject(bsp);
- BioseqUnlock(bsp);
}
break;
}
@@ -1888,6 +1997,8 @@ static Boolean IsAlignmentTPA (SeqAlignPtr salp)
}
+//LCOV_EXCL_START
+// only call to this function is commented out
static void CheckAlnSeqLens (SeqAlignPtr salp)
{
Int4 aln_len, start, stop;
@@ -1919,8 +2030,31 @@ static void CheckAlnSeqLens (SeqAlignPtr salp)
ValMessage (salp, Err_Short_Aln, SEV_INFO, NULL, NULL, 0);
}
}
+//LCOV_EXCL_STOP
+
+
+static Boolean AlignmentScorePercentIdOk (SeqAlignPtr salp)
+{
+ ScorePtr score;
+
+ if (salp == NULL) {
+ return FALSE;
+ }
+ for (score = salp->score; score != NULL; score = score->next) {
+ if (score->id != NULL
+ && score->id->str != NULL
+ && StringICmp (score->id->str, "pct_identity_ungap") == 0) {
+ if (score->value.realvalue > 50.0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ }
+ }
+ return FALSE;
+}
+
-
/******************************************************************
validate seqid, segment length, strand in Seqalignment for Denseseg,
Densediag and Stdseg. Also check if it's FASTA-like
@@ -1953,7 +2087,7 @@ static Boolean ValidateSeqAlignFunc (SeqAlignPtr salp, Boolean find_remote_bsp)
/*validate segment gap*/
Segment_Gap_In_SeqAlign (salp);
- if (!IsAlignmentTPA(salp)) {
+ if (!IsAlignmentTPA(salp) && !AlignmentScorePercentIdOk(salp)) {
if (salp->segtype == SAS_DENDIAG) {
/* duplicate alignment, to prevent indexing from changing the original type */
salp_test = SeqAlignDup (salp);
@@ -2029,6 +2163,8 @@ NLM_EXTERN Boolean ValidateSeqAlign (SeqAlignPtr salp, Uint2 entityID, Boolean m
}
if (errorp)
{
+ //LCOV_EXCL_START
+ //not used
if(svp->message)
{
for (vnp=errorp; vnp!=NULL; vnp=vnp->next)
@@ -2058,6 +2194,7 @@ NLM_EXTERN Boolean ValidateSeqAlign (SeqAlignPtr salp, Uint2 entityID, Boolean m
}
err_count++;
svp->retdel=FALSE;
+ //LCOV_EXCL_STOP
}
else {
salptmp = salptmp->next;
@@ -2077,6 +2214,7 @@ NLM_EXTERN Boolean ValidateSeqAlign (SeqAlignPtr salp, Uint2 entityID, Boolean m
}
+//LCOV_EXCL_START
/******************************************************************
call back function for REGISTER_ALIGNVALIDATION defined in sequin4.c.
Starting point for seqalign validation if user clicked on
@@ -2134,6 +2272,7 @@ NLM_EXTERN Int2 LIBCALLBACK ValidateSeqAlignFromData (Pointer data)
}
return OM_MSG_RET_DONE;
}
+//LCOV_EXCL_STOP
static void ValidateSeqAlignInAnnot (SeqAnnotPtr sap, SaValPtr svp)
@@ -2359,6 +2498,7 @@ static Char AmbiguousMatch (Char ch1, Char ch2)
}
+//LCOV_EXCL_START
extern double *
GetAlignmentColumnPercentIdentities
(SeqAlignPtr salp,
@@ -2522,6 +2662,7 @@ GetAlignmentColumnPercentIdentities
return pct_ids;
}
+//LCOV_EXCL_STOP
static Uint2 AlignmentPercentIdentityEx (SeqAlignPtr salp, Boolean internal_gaps, Boolean internal_validation)
@@ -2541,7 +2682,9 @@ static Uint2 AlignmentPercentIdentityEx (SeqAlignPtr salp, Boolean internal_gaps
Int4 sample_len = 50;
Int4Ptr starts, stops;
Int4 match_25 = 0;
-
+ ErrSev logsev;
+ ErrSev msgsev;
+
if (salp == NULL) return 0;
AlnMgr2IndexSingleChildSeqAlign(salp);
@@ -2561,10 +2704,18 @@ static Uint2 AlignmentPercentIdentityEx (SeqAlignPtr salp, Boolean internal_gaps
for (row = 1; row <= num_rows; row++) {
sip_list[row - 1] = AlnMgr2GetNthSeqIdPtr(salp, row);
strand_list[row - 1] = AlnMgr2GetNthStrand(salp, row);
+ msgsev = ErrSetMessageLevel (SEV_MAX);
+ logsev = ErrSetLogLevel (SEV_MAX);
bsp_list[row - 1] = BioseqLockById(sip_list[row - 1]);
+ ErrSetLogLevel (logsev);
+ ErrSetMessageLevel (msgsev);
if (bsp_list[row - 1] == NULL) {
oldscope = SeqEntrySetScope (NULL);
+ msgsev = ErrSetMessageLevel (SEV_MAX);
+ logsev = ErrSetLogLevel (SEV_MAX);
bsp_list[row - 1] = BioseqLockById(sip_list[row - 1]);
+ ErrSetLogLevel (logsev);
+ ErrSetMessageLevel (msgsev);
SeqEntrySetScope(oldscope);
if (bsp_list[row - 1] == NULL) {
break;
@@ -2670,167 +2821,9 @@ static Uint2 AlignmentPercentIdentityEx (SeqAlignPtr salp, Boolean internal_gaps
return pcnt;
}
+//LCOV_EXCL_START
extern Uint2 AlignmentPercentIdentity (SeqAlignPtr salp, Boolean internal_gaps)
{
return AlignmentPercentIdentityEx (salp, internal_gaps, FALSE);
}
-
-extern Uint2 WeightedAlignmentPercentIdentity (SeqAlignPtr salp, Boolean internal_gaps)
-{
- Int4 aln_len, num_rows, row, col_count = 0;
- Int4 num_match;
- Uint2 pcnt;
- Int4 aln_pos, seq_pos, k;
- Uint1 row_ch;
- SeqEntryPtr oldscope;
- SeqIdPtr PNTR sip_list;
- BioseqPtr PNTR bsp_list;
- Uint1Ptr strand_list;
- BoolPtr start_gap, end_gap;
- Int4Ptr start_list;
- Uint1Ptr seqbuf_list;
- Int4 sample_len = 50;
- Int4 chars_appearing[5]; /* 0 is A, 1 is T, 2 is G, 3 is C, 4 is internal gap */
- double col_pct, col_pct_total = 0;
- Int4 max_app, total_app, i;
-
- if (salp == NULL) return 0;
-
- AlnMgr2IndexSingleChildSeqAlign(salp);
- aln_len = AlnMgr2GetAlnLength(salp, FALSE);
- num_rows = AlnMgr2GetNumRows(salp);
- if (num_rows < 0) {
- Message (MSG_POSTERR, "AlnMgr2GetNumRows failed");
- return 0;
- }
- bsp_list = (BioseqPtr PNTR) MemNew (num_rows * sizeof (BioseqPtr));
- sip_list = (SeqIdPtr PNTR) MemNew (num_rows * sizeof(SeqIdPtr));
- strand_list = (Uint1Ptr) MemNew (num_rows * sizeof(Uint1));
- start_gap = (BoolPtr) MemNew (num_rows * sizeof(Boolean));
- end_gap = (BoolPtr) MemNew (num_rows * sizeof(Boolean));
- for (row = 1; row <= num_rows; row++) {
- sip_list[row - 1] = AlnMgr2GetNthSeqIdPtr(salp, row);
- strand_list[row - 1] = AlnMgr2GetNthStrand(salp, row);
- bsp_list[row - 1] = BioseqLockById(sip_list[row - 1]);
- if (bsp_list[row - 1] == NULL) {
- oldscope = SeqEntrySetScope (NULL);
- bsp_list[row - 1] = BioseqLockById(sip_list[row - 1]);
- SeqEntrySetScope(oldscope);
- if (bsp_list[row - 1] == NULL) {
- break;
- }
- }
- start_gap[row - 1] = TRUE;
- end_gap[row - 1] = FALSE;
- }
-
- if (row <= num_rows) {
- Message (MSG_POSTERR, "Unable to locate Bioseq in alignment");
- while (row >= 0) {
- sip_list[row] = SeqIdFree(sip_list[row]);
- BioseqUnlock(bsp_list[row]);
- row--;
- }
- sip_list = MemFree (sip_list);
- bsp_list = MemFree (bsp_list);
- start_gap = MemFree (start_gap);
- end_gap = MemFree (end_gap);
- return 0;
- }
-
- start_list = (Int4Ptr) MemNew (num_rows * sizeof(Int4));
- seqbuf_list = (Uint1Ptr) MemNew (num_rows * sample_len * sizeof(Uint1));
- for (row = 0; row < num_rows; row++) {
- start_list[row] = 0;
- PopulateSample (seqbuf_list, start_list,
- sample_len, bsp_list,
- row);
- }
-
- num_match = 0;
- for (aln_pos = 0; aln_pos < aln_len; aln_pos++) {
- /* init lists */
- MemSet (chars_appearing, 0, sizeof (chars_appearing));
- for (row = 1; row <= num_rows; row++) {
- if (end_gap[row - 1]) {
- continue;
- }
- seq_pos = AlnMgr2MapSeqAlignToBioseq(salp, aln_pos, row);
- if (seq_pos < 0) {
- if (start_gap[row - 1] || end_gap[row - 1]) {
- /* beginning/end gap - never counts against percent identity */
- } else {
- k = aln_pos + 1;
- while (k < aln_len && seq_pos < 0) {
- seq_pos = AlnMgr2MapSeqAlignToBioseq(salp, k, row);
- k++;
- }
- if (seq_pos < 0) {
- /* now in end_gap for this sequence */
- end_gap[row - 1] = TRUE;
- } else {
- /* internal gaps count against percent identity when specified */
- if (internal_gaps) {
- chars_appearing[4] ++;
- }
- }
- }
- } else {
- start_gap[row - 1] = FALSE;
-
- row_ch = ReadFromAlignmentSample(seqbuf_list, start_list,
- sample_len, bsp_list, strand_list,
- row - 1, seq_pos);
- switch (row_ch) {
- case 'A':
- chars_appearing[0]++;
- break;
- case 'T':
- chars_appearing[1]++;
- break;
- case 'G':
- chars_appearing[2]++;
- break;
- case 'C':
- chars_appearing[3]++;
- break;
- default:
- /* we don't count ambiguity characters */
- break;
- }
- }
- }
- max_app = 0;
- total_app = 0;
- for (i = 0; i < 4; i++) {
- if (chars_appearing[i] > max_app) {
- max_app = chars_appearing[i];
- }
- total_app += chars_appearing[i];
- }
- if (total_app > 0) {
- col_pct = (double) max_app / (double) total_app;
- col_pct_total += col_pct;
- }
- col_count++;
- }
-
- for (row = 0; row < num_rows; row++) {
- sip_list[row] = SeqIdFree(sip_list[row]);
- BioseqUnlock(bsp_list[row]);
- }
- sip_list = MemFree (sip_list);
- bsp_list = MemFree (bsp_list);
- start_gap = MemFree (start_gap);
- end_gap = MemFree (end_gap);
- start_list = MemFree (start_list);
- seqbuf_list = MemFree (seqbuf_list);
-
- if (col_count == 0) {
- pcnt = 0;
- } else {
- pcnt = (100 * col_pct_total) / col_count;
- }
- return pcnt;
-}
-
+//LCOV_EXCL_STOP
diff --git a/api/alignval.h b/api/alignval.h
index 4e1172f7..92465f78 100644
--- a/api/alignval.h
+++ b/api/alignval.h
@@ -29,13 +29,21 @@
*
* Version Creation Date: 6/3/99
*
-* $Revision: 6.19 $
+* $Revision: 6.20 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignval.h,v $
+* Revision 6.20 2013/01/25 20:14:55 bollin
+* JIRA:SQD-976 removed Special->Display->Weighted Alignment Percent Identity
+*
+*
+* Committed on the Free edition of March Hare Software CVSNT Client.
+* Upgrade to CVS Suite for more features and support:
+* http://march-hare.com/cvsnt/
+*
* Revision 6.19 2009/06/18 17:18:08 bollin
* Changed FASTA-like validator error to use minimum length of two sequences
* when comparing, allow N to match anything.
@@ -108,7 +116,6 @@ NLM_EXTERN Boolean ValidateSeqAlignInSeqEntry (SeqEntryPtr sep, Boolean message,
Boolean do_hist_assembly);
extern Uint2 AlignmentPercentIdentity (SeqAlignPtr salp, Boolean internal_gaps);
-extern Uint2 WeightedAlignmentPercentIdentity (SeqAlignPtr salp, Boolean internal_gaps);
extern double *
GetAlignmentColumnPercentIdentities
diff --git a/api/aliread.c b/api/aliread.c
index d08ad64c..af5a3bff 100644
--- a/api/aliread.c
+++ b/api/aliread.c
@@ -314,8 +314,7 @@ void Ali_ChangeRowToOther (ValNodePtr rowPtr)
seqLinePtr = (SeqLineInfoPtr) rowPtr->data.ptrvalue;
if (seqLinePtr->junk != NULL)
- sprintf(seqLinePtr->sequence,"%s%s",seqLinePtr->sequence,
- seqLinePtr->junk);
+ strcat(seqLinePtr->sequence, seqLinePtr->junk);
if ((seqLinePtr->sequence != NULL) && (seqLinePtr->id == NULL))
{
@@ -383,7 +382,7 @@ CharPtr ReadAlignFileLine (FILE PNTR alignFilePtr,
Int4 segmentLen = 0;
Int4 segmentCount = 1;
Boolean done = FALSE;
- Char ch = 0;
+ Int2 ch = 0;
/* Allocate memory for the line. More */
/* can be added later as necessary. */
@@ -402,12 +401,12 @@ CharPtr ReadAlignFileLine (FILE PNTR alignFilePtr,
/* Process the current character */
- ch = (Char) NLM_GETC (alignFilePtr);
+ ch = NLM_GETC (alignFilePtr);
if (ch == '\n')
{
done = TRUE;
- ch = (Char) NLM_GETC (alignFilePtr);
+ ch = NLM_GETC (alignFilePtr);
if (ch != '\r') {
ungetc (ch, alignFilePtr);
diff --git a/api/asn2ff3.c b/api/asn2ff3.c
index f8eca746..bb9ee7eb 100644
--- a/api/asn2ff3.c
+++ b/api/asn2ff3.c
@@ -34,595 +34,6 @@
*
* Modifications:
* --------------------------------------------------------------------------
-* $Log: asn2ff3.c,v $
-* Revision 6.120 2011/12/19 18:33:53 gouriano
-* Corrected printf formatting. NOJIRA
-*
-* Revision 6.119 2006/07/13 17:06:38 bollin
-* use Uint4 instead of Uint2 for itemID values
-* removed unused variables
-* resolved compiler warnings
-*
-* Revision 6.118 2003/07/22 16:18:27 kans
-* added ZFIN as legal db_xref
-*
-* Revision 6.117 2003/06/10 18:44:10 kans
-* added GeneDB to list of legal db_xrefs
-*
-* Revision 6.116 2003/05/29 20:25:19 kans
-* added Interpro to list of legal dbxrefs
-*
-* Revision 6.115 2002/11/30 20:18:27 kans
-* added GOA to list of legal db_xrefs
-*
-* Revision 6.114 2002/11/27 22:25:17 kans
-* added AceView/WormGenes, NextDB, and WorfDB to legal db_xrefs
-*
-* Revision 6.113 2002/07/12 17:34:35 kans
-* WormBase is now legal dbxref for all records, not just RefSeq
-*
-* Revision 6.112 2002/06/21 15:31:11 kans
-* added GABI db_xref
-*
-* Revision 6.111 2002/06/18 20:59:59 kans
-* added ISFinder as legal db_xref with hotlink
-*
-* Revision 6.110 2002/05/06 22:15:12 kans
-* added IFO and JCM db_xrefs
-*
-* Revision 6.109 2002/02/27 13:47:11 kans
-* fixed model evidence printing
-*
-* Revision 6.108 2002/02/20 21:59:04 tatiana
-* IMGT/LIGM dbxref added
-*
-* Revision 6.107 2002/01/31 22:31:31 tatiana
-* allow trascript_id in NC records
-*
-* Revision 6.106 2002/01/18 19:53:24 kans
-* if RefSeq, allow WormBase dbxref
-*
-* Revision 6.105 2001/12/28 21:37:10 kans
-* allow sfp->product to be SEQLOC_EQUIV
-*
-* Revision 6.104 2001/11/29 18:29:38 kans
-* added FANTOM_DB to list of legal db_xrefs, incremented DBNUM
-*
-* Revision 6.103 2001/11/12 19:32:38 kans
-* updated mRNAEvidenceComment
-*
-* Revision 6.102 2001/10/25 12:45:45 kans
-* Get3LetterSymbol was using table->num instead of table_3aa->num
-*
-* Revision 6.101 2001/10/15 17:08:44 kans
-* updated legal db_xref list to collaboration + RefSeq
-*
-* Revision 6.100 2001/10/15 13:57:22 kans
-* added BDGP_INS and SoyBase as legal db_xrefs
-*
-* Revision 6.99 2001/10/02 17:39:50 yaschenk
-* Removing memory leaks
-*
-* Revision 6.98 2001/09/06 20:31:24 yaschenk
-* removing memory leak - seqid returned by GetSeqIdForGI() needs to be freed
-*
-* Revision 6.97 2001/09/05 23:37:42 tatiana
-* ribosomal slippage added to /note
-*
-* Revision 6.96 2001/09/05 23:32:39 tatiana
-* supressed comparison of note to gene->synonym
-*
-* Revision 6.95 2001/08/22 22:35:07 kans
-* added ProductIsLocal for /translation
-*
-* Revision 6.94 2001/08/07 16:49:41 kans
-* use NUM_SEQID, added third party annotation SeqIDs to one more place
-*
-* Revision 6.93 2001/08/03 20:36:16 kans
-* implemented ASN2GNBK_PRINT_UNKNOWN_ORG test to suppress unwanted mode diffs for asn2gnbk QA
-*
-* Revision 6.92 2001/07/12 17:12:49 kans
-* biop->genome range checks in AddBioSourceToGBQual to prevent crashes
-*
-* Revision 6.91 2001/07/08 21:18:50 kans
-* if ssp->subtype is 0, use ? as tag in note
-*
-* Revision 6.90 2001/06/26 19:50:07 kans
-* call AddPID with is_NC as an option for showing /protein_id with the gi
-*
-* Revision 6.89 2001/06/25 22:22:17 kans
-* ProteinFromCdRegion and GetProductFromCDS only if sfp->product and ! ajp->genome_view, should eliminate unwanted fetches to get far delta components
-*
-* Revision 6.88 2001/05/31 17:42:18 kans
-* NC and NG RefSeq records allow remote fetching for /protein_id and /transcript_id, show gi if fetching not enabled
-*
-* Revision 6.87 2001/03/17 00:51:30 tatiana
-* GeneID added to dbxref array
-*
-* Revision 6.86 2001/02/13 23:31:58 kans
-* allow trans splicing exception, do not change sfp_in->excpt
-*
-* Revision 6.85 2001/01/30 16:25:54 kans
-* precursor_RNA now has /product as legal qualifier
-*
-* Revision 6.84 2001/01/26 19:26:36 kans
-* added niaEST, increased DBNUM
-*
-* Revision 6.83 2001/01/26 19:21:45 kans
-* extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
-*
-* Revision 6.82 2001/01/18 23:57:01 kans
-* add GO (gene ontology) to list of legal dbxrefs
-*
-* Revision 6.81 2001/01/02 19:56:48 kans
-* Get3LetterSymbol protects against empty string
-*
-* Revision 6.80 2000/12/07 19:03:53 tatiana
-* transcript_id shows for NT only
-*
-* Revision 6.79 2000/12/06 22:00:46 tatiana
-* ifdef removed
-*
-* Revision 6.78 2000/12/06 20:56:24 tatiana
-* AceView link added
-*
-* Revision 6.76 2000/12/04 22:23:47 tatiana
-* contig comments added
-*
-* Revision 6.75 2000/11/22 16:48:18 tatiana
-* remove debugging error printing
-*
-* Revision 6.74 2000/11/10 00:37:13 tatiana
-* changes in AddPID
-*
-* Revision 6.73 2000/10/25 15:57:57 kans
-* sfp_in->excpt set to FALSE, not NULL, UNIX compiler does not know the difference, but Mac and PC compilers do
-*
-* Revision 6.72 2000/10/24 20:35:35 tatiana
-* CDS without protein sequence is accepted for not forgbrel mode
-*
-* Revision 6.70 2000/10/19 18:52:32 kans
-* added another NULL entry to organelleQual for endogenous virus to suppress as organelle qualifier
-*
-* Revision 6.69 2000/10/16 19:10:17 kans
-* added UniSTS and InterimID to legal dbxrefs
-*
-* Revision 6.68 2000/10/10 15:06:02 kans
-* added SUBSRC_endogenous_virus_name
-*
-* Revision 6.67 2000/08/28 22:17:18 kans
-* added CDD to list of legal dbxrefs
-*
-* Revision 6.66 2000/07/14 20:24:26 kans
-* added RGD as dbxref with web link
-*
-* Revision 6.65 2000/07/12 22:45:15 kans
-* added ORGMOD_old_lineage
-*
-* Revision 6.64 2000/06/20 17:31:34 kans
-* added authority through breed as orgmod.subtypes
-*
-* Revision 6.63 2000/06/15 16:45:40 kans
-* added segment to biosource note print
-*
-* Revision 6.62 2000/06/05 17:52:11 tatiana
-* increase size of feature arrays to Int4
-*
-* Revision 6.61 2000/05/15 15:52:50 bazhin
-* Removed memory leak in "PrintSourceFeat()".
-*
-* Revision 6.60 2000/03/30 20:37:29 kans
-* added tilde to newline code in PrintImpFeatEx (thanks to Sergei B)
-*
-* Revision 6.59 2000/03/01 19:09:53 tatiana
-* for SYN records with multiple source features there is no subtraction
-*
-* Revision 6.58 2000/02/17 21:59:18 kans
-* /organelle not under ajp->forgbrel for this release now
-*
-* Revision 6.57 2000/02/15 22:53:56 kans
-* added dbSNP and RATMAP as legal dbxrefs, put /organelle under ajp->forgrel control
-*
-* Revision 6.56 2000/02/09 01:12:51 tatiana
-* remove space in organelle qualifier
-*
-* Revision 6.55 2000/01/21 20:48:45 kans
-* changes to merge several source qualifiers under new organelle qualifier
-*
-* Revision 6.54 2000/01/11 22:49:37 tatiana
-* protein accession is not required in DUMP_MODE
-*
-* Revision 6.53 2000/01/03 23:16:17 kans
-* CDS note components from GetProtRefComment are separated by semicolons - to be consistent with upcoming asn2gnbk style
-*
-* Revision 6.52 1999/10/18 20:13:34 kans
-* removed erroneous cast in sprintf
-*
-* Revision 6.51 1999/10/06 22:18:29 kans
-* calls ComposeCodonsRecognizedString
-*
-* Revision 6.50 1999/10/06 20:23:48 bazhin
-* Removed memory leaks.
-*
-* Revision 6.49 1999/08/03 20:48:23 tatiana
-* UMR error fixed in PrintImpFeat
-*
-* Revision 6.47 1999/04/26 18:53:00 tatiana
-* added pseuod from sfp in ConvertToNAImpFeat()
-*
-* Revision 6.46 1999/04/06 22:37:45 tatiana
-* protein_id hot link added
-*
-* Revision 6.45 1999/04/06 15:00:16 tatiana
-* www_featkey is not called for slp view
-*
-* Revision 6.44 1999/03/30 22:23:33 kans
-* pseudo can be on grp or sfp
-*
-* Revision 6.43 1999/03/30 19:18:19 tatiana
-* changes for SEQID_OTHER
-*
-* Revision 6.42 1999/03/22 23:09:26 tatiana
-* AddPID() changes
-*
-* Revision 6.41 1998/10/19 15:57:35 tatiana
-* UniGene added to dbtag array
-*
-* Revision 6.40 1998/09/24 17:45:57 kans
-* fixed GetDBXrefFromGene problem (TT)
-*
-* Revision 6.39 1998/09/01 19:25:21 kans
-* context parameter in get best protein, get cds/rna given product
-*
-* Revision 6.38 1998/08/19 18:40:38 tatiana
-* RiceGenes added to dbtag array
-*
-* Revision 6.37 1998/07/21 15:14:50 kans
-* GetProtRefComments modified for indexes because continue statement avoided get next feature, got stuck
-*
-* Revision 6.36 1998/07/15 22:07:19 kans
-* implemented sequence manager indexes for non-segmented nucleotides
-*
-* Revision 6.35 1998/07/13 14:52:24 tatiana
-* subtypes added to source feature /note
-*
-* Revision 6.34 1998/06/15 14:57:22 tatiana
-* UNIX compiler warnings and extra HTML characters in notes fixed
-*
-* Revision 6.33 1998/05/20 20:05:40 tatiana
-* SEQFEAT_REGION added to get_prot_feats()
-*
-* Revision 6.32 1998/05/18 14:41:53 tatiana
-* GI added to dbtag array
-*
-* Revision 6.31 1998/05/08 21:56:56 tatiana
-* added new PARTIAL_MODE
-*
-* Revision 6.30 1998/04/30 21:42:36 tatiana
-* *** empty log message ***
-*
-* Revision 6.29 1998/04/27 18:31:51 tatiana
-* added /focus in PrintSourceFeat()
-*
-* Revision 6.28 1998/04/24 15:10:08 tatiana
-* GetProtRefComment() fixed: only main Prot-Ref feature adds comment to CDS
-*
-* Revision 6.27 1998/04/15 21:38:32 kans
-* rearrange dbtag array so PID set is at start, allow unknown database on all but release_mode (Tatiana)
-*
-* Revision 6.24 1998/04/06 14:59:08 tatiana
-* PutTranslationLast has been moved
-*
-* Revision 6.23 1998/04/03 22:38:36 tatiana
-* selenocysteine added tp /note in ComposeCodeBreakQuals()
-*
-* Revision 6.22 1998/04/02 21:42:53 tatiana
-* ignore old_name in OrgMod
-*
-* Revision 6.21 1998/04/02 17:21:23 tatiana
-* a bug fixed in AddBioSourceToGBQual()
-*
-* Revision 6.20 1998/03/30 20:38:56 tatiana
-* nat_host changed to specific_host
-*
-* Revision 6.19 1998/03/27 23:01:54 tatiana
-* AddBioSourceToGBQual: added all OrgMod.subtypes as /notes on the source feature
-*
-* Revision 6.18 1998/03/24 19:47:45 tatiana
-* added check for sfp->except_text
-*
-* Revision 6.17 1998/03/04 18:38:48 tatiana
-* illegal feature will be dropped in ConvertToAAImpFeat
-*
-* Revision 6.16 1998/02/19 21:28:52 tatiana
-* dbtags array updated
-*
-* Revision 6.15 1998/01/26 21:16:16 tatiana
-* biovar and country added to source feature /note
-*
-* Revision 6.14 1998/01/20 22:45:11 tatiana
-* show both product and descr in Genpept
-*
-* Revision 6.13 1998/01/13 16:27:38 tatiana
-* fixed a bug in dbtag check in PrintSourceFeat
-*
-* Revision 6.12 1997/12/23 21:57:16 tatiana
-* focus and specimen_voucher
-*
-* Revision 6.11 1997/12/15 15:48:33 tatiana
-* features processing has been changed
-*
-* Revision 6.10 1997/12/02 18:15:02 tatiana
-* fix use of printf
-*
-* Revision 6.9 1997/10/23 16:57:42 tatiana
-* *** empty log message ***
-*
-* Revision 6.6 1997/09/16 15:48:07 kans
-* removed automatically generated diff lines
-*
-* Revision 6.5 1997/09/16 15:42:52 kans
-* show non-gbff source qualifiers in note with labels (TT)
-*
-* Revision 6.4 1997/09/12 20:20:18 tatiana
-* fixed typo
-*
-* Revision 6.3 1997/09/12 20:03:53 tatiana
-* added source feature in genome_view
-*
-* Revision 6.2 1997/09/04 01:16:48 kans
-* fixed typo
-*
-* Revision 6.1 1997/09/03 21:49:37 tatiana
-* GatherItemWithLock() added for ProtRef features
-*
-* Revision 6.0 1997/08/25 18:04:51 madden
-* Revision changed to 6.0
-*
-* Revision 5.59 1997/08/21 19:03:17 tatiana
-* map, syn, description eliminated from features other than gene
-*
-* Revision 5.58 1997/08/05 20:09:08 kans
-* added check for po->sfp null in PrintSourceFeat
-*
-* Revision 5.57 1997/07/29 14:55:51 kans
-* make sure features on protein are SEQFEAT_PROT
-*
-* Revision 5.56 1997/07/16 21:08:28 tatiana
-* Use gene synonym for /gene qualifier
-*
-* Revision 5.55 1997/06/19 18:37:02 vakatov
-* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
-*
-* Revision 5.54 1997/06/12 16:56:37 kans
-* fixed typo that resulted in lost note (TT)
-*
-* Revision 5.53 1997/06/10 15:27:12 tatiana
-* fix a typo in COnvertToNa... that leaded to the lost /note
-*
- * Revision 5.47 1997/03/14 21:21:33 tatiana
- * exp_evidence fix
- *
- * Revision 5.46 1997/03/05 22:12:33 tatiana
- * print 'pseudo' in /note for orphan genes
- *
- * Revision 5.45 1997/03/04 23:45:14 tatiana
- * check for 'pseudo' gene added in ConvertToNAImpFeat()
- *
- * Revision 5.44 1997/02/25 23:47:21 tatiana
- * new error message added for dropped feature
- *
- * Revision 5.42 1997/01/29 15:49:11 tatiana
- * fix the entityID in GatherProductGeneInfo()
- *
- * Revision 5.40 1997/01/15 17:23:38 tatiana
- * a bug fixed (purify reported) in PrintNAFeatByNumber()
- *
- * Revision 5.39 1997/01/07 23:27:13 tatiana
- * check for NULLs added in CompareTranslation
- *
- * Revision 5.38 1997/01/07 22:32:41 tatiana
- * added SEQFEAT_SITE to get_prot_feats callback
- *
- * Revision 5.37 1997/01/02 22:49:55 tatiana
- * gather SEQFEAT_BOND
- *
- * Revision 5.36 1996/12/10 17:45:41 tatiana
- * a bug fixed in ComposeNoteFromNoteStruct()
- *
- * Revision 5.35 1996/12/09 19:12:33 tatiana
- * SPTREMBL added to legal db_xref database names
- *
- * Revision 5.34 1996/12/04 16:52:16 tatiana
- * a typo fixed in Add_dbxref
- *
- * Revision 5.33 1996/12/03 15:49:57 tatiana
- * 'CK' added to array of legal databases in db_xref
- *
- * Revision 5.32 1996/10/30 16:52:36 tatiana
- * SeqIdFindBest added in PrintSourceFeat
- *
- * Revision 5.31 1996/10/25 22:11:19 tatiana
- * NoteCmp changed
- *
- * Revision 5.30 1996/10/24 20:40:12 tatiana
- * a bug fixed in AddDBXref()
- *
- * Revision 5.29 1996/10/18 21:37:22 tatiana
- * a bug fixed in NoteCmp
- *
- * Revision 5.28 1996/10/09 15:15:00 tatiana
- * Take the main protein ONLY (not sig_peptide mat_peptide)
- * to make CDS comments
- *
- * Revision 5.27 1996/09/25 18:05:45 tatiana
- * SEQFEAT_COMMENT becomes misc_feature
- *
- * Revision 5.26 1996/09/18 20:41:26 kans
- * changed uninitialized variable names to correct names, removed unused
- * variable
- *
- * Revision 5.25 1996/09/18 20:21:27 tatiana
- * NoteCmp added to ComposeNoteFromNoteStruct to check for identical notes
- *
- * Revision 5.24 1996/09/17 14:59:04 tatiana
- * virion and transl_except added
- *
- * Revision 5.23 1996/09/12 17:52:28 tatiana
- * a bug fixed in PrintSourceFeat
- *
- * Revision 5.22 1996/09/06 14:58:00 tatiana
- * clean sfp_out at the end of PrintSourceFeat and PrintNAFeatByNumber
- *
- * Revision 5.21 1996/09/04 13:40:17 tatiana
- * a bug fixed in GetDotTRNA
- *
- * Revision 5.19 1996/09/03 19:51:30 tatiana
- * extra_loc added
- *
- * Revision 5.18 1996/08/16 20:32:23 tatiana
- * for ifp->key StringSave is used not StringCpy
- *
- * Revision 5.17 1996/08/12 16:36:40 tatiana
- * ErrPostEx changed to ErrPostStr
- *
- * Revision 5.16 1996/08/06 20:30:46 kans
- * SeqIdFindBest called to handle local IDs and genbank IDs coexisting
- *
- * Revision 5.15 1996/08/02 21:41:23 tatiana
- * turned off metho conceptual transl by author
- *
- * Revision 5.14 1996/07/30 17:28:07 kans
- * ParFlat_... arrays now external in header file
- *
- * Revision 5.13 1996/07/30 16:34:09 tatiana
- * minor change in PrintSourcefeat
- *
- * Revision 5.12 1996/07/29 19:46:14 tatiana
- * GBQual_names changed to use a structureGBQual_names changed to use a structure
- *
- * Revision 5.11 1996/07/23 22:33:40 tatiana
- * prot feats in genpept (piptides)
- *
- * Revision 5.10 1996/07/22 22:07:21 tatiana
- * a bug fixed in DoTRNAQual
- *
- * Revision 5.9 1996/07/15 18:07:10 tatiana
- * minor changes in PrintSourceFeat to show 'unknown' in debug mode
- *
- * Revision 5.8 1996/07/12 20:38:22 tatiana
- * concept_transl_a supressed
- *
- * Revision 5.7 1996/07/12 20:11:49 tatiana
- * DotRNAQuals() changed
- *
- * Revision 5.6 1996/07/11 14:58:27 tatiana
- * product in tRNA
- *
- * Revision 5.5 1996/07/09 16:31:34 tatiana
- * a bug fixed in PrintNAFeatByNumber
- *
- * Revision 5.4 1996/07/02 18:09:17 tatiana
- * don't print duplicated features (PrintNAFeatByNumber)
- *
- * Revision 5.3 1996/06/14 18:03:56 tatiana
- * GetNAFeatKey change
- *
- * Revision 5.2 1996/06/11 15:35:04 tatiana
- * make GetGeneticCode static and get_prot_feats non-static
- *
- * Revision 5.1 1996/05/31 18:01:24 tatiana
- * check for /pseudo in CdRegion added
- *
- * Revision 4.35 1996/05/21 21:02:03 tatiana
- * a bug fixed in location[] size in PrintSourceFeat()
- *
- * Revision 4.34 1996/05/16 20:58:09 tatiana
- * GetCdregionGeneXrefInfo changed to Boolean
- *
- * Revision 4.33 1996/04/25 14:55:33 kans
- * protect against biosource subsource subtype of 255 (other) or bad values
- *
- * Revision 4.32 1996/04/15 14:36:49 tatiana
- * memory leaks cleaning
- *
- * Revision 4.31 1996/04/08 21:53:56 tatiana
- * change in www_featloc
- *
- * Revision 4.30 1996/04/05 17:43:36 ostell
- * added quickie patch for overrun of buf[30] when called by
- * www_featloc()
- *
- * Revision 4.29 1996/03/25 15:20:19 tatiana
- * add html symbols
- *
- * Revision 4.28 1996/03/19 23:58:27 tatiana
- * print activity in CDS
- *
- * Revision 4.27 1996/03/12 21:36:32 tatiana
- * 'serotype' added to orgmod_subtype array
- *
- * Revision 4.26 1996/02/28 04:53:06 ostell
- * changes to support segmented master seeuquences
- *
- * Revision 4.25 1996/02/26 00:46:18 ostell
- * removed unused local variables and integer size mismatch fusses
- *
- * Revision 4.24 1996/02/18 21:16:48 tatiana
- * memory leaks cleaned up
- *
- * Revision 4.23 1996/02/16 16:22:32 tatiana
- * a bug fixed in ConvertToNAImpFeat
- *
- * Revision 4.22 1996/02/15 15:52:18 tatiana
- * Gather for temp loaded items and sortin features within entity addded
- *
- * Revision 4.21 1996/01/29 22:34:42 tatiana
- * mainly PID changes
- *
- * Revision 4.20 1995/12/20 22:38:02 tatiana
- * gene xrefs to db_xref
- *
- * Revision 4.19 1995/12/15 02:47:01 ostell
- * added protection so that GatherProductGeneInfo does not gather if protein
- * bioseq not already in memory
- *
- * Revision 4.18 1995/12/13 16:31:36 tatiana
- * anticodon added to new tRNA slot
- *
- * Revision 4.17 1995/12/04 23:01:16 tatiana
- * take starin from OrgRef.mod in PrintSourceFeat()
- *
- * Revision 4.16 1995/11/28 15:19:46 tatiana
- * GetPID fixed
- *
- * Revision 4.15 1995/11/22 18:59:42 tatiana
- * a bug fixed in orphan genes printing
- *
- * Revision 4.14 1995/11/17 21:49:19 tatiana
- * hot link to genetic code added
- *
- * Revision 4.13 1995/11/17 21:28:35 kans
- * asn2ff now uses gather (Tatiana)
- *
- * Revision 4.4 1995/08/18 22:18:31 tatiana
- * a bug fix
- *
- * Revision 4.1 1995/08/01 14:51:29 tatiana
- * change SeqIdPrint to SeqIdWrite
- *
- * Revision 1.65 1995/07/17 19:33:20 kans
- * parameters combined into Asn2ffJobPtr structure
- *
- * Revision 1.61 1995/06/19 21:40:02 kans
- * Tatiana's first major reorganization, moving printing, adding HTML
- *
- * Revision 1.60 1995/05/19 21:25:06 kans
- * no longer fetches CDS protein product causing Entrez disc swap
- *
- * Revision 1.59 1995/05/15 21:46:05 ostell
- * added Log line
- *
*
**************************************/
#include <asn2ffp.h>
@@ -3709,7 +3120,7 @@ NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
{
ImpFeatPtr ifp;
- Int4 gi = -1;
+ BIG_ID gi = -1;
SeqIdPtr sip, newid=NULL;
ValNodePtr product;
Char buf[MAX_ACCESSION_LEN+5];
@@ -3728,7 +3139,7 @@ NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
if ((newid = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
SeqIdWrite(newid, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
} else {
- sprintf(buf, "%d", sip->data.intvalue);
+ sprintf(buf, "%ld", sip->data.intvalue);
}
} else {
SeqIdWrite(sip, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
@@ -3748,7 +3159,7 @@ NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
{
ImpFeatPtr ifp;
- Int4 gi = -1;
+ BIG_ID gi = -1;
SeqIdPtr sip, new_id=NULL;
ValNodePtr product, vnp;
BioseqPtr p_bsp = NULL;
@@ -3771,7 +3182,7 @@ NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
SeqIdFree(new_id); /*** need to free it !!! (EY) ***/
} else {
- sprintf(buf, "%d", sip->data.intvalue);
+ sprintf(buf, "%ld", sip->data.intvalue);
}
sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
} else if ((p_bsp = BioseqFind(sip)) != NULL) {
@@ -4630,7 +4041,7 @@ NLM_EXTERN GBQualPtr AddBioSourceToGBQual (Asn2ffJobPtr ajp, NoteStructPtr nsp,
* This code prints out an ImpFeat in GenBank and HTML format.
*
****************************************************************************/
-NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, Int4 gi, Int2 entityID, Uint4 itemID)
+NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, BIG_ID gi, Int2 entityID, Uint4 itemID)
{
CharPtr flatloc_ptr, key, loc;
GBQualPtr gbqp;
diff --git a/api/asn2ffp.h b/api/asn2ffp.h
index a50083f4..2dcbcacb 100644
--- a/api/asn2ffp.h
+++ b/api/asn2ffp.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.34 $
+* $Revision: 6.37 $
*
* File Description:
*
@@ -42,330 +42,6 @@
* ==========================================================================
*/
-/*************************************
-*
- * $Log: asn2ffp.h,v $
- * Revision 6.34 2006/07/13 17:06:38 bollin
- * use Uint4 instead of Uint2 for itemID values
- * removed unused variables
- * resolved compiler warnings
- *
- * Revision 6.33 2003/07/22 16:18:27 kans
- * added ZFIN as legal db_xref
- *
- * Revision 6.32 2003/06/10 18:44:10 kans
- * added GeneDB to list of legal db_xrefs
- *
- * Revision 6.31 2003/05/29 20:25:19 kans
- * added Interpro to list of legal dbxrefs
- *
- * Revision 6.30 2002/11/30 20:18:27 kans
- * added GOA to list of legal db_xrefs
- *
- * Revision 6.29 2002/11/27 22:25:17 kans
- * added AceView/WormGenes, NextDB, and WorfDB to legal db_xrefs
- *
- * Revision 6.28 2002/07/12 17:34:35 kans
- * WormBase is now legal dbxref for all records, not just RefSeq
- *
- * Revision 6.27 2002/06/21 15:31:11 kans
- * added GABI db_xref
- *
- * Revision 6.26 2002/06/18 20:59:59 kans
- * added ISFinder as legal db_xref with hotlink
- *
- * Revision 6.25 2002/05/06 22:15:12 kans
- * added IFO and JCM db_xrefs
- *
- * Revision 6.24 2002/02/20 21:59:33 tatiana
- * DBNUM increased for IMGT/LIGM
- *
- * Revision 6.23 2001/11/29 18:29:38 kans
- * added FANTOM_DB to list of legal db_xrefs, incremented DBNUM
- *
- * Revision 6.22 2001/10/15 17:08:44 kans
- * updated legal db_xref list to collaboration + RefSeq
- *
- * Revision 6.21 2001/10/15 13:57:22 kans
- * added BDGP_INS and SoyBase as legal db_xrefs
- *
- * Revision 6.20 2001/09/06 18:55:52 tatiana
- * *** empty log message ***
- *
- * Revision 6.19 2001/03/17 00:51:14 tatiana
- * GeneID added to dbxref array, DBNUM increased
- *
- * Revision 6.18 2001/01/26 19:26:37 kans
- * added niaEST, increased DBNUM
- *
- * Revision 6.17 2001/01/18 23:57:02 kans
- * add GO (gene ontology) to list of legal dbxrefs
- *
- * Revision 6.16 2000/12/05 01:21:23 tatiana
- * DBNUM increased for COG
- *
- * Revision 6.15 2000/10/16 19:10:18 kans
- * added UniSTS and InterimID to legal dbxrefs
- *
- * Revision 6.14 2000/08/28 22:17:19 kans
- * added CDD to list of legal dbxrefs
- *
- * Revision 6.13 2000/07/14 20:24:27 kans
- * added RGD as dbxref with web link
- *
- * Revision 6.12 2000/02/15 22:53:58 kans
- * added dbSNP and RATMAP as legal dbxrefs, put /organelle under ajp->forgrel control
- *
- * Revision 6.11 1999/04/02 20:35:51 kans
- * added prototype for GR_PrintPubs
- *
- * Revision 6.10 1999/03/22 23:19:19 tatiana
- * PrintImpFeatEx() added
- *
- * Revision 6.9 1998/10/19 15:57:04 tatiana
- * DBNUM increased for UniGene tag
- *
- * Revision 6.8 1998/08/19 18:40:03 tatiana
- * DBNUM increased to add RiceGenes dbrag
- *
- * Revision 6.7 1998/05/18 14:41:19 tatiana
- * DBNUM increased to add GI to dbtag array
- *
- * Revision 6.6 1998/04/15 20:50:38 tatiana
- * bugs fixing
- *
- * Revision 6.5 1998/04/15 18:44:45 tatiana
- * dbtag array moved from asn2ff3.c
- *
- * Revision 6.4 1997/12/15 15:45:59 tatiana
- * features processing has been changed
- *
- * Revision 6.3 1997/12/02 16:44:17 tatiana
- * missing semicolon in SortOrganizeFeat
- *
- * Revision 6.1 1997/09/16 15:41:01 kans
- * added AddSiteNoteQual (TT)
- *
- * Revision 5.7 1997/06/19 18:37:24 vakatov
- * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
- *
- * Revision 5.6 1996/11/20 15:26:58 tatiana
- * prototype for GetDefinitionLine
- *
- * Revision 5.5 1996/07/30 16:31:44 tatiana
- * add Boolean arg in CheckNAFeat()
- *
- * Revision 5.4 1996/07/24 12:40:24 tatiana
- * GetCdregionGeneXrefInfo changed number of arguments
- *
- * Revision 5.3 1996/07/12 16:48:49 tatiana
- * *** empty log message ***
- *
- * Revision 5.2 1996/06/11 15:25:32 tatiana
- * add PrintNID PROTO
- *
- * Revision 5.1 1996/06/06 14:50:31 tatiana
- * *** empty log message ***
- *
- * Revision 4.10 1996/05/16 20:59:07 tatiana
- * GetCdregionGeneXrefInfo changed to Boolean
- *
- * Revision 4.9 1996/04/29 18:54:51 tatiana
- * *** empty log message ***
- *
- * Revision 4.9 1996/04/29 18:54:51 tatiana
- * *** empty log message ***
- *
- * Revision 4.7 1996/04/09 14:05:06 tatiana
- * *** empty log message ***
- *
- * Revision 4.6 1996/02/21 20:12:17 tatiana
- * *** empty log message ***
- *
- * Revision 4.5 1996/02/15 17:20:23 tatiana
- * GatherItemWithLock added
- *
- * Revision 4.4 1995/12/20 22:45:30 tatiana
- * new function added
- *
- * Revision 4.1 1995/08/22 15:39:26 tatiana
- * GetVersion name changed to GetBiotableVersion
- *
- * Revision 1.43 1995/07/17 19:33:20 kans
- * parameters combined into Asn2ffJobPtr structure
- *
-*
-**************************************/
-
-/*****************************************************************************
-*
-* Header file for asn2gb files.
-*
-****************************************************************************/
-
-/*************************************
-*
-* $Log: asn2ffp.h,v $
-* Revision 6.34 2006/07/13 17:06:38 bollin
-* use Uint4 instead of Uint2 for itemID values
-* removed unused variables
-* resolved compiler warnings
-*
-* Revision 6.33 2003/07/22 16:18:27 kans
-* added ZFIN as legal db_xref
-*
-* Revision 6.32 2003/06/10 18:44:10 kans
-* added GeneDB to list of legal db_xrefs
-*
-* Revision 6.31 2003/05/29 20:25:19 kans
-* added Interpro to list of legal dbxrefs
-*
-* Revision 6.30 2002/11/30 20:18:27 kans
-* added GOA to list of legal db_xrefs
-*
-* Revision 6.29 2002/11/27 22:25:17 kans
-* added AceView/WormGenes, NextDB, and WorfDB to legal db_xrefs
-*
-* Revision 6.28 2002/07/12 17:34:35 kans
-* WormBase is now legal dbxref for all records, not just RefSeq
-*
-* Revision 6.27 2002/06/21 15:31:11 kans
-* added GABI db_xref
-*
-* Revision 6.26 2002/06/18 20:59:59 kans
-* added ISFinder as legal db_xref with hotlink
-*
-* Revision 6.25 2002/05/06 22:15:12 kans
-* added IFO and JCM db_xrefs
-*
-* Revision 6.24 2002/02/20 21:59:33 tatiana
-* DBNUM increased for IMGT/LIGM
-*
-* Revision 6.23 2001/11/29 18:29:38 kans
-* added FANTOM_DB to list of legal db_xrefs, incremented DBNUM
-*
-* Revision 6.22 2001/10/15 17:08:44 kans
-* updated legal db_xref list to collaboration + RefSeq
-*
-* Revision 6.21 2001/10/15 13:57:22 kans
-* added BDGP_INS and SoyBase as legal db_xrefs
-*
-* Revision 6.20 2001/09/06 18:55:52 tatiana
-* *** empty log message ***
-*
-* Revision 6.19 2001/03/17 00:51:14 tatiana
-* GeneID added to dbxref array, DBNUM increased
-*
-* Revision 6.18 2001/01/26 19:26:37 kans
-* added niaEST, increased DBNUM
-*
-* Revision 6.17 2001/01/18 23:57:02 kans
-* add GO (gene ontology) to list of legal dbxrefs
-*
-* Revision 6.16 2000/12/05 01:21:23 tatiana
-* DBNUM increased for COG
-*
-* Revision 6.15 2000/10/16 19:10:18 kans
-* added UniSTS and InterimID to legal dbxrefs
-*
-* Revision 6.14 2000/08/28 22:17:19 kans
-* added CDD to list of legal dbxrefs
-*
-* Revision 6.13 2000/07/14 20:24:27 kans
-* added RGD as dbxref with web link
-*
-* Revision 6.12 2000/02/15 22:53:58 kans
-* added dbSNP and RATMAP as legal dbxrefs, put /organelle under ajp->forgrel control
-*
-* Revision 6.11 1999/04/02 20:35:51 kans
-* added prototype for GR_PrintPubs
-*
-* Revision 6.10 1999/03/22 23:19:19 tatiana
-* PrintImpFeatEx() added
-*
-* Revision 6.9 1998/10/19 15:57:04 tatiana
-* DBNUM increased for UniGene tag
-*
-* Revision 6.8 1998/08/19 18:40:03 tatiana
-* DBNUM increased to add RiceGenes dbrag
-*
-* Revision 6.7 1998/05/18 14:41:19 tatiana
-* DBNUM increased to add GI to dbtag array
-*
-* Revision 6.6 1998/04/15 20:50:38 tatiana
-* bugs fixing
-*
-* Revision 6.5 1998/04/15 18:44:45 tatiana
-* dbtag array moved from asn2ff3.c
-*
-* Revision 6.4 1997/12/15 15:45:59 tatiana
-* features processing has been changed
-*
-* Revision 6.3 1997/12/02 16:44:17 tatiana
-* missing semicolon in SortOrganizeFeat
-*
-* Revision 6.1 1997/09/16 15:41:01 kans
-* added AddSiteNoteQual (TT)
-*
-* Revision 5.7 1997/06/19 18:37:24 vakatov
-* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
-*
-* Revision 5.6 1996/11/20 15:26:58 tatiana
-* prototype for GetDefinitionLine
-*
- * Revision 5.5 1996/07/30 16:31:44 tatiana
- * add Boolean arg in CheckNAFeat()
- *
- * Revision 5.4 1996/07/24 12:40:24 tatiana
- * GetCdregionGeneXrefInfo changed number of arguments
- *
- * Revision 5.3 1996/07/12 16:48:49 tatiana
- * *** empty log message ***
- *
- * Revision 5.2 1996/06/11 15:25:32 tatiana
- * add PrintNID PROTO
- *
- * Revision 5.1 1996/06/06 14:50:31 tatiana
- * *** empty log message ***
- *
- * Revision 4.10 1996/05/16 20:59:07 tatiana
- * GetCdregionGeneXrefInfo changed to Boolean
- *
- * Revision 4.9 1996/04/29 18:54:51 tatiana
- * *** empty log message ***
- *
- * Revision 4.9 1996/04/29 18:54:51 tatiana
- * *** empty log message ***
- *
- * Revision 4.7 1996/04/09 14:05:06 tatiana
- * *** empty log message ***
- *
- * Revision 4.6 1996/02/21 20:12:17 tatiana
- * *** empty log message ***
- *
- * Revision 4.5 1996/02/15 17:20:23 tatiana
- * GatherItemWithLock added
- *
- * Revision 4.4 1995/12/20 22:45:30 tatiana
- * new function added
- *
- * Revision 4.1 1995/08/22 15:39:26 tatiana
- * GetVersion name changed to GetBiotableVersion
- *
- * Revision 1.43 1995/07/17 19:33:20 kans
- * parameters combined into Asn2ffJobPtr structure
- *
- * Revision 1.41 1995/06/19 21:40:02 kans
- * Tatiana's first major reorganization, moving printing, adding HTML
- *
- * Revision 1.40 1995/05/22 16:14:20 tatiana
- * add ASN2FF_SHOW_ALL_PUBS to asn2ff_flags
- *
- * Revision 1.39 1995/05/15 21:46:05 ostell
- * added Log line
- *
-*
-**************************************/
#ifndef _ASN2FFP_
#define _ASN2FFP_
@@ -440,7 +116,7 @@ NLM_EXTERN CharPtr FlatJournal PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, ValNode
NLM_EXTERN ValNodePtr GetKeywordLine PROTO((Asn2ffJobPtr ajp, GBEntryPtr gbp));
NLM_EXTERN void PrintSourceFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
NLM_EXTERN Int2 PrintImpFeat PROTO ((Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp));
-NLM_EXTERN Int2 PrintImpFeatEx PROTO ((Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, Int4 gi, Int2 entityID, Uint4 itemID));
+NLM_EXTERN Int2 PrintImpFeatEx PROTO ((Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, BIG_ID gi, Int2 entityID, Uint4 itemID));
NLM_EXTERN void PrintNAFeatAwp PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
NLM_EXTERN void PrintNAFeatByNumber PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
NLM_EXTERN void PrintAAFeatByNumber PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp));
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c
index 01dd49e2..3094eddd 100644
--- a/api/asn2gnb1.c
+++ b/api/asn2gnb1.c
@@ -31,7 +31,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.239 $
+* $Revision: 1.280 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -216,6 +216,47 @@ NLM_EXTERN void FFAddString_NoRedund (
}
}
+NLM_EXTERN void FFAddString_NoRedundEx (
+ StringItemPtr unique,
+ CharPtr prefix,
+ CharPtr string,
+ CharPtr suffix,
+ Boolean convertQuotes
+);
+
+NLM_EXTERN void FFAddString_NoRedundEx (
+ StringItemPtr unique,
+ CharPtr prefix,
+ CharPtr string,
+ CharPtr suffix,
+ Boolean convertQuotes
+)
+{
+ CharPtr str = string;
+ Int4 foundPos = 0;
+ Boolean wholeWord = FALSE;
+
+ if ( StringHasNoText(prefix) &&
+ StringHasNoText(string) &&
+ StringHasNoText(suffix) ) return;
+
+ if (StringNICmp (string, "tRNA-", 5) == 0) {
+ str = string+5;
+ }
+
+ while ( foundPos >= 0 && !wholeWord ) {
+ foundPos = FFStringSearch(unique, str, foundPos);
+ if ( foundPos >= 0 ) {
+ wholeWord = IsWholeWordSubstr(unique, foundPos, str);
+ foundPos += StringLen(str);
+ }
+ }
+
+ if ( foundPos < 0 || !wholeWord ) {
+ FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_EXPAND);
+ }
+}
+
/* s_AddPeriodToEnd () -- Adds a '.' to the end of a given string if */
@@ -2263,7 +2304,7 @@ static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 20, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
20, /* 11 = general Dbtag */
255, /* 12 = gi */
10, /* 13 = ddbj */
@@ -2272,8 +2313,8 @@ static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr
10, /* 16 = tpg */
10, /* 17 = tpe */
10, /* 18 = tpd */
- 10, /* 19 = gpp */
- 10 /* 20 = nat */
+ 15, /* 19 = gpp */
+ 15 /* 20 = nat */
};
/* DoOneSection builds a single report for one bioseq or segment */
@@ -2302,20 +2343,6 @@ static Asn2gbSectPtr Asn2gbAddSection (
return asp;
}
-NLM_EXTERN Boolean DeltaLitOnly (
- BioseqPtr bsp
-)
-
-{
- ValNodePtr vnp;
-
- if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
- for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
- if (vnp->choice == 1) return FALSE;
- }
- return TRUE;
-}
-
NLM_EXTERN Boolean SegHasParts (
BioseqPtr bsp
)
@@ -2334,6 +2361,67 @@ NLM_EXTERN Boolean SegHasParts (
return FALSE;
}
+static Boolean LocInBioseq (
+ SeqLocPtr slp,
+ BioseqPtr bsp
+)
+
+{
+ SeqIdPtr sip;
+
+ if (slp == NULL || bsp == NULL) return FALSE;
+ sip = SeqLocId (slp);
+ if (sip == NULL) return FALSE;
+ return SeqIdIn (sip, bsp->id);
+}
+
+static void AddRemainingGaps (
+ Asn2gbWorkPtr awp
+)
+
+{
+ Asn2gbSectPtr asp;
+ BioseqPtr bsp;
+ FeatBlockPtr fbp;
+ SeqFeatPtr gap;
+ IntFeatBlockPtr ifp;
+
+ if (awp == NULL) return;
+ asp = awp->asp;
+ if (asp == NULL) return;
+ bsp = asp->bsp;
+ if (bsp == NULL) return;
+ gap = awp->currfargap;
+ if (gap != NULL && awp->afp != NULL) {
+ while (gap != NULL && LocInBioseq (gap->location, bsp)) {
+
+ fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
+ if (fbp != NULL) {
+ fbp->entityID = 0;
+ fbp->itemID = 0;
+ fbp->itemtype = OBJ_SEQFEAT;
+ fbp->featdeftype = FEATDEF_gap;
+ ifp = (IntFeatBlockPtr) fbp;
+ ifp->mapToNuc = FALSE;
+ ifp->mapToProt = FALSE;
+ ifp->mapToGen = FALSE;
+ ifp->mapToMrna = FALSE;
+ ifp->mapToPep = FALSE;
+ ifp->left = 0;
+ ifp->right = 0;
+ ifp->firstfeat = awp->firstfeat;
+ awp->firstfeat = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, gap);
+ }
+ }
+
+ awp->currfargap = gap->next;
+ gap = awp->currfargap;
+ }
+ }
+}
+
NLM_EXTERN void DoOneSection (
BioseqPtr target,
BioseqPtr parent,
@@ -2358,6 +2446,8 @@ NLM_EXTERN void DoOneSection (
BaseBlockPtr PNTR blockArray;
Boolean cagemaster = FALSE;
SeqMgrDescContext dcontext;
+ BioseqPtr gbsp;
+ SeqAnnotPtr gsap;
Boolean hasRefs;
Int4 i;
IntAsn2gbSectPtr iasp;
@@ -2369,6 +2459,7 @@ NLM_EXTERN void DoOneSection (
Int4 numsegs = 0;
SeqDescrPtr sdp;
SeqIdPtr sip;
+ Boolean tlsmaster = FALSE;
Boolean tsamaster = FALSE;
TextSeqIdPtr tsip;
ValNodePtr vnp;
@@ -2378,6 +2469,7 @@ NLM_EXTERN void DoOneSection (
Boolean willshowcontig = FALSE;
Boolean willshowgenome = FALSE;
Boolean willshowsequence = FALSE;
+ Boolean willshowtls = FALSE;
Boolean willshowtsa = FALSE;
Boolean willshowwgs = FALSE;
@@ -2389,6 +2481,8 @@ NLM_EXTERN void DoOneSection (
if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
if (bsp->repr == Seq_repr_seg) {
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
+ } else if (bsp->repr == Seq_repr_ref) {
+ } else if (bsp->repr == Seq_repr_map) {
} else return;
}
@@ -2438,6 +2532,16 @@ NLM_EXTERN void DoOneSection (
awp->featjustseen = FALSE;
awp->wgsaccnlist = NULL;
+ if (ajp->manygaps != NULL) {
+ gbsp = (BioseqPtr) ajp->manygaps->data.ptrvalue;
+ if (gbsp != NULL) {
+ gsap = gbsp->annot;
+ if (gsap != NULL && gsap->type == 1) {
+ awp->currfargap = (SeqFeatPtr) gsap->data;
+ }
+ }
+ }
+
/* initialize empty blockList for this section */
awp->blockList = NULL;
@@ -2482,6 +2586,10 @@ NLM_EXTERN void DoOneSection (
if (StringCmp (tsip->accession + 6, "0000000") == 0) {
wgsmaster = TRUE;
}
+ } else if (acclen == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ wgsmaster = TRUE;
+ }
}
}
} else if (sip->choice == SEQID_OTHER) {
@@ -2514,6 +2622,8 @@ NLM_EXTERN void DoOneSection (
wgstech = TRUE;
} else if (mip->tech == MI_TECH_tsa && bsp->repr == Seq_repr_virtual) {
tsamaster = TRUE;
+ } else if (mip->tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
+ tlsmaster = TRUE;
} else if (mip->tech == MI_TECH_other && StringCmp (mip->techexp, "cage") == 0) {
cagemaster = TRUE;
}
@@ -2525,7 +2635,7 @@ NLM_EXTERN void DoOneSection (
if (sip->choice == SEQID_OTHER) {
isRefSeq = TRUE;
} else if (sip->choice == SEQID_GI) {
- awp->currGi = (Int4) sip->data.intvalue;
+ awp->currGi = (BIG_ID) sip->data.intvalue;
} else if (sip->choice == SEQID_GPIPE) {
isGpipe = TRUE;
}
@@ -2543,6 +2653,7 @@ NLM_EXTERN void DoOneSection (
}
if (! awp->hideFeatures) {
AddFeatureBlock (awp);
+ AddRemainingGaps (awp);
}
} else {
@@ -2551,6 +2662,8 @@ NLM_EXTERN void DoOneSection (
willshowwgs = TRUE;
} else if (tsamaster) {
willshowtsa = TRUE;
+ } else if (tlsmaster) {
+ willshowtls = TRUE;
} else if (cagemaster) {
willshowcage = TRUE;
} else if (nsgenome) {
@@ -2568,6 +2681,8 @@ NLM_EXTERN void DoOneSection (
willshowcontig = TRUE;
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
willshowcontig = TRUE;
+ } else if (bsp->repr == Seq_repr_ref) {
+ willshowcontig = TRUE;
}
}
if (! awp->hideSequence) {
@@ -2575,7 +2690,7 @@ NLM_EXTERN void DoOneSection (
}
}
- AddLocusBlock (awp, willshowwgs, willshowtsa, willshowcage, willshowgenome, willshowcontig, willshowsequence);
+ AddLocusBlock (awp, willshowwgs, willshowtsa, willshowtls, willshowcage, willshowgenome, willshowcontig, willshowsequence);
if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
@@ -2622,8 +2737,7 @@ NLM_EXTERN void DoOneSection (
AddSegmentBlock (awp, onePartOfSeg, (Boolean) ISA_na (bsp->mol));
}
- AddSourceBlock (awp);
- AddOrganismBlock (awp);
+ AddSourceOrganismBlock (awp);
/*
if (awp->showRefStats) {
@@ -2668,6 +2782,10 @@ NLM_EXTERN void DoOneSection (
AddTSABlock (awp);
+ } else if (tlsmaster) {
+
+ AddTLSBlock (awp);
+
} else if (cagemaster) {
AddCAGEBlock (awp);
@@ -2681,24 +2799,30 @@ NLM_EXTERN void DoOneSection (
if (awp->showconfeats) {
if (! awp->hideFeatures) {
AddFeatureBlock (awp);
+ AddRemainingGaps (awp);
}
} else if (awp->smartconfeats && bsp->length <= 1000000) {
if (! awp->hideFeatures) {
AddFeatureBlock (awp);
+ AddRemainingGaps (awp);
}
}
AddContigBlock (awp);
if (awp->showContigAndSeq) {
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
- if (awp->showBaseCount) {
+ if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
AddBasecountBlock (awp);
}
}
- AddOriginBlock (awp);
+ if (bsp->repr != Seq_repr_map) {
+ AddOriginBlock (awp);
+ }
if (! awp->hideSequence) {
- AddSequenceBlock (awp);
+ if (bsp->repr != Seq_repr_map) {
+ AddSequenceBlock (awp);
+ }
}
}
@@ -2706,6 +2830,7 @@ NLM_EXTERN void DoOneSection (
if (! awp->hideFeatures) {
AddFeatureBlock (awp);
+ AddRemainingGaps (awp);
}
if (awp->showContigAndSeq) {
@@ -2713,18 +2838,24 @@ NLM_EXTERN void DoOneSection (
AddContigBlock (awp);
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
AddContigBlock (awp);
+ } else if (bsp->repr == Seq_repr_ref) {
+ AddContigBlock (awp);
}
}
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
- if (awp->showBaseCount) {
+ if (awp->showBaseCount && bsp->repr != Seq_repr_map) {
AddBasecountBlock (awp );
}
}
- AddOriginBlock (awp);
+ if (bsp->repr != Seq_repr_map) {
+ AddOriginBlock (awp);
+ }
if (! awp->hideSequence) {
- AddSequenceBlock (awp);
+ if (bsp->repr != Seq_repr_map) {
+ AddSequenceBlock (awp);
+ }
}
}
@@ -2990,6 +3121,9 @@ NLM_EXTERN void DoOneBioseq (
if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
if (! DeltaLitOnly (bsp)) {
contig = TRUE;
+ if (awp->isRefSeq) {
+ ajp->masterStyle = TRUE;
+ }
}
}
@@ -3018,6 +3152,8 @@ NLM_EXTERN void DoOneBioseq (
} else if (bsp->repr == Seq_repr_raw ||
bsp->repr == Seq_repr_const ||
bsp->repr == Seq_repr_delta ||
+ bsp->repr == Seq_repr_ref ||
+ bsp->repr == Seq_repr_map ||
bsp->repr == Seq_repr_virtual) {
parent = SeqMgrGetParentOfPart (bsp, &context);
@@ -3153,7 +3289,7 @@ static void RecordOneSection (
if (sip == NULL) return;
if (sip->choice == SEQID_GI) {
- vnp = ValNodeAddInt (&(ajp->gitail), 0, (Int4) sip->data.intvalue);
+ vnp = ValNodeAddBigInt (&(ajp->gitail), 0, (BIG_ID) sip->data.intvalue);
if (ajp->gihead == NULL) {
ajp->gihead = vnp;
}
@@ -3188,6 +3324,8 @@ static void CountOneSection (
if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
if (bsp->repr == Seq_repr_seg) {
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
+ } else if (bsp->repr == Seq_repr_ref) {
+ } else if (bsp->repr == Seq_repr_map) {
} else return;
}
@@ -3329,6 +3467,8 @@ static void CountOneBioseq (
} else if (bsp->repr == Seq_repr_raw ||
bsp->repr == Seq_repr_const ||
bsp->repr == Seq_repr_delta ||
+ bsp->repr == Seq_repr_ref ||
+ bsp->repr == Seq_repr_map ||
bsp->repr == Seq_repr_virtual) {
parent = SeqMgrGetParentOfPart (bsp, &context);
@@ -3675,16 +3815,19 @@ static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
typedef struct lookforids {
Boolean isG;
Boolean isGED;
+ Boolean isED;
Boolean isNTorNWorNG;
Boolean isNC;
Boolean isNZ;
Boolean isRefSeq;
Boolean isGeneral;
+ Boolean isNCBIGenomes;
Boolean isTPA;
Boolean isTPG;
Boolean isSP;
Boolean isNuc;
Boolean isProt;
+ Boolean isFarProt;
Boolean isLocal;
Boolean isNonLocal;
Boolean sourcePubFuse;
@@ -3704,15 +3847,25 @@ static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
}
if (ISA_aa (bsp->mol)) {
lfip->isProt = TRUE;
+ if (bsp->repr == Seq_repr_delta) {
+ if (! DeltaLitOnly (bsp)) {
+ lfip->isFarProt = TRUE;
+ }
+ } else if (bsp->repr == Seq_repr_ref) {
+ lfip->isFarProt = TRUE;
+ }
}
for (sip = bsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_GENBANK :
lfip->isG = TRUE;
- /* and fall through to EMBL and DDBJ */
+ lfip->isGED = TRUE;
+ lfip->isNonLocal = TRUE;
+ break;
case SEQID_EMBL :
case SEQID_DDBJ :
+ lfip->isED = TRUE;
lfip->isGED = TRUE;
lfip->isNonLocal = TRUE;
break;
@@ -3750,6 +3903,9 @@ static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
if (dbt != NULL && !IsSkippableDbtag(dbt)) {
lfip->isGeneral = TRUE;
lfip->isNonLocal = TRUE;
+ if (StringCmp (dbt->db, "NCBI_GENOMES") == 0) {
+ lfip->isNCBIGenomes = TRUE;
+ }
}
break;
case SEQID_LOCAL :
@@ -3801,16 +3957,19 @@ static void LookForGEDetc (
SeqEntryPtr topsep,
BoolPtr isG,
BoolPtr isGED,
+ BoolPtr isED,
BoolPtr isNTorNWorNG,
BoolPtr isNC,
BoolPtr isNZ,
BoolPtr isRefSeq,
BoolPtr isGeneral,
+ BoolPtr isNCBIGenomes,
BoolPtr isTPA,
BoolPtr isTPG,
BoolPtr isSP,
BoolPtr isNuc,
BoolPtr isProt,
+ BoolPtr isFarProt,
BoolPtr isOnlyLocal,
BoolPtr sourcePubFuse
)
@@ -3822,16 +3981,19 @@ static void LookForGEDetc (
VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
*isG = lfi.isG;
*isGED = lfi.isGED;
+ *isED = lfi.isED;
*isNTorNWorNG = lfi.isNTorNWorNG;
*isNC = lfi.isNC;
*isNZ = lfi.isNZ;
*isRefSeq = lfi.isRefSeq;
*isGeneral = lfi.isGeneral;
+ *isNCBIGenomes = lfi.isNCBIGenomes;
*isTPA = lfi.isTPA;
*isTPG = lfi.isTPG;
*isSP = lfi.isSP;
*isNuc = lfi.isNuc;
*isProt = lfi.isProt;
+ *isFarProt = lfi.isFarProt;
if (lfi.isLocal && (! lfi.isNonLocal)) {
*isOnlyLocal = TRUE;
} else {
@@ -3854,6 +4016,7 @@ static void MakeGapFeatsBase (
IntFuzzPtr fuzz;
ValNodePtr PNTR gapvnp;
ImpFeatPtr ifp;
+ SeqFeatPtr last = NULL;
SeqLitPtr litp;
SeqAnnotPtr sap = NULL;
SeqFeatPtr sfp;
@@ -3903,8 +4066,13 @@ static void MakeGapFeatsBase (
if (sfp == NULL) continue;
sfp->data.choice = SEQFEAT_IMP;
sfp->data.value.ptrvalue = (Pointer) ifp;
- sfp->next = (SeqFeatPtr) sap->data;
- sap->data = (Pointer) sfp;
+ sfp->idx.subtype = FEATDEF_gap;
+ if (last != NULL) {
+ last->next = sfp;
+ } else {
+ sap->data = (Pointer) sfp;
+ }
+ last = sfp;
fuzz = litp->fuzz;
if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
AddQualifierToFeature (sfp, "estimated_length", "unknown");
@@ -3933,9 +4101,16 @@ static void MakeGapFeatsBase (
/* I can't seem to find pound-defines for
some of these magic numbers below */
gap_is_linked = ( seq_gap->linkage == 1 ); /* linked */
+
+ if (seq_gap->linkage_evidence != NULL) {
+ gap_is_linked = TRUE; /* do not rely solely on seq_gap->linkage, which is not always set correctly */
+ }
+
switch( seq_gap->type ) {
case 0: /* unknown */
- /* no /gap_type label */
+ /* no /gap_type label - policy changed at SQD-1801 */
+ AddQualifierToFeature(sfp, "gap_type", "unknown" );
+ needs_evidence = gap_is_linked;
break;
case 1: /* fragment */
AddQualifierToFeature(sfp, "gap_type", "within scaffold" );
@@ -4024,6 +4199,10 @@ static void MakeGapFeatsBase (
AddQualifierToFeature(sfp, "linkage_evidence",
"unspecified" );
break;
+ case 9: /* pcr */
+ AddQualifierToFeature(sfp, "linkage_evidence",
+ "pcr" );
+ break;
case 255: /* other */
AddQualifierToFeature(sfp, "linkage_evidence",
"other" );
@@ -4078,74 +4257,6 @@ static void MakeGapFeats (
MakeGapFeatsBase (bsp, userdata, FALSE, FALSE);
}
-static CharPtr gapstr1 = " gap ";
-static CharPtr gapstr2 = " /estimated_length=";
-static CharPtr gapstr3 = "unknown";
-
-static void MakeFarGapFeats (
- BioseqPtr bsp,
- Pointer userdata
-)
-
-{
- Char buf [256];
- Int4 currpos = 0;
- ValNodePtr PNTR fargaps;
- IntFuzzPtr fuzz;
- SeqLitPtr litp;
- Boolean notFar = FALSE;
- SeqIdPtr sip;
- SeqLocPtr slp;
- ValNodePtr vnp;
-
- if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
- fargaps = (ValNodePtr PNTR) userdata;
- if (fargaps == NULL) return;
- sip = SeqIdFindBest (bsp->id, 0);
- if (sip == NULL) return;
- /* no longer suppress on far delta contigs */
- /* if (! DeltaLitOnly (bsp)) return; */
-
- /* empty string at beginning for GetFeatsOnSeg to skip over */
- ValNodeAddPointer (fargaps, 0, NULL);
-
- for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
- if (vnp->choice == 1) {
- slp = (SeqLocPtr) vnp->data.ptrvalue;
- if (slp == NULL) continue;
- currpos += SeqLocLen (slp);
- }
- if (vnp->choice == 2) {
- litp = (SeqLitPtr) vnp->data.ptrvalue;
- if (litp == NULL) continue;
- if (litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) {
- if (litp->length > 0) {
- fuzz = litp->fuzz;
- if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
- sprintf (buf, "%s%ld..%ld\n%s%s\n",
- gapstr1, (long) currpos + 1, (long) currpos + litp->length,
- gapstr2, gapstr3);
- } else {
- sprintf (buf, "%s%ld..%ld\n%s%ld\n",
- gapstr1, (long) currpos + 1, (long) currpos + litp->length,
- gapstr2, (long) litp->length);
- }
- ValNodeCopyStr (fargaps, 0, (Pointer) buf);
- }
- } else {
- notFar = TRUE;
- }
- currpos += litp->length;
- }
- }
-
- if (notFar) {
- for (vnp = *fargaps; vnp != NULL; vnp = vnp->next) {
- vnp->choice = 1;
- }
- }
-}
-
typedef struct featpolicy {
Boolean forceOnlyNearFeats;
Boolean forceAllowFarFeats;
@@ -4296,18 +4407,23 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
Uint2 entityID = 0;
Uint2 item_type = 0;
Uint4 item_id = 0;
- ValNodePtr fargaps = NULL;
CharPtr ffhead = NULL;
CharPtr fftail = NULL;
Asn2gbWriteFunc ffwrite = NULL;
FeatPolicy featpolicy;
ValNodePtr gapvnp = NULL;
GBSeqPtr gbseq = NULL;
+ BioseqPtr gbsp;
+ SeqAnnotPtr gsap;
+ SeqFeatPtr gsfp;
Int4 i;
IndxPtr index = NULL;
+ Boolean isFarProt;
Boolean isG;
Boolean isGED;
+ Boolean isED;
Boolean isGeneral;
+ Boolean isNCBIGenomes;
Boolean isNTorNWorNG;
Boolean isNC;
Boolean isNuc;
@@ -4330,12 +4446,14 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
Boolean lookupFarLocs;
Boolean lookupFarOthers;
Boolean lookupFarProd;
+ ValNodePtr manygaps = NULL;
Boolean missingVersion;
Boolean multiIntervalGenes = FALSE;
- Int4 nextGi = 0;
+ BIG_ID nextGi = 0;
Boolean noLeft;
Boolean noRight;
Int4 numBlocks;
+ Int4 numGaps;
Int4 numSections;
SeqEntryPtr oldscope;
ObjMgrDataPtr omdp;
@@ -4343,13 +4461,15 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
BaseBlockPtr PNTR paragraphArray;
BaseBlockPtr PNTR paragraphByIDs;
BioseqPtr parent = NULL;
- Int4 prevGi = 0;
+ BIG_ID prevGi = 0;
Int2 q;
Boolean reindex = TRUE;
Pointer remotedata = NULL;
Asn2gbFreeFunc remotefree = NULL;
Asn2gbLockFunc remotelock = NULL;
ValNodePtr remotevnp = NULL;
+ Int2 sat = 0;
+ Int4 sat_key = 0;
SubmitBlockPtr sbp;
Asn2gbSectPtr PNTR sectionArray;
Boolean segmentedBioseqs = FALSE;
@@ -4394,6 +4514,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
bkmask = extra->bkmask;
reindex = extra->reindex;
seqspans = extra->seqspans;
+ sat = extra->sat;
+ sat_key = extra->sat_key;
}
if ((custom & FORCE_SEQ_SPANS) != 0) {
@@ -4489,9 +4611,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
sep = GetTopSeqEntryForEntityID (entityID);
- LookForGEDetc (sep, &isG, &isGED, &isNTorNWorNG, &isNC, &isNZ, &isRefSeq,
- &isGeneral, &isTPA, &isTPG, &isSP, &isNuc, &isProt,
- &isOnlyLocal, &sourcePubFuse);
+ LookForGEDetc (sep, &isG, &isGED, &isED, &isNTorNWorNG, &isNC, &isNZ, &isRefSeq,
+ &isGeneral, &isNCBIGenomes, &isTPA, &isTPG, &isSP, &isNuc,
+ &isProt, &isFarProt, &isOnlyLocal, &sourcePubFuse);
if (mode == RELEASE_MODE) {
missingVersion = FALSE;
@@ -4506,21 +4628,11 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
featpolicy.forceAllowFarFeats = FALSE;
VisitDescriptorsInSep (sep, (Pointer) &featpolicy, LookFarFeatFetchPolicy);
- fargaps = NULL;
- if (format != FTABLE_FMT && (! was_slp)) {
- if (isRefSeq && isNC && VisitFeaturesInSep (sep, NULL, NULL) == 0) {
- if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
- VisitBioseqsInSep (sep, (Pointer) &fargaps, MakeFarGapFeats);
- }
- if (fargaps != NULL && fargaps->choice == 1) {
- fargaps = ValNodeFreeData (fargaps);
- }
- }
- }
- ajp->fargaps = fargaps;
-
gapvnp = NULL;
- if (fargaps == NULL && format != FTABLE_FMT && (! was_slp)) {
+ manygaps = NULL;
+ remotevnp = NULL;
+
+ if (format != FTABLE_FMT && (! was_slp)) {
if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) {
if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
if (isSP) {
@@ -4533,9 +4645,34 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
}
}
}
+
+ numGaps = 0;
+ if (gapvnp != NULL) {
+ gbsp = (BioseqPtr) gapvnp->data.ptrvalue;
+ if (gbsp != NULL) {
+ gsap = gbsp->annot;
+ if (gsap != NULL && gsap->type == 1) {
+ for (gsfp = (SeqFeatPtr) gsap->data; gsfp != NULL; gsfp = gsfp->next) {
+ numGaps++;
+ }
+ }
+ }
+ }
+ if (isED) {
+ if (numGaps > 0) {
+ manygaps = gapvnp;
+ gapvnp = NULL;
+ }
+ } else {
+ if (numGaps > 1000) {
+ manygaps = gapvnp;
+ gapvnp = NULL;
+ }
+ }
+
ajp->gapvnp = gapvnp;
+ ajp->manygaps = manygaps;
- remotevnp = NULL;
ajp->remotelock = remotelock;
ajp->remotefree = remotefree;
ajp->remotedata = remotedata;
@@ -4545,6 +4682,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
remotevnp = remotelock (sip, remotedata);
}
}
+
ajp->remotevnp = remotevnp;
if (gapvnp != NULL || remotevnp != NULL) {
@@ -4621,6 +4759,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
ajp->bkmask = bkmask;
ajp->reindex = reindex;
ajp->seqspans = seqspans;
+ ajp->sat = sat;
+ ajp->sat_key = sat_key;
ajp->aip = aip;
ajp->atp = atp;
@@ -4683,6 +4823,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
ajp->seqGapCurrLen = 0;
ajp->relaxedMapping = (Boolean) ((flags & RELAXED_MAPPING) != 0);
+ ajp->gpipdDeflines = (Boolean) ((flags & GPIPE_DEFLINES) != 0);
+ ajp->hideProteinID = (Boolean) ((flags & HIDE_PROTEIN_ID) != 0);
ajp->produceInsdSeq = (Boolean) (((flags & PRODUCE_OLD_GBSEQ) == 0) && ((custom & OLD_GBSEQ_XML) == 0));
ajp->oldXmlPolicy = (Boolean) ((custom & NEW_XML_POLICY) == 0);
@@ -4774,6 +4916,10 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
/* do not set other flags */
+ } else if (featpolicy.forceOnlyNearFeats) {
+
+ aw.onlyNearFeats = TRUE;
+
} else if (isNC) {
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
@@ -4792,6 +4938,10 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
aw.onlyNearFeats = TRUE;
+ } else if (format == GENPEPT_FMT && isFarProt ) {
+
+ aw.onlyNearFeats = TRUE;
+
} else if (isGED) {
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
@@ -4801,12 +4951,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
} else {
aw.nearFeatsSuppress = TRUE;
}
- ajp->showFarTransl = TRUE;
-
- } else if (featpolicy.forceOnlyNearFeats) {
- aw.onlyNearFeats = TRUE;
-
} else {
aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
@@ -4814,6 +4959,10 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
}
+ if (isGED || isRefSeq) {
+ ajp->showFarTransl = TRUE;
+ }
+
/* continue setting flags */
aw.showFeatStats = (Boolean) ((custom & SHOW_FEATURE_STATS) != 0);
@@ -4850,6 +4999,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
}
}
+ aw.isNCBIGenomes = isNCBIGenomes;
aw.isRefSeq = isRefSeq;
aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
@@ -4895,6 +5045,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
}
}
+ ajp->hideGI = (Boolean) ((flags & HIDE_GI_NUMBERS) != 0);
ajp->bad_html_fsa = TextFsaNew ();
for (q = 0; bad_html_strings [q] != NULL; q++) {
@@ -4921,7 +5072,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
ffwrite (ffhead, userdata, HEAD_BLOCK, entityID, item_type, item_id, 0, 0);
}
if (is_html) {
- DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">");
+ DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">\n");
}
}
@@ -5489,7 +5640,7 @@ NLM_EXTERN void PrintFTCodeBreakEx (
)
{
- Char buf [80];
+ Char buf [128];
Choice cbaa;
IntAsn2gbJob iaj;
SeqLocPtr newloc;
@@ -5791,6 +5942,9 @@ static void PrintBioSourceFtableEntry (
case ORGMOD_metagenome_source :
sprintf (str, "\t\t\tmetagenome_source\t");
break;
+ case ORGMOD_type_material :
+ sprintf (str, "\t\t\ttype_material\t");
+ break;
case ORGMOD_old_lineage :
sprintf (str, "\t\t\told_lineage\t");
break;
@@ -5935,6 +6089,12 @@ static void PrintBioSourceFtableEntry (
case SUBSRC_haplogroup :
sprintf (str, "\t\t\thaplogroup\t");
break;
+ case SUBSRC_phenotype :
+ sprintf (str, "\t\t\tphenotype\t");
+ break;
+ case SUBSRC_altitude :
+ sprintf (str, "\t\t\taltitude\t");
+ break;
case SUBSRC_other :
sprintf (str, "\t\t\tnote\t");
break;
@@ -6111,6 +6271,10 @@ NLM_EXTERN void PrintFtableLocAndQuals (
sprintf (tmp, "\t\t\tgene\t%s\n", str);
ValNodeCopyStr (head, 0, tmp);
}
+ if (! StringHasNoText (grp->allele)) {
+ sprintf (tmp, "\t\t\tallele\t%s\n", grp->allele);
+ ValNodeCopyStr (head, 0, tmp);
+ }
for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
if (! StringHasNoText (str)) {
@@ -6199,7 +6363,7 @@ NLM_EXTERN void PrintFtableLocAndQuals (
ValNodeCopyStr (head, 0, tmp);
}
}
- if (prod != NULL) {
+ if (prod != NULL && ! ajp->hideProteinID) {
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
ValNodeCopyStr (head, 0, tmp);
@@ -6216,7 +6380,7 @@ NLM_EXTERN void PrintFtableLocAndQuals (
}
}
}
- } else if (sfp->product != NULL) {
+ } else if (sfp->product != NULL && ! ajp->hideProteinID) {
sip = SeqLocId (sfp->product);
if (sip != NULL) {
if (sip->choice == SEQID_GI) {
@@ -6281,16 +6445,13 @@ NLM_EXTERN void PrintFtableLocAndQuals (
}
if (slp != NULL && StringDoesHaveText (aa)) {
anticodon [0] = '\0';
- if (ajp->refseqConventions && SeqLocLen (slp) == 3) {
+ if (SeqLocLen (slp) == 3) {
tmpy = GetSequenceByLocation (slp);
if (tmpy != NULL) {
ptr = tmpy;
ch = *ptr;
while (ch != '\0') {
- ch = TO_UPPER (ch);
- if (ch == 'T') {
- ch = 'U';
- }
+ ch = TO_LOWER(ch);
*ptr = ch;
ptr++;
ch = *ptr;
@@ -6341,7 +6502,7 @@ NLM_EXTERN void PrintFtableLocAndQuals (
break;
}
}
- if (prod != NULL) {
+ if (prod != NULL && ! ajp->hideProteinID) {
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
ValNodeCopyStr (head, 0, tmp);
@@ -6358,7 +6519,7 @@ NLM_EXTERN void PrintFtableLocAndQuals (
}
}
}
- } else if (sfp->product != NULL) {
+ } else if (sfp->product != NULL && ! ajp->hideProteinID) {
sip = SeqLocId (sfp->product);
if (sip != NULL) {
if (sip->choice == SEQID_GI) {
@@ -6417,12 +6578,12 @@ NLM_EXTERN void PrintFtableLocAndQuals (
ValNodeCopyStr (head, 0, tmp);
}
*/
- if (prod != NULL) {
+ if (prod != NULL && ! ajp->hideProteinID) {
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
ValNodeCopyStr (head, 0, tmp);
}
- } else if (sfp->product != NULL) {
+ } else if (sfp->product != NULL && ! ajp->hideProteinID) {
sip = SeqLocId (sfp->product);
if (sip != NULL) {
if (sip->choice == SEQID_GI) {
@@ -6967,8 +7128,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
UnlockFarComponents (iajp->lockedBspList);
}
- if (iajp->fargaps != NULL) {
- ValNodeFreeData (iajp->fargaps);
+ if (iajp->manygaps != NULL) {
+ ValNodeFreeData (iajp->manygaps);
}
if (iajp->gapvnp != NULL || iajp->remotevnp != NULL) {
diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c
index bec80e74..095d94a8 100644
--- a/api/asn2gnb2.c
+++ b/api/asn2gnb2.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.176 $
+* $Revision: 1.235 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -54,7 +54,10 @@
#include <gbfeat.h>
#include <gbftdef.h>
#include <edutil.h>
-#include <alignmgr2.h>
+#include <validerr.h>
+#include <objvalid.h>
+#include <valapi.h>
+#include <asn2gnbi.h>
#include <asn2gnbi.h>
#ifdef WIN_MAC
@@ -63,32 +66,39 @@
#endif
#endif
-static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/bioproject/";
+static CharPtr link_projid = "https://www.ncbi.nlm.nih.gov/bioproject/";
+
+static CharPtr link_bioproj = "https://www.ncbi.nlm.nih.gov/bioproject/";
-static CharPtr link_bioproj = "http://www.ncbi.nlm.nih.gov/bioproject?term=";
+static CharPtr link_biosamp = "https://www.ncbi.nlm.nih.gov/biosample/";
-static CharPtr link_biosamp = "http://www.ncbi.nlm.nih.gov/biosample?term=";
+static CharPtr link_assembl = "https://www.ncbi.nlm.nih.gov/assembly/";
-static CharPtr link_srr = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=sra&term=";
+static CharPtr link_srr = "https://www.ncbi.nlm.nih.gov/sra/";
+static CharPtr link_srz = "https://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?analysis=";
-static CharPtr link_accn = "http://www.ncbi.nlm.nih.gov/sites/entrez?";
+static CharPtr link_accn = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
-static CharPtr link_wgs = "http://www.ncbi.nlm.nih.gov/Traces/wgs?";
-static CharPtr link_wgsscaf = "http://www.ncbi.nlm.nih.gov/nuccore?";
+static CharPtr link_wgs = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
+static CharPtr link_wgsscaf = "https://www.ncbi.nlm.nih.gov/nuccore?";
-static CharPtr link_tsa = "http://www.ncbi.nlm.nih.gov/Traces/wgs?";
+static CharPtr link_tls = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
-static CharPtr link_cage = "http://www.ncbi.nlm.nih.gov/sites/entrez?";
+static CharPtr link_tsa = "https://www.ncbi.nlm.nih.gov/Traces/wgs?";
+
+static CharPtr link_cage = "https://www.ncbi.nlm.nih.gov/sites/entrez?";
static CharPtr link_sp = "http://www.uniprot.org/uniprot/";
+static CharPtr link_mmdb = "https://www.ncbi.nlm.nih.gov/Structure/mmdb/mmdbsrv.cgi?uid=";
+
/*
-static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
*/
-static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
/*
static CharPtr link_omim = "http://www.ncbi.nlm.nih.gov/omim/";
@@ -255,7 +265,7 @@ static Boolean s_LocusAdjustLength(CharPtr locus, Int2 maxLength)
buflen = StringLen (locus);
if (buflen <= maxLength) return FALSE;
- buftmp = MemNew(maxLength + 1);
+ buftmp = (CharPtr) MemNew(maxLength + 1);
/* If the sequence id is an NCBI locus of the */
/* form HSU00001, then make sure that if */
@@ -408,7 +418,7 @@ static Boolean LocusHasBadChars (
}
static void LookupAccnForNavLink (
- Int4 gi,
+ BIG_ID gi,
CharPtr seqid,
size_t len,
CharPtr dfault
@@ -438,6 +448,7 @@ NLM_EXTERN void AddLocusBlock (
Asn2gbWorkPtr awp,
Boolean willshowwgs,
Boolean willshowtsa,
+ Boolean willshowtls,
Boolean willshowcage,
Boolean willshowgenome,
Boolean willshowcontig,
@@ -457,7 +468,7 @@ NLM_EXTERN void AddLocusBlock (
Boolean cagemaster = FALSE;
SeqFeatPtr cds;
Char ch1, ch2, ch3;
- Int4 currGi;
+ BIG_ID currGi;
Char dataclass [10];
Char date [40];
SeqMgrDescContext dcontext;
@@ -493,15 +504,16 @@ NLM_EXTERN void AddLocusBlock (
Char locus [41];
MolInfoPtr mip;
Char mol [64];
- Int4 nextGi;
+ BIG_ID nextGi;
BioseqPtr nm = NULL;
BioseqPtr nuc;
ObjectIdPtr oip;
OrgNamePtr onp;
Uint1 origin;
+ CharPtr original_id = NULL;
OrgRefPtr orp;
BioseqPtr parent;
- Int4 prevGi;
+ BIG_ID prevGi;
CharPtr ptr;
SeqDescrPtr sdp;
Char sect [128];
@@ -514,6 +526,7 @@ NLM_EXTERN void AddLocusBlock (
CharPtr str;
CharPtr suffix = NULL;
Uint1 tech;
+ Boolean tlsmaster = FALSE;
Uint1 topology;
Boolean tsamaster = FALSE;
TextSeqIdPtr tsip;
@@ -524,8 +537,8 @@ NLM_EXTERN void AddLocusBlock (
Boolean wgsmaster = FALSE;
Int2 moltype, strandedness, topol;
/*
- Int4 gi = 0;
- Char gi_buf [16];
+ BIG_ID gi = 0;
+ Char gi_buf [32];
Boolean is_aa;
CharPtr prefix = NULL;
*/
@@ -551,6 +564,7 @@ NLM_EXTERN void AddLocusBlock (
dataclass [0] = '\0';
date [0] = '\0';
gene [0] = '\0';
+ locus [0] = '\0';
genome_view = FALSE;
if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
@@ -563,6 +577,10 @@ NLM_EXTERN void AddLocusBlock (
/* locus id */
+ if (ShouldUseOriginalID (bsp)) {
+ original_id = FastaGetOriginalId (bsp);
+ }
+
sip = NULL;
version = 0;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
@@ -620,7 +638,9 @@ NLM_EXTERN void AddLocusBlock (
}
sprintf (ver, "%d", (int) version);
- if (genome_view) {
+ if (original_id != NULL) {
+ StringNCpy_0 (locus, original_id, sizeof (locus));
+ } else if (genome_view) {
SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
} else {
SeqIdWrite (sip, locus, PRINTID_TEXTID_LOCUS, sizeof (locus) - 1);
@@ -738,6 +758,10 @@ NLM_EXTERN void AddLocusBlock (
if (StringCmp (tsip->accession + 6, "0000000") == 0) {
wgsmaster = TRUE;
}
+ } else if (acclen == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ wgsmaster = TRUE;
+ }
}
}
break;
@@ -777,6 +801,10 @@ NLM_EXTERN void AddLocusBlock (
if (StringCmp (tsip->accession + 6, "0000000") == 0) {
tsamaster = TRUE;
}
+ } else if (acclen == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ tsamaster = TRUE;
+ }
}
}
break;
@@ -810,6 +838,10 @@ NLM_EXTERN void AddLocusBlock (
}
}
}
+
+ if (tech == MI_TECH_targeted && bsp->repr == Seq_repr_virtual) {
+ tlsmaster = TRUE;
+ }
}
}
@@ -826,7 +858,9 @@ NLM_EXTERN void AddLocusBlock (
imol = 1;
}
} else if (imol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL) {
- if (bmol == Seq_mol_rna) {
+ if (bmol == Seq_mol_aa) {
+ imol = MOLECULE_TYPE_PEPTIDE;
+ } else if (bmol == Seq_mol_rna) {
imol = 2;
}
}
@@ -863,6 +897,8 @@ NLM_EXTERN void AddLocusBlock (
sprintf (len, "%ld rc", (long) length);
} else if (cagemaster) {
sprintf (len, "%ld rc", (long) length);
+ } else if (tlsmaster) {
+ sprintf (len, "%ld rc", (long) length);
} else {
sprintf (len, "%ld bp", (long) length);
}
@@ -1002,6 +1038,7 @@ NLM_EXTERN void AddLocusBlock (
if (tech == MI_TECH_unknown ||
tech == MI_TECH_standard ||
tech == MI_TECH_other ||
+ tech == MI_TECH_wgs ||
tech == MI_TECH_htgs_3) {
StringCpy (div, "ENV");
StringCpy (embldiv, "ENV");
@@ -1386,7 +1423,7 @@ NLM_EXTERN void AddLocusBlock (
gi = 0;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
if (gi > 0) {
@@ -1507,14 +1544,14 @@ NLM_EXTERN void AddLocusBlock (
do {
vnp = gilistpos;
if (vnp != NULL) {
- prevGi = vnp->data.intvalue;
+ prevGi = (BIG_ID) vnp->data.intvalue;
vnp = vnp->next;
gilistpos = vnp;
if (vnp != NULL) {
- currGi = vnp->data.intvalue;
+ currGi = (BIG_ID) vnp->data.intvalue;
vnp = vnp->next;
if (vnp != NULL) {
- nextGi = vnp->data.intvalue;
+ nextGi = (BIG_ID) vnp->data.intvalue;
}
}
}
@@ -1586,14 +1623,14 @@ NLM_EXTERN void AddDeflineBlock (
)
{
- IntAsn2gbJobPtr ajp;
- Asn2gbSectPtr asp;
- BaseBlockPtr bbp;
- BioseqPtr bsp;
- Char buf [4096];
- GBSeqPtr gbseq;
- ItemInfo ii;
- StringItemPtr ffstring;
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ BaseBlockPtr bbp;
+ BioseqPtr bsp;
+ GBSeqPtr gbseq;
+ ItemInfo ii;
+ StringItemPtr ffstring;
+ CharPtr title;
if (awp == NULL) return;
ajp = awp->ajp;
@@ -1610,26 +1647,25 @@ NLM_EXTERN void AddDeflineBlock (
if ( ffstring == NULL ) return;
MemSet ((Pointer) (&ii), 0, sizeof (ItemInfo));
- MemSet ((Pointer) buf, 0, sizeof (buf));
/* create default defline */
- if (NewCreateDefLineBuf (&ii, bsp, buf, sizeof (buf), FALSE, FALSE)) {
+ title = NewCreateDefLineEx (&ii, bsp, ajp->gpipdDeflines, FALSE, ajp->gpipdDeflines);
+
+ FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
+
+ if (StringDoesHaveText (title)) {
bbp->entityID = ii.entityID;
bbp->itemID = ii.itemID;
bbp->itemtype = ii.itemtype;
- FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
-
- if (StringHasNoText (buf)) {
- FFAddOneChar (ffstring, '.', FALSE);
- } else {
- FFAddOneString (ffstring, buf, TRUE, TRUE, TILDE_IGNORE);
- }
-
- bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
+ FFAddOneString (ffstring, title, TRUE, TRUE, TILDE_IGNORE);
+ } else {
+ FFAddOneChar (ffstring, '.', FALSE);
}
+ bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
+
/* optionally populate gbseq for XML-ized GenBank format */
if (ajp->gbseq) {
@@ -1639,9 +1675,11 @@ NLM_EXTERN void AddDeflineBlock (
}
if (gbseq != NULL) {
- gbseq->definition = StringSave (buf);
+ gbseq->definition = StringSave (title);
}
+ MemFree (title);
+
FFRecycleString(ajp, ffstring);
/*
@@ -1885,6 +1923,12 @@ NLM_EXTERN void AddAccessionBlock (
} else if (acclen == 13) {
wgsaccn = tsip->accession;
len = 13;
+ } else if (acclen == 14) {
+ wgsaccn = tsip->accession;
+ len = 14;
+ } else if (acclen == 15) {
+ wgsaccn = tsip->accession;
+ len = 15;
}
}
break;
@@ -2040,6 +2084,8 @@ NLM_EXTERN void AddAccessionBlock (
StringCpy (buf + len - 6, "000000");
} else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
StringCpy (buf + len - 7, "0000000");
+ } else if (acclen == 14 && StringCmp (buf + len - 8, "00000000") != 0) {
+ StringCpy (buf + len - 8, "00000000");
} else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
StringCpy (buf + len - 8, "00000000");
} else {
@@ -2145,7 +2191,7 @@ NLM_EXTERN void AddVersionBlock (
Char ch1, ch2, ch3;
Uint1 format = PRINTID_TEXTID_ACC_VER;
GBSeqPtr gbseq;
- Int4 gi = -1;
+ BIG_ID gi = -1;
SeqIdPtr gpp = NULL;
IntAsn2gbSectPtr iasp;
IndxPtr index;
@@ -2171,7 +2217,7 @@ NLM_EXTERN void AddVersionBlock (
for (sip = bsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_GI :
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
break;
case SEQID_GENBANK :
case SEQID_EMBL :
@@ -2208,6 +2254,12 @@ NLM_EXTERN void AddVersionBlock (
/* if (gi < 1 && accn == NULL) return; */
+ /* display of GI in VERSION line is now under control of HIDE_GI_NUMBERS bit in flags argument */
+
+ if (ajp->hideGI) {
+ gi = 0;
+ }
+
if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
if ( accn == NULL ) return;
if (awp->newLocusLine) return;
@@ -2295,7 +2347,7 @@ NLM_EXTERN void AddVersionBlock (
}
if (gi > 0) {
- sprintf (version, "%s GI:%ld", buf, (long) gi);
+ sprintf (version, "%s GI:%lld", buf, (long long) gi);
} else {
sprintf (version, "%s", buf);
}
@@ -2318,7 +2370,7 @@ NLM_EXTERN void AddVersionBlock (
index->version = StringSave (ptr);
}
if (gi > 0) {
- sprintf (tmp, "%ld", (long) gi);
+ sprintf (tmp, "%lld", (long long) gi);
index->gi = StringSave (tmp);
}
}
@@ -2335,6 +2387,8 @@ NLM_EXTERN void AddVersionBlock (
ptr = StringChr (buf, '.');
if (ptr != NULL) {
gbseq->accession_version = StringSave (buf);
+ } else if (StringDoesHaveText (gbseq->primary_accession)) {
+ gbseq->accession_version = StringSave (gbseq->primary_accession);
}
}
@@ -2342,7 +2396,7 @@ NLM_EXTERN void AddVersionBlock (
FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 12, 5, 5, "SV", TRUE);
- sprintf (version, " GI:%ld", (long) gi);
+ sprintf (version, " GI:%lld", (long long) gi);
FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
@@ -2397,7 +2451,11 @@ static void FF_asn2gb_www_SRR (
if (ffstring == NULL) return;
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_Add_NCBI_Base_URL (ffstring, link_srr);
+ if (StringNCmp (str, "SRZ", 3) == 0 || StringNCmp (str, "DRZ", 3) == 0 || StringNCmp (str, "ERZ", 3) == 0) {
+ FF_Add_NCBI_Base_URL (ffstring, link_srz);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_srr);
+ }
FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
@@ -2508,18 +2566,117 @@ static void FF_asn2gb_www_BS (
}
}
+static void FF_asn2gb_www_AS (
+ IntAsn2gbJobPtr ajp,
+ CharPtr buf,
+ CharPtr str
+)
+
+{
+ Char ch;
+ StringItemPtr ffstring;
+ CharPtr ptr;
+ CharPtr tmp;
+
+ if (ajp == NULL || buf == NULL || StringHasNoText (str)) return;
+
+ ffstring = FFGetString (ajp);
+ if (ffstring == NULL) return;
+
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_assembl);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+
+ tmp = FFEndPrint (ajp, ffstring, ajp->format, 21, 21, 21, 21, NULL);
+ FFRecycleString (ajp, ffstring);
+
+ if (tmp != NULL) {
+ ptr = tmp;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == '\n' || ch == '\r' || ch == '\t') {
+ *ptr = ' ';
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ TrimSpacesAroundString (tmp);
+ StringCat (buf, tmp);
+ MemFree (tmp);
+ }
+}
+
+static void AddGbseqXref (
+ GBXrefPtr PNTR headP,
+ GBXrefPtr PNTR tailP,
+ CharPtr db,
+ CharPtr str,
+ Int4 id
+)
+
+{
+ GBXrefPtr gxp, lst;
+ Char tmp [32];
+
+ if (headP == NULL || tailP == NULL) return;
+ if (StringHasNoText (db)) return;
+ if (StringHasNoText (str) && id == 0) return;
+
+ gxp = GBXrefNew ();
+ if (gxp == NULL) return;
+
+ gxp->dbname = StringSave (db);
+ if (StringDoesHaveText (str)) {
+ gxp->id = StringSave (str);
+ } else {
+ sprintf (tmp, "%ld", (long) id);
+ gxp->id = StringSave (tmp);
+ }
+
+ if (*headP == NULL) {
+ *headP = gxp;
+ }
+ if (*tailP != NULL) {
+ lst = *tailP;
+ while (lst->next != NULL) {
+ lst = lst->next;
+ }
+ lst->next = gxp;
+ }
+ *tailP = gxp;
+}
+
+typedef enum {
+ eDbLinkStrOutputDest_Nothing,
+ eDbLinkStrOutputDest_bioProjectIDP
+} EDbLinkStrOutputDest;
+
+typedef void (*TDbLinkWWWFormatter)(IntAsn2gbJobPtr, CharPtr, CharPtr);
+
+typedef struct dblinkinfo {
+ EDbLinkStrOutputDest output_dest;
+ CharPtr pchName; /* e.g. "Assembly" */
+ TDbLinkWWWFormatter pWWWFormatFunc; /* e.g. & FF_asn2gb_www_BP */
+ Uint4 uBufIdx; /* index into bufs array in GetDBLinkString */
+} DbLinkInfoForStr;
+
static CharPtr GetDBLinkString (
IntAsn2gbJobPtr ajp,
UserObjectPtr uop,
- CharPtr PNTR bioProjectIDP
+ CharPtr PNTR bioProjectIDP,
+ GBXrefPtr PNTR dblinkP
)
{
Char frm [256], tmp [256];
- CharPtr buf1, buf2, buf3, buf4, buf5;
+ CharPtr bufs[6];
CharPtr PNTR cpp;
+ GBXrefPtr dbhead = NULL, dbtail = NULL;
ValNodePtr head, tail;
- Int4 i;
+ Int4 i, j;
Int4Ptr ip;
size_t len;
ObjectIdPtr oip;
@@ -2528,205 +2685,176 @@ static CharPtr GetDBLinkString (
UserFieldPtr ufp;
Int4 val;
+ const Uint4 num_bufs = sizeof(bufs)/sizeof(bufs[0]);
+
+ const static DbLinkInfoForStr str_dblink_infos[] = {
+ /* Yes, 4 is missing for uBufIdx because that's
+ handled by "Trace Assembly Archive" which is an
+ int. */
+ { eDbLinkStrOutputDest_Nothing, "Assembly", & FF_asn2gb_www_AS, 5 },
+ { eDbLinkStrOutputDest_bioProjectIDP, "BioProject", & FF_asn2gb_www_BP, 0 },
+ { eDbLinkStrOutputDest_Nothing, "BioSample", & FF_asn2gb_www_BS, 1 },
+ { eDbLinkStrOutputDest_Nothing, "ProbeDB", NULL, 2 },
+ { eDbLinkStrOutputDest_Nothing, "Sequence Read Archive", & FF_asn2gb_www_SRR, 3 }
+ };
+
+ const Uint4 num_link_infos_for_str =
+ sizeof(str_dblink_infos) /
+ sizeof(str_dblink_infos[0]);
+
if (bioProjectIDP != NULL) {
*bioProjectIDP = NULL;
}
+ if (dblinkP != NULL) {
+ *dblinkP = NULL;
+ }
if (uop == NULL) return NULL;
- buf1 = NULL;
- buf2 = NULL;
- buf3 = NULL;
- buf4 = NULL;
- buf5 = NULL;
+ for( i = 0; i < num_bufs; ++i ) {
+ bufs[i] = NULL;
+ }
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL || oip->str == NULL) continue;
- if (StringICmp (oip->str, "BioProject") == 0 && ufp->choice == 7) {
- head = NULL;
- tail = NULL;
+
+ cpp = NULL;
+ str = NULL;
+ head = NULL;
+ tail = NULL;
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ } else if (ufp->choice == 7) {
cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num > 0 && cpp != NULL) {
+ if (cpp != NULL && ufp->num > 0) {
str = cpp [0];
- if (StringDoesHaveText (str)) {
- if (ufp->num == 1 && bioProjectIDP != NULL) {
- *bioProjectIDP = str;
- }
- frm [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_BP (ajp, frm, str);
- } else {
- StringCpy (frm, str);
- }
- sprintf (tmp, "BioProject: %s", frm);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- for (i = 1; i < ufp->num; i++) {
- str = cpp [i];
- if (StringDoesHaveText (str)) {
- tmp [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_BP (ajp, tmp, str);
- } else {
- StringCpy (tmp, str);
- }
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- }
- }
- buf1 = ValNodeMergeStrsEx (head, ", ");
- ValNodeFreeData (head);
- }
}
}
- if (StringICmp (oip->str, "BioSample") == 0 && ufp->choice == 7) {
- head = NULL;
- tail = NULL;
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num > 0 && cpp != NULL) {
- str = cpp [0];
- if (StringDoesHaveText (str)) {
- frm [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_BS (ajp, frm, str);
- } else {
- StringCpy (frm, str);
- }
- sprintf (tmp, "BioSample: %s", frm);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- for (i = 1; i < ufp->num; i++) {
- str = cpp [i];
- if (StringDoesHaveText (str)) {
- tmp [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_BS (ajp, tmp, str);
- } else {
- StringCpy (tmp, str);
+
+ if (StringICmp (oip->str, "Trace Assembly Archive") == 0) {
+ if (ufp->choice == 2) {
+ val = (Int4) ufp->data.intvalue;
+ if (val > 0) {
+ sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
+ bufs[4] = StringSave (tmp);
+ AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
+ }
+ } else if (ufp->choice == 8) {
+ ip = (Int4Ptr) ufp->data.ptrvalue;
+ if (ufp->num > 0 && ip != NULL) {
+ val = ip [0];
+ if (val > 0) {
+ sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
+ ValNodeCopyStrEx (&head, &tail, 0, tmp);
+ AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
+ for (i = 1; i < ufp->num; i++) {
+ val = ip [i];
+ if (val > 0) {
+ sprintf (tmp, "%ld", (long) val);
+ ValNodeCopyStrEx (&head, &tail, 0, tmp);
+ AddGbseqXref (&dbhead, &dbtail, "Trace Assembly Archive", NULL, val);
}
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
}
+ bufs[4] = ValNodeMergeStrsEx (head, ", ");
+ ValNodeFreeData (head);
}
- buf2 = ValNodeMergeStrsEx (head, ", ");
- ValNodeFreeData (head);
}
}
- }
- if (StringICmp (oip->str, "ProbeDB") == 0 && ufp->choice == 7) {
- head = NULL;
- tail = NULL;
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num > 0 && cpp != NULL) {
- str = cpp [0];
- if (StringDoesHaveText (str)) {
- sprintf (tmp, "ProbeDB: %s", str);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- for (i = 1; i < ufp->num; i++) {
- str = cpp [i];
- if (StringDoesHaveText (str)) {
- ValNodeCopyStrEx (&head, &tail, 0, str);
+ } else if ( StringDoesHaveText(str) || cpp != NULL ) {
+
+ /* this handles DBLink entries where User-field.data is "str" or "strs" */
+
+ /* check which DBLink this is (for ones that are strings) */
+ for( i = 0; i < num_link_infos_for_str ; ++i ) {
+ /* check if this is the DBLink name that matches */
+ if( 0 != StringICmp(oip->str, str_dblink_infos[i].pchName) ) {
+ continue;
+ }
+
+ /* str, possibly with HTML */
+ if( StringDoesHaveText(str) ) {
+ frm [0] = '\0';
+ if (ajp != NULL && GetWWW (ajp) &&
+ str_dblink_infos[i].pWWWFormatFunc )
+ {
+ (*str_dblink_infos[i].pWWWFormatFunc) (ajp, frm, str);
+ } else {
+ StringCpy (frm, str);
}
- }
- buf3 = ValNodeMergeStrsEx (head, ", ");
- ValNodeFreeData (head);
+ sprintf (tmp, "%s", frm);
+ ValNodeCopyStrEx (&head, &tail, 0, tmp);
+ AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
}
- }
- }
- if (StringICmp (oip->str, "Sequence Read Archive") == 0 && ufp->choice == 7) {
- head = NULL;
- tail = NULL;
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num > 0 && cpp != NULL) {
- str = cpp [0];
- if (StringDoesHaveText (str)) {
- frm [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_SRR (ajp, frm, str);
- } else {
- StringCpy (frm, str);
- }
- sprintf (tmp, "Sequence Read Archive: %s", frm);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- for (i = 1; i < ufp->num; i++) {
- str = cpp [i];
+ if (cpp != NULL && ufp->num > 1) {
+ for (j = 1; j < ufp->num; j++) {
+ str = cpp [j];
if (StringDoesHaveText (str)) {
tmp [0] = '\0';
- if (ajp != NULL && GetWWW (ajp)) {
- FF_asn2gb_www_SRR (ajp, tmp, str);
+ if (ajp != NULL && GetWWW (ajp) &&
+ str_dblink_infos[i].pWWWFormatFunc )
+ {
+ (*str_dblink_infos[i].pWWWFormatFunc) (
+ ajp, tmp, str);
} else {
StringCpy (tmp, str);
}
ValNodeCopyStrEx (&head, &tail, 0, tmp);
+ AddGbseqXref (&dbhead, &dbtail, str_dblink_infos[i].pchName, str, 0);
}
}
- buf4 = ValNodeMergeStrsEx (head, ", ");
- ValNodeFreeData (head);
}
- }
- }
- if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && ufp->choice == 8) {
- head = NULL;
- tail = NULL;
- ip = (Int4Ptr) ufp->data.ptrvalue;
- if (ufp->num > 0 && ip != NULL) {
- val = ip [0];
- if (val > 0) {
- sprintf (tmp, "Trace Assembly Archive: %ld", (long) val);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
- for (i = 1; i < ufp->num; i++) {
- val = ip [i];
- if (val > 0) {
- sprintf (tmp, "%ld", (long) val);
- ValNodeCopyStrEx (&head, &tail, 0, tmp);
+
+ /* load output variable for some types */
+ switch( str_dblink_infos[i].output_dest ) {
+ case eDbLinkStrOutputDest_Nothing:
+ /* nothing to do */
+ break;
+ case eDbLinkStrOutputDest_bioProjectIDP:
+ if (cpp == NULL || ufp->num == 1) {
+ if (bioProjectIDP != NULL) {
+ *bioProjectIDP = str;
}
- }
- buf5 = ValNodeMergeStrsEx (head, ", ");
+ }
+ break;
+ }
+
+ /* write output buf */
+ if( head != NULL ) {
+ ASSERT( str_dblink_infos[i].uBufIdx < num_bufs );
+ sprintf (tmp, "%s: ", str_dblink_infos[i].pchName);
+ bufs[str_dblink_infos[i].uBufIdx] = ValNodeMergeStrsExEx (head, ", ", tmp, NULL);
ValNodeFreeData (head);
}
}
}
}
- if (StringHasNoText (buf1) && StringHasNoText (buf2) && StringHasNoText (buf3) &&
- StringHasNoText (buf4) && StringHasNoText (buf5)) return NULL;
-
- len = StringLen (buf1) + StringLen (buf2) + StringLen (buf3) + StringLen (buf4) + StringLen (buf5);
- str = (CharPtr) MemNew (sizeof (Char) * (len + 20));
- if (str == NULL) return NULL;
-
- prefix = "";
-
- if (StringDoesHaveText (buf1)) {
- StringCat (str, buf1);
- prefix = "\n";
+ if (dblinkP != NULL) {
+ *dblinkP = dbhead;
}
- if (StringDoesHaveText (buf2)) {
- StringCat (str, prefix);
- StringCat (str, buf2);
- prefix = "\n";
+ len = 0;
+ for( i = 0; i < num_bufs ; ++i ) {
+ len += StringLen(bufs[i]);
}
-
- if (StringDoesHaveText (buf3)) {
- StringCat (str, prefix);
- StringCat (str, buf3);
- prefix = "\n";
+ if( 0 == len ) {
+ /* all bufs are empty */
+ return NULL;
}
- if (StringDoesHaveText (buf4)) {
- StringCat (str, prefix);
- StringCat (str, buf4);
- prefix = "\n";
- }
+ str = (CharPtr) MemNew (sizeof (Char) * (len + (num_bufs * 4))); /* not sure why exactly a "4" is used */
+ if (str == NULL) return NULL;
- if (StringDoesHaveText (buf5)) {
- StringCat (str, prefix);
- StringCat (str, buf5);
- prefix = "\n";
- }
+ prefix = "";
- MemFree (buf1);
- MemFree (buf2);
- MemFree (buf3);
- MemFree (buf4);
- MemFree (buf5);
+ for( i = 0; i < num_bufs ; ++i ) {
+ if( StringDoesHaveText(bufs[i]) ) {
+ StringCat (str, prefix);
+ StringCat (str, bufs[i]);
+ bufs[i] = MemFree(bufs[i]);
+ prefix = "\n";
+ }
+ }
return str;
}
@@ -2744,6 +2872,7 @@ NLM_EXTERN void AddDblinkBlock (
Char buf [32];
UserFieldPtr curr;
Uint4 dbitemID = 0;
+ GBXrefPtr dblinkHead = NULL;
UserObjectPtr dbuop = NULL;
SeqMgrDescContext dcontext;
Boolean first = TRUE;
@@ -2890,7 +3019,7 @@ NLM_EXTERN void AddDblinkBlock (
}
if (dbuop != NULL) {
- str = GetDBLinkString (ajp, dbuop, &bioProjectID);
+ str = GetDBLinkString (ajp, dbuop, &bioProjectID, &dblinkHead);
if (StringDoesHaveText (str)) {
bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
if (bbp == NULL) return;
@@ -2929,6 +3058,14 @@ NLM_EXTERN void AddDblinkBlock (
ValNodeFreeData (head);
}
}
+
+ if (dblinkHead != NULL) {
+ if (gbseq != NULL) {
+ gbseq->xrefs = dblinkHead;
+ } else {
+ AsnGenericUserSeqOfFree (dblinkHead, (AsnOptFreeFunc) GBXrefFree);
+ }
+ }
}
/* only displaying PID in GenPept format */
@@ -2940,7 +3077,7 @@ static void AddPidBlock (Asn2gbWorkPtr awp)
IntAsn2gbJobPtr ajp;
BaseBlockPtr bbp;
BioseqPtr bsp;
- Int4 gi = -1;
+ BIG_ID gi = -1;
SeqIdPtr sip;
Char version [64];
StringItemPtr ffstring;
@@ -2954,7 +3091,7 @@ static void AddPidBlock (Asn2gbWorkPtr awp)
for (sip = bsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_GI :
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
break;
default :
break;
@@ -2990,7 +3127,7 @@ static Uint1 dbsource_fasta_order [NUM_SEQID] = {
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 18, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
20, /* 11 = general Dbtag */
31, /* 12 = gi */
10, /* 13 = ddbj */
@@ -2999,8 +3136,8 @@ static Uint1 dbsource_fasta_order [NUM_SEQID] = {
10, /* 16 = tpg */
10, /* 17 = tpe */
10, /* 18 = tpd */
- 10, /* 19 = gpp */
- 10 /* 20 = nat */
+ 15, /* 19 = gpp */
+ 15 /* 20 = nat */
};
static void AddToUniqueSipList (
@@ -3028,7 +3165,7 @@ static Boolean WriteDbsourceID (
Boolean check_na = FALSE;
DbtagPtr db;
CharPtr dt;
- Int4 gi;
+ BIG_ID gi;
ObjectIdPtr oip;
CharPtr pfx;
PDBSeqIdPtr psip = NULL;
@@ -3058,9 +3195,9 @@ static Boolean WriteDbsourceID (
}
return FALSE;
case SEQID_GI :
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (gi == 0) return FALSE;
- sprintf (tmp, "gi: %ld", (long) gi);
+ sprintf (tmp, "gi: %lld", (long long) gi);
StringCat (str, tmp);
return TRUE;
case SEQID_GENERAL :
@@ -3209,7 +3346,7 @@ static void AddSPBlock (
DbtagPtr db;
SeqMgrDescContext dcontext;
Boolean first;
- Int4 gi;
+ BIG_ID gi;
Boolean has_link;
Char id [42];
ObjectIdPtr oip;
@@ -3297,7 +3434,7 @@ static void AddSPBlock (
sif = NULL;
id [0] = '\0';
if (sip->choice == SEQID_GI) {
- gi = sid->data.intvalue;
+ gi = (BIG_ID) sid->data.intvalue;
if (! GetAccnVerFromServer (gi, id)) {
sif = GetSeqIdForGI (gi);
if (sif != NULL) {
@@ -3365,6 +3502,8 @@ static void AddSPBlock (
str += 4;
} else if (StringCmp (db->db, "HGNC") == 0 && StringNCmp (str, "HGNC:", 5) == 0) {
str += 5;
+ } else if (StringCmp (db->db, "VGNC") == 0 && StringNCmp (str, "VGNC:", 5) == 0) {
+ str += 5;
} else if (StringCmp (db->db, "DIP") == 0 && StringNCmp (str, "DIP:", 4) == 0) {
str += 4;
}
@@ -3394,6 +3533,8 @@ static void AddSPBlock (
str += 4;
} else if (StringNCmp (str, "HGNC:", 5) == 0) {
str += 5;
+ } else if (StringNCmp (str, "VGNC:", 5) == 0) {
+ str += 5;
}
} else if ( oip->id > 0 ) {
sprintf(numbuf, "%d", oip->id);
@@ -3615,11 +3756,13 @@ static void AddPDBBlock (
)
{
+ Char ch;
SeqMgrDescContext dcontext;
CharPtr dt;
CharPtr prefix = NULL;
SeqDescrPtr sdp;
PdbBlockPtr pdb;
+ CharPtr ptr;
PdbRepPtr replace;
CharPtr str;
ValNodePtr vnp;
@@ -3650,7 +3793,32 @@ static void AddPDBBlock (
for (vnp = pdb->source; vnp != NULL; vnp = vnp->next) {
str = (CharPtr) vnp->data.ptrvalue;
if (StringHasNoText (str)) continue;
- FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ if (StringNICmp (str, "Mmdb_id:", 8) == 0) {
+ ptr = str + 8;
+ ch = *ptr;
+ while (ch == ' ') {
+ ptr++;
+ ch = *ptr;
+ }
+ if (StringIsAllDigits (ptr)) {
+ FFAddTextToString (ffstring, prefix, "Mmdb_id:", NULL, FALSE, TRUE, TILDE_IGNORE);
+ FFAddTextToString (ffstring, NULL, " ", NULL, FALSE, TRUE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, link_mmdb, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, ptr, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
+ }
+ } else {
+ FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
+ }
+ } else {
+ FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
+ }
prefix = ", ";
}
prefix = ";";
@@ -3699,7 +3867,7 @@ static CharPtr TxtSave (CharPtr text, size_t len)
if ((text == NULL) || (len == 0))
return str;
- str = MemNew((size_t)(len + 1));
+ str = (CharPtr) MemNew((size_t)(len + 1));
MemCopy(str, text, (size_t)len);
return (str);
@@ -3718,7 +3886,7 @@ static Boolean FF_www_dbsource (
CharPtr temp, end, text, loc, link = NULL;
Uint1 choice;
Int2 j;
- Int4 gi = 0;
+ BIG_ID gi = 0;
Char gibuf [32];
if (sip == NULL) return FALSE;
@@ -3831,6 +3999,7 @@ NLM_EXTERN void AddDbsourceBlock (
SeqIdPtr sip;
SeqLocPtr slp;
CharPtr str;
+ TextSeqIdPtr tsip;
Boolean unknown = TRUE;
ValNodePtr vnp;
StringItemPtr ffstring;
@@ -3843,6 +4012,13 @@ NLM_EXTERN void AddDbsourceBlock (
bsp = awp->bsp;
if (bsp == NULL) return;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice != SEQID_OTHER) continue;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ if (StringNCmp (tsip->accession, "WP_", 3) == 0) return;
+ }
+
bbp = Asn2gbAddBlock (awp, DBSOURCE_BLOCK, sizeof (BaseBlock));
if (bbp == NULL) return;
@@ -3869,7 +4045,7 @@ NLM_EXTERN void AddDbsourceBlock (
}
break;
case SEQID_GENERAL :
- db = sip->data.ptrvalue;
+ db = (DbtagPtr) sip->data.ptrvalue;
if (db == NULL) {
break;
}
@@ -4080,8 +4256,8 @@ NLM_EXTERN void AddDateBlock (
#define TOTAL_ESTKW 11
-#define TOTAL_STSKW 5
#define TOTAL_GSSKW 2
+#define TOTAL_STSKW 5
static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
"EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
@@ -4093,6 +4269,7 @@ static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
static CharPtr GSS_kw_array [TOTAL_GSSKW] = {
"GSS", "trapped exon"
};
+
static CharPtr STS_kw_array[TOTAL_STSKW] = {
"STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
"STS sequence", "sequence tagged site"
@@ -4229,6 +4406,7 @@ NLM_EXTERN void AddKeywordsBlock (
GBSeqPtr gbseq;
ValNodePtr head = NULL;
IndxPtr index;
+ Boolean is_cross_kingdom = FALSE;
Boolean is_est = FALSE;
Boolean is_gss = FALSE;
Boolean is_sts = FALSE;
@@ -4236,19 +4414,33 @@ NLM_EXTERN void AddKeywordsBlock (
Boolean is_genome_assembly = FALSE;
Boolean is_tsa = FALSE;
Boolean is_unverified = FALSE;
+ Boolean is_unv_organism = FALSE;
+ Boolean is_unv_misassembled = FALSE;
+ Boolean is_wp = FALSE;
+ Boolean this_is_gen_asm;
ValNodePtr keywords;
CharPtr kwd;
+ ValNodePtr ky_head;
MolInfoPtr mip;
BioseqPtr nbsp;
+ Int2 num_super_kingdom = 0;
ObjectIdPtr oip;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
PirBlockPtr pir;
PrfBlockPtr prf;
+ CharPtr sc_keyword;
SeqDescrPtr sdp;
SeqEntryPtr sep;
SeqIdPtr sip;
SPBlockPtr sp;
SubSourcePtr ssp;
CharPtr str;
+ Boolean super_kingdoms_different = FALSE;
+ CharPtr super_kingdom_name = NULL;
+ TaxElementPtr tep;
+ TextSeqIdPtr tsip;
+ UserFieldPtr ufp;
UserObjectPtr uop;
ValNodePtr vnp;
StringItemPtr ffstring;
@@ -4267,17 +4459,55 @@ NLM_EXTERN void AddKeywordsBlock (
ffstring = FFGetString(ajp);
if ( ffstring == NULL ) return;
- biop = NULL;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ is_wp = TRUE;
+ }
+ }
+ }
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp != NULL) {
+ while (sdp != NULL) {
biop = (BioSourcePtr) sdp->data.ptrvalue;
- }
- if (biop != NULL) {
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_environmental_sample) {
- is_env_sample = TRUE;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ onp = orp->orgname;
+ if (onp != NULL) {
+ if (onp->choice == 5) {
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ num_super_kingdom++;
+ if (super_kingdom_name == NULL) {
+ super_kingdom_name = tep->name;
+ } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
+ super_kingdoms_different = TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_environmental_sample) {
+ is_env_sample = TRUE;
+ }
}
}
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
+ }
+
+ if (num_super_kingdom > 1 && super_kingdoms_different) {
+ is_cross_kingdom = TRUE;
+ }
+
+ if (bsp->repr == Seq_repr_map) {
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, "Whole_Genome_Map");
}
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
@@ -4387,6 +4617,14 @@ NLM_EXTERN void AddKeywordsBlock (
ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
is_tsa = TRUE;
break;
+ case MI_TECH_targeted :
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, "TLS");
+ ValNodeCopyStr (&head, 0, "; ");
+ ValNodeCopyStr (&head, 0, "Targeted Locus Study");
+ break;
case MI_TECH_unknown :
case MI_TECH_standard :
case MI_TECH_other :
@@ -4439,6 +4677,7 @@ NLM_EXTERN void AddKeywordsBlock (
if (oip != NULL && StringICmp (oip->str, "ENCODE") == 0) {
add_encode = TRUE;
} else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
+ this_is_gen_asm = FALSE;
for (curr = uop->data; curr != NULL; curr = curr->next) {
if (curr->choice != 1) continue;
oip = curr->label;
@@ -4448,14 +4687,53 @@ NLM_EXTERN void AddKeywordsBlock (
if (StringCmp (field, "StructuredCommentPrefix") == 0) {
if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
is_genome_assembly = TRUE;
+ this_is_gen_asm = TRUE;
}
}
- if (StringCmp (field, "Current Finishing Status") == 0) {
- finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
+ }
+ if (this_is_gen_asm) {
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ if (curr->choice != 1) continue;
+ oip = curr->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "Current Finishing Status") == 0) {
+ finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
+ }
}
}
+ sc_keyword = KeywordForStructuredCommentName (uop);
+ if (sc_keyword != NULL) {
+ if (IsStructuredCommentValid (uop, NULL, NULL) == eFieldValid_Valid) {
+ ky_head = SplitStringAtSemicolon (sc_keyword);
+ if (ky_head != NULL) {
+ for (vnp = ky_head; vnp != NULL; vnp = vnp->next) {
+ kwd = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (kwd)) continue;
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, kwd);
+ }
+ ValNodeFreeData (ky_head);
+ }
+ }
+ MemFree (sc_keyword);
+ }
} else if (oip != NULL && StringICmp (oip->str, "Unverified") == 0) {
is_unverified = TRUE;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip != NULL && StringCmp (oip->str, "Type") == 0 && ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringICmp (str, "Organism") == 0) {
+ is_unv_organism = TRUE;
+ } else if (StringICmp (str, "Misassembled") == 0) {
+ is_unv_misassembled = TRUE;
+ }
+ }
+ }
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
@@ -4464,7 +4742,13 @@ NLM_EXTERN void AddKeywordsBlock (
if (head != NULL) {
ValNodeCopyStr (&head, 0, "; ");
}
- ValNodeCopyStr (&head, 0, "UNVERIFIED");
+ if (is_unv_organism) {
+ ValNodeCopyStr (&head, 0, "UNVERIFIED_ORGANISM");
+ } else if (is_unv_misassembled) {
+ ValNodeCopyStr (&head, 0, "UNVERIFIED_MISASSEMBLY");
+ } else {
+ ValNodeCopyStr (&head, 0, "UNVERIFIED");
+ }
}
if (add_encode) {
if (head != NULL) {
@@ -4484,12 +4768,24 @@ NLM_EXTERN void AddKeywordsBlock (
if (head != NULL) {
ValNodeCopyStr (&head, 0, "; ");
}
- ValNodeCopyStr (&head, 0, "Third Party Annotation");
+ ValNodeCopyStr (&head, 0, "Third Party Data");
ValNodeCopyStr (&head, 0, "; ");
ValNodeCopyStr (&head, 0, "TPA");
+ } else if (sip->choice == SEQID_OTHER) {
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, "RefSeq");
}
}
+ if (is_cross_kingdom && is_wp) {
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, "CROSS_KINGDOM");
+ }
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
while (sdp != NULL) {
@@ -4680,21 +4976,84 @@ NLM_EXTERN void AddSegmentBlock (
}
}
-NLM_EXTERN void AddSourceBlock (
+static void AddSrcBlk (
+ Asn2gbWorkPtr awp,
+ Uint2 entityID,
+ Uint4 itemID,
+ Uint2 itemtype
+)
+
+{
+ BaseBlockPtr bbp;
+
+ if (awp == NULL) return;
+ bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
+ if (bbp == NULL) return;
+
+ bbp->entityID = entityID;
+ bbp->itemID = itemID;
+ bbp->itemtype = itemtype;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
+}
+
+static void AddOrgBlk (
+ Asn2gbWorkPtr awp,
+ Uint2 entityID,
+ Uint4 itemID,
+ Uint2 itemtype
+)
+
+{
+ BaseBlockPtr bbp;
+
+ if (awp == NULL) return;
+ bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
+ if (bbp == NULL) return;
+
+ bbp->entityID = entityID;
+ bbp->itemID = itemID;
+ bbp->itemtype = itemtype;
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
+}
+
+NLM_EXTERN void AddSourceOrganismBlock (
Asn2gbWorkPtr awp
)
{
IntAsn2gbJobPtr ajp;
- BaseBlockPtr bbp;
+ BioSourcePtr biop;
BioseqPtr bsp;
SeqFeatPtr cds;
+ CharPtr common;
SeqMgrDescContext dcontext;
BioseqPtr dna;
SeqMgrFeatContext fcontext;
- GBBlockPtr gbp;
+ GBBlockPtr gbp = NULL;
+ SeqDescrPtr gbsdp = NULL;
+ Boolean is_wp = FALSE;
+ Boolean loop = FALSE;
+ Int2 num_super_kingdom = 0;
+ Boolean okay = FALSE;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ ObjValNodePtr ovp;
SeqDescrPtr sdp;
+ ValNodePtr sdplist = NULL;
SeqFeatPtr sfp;
+ SeqIdPtr sip;
+ Boolean super_kingdoms_different = FALSE;
+ CharPtr super_kingdom_name = NULL;
+ CharPtr taxname;
+ TaxElementPtr tep;
+ TextSeqIdPtr tsip;
+ ValNodePtr vnp;
if (awp == NULL) return;
ajp = awp->ajp;
@@ -4702,99 +5061,107 @@ NLM_EXTERN void AddSourceBlock (
bsp = awp->bsp;
if (bsp == NULL) return;
- bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
- if (bbp == NULL) return;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_SWISSPROT) {
+ loop = TRUE;
+ } else if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ is_wp = TRUE;
+ }
+ }
+ }
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
if (sdp != NULL && (! ajp->newSourceOrg)) {
gbp = (GBBlockPtr) sdp->data.ptrvalue;
- if (gbp != NULL && (! StringHasNoText (gbp->source))) {
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
-
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, bbp);
- }
-
- return;
+ if (gbp != NULL && StringDoesHaveText (gbp->source)) {
+ gbsdp = sdp;
}
}
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp != NULL) {
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
- } else {
- sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
- if (sfp != NULL) {
- bbp->entityID = fcontext.entityID;
- bbp->itemID = fcontext.itemID;
- bbp->itemtype = OBJ_SEQFEAT;
- } else if (ISA_aa (bsp->mol)) {
-
- /* if protein with no sources, get sources applicable to DNA location of CDS */
-
- cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
- if (cds != NULL) {
- sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
- if (sfp != NULL) {
- bbp->entityID = fcontext.entityID;
- bbp->itemID = fcontext.itemID;
- bbp->itemtype = OBJ_SEQFEAT;
- } else {
- dna = BioseqFindFromSeqLoc (cds->location);
- if (dna != NULL) {
- sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
- if (sdp != NULL) {
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
+ while (sdp != NULL) {
+ ValNodeAddPointer (&sdplist, 0, (Pointer) sdp);
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ taxname = orp->taxname;
+ common = orp->common;
+ onp = orp->orgname;
+ if (onp != NULL) {
+ if (onp->choice == 5) {
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ num_super_kingdom++;
+ if (super_kingdom_name == NULL) {
+ super_kingdom_name = tep->name;
+ } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
+ super_kingdoms_different = TRUE;
+ }
+ }
}
}
}
}
}
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
}
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, bbp);
- }
-}
+ if (sdplist != NULL && ((num_super_kingdom > 1 && super_kingdoms_different && is_wp) || loop)) {
-NLM_EXTERN void AddOrganismBlock (
- Asn2gbWorkPtr awp
-)
+ for (vnp = sdplist; vnp != NULL; vnp = vnp->next) {
+ sdp = (SeqDescrPtr) vnp->data.ptrvalue;
-{
- BaseBlockPtr bbp;
- BioseqPtr bsp;
- SeqFeatPtr cds;
- SeqMgrDescContext dcontext;
- BioseqPtr dna;
- SeqMgrFeatContext fcontext;
- SeqDescrPtr sdp;
- SeqFeatPtr sfp;
+ if (gbsdp != NULL) {
+ if (gbsdp->extended != 0) {
+ ovp = (ObjValNodePtr) gbsdp;
+ AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
+ } else if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
- if (awp == NULL) return;
- bsp = awp->bsp;
- if (bsp == NULL) return;
+ if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
+ }
- bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
- if (bbp == NULL) return;
+ } else if (sdplist != NULL) {
+
+ sdp = (SeqDescrPtr) sdplist->data.ptrvalue;
+
+ if (gbsdp != NULL) {
+ if (gbsdp->extended != 0) {
+ ovp = (ObjValNodePtr) gbsdp;
+ AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
+ } else if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
+
+ if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ }
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp != NULL) {
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
} else {
+
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
if (sfp != NULL) {
- bbp->entityID = fcontext.entityID;
- bbp->itemID = fcontext.itemID;
- bbp->itemtype = OBJ_SEQFEAT;
+ AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
+ AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
+ okay = TRUE;
+
} else if (ISA_aa (bsp->mol)) {
/* if protein with no sources, get sources applicable to DNA location of CDS */
@@ -4803,17 +5170,20 @@ NLM_EXTERN void AddOrganismBlock (
if (cds != NULL) {
sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
if (sfp != NULL) {
- bbp->entityID = fcontext.entityID;
- bbp->itemID = fcontext.itemID;
- bbp->itemtype = OBJ_SEQFEAT;
+ AddSrcBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
+ AddOrgBlk (awp, sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT);
+ okay = TRUE;
} else {
dna = BioseqFindFromSeqLoc (cds->location);
if (dna != NULL) {
sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
if (sdp != NULL) {
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
+ if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ AddSrcBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ AddOrgBlk (awp, ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC);
+ okay = TRUE;
+ }
}
}
}
@@ -4821,9 +5191,12 @@ NLM_EXTERN void AddOrganismBlock (
}
}
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, bbp);
+ if (! okay) {
+ AddSrcBlk (awp, 0, 0, 0);
+ AddOrgBlk (awp, 0, 0, 0);
}
+
+ ValNodeFree (sdplist);
}
static RefBlockPtr AddPub (
@@ -5324,7 +5697,7 @@ static CharPtr GetAuthorsPlusConsortium (
str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
if (str == NULL) return consortium;
if (consortium == NULL) return str;
- tmp = MemNew (StringLen (str) + StringLen (consortium) + 5);
+ tmp = (CharPtr) MemNew (StringLen (str) + StringLen (consortium) + 5);
if (tmp == NULL) return NULL;
StringCpy (tmp, str);
StringCat (tmp, "; ");
@@ -5799,9 +6172,9 @@ NLM_EXTERN Boolean AddReferenceBlock (
/* sort by pub/unpub/sites/sub, then date, finally existing serial */
if (isRefSeq) {
- head = SortValNode (head, SortReferencesAR);
+ head = ValNodeSort (head, SortReferencesAR);
} else {
- head = SortValNode (head, SortReferencesA);
+ head = ValNodeSort (head, SortReferencesA);
}
if (awp->ssp != NULL && (! awp->onlyGeneRIFs) && (! awp->onlyReviewPubs)) {
@@ -5983,9 +6356,9 @@ NLM_EXTERN Boolean AddReferenceBlock (
/* resort by existing serial, then pub/unpub/sites/sub, then date */
if (isRefSeq) {
- head = SortValNode (head, SortReferencesBR);
+ head = ValNodeSort (head, SortReferencesBR);
} else {
- head = SortValNode (head, SortReferencesB);
+ head = ValNodeSort (head, SortReferencesB);
}
if (head == NULL) return FALSE;
@@ -6148,7 +6521,7 @@ NLM_EXTERN void AddWGSBlock (
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
BioseqPtr bsp;
- Char buf [80];
+ Char buf [128];
SeqMgrDescContext dcontext;
CharPtr first;
GBAltSeqItemPtr gbaip;
@@ -6265,11 +6638,32 @@ NLM_EXTERN void AddWGSBlock (
} else {
buf [6] = '\0';
}
- FFAddTextToString(ffstring, "val=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString(ffstring, "val=", buf, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
sprintf (buf, "%s-%s", first, last);
FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
- } else {
+ } else if (wgstype == 2) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (StringLen (first) > 7 && first [6] == 'S') {
+ FF_Add_NCBI_Base_URL (ffstring, link_wgs);
+ StringCpy (buf, first);
+ if (buf [2] == '_') {
+ buf [9] = '\0';
+ } else {
+ buf [6] = '\0';
+ }
+ FFAddTextToString(ffstring, "val=", buf, "#scaffolds\">", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%s-%s", first, last);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
+ sprintf (buf, "%s:%s", first, last);
+ FFAddTextToString(ffstring, "term=", buf, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%s-%s", first, last);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ }
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
+ } else if (wgstype == 3) {
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
FF_Add_NCBI_Base_URL (ffstring, link_wgsscaf);
sprintf (buf, "%s:%s", first, last);
@@ -6319,6 +6713,108 @@ NLM_EXTERN void AddWGSBlock (
}
}
+NLM_EXTERN void AddTLSBlock (
+ Asn2gbWorkPtr awp
+)
+
+{
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ BaseBlockPtr bbp;
+ BioseqPtr bsp;
+ Char buf [128];
+ SeqMgrDescContext dcontext;
+ CharPtr first;
+ CharPtr last;
+ ObjectIdPtr oip;
+ SeqDescrPtr sdp;
+ Char tls [32];
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+ StringItemPtr ffstring;
+
+ if (awp == NULL) return;
+ ajp = awp->ajp;
+ if ( ajp == NULL ) return;
+ bsp = awp->bsp;
+ if (bsp == NULL) return;
+ asp = awp->asp;
+ if (asp == NULL) return;
+
+ if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ first = NULL;
+ last = NULL;
+ if (oip != NULL) {
+ if (StringICmp (oip->str, "TLSProjects") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
+ if (StringICmp (oip->str, "TLS_accession_first") == 0) {
+ first = (CharPtr) ufp->data.ptrvalue;
+ } else if (StringICmp (oip->str, "TLS_accession_last") == 0) {
+ last = (CharPtr) ufp->data.ptrvalue;
+ }
+ }
+ if (first != NULL && last != NULL) {
+ bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
+ if (bbp != NULL) {
+ ffstring = FFGetString (ajp);
+ if (ffstring != NULL) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "TLS", 12, 0, 0, NULL, FALSE);
+
+ if ( GetWWW(ajp) ) {
+ StringNCpy_0 (tls, first, sizeof (tls));
+ tls [6] = '\0';
+ if (StringCmp (first, last) != 0) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_tls);
+ FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%s-%s", first, last);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
+ } else {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_tls);
+ FFAddTextToString(ffstring, "val=", tls, "#contigs\">", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%s", first);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
+ }
+ } else {
+ if (StringCmp (first, last) != 0) {
+ sprintf (buf, "%s-%s", first, last);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ } else {
+ sprintf (buf, "%s", first);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
+ }
+ }
+
+ bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
+ FFRecycleString(ajp, ffstring);
+ }
+
+ bbp->entityID = dcontext.entityID;
+ bbp->itemtype = OBJ_SEQDESC;
+ bbp->itemID = dcontext.itemID;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, bbp);
+ }
+ }
+ }
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+}
+
NLM_EXTERN void AddTSABlock (
Asn2gbWorkPtr awp
)
@@ -6328,7 +6824,7 @@ NLM_EXTERN void AddTSABlock (
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
BioseqPtr bsp;
- Char buf [80];
+ Char buf [128];
SeqMgrDescContext dcontext;
CharPtr first;
GBAltSeqItemPtr gbaip;
@@ -6373,7 +6869,7 @@ NLM_EXTERN void AddTSABlock (
last = NULL;
tsatype = 0;
if (oip != NULL) {
- if (StringICmp (oip->str, "TSA-mRNA-List") == 0) {
+ if (StringICmp (oip->str, "TSA-mRNA-List") == 0 || StringICmp (oip->str, "TSA-RNA-List") == 0) {
tsatype = 1;
}
if (tsatype != 0) {
@@ -6490,7 +6986,7 @@ NLM_EXTERN void AddCAGEBlock (
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
BioseqPtr bsp;
- Char buf [80];
+ Char buf [128];
Int2 cagetype;
SeqMgrDescContext dcontext;
CharPtr first;
@@ -6987,7 +7483,7 @@ NLM_EXTERN void AddSlashBlock (
sprintf (buf, "//</pre>\n");
str = StringSave (buf);
} else {
- str = MemNew(sizeof(Char) * 4);
+ str = (CharPtr) MemNew(sizeof(Char) * 4);
StringNCpy(str, "//\n", 4);
}
diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c
index 59671ddf..2db0651b 100644
--- a/api/asn2gnb3.c
+++ b/api/asn2gnb3.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.155 $
+* $Revision: 1.232 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -63,16 +63,16 @@
#endif
#endif
-static CharPtr ref_link = "http://www.ncbi.nlm.nih.gov/RefSeq/";
+static CharPtr ref_link = "https://www.ncbi.nlm.nih.gov/RefSeq/";
-static CharPtr doc_link = "http://www.ncbi.nlm.nih.gov/genome/guide/build.shtml";
+static CharPtr doc_link = "http://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/";
static CharPtr ev_link = "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?";
static CharPtr link_encode = "http://www.nhgri.nih.gov/10005107";
-static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
/* ********************************************************************** */
@@ -92,7 +92,7 @@ static void AddHistCommentString (
Int2 count = 0;
Char buf [256], id [42];
Boolean first, skip;
- Int4 gi = 0;
+ BIG_ID gi = 0;
SeqIdPtr sip, sip2;
CharPtr strd;
@@ -105,7 +105,7 @@ static void AddHistCommentString (
for (sip = ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (long) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
count++;
}
}
@@ -127,7 +127,7 @@ static void AddHistCommentString (
first = TRUE;
for (sip = ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (long) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (! first) {
FFAddOneString (ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
}
@@ -183,6 +183,63 @@ static void AddHistCommentString (
FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND);
}
+static void AddUnorderedCommentString (
+ StringItemPtr ffstring,
+ BioseqPtr bsp
+)
+
+{
+ Char buffer [256];
+ DeltaSeqPtr dsp;
+ ValNodePtr head = NULL;
+ Int4 num_gaps = 0;
+ SeqLitPtr slitp;
+ SeqLocPtr slocp;
+ CharPtr str;
+
+ if (bsp == NULL) return;
+
+ if (bsp->repr == Seq_repr_delta) {
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp->next) {
+ switch (dsp->choice) {
+ case 1:
+ slocp = (SeqLocPtr)(dsp->data.ptrvalue);
+ if (slocp == NULL) break;
+ if (slocp->choice == SEQLOC_NULL) {
+ num_gaps++;
+ }
+ break;
+ case 2:
+ slitp = (SeqLitPtr)(dsp->data.ptrvalue);
+ if (slitp == NULL) break;
+ if (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) {
+ num_gaps++;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ ValNodeCopyStr (&head, 0, "* NOTE: This is a partial genome representation.");
+ if (num_gaps > 0) {
+ sprintf (buffer, " It currently~* consists of %ld contigs. The true order of the pieces~", (long) (num_gaps + 1));
+ ValNodeCopyStr (&head, 0, buffer);
+ ValNodeCopyStr (&head, 0, "* is not known and their order in this sequence record is~");
+ ValNodeCopyStr (&head, 0, "* arbitrary. Gaps between the contigs are represented as~");
+ ValNodeCopyStr (&head, 0, "* runs of N, but the exact sizes of the gaps are unknown.");
+ }
+ ValNodeCopyStr (&head, 0, "~");
+
+ str = MergeFFValNodeStrs (head);
+
+ FFAddOneString (ffstring, str, TRUE, TRUE, TILDE_EXPAND);
+
+ MemFree (str);
+ ValNodeFreeData (head);
+}
+
static void AddHTGSCommentString (
StringItemPtr ffstring,
BioseqPtr bsp,
@@ -344,9 +401,15 @@ static void AddWGSMasterCommentString (
} else if (acclen == 13) {
StringCpy (ver, wgsname + 4);
ver [2] = '\0';
+ } else if (acclen == 14) {
+ StringCpy (ver, wgsname + 4);
+ ver [2] = '\0';
} else if (acclen == 15) {
StringCpy (ver, wgsname + 7);
ver [2] = '\0';
+ } else if (acclen == 16) {
+ StringCpy (ver, wgsname + 7);
+ ver [2] = '\0';
}
sprintf (buf, "The %s whole genome shotgun (WGS) project has the project accession %s.", taxname, wgsaccn);
@@ -417,14 +480,16 @@ static void AddTSAMasterCommentString (
uop = (UserObjectPtr) sdp->data.ptrvalue;
if (uop != NULL) {
oip = uop->type;
- if (oip != NULL && StringICmp (oip->str, "TSA-mRNA-List") == 0) {
- for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
- oip = ufp->label;
- if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
- if (StringICmp (oip->str, "TSA_accession_first") == 0) {
- first = (CharPtr) ufp->data.ptrvalue;
- } else if (StringICmp (oip->str, "TSA_accession_last") == 0) {
- last = (CharPtr) ufp->data.ptrvalue;
+ if (oip != NULL) {
+ if (StringICmp (oip->str, "TSA-mRNA-List") == 0 || StringICmp (oip->str, "TSA-RNA-List") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
+ if (StringICmp (oip->str, "TSA_accession_first") == 0) {
+ first = (CharPtr) ufp->data.ptrvalue;
+ } else if (StringICmp (oip->str, "TSA_accession_last") == 0) {
+ last = (CharPtr) ufp->data.ptrvalue;
+ }
}
}
}
@@ -443,6 +508,9 @@ static void AddTSAMasterCommentString (
} else if (acclen == 13) {
StringCpy (ver, tsaname + 4);
ver [2] = '\0';
+ } else if (acclen == 14) {
+ StringCpy (ver, tsaname + 4);
+ ver [2] = '\0';
} else if (acclen == 15) {
StringCpy (ver, tsaname + 7);
ver [2] = '\0';
@@ -478,6 +546,111 @@ static void AddTSAMasterCommentString (
}
}
+static void AddTLSMasterCommentString (
+ StringItemPtr ffstring,
+ BioseqPtr bsp,
+ CharPtr tlsaccn,
+ CharPtr tlsname
+)
+
+{
+ size_t acclen;
+ BioSourcePtr biop;
+ Char buf [256];
+ SeqMgrDescContext dcontext;
+ CharPtr first = NULL;
+ CharPtr last = NULL;
+ ObjectIdPtr oip;
+ OrgRefPtr orp;
+ SeqDescrPtr sdp;
+ CharPtr taxname = NULL;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+ Char ver [16];
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ taxname = orp->taxname;
+ }
+ }
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringICmp (oip->str, "TLSProjects") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
+ if (StringICmp (oip->str, "TLS_accession_first") == 0) {
+ first = (CharPtr) ufp->data.ptrvalue;
+ } else if (StringICmp (oip->str, "TLS_accession_last") == 0) {
+ last = (CharPtr) ufp->data.ptrvalue;
+ }
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+
+ if (StringHasNoText (taxname)) {
+ taxname = "?";
+ }
+ ver [0] = '\0';
+ acclen = StringLen (tlsname);
+ if (acclen == 12) {
+ StringCpy (ver, tlsname + 4);
+ ver [2] = '\0';
+ } else if (acclen == 13) {
+ StringCpy (ver, tlsname + 4);
+ ver [2] = '\0';
+ } else if (acclen == 14) {
+ StringCpy (ver, tlsname + 4);
+ ver [2] = '\0';
+ } else if (acclen == 15) {
+ StringCpy (ver, tlsname + 7);
+ ver [2] = '\0';
+ } else if (acclen == 16) {
+ StringCpy (ver, tlsname + 7);
+ ver [2] = '\0';
+ }
+
+ sprintf (buf, "The %s targeted locus study (TLS) project has the project accession %s.", taxname, tlsaccn);
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+
+ sprintf (buf, " This version of the project (%s) has the accession number %s", ver, tlsname);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
+
+ if (first == NULL && last == NULL) {
+ sprintf (buf, ".");
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ } else {
+ if (first != NULL && last == NULL) {
+ last = first;
+ } else if (first == NULL && last != NULL) {
+ first = last;
+ }
+ if (StringDoesHaveText (first) && StringDoesHaveText (last)) {
+ if (StringCmp (first, last) != 0) {
+ sprintf (buf, ", and consists of sequences %s-%s.", first, last);
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ } else {
+ sprintf (buf, ", and consists of sequence %s.", first);
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ }
+ } else {
+ sprintf (buf, ".");
+ FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ }
+ }
+}
+
static CharPtr GetMolInfoCommentString (
BioseqPtr bsp,
MolInfoPtr mip
@@ -538,11 +711,12 @@ static CharPtr GetMolInfoCommentString (
static CharPtr GetStrForBankit (
UserObjectPtr uop,
- Boolean dumpMode
+ Boolean dumpMode,
+ Boolean showedLocalId
)
{
- CharPtr bic = NULL, uvc = NULL, ptr;
+ CharPtr bic = NULL, smc = NULL, uvc = NULL, pfx = NULL, ptr;
ObjectIdPtr oip;
UserFieldPtr ufp;
@@ -557,19 +731,39 @@ static CharPtr GetStrForBankit (
} else if (StringCmp(oip->str, "AdditionalComment") == 0) {
bic = ufp->data.ptrvalue;
} else if (StringCmp(oip->str, "SmartComment") == 0 && dumpMode) {
- bic = ufp->data.ptrvalue;
+ smc = ufp->data.ptrvalue;
+ }
+ }
+
+ if (showedLocalId) {
+ if (StringNICmp (bic, "LocalID:", 8) == 0) {
+ bic = NULL;
+ }
+ if (StringNICmp (smc, "LocalID:", 8) == 0) {
+ smc = NULL;
}
}
- if (uvc == NULL && bic == NULL) return NULL;
+ if (uvc == NULL && bic == NULL && smc == NULL) return NULL;
- ptr = (CharPtr) MemNew (StringLen (uvc) + StringLen (bic) + 45);
- if (uvc != NULL && bic != NULL) {
- sprintf (ptr, "Vector Explanation: %s~Bankit Comment: %s", uvc, bic);
- } else if (uvc != NULL) {
- sprintf (ptr, "Vector Explanation: %s", uvc);
- } else if (bic != NULL) {
- sprintf (ptr, "Bankit Comment: %s", bic);
+ ptr = (CharPtr) MemNew (StringLen (uvc) + StringLen (bic) + StringLen (smc) + 45);
+ if (uvc != NULL) {
+ StringCat (ptr, pfx);
+ StringCat (ptr, "Vector Explanation: ");
+ StringCat (ptr, uvc);
+ pfx = "~";
+ }
+ if (bic != NULL) {
+ StringCat (ptr, pfx);
+ StringCat (ptr, "Bankit Comment: ");
+ StringCat (ptr, bic);
+ pfx = "~";
+ }
+ if (smc != NULL) {
+ StringCat (ptr, pfx);
+ StringCat (ptr, "Bankit Comment: ");
+ StringCat (ptr, smc);
+ pfx = "~";
}
return ptr;
@@ -590,6 +784,7 @@ static CharPtr reftxt21 = " NCBI contigs are derived from assembled genomic sequ
static CharPtr reftxt22 = " Features on this sequence have been produced for build ";
static CharPtr reftxt23 = " of the NCBI's genome annotation";
static CharPtr reftxt41 = " This record is based on preliminary annotation provided by ";
+static CharPtr reftxt51 = " This record represents a single, non-redundant, protein sequence which may be annotated on many different RefSeq genomes from the same, or different, species";
static CharPtr GetStatusForRefTrack (
UserObjectPtr uop
@@ -668,10 +863,10 @@ static Boolean URLHasSuspiciousHtml (
return FALSE;
}
-static Boolean GetGiFromAccnDotVer (CharPtr source, Int4Ptr gip)
+static Boolean GetGiFromAccnDotVer (CharPtr source, BIG_ID_PNTR gip)
{
- Int4 gi = 0;
+ BIG_ID gi = 0;
SeqIdPtr sip;
if (StringHasNoText (source) || gip == NULL) return FALSE;
@@ -701,7 +896,8 @@ static void AddStrForRefTrack (
Char buf [64];
ObjectIdPtr oip;
UserFieldPtr ufp, tmp, u, urf = NULL;
- Int4 from, to, gi;
+ Int4 from, to;
+ BIG_ID gi;
Int2 i = 0;
Int2 review = 0;
Boolean generated = FALSE, identical = FALSE;
@@ -758,8 +954,10 @@ static void AddStrForRefTrack (
}
if (urf != NULL && urf->choice == 11) {
for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
+ if (tmp->choice != 11) continue;
for (u = tmp->data.ptrvalue; u != NULL; u = u->next) {
oip = u->label;
+ if (oip == NULL) continue;
if (StringCmp (oip->str, "accession") == 0 ||
StringCmp (oip->str, "name") == 0) {
i++;
@@ -900,7 +1098,7 @@ static void AddStrForRefTrack (
} else if (StringICmp (oip->str, "name") == 0 && u->choice == 1) {
name = (CharPtr) u->data.ptrvalue;
} else if (StringICmp (oip->str, "gi") == 0 && u->choice == 2) {
- gi = u->data.intvalue;
+ gi = (BIG_ID) u->data.intvalue;
}
}
}
@@ -953,6 +1151,108 @@ static void AddStrForRefTrack (
}
}
+static void AddStrForRefSeqGenome (
+ IntAsn2gbJobPtr ajp,
+ StringItemPtr ffstring,
+ UserObjectPtr uop
+)
+
+{
+ CharPtr category = NULL, calc = NULL, cca = NULL, cli = NULL, com = NULL,
+ fgs = NULL, mod = NULL, phy = NULL, prt = NULL, qfo = NULL,
+ tys = NULL, upr = NULL;
+ ObjectIdPtr oip;
+ UserFieldPtr ufp, tmp, urf = NULL;
+
+ if ( uop == NULL || ffstring == NULL ) return;
+ if ((oip = uop->type) == NULL) return;
+ if (StringCmp (oip->str, "RefSeqGenome") != 0) return;
+
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "RefSeq Category") == 0) {
+ category = (CharPtr) ufp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "Details") == 0) {
+ urf = ufp;
+ }
+ }
+ if (urf != NULL && urf->choice == 11) {
+ for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
+ oip = tmp->label;
+ if (StringCmp (oip->str, "CALC") == 0) {
+ calc = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "CCA") == 0) {
+ cca = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "CLI") == 0) {
+ cli = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "COM") == 0) {
+ com = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "FGS") == 0) {
+ fgs = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "MOD") == 0) {
+ mod = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "PHY") == 0) {
+ phy = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "PRT") == 0) {
+ prt = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "QfO") == 0) {
+ qfo = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "TYS") == 0) {
+ tys = (CharPtr) tmp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "UPR") == 0) {
+ upr = (CharPtr) tmp->data.ptrvalue;
+ }
+ }
+ }
+ FFAddOneString (ffstring, "RefSeq Category: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, category, FALSE, FALSE, TILDE_IGNORE);
+ if (calc != NULL) {
+ FFAddOneString (ffstring, "\n CALC: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, calc, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (cca != NULL) {
+ FFAddOneString (ffstring, "\n CCA: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, cca, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (cli != NULL) {
+ FFAddOneString (ffstring, "\n CLI: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, cli, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (com != NULL) {
+ FFAddOneString (ffstring, "\n COM: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, com, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (fgs != NULL) {
+ FFAddOneString (ffstring, "\n FGS: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, fgs, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (mod != NULL) {
+ FFAddOneString (ffstring, "\n MOD: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, mod, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (phy != NULL) {
+ FFAddOneString (ffstring, "\n PHY: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, phy, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (prt != NULL) {
+ FFAddOneString (ffstring, "\n PRT: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, prt, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (qfo != NULL) {
+ FFAddOneString (ffstring, "\n QfO: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, qfo, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (tys != NULL) {
+ FFAddOneString (ffstring, "\n TYS: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, tys, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (upr != NULL) {
+ FFAddOneString (ffstring, "\n UPR: ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, upr, FALSE, FALSE, TILDE_IGNORE);
+ }
+}
+
static CharPtr GetGenomeBuildNumber (
UserObjectPtr uop
)
@@ -1015,6 +1315,7 @@ static CharPtr GetGenomeVersionNumber (
static CharPtr reftxt11 = "This record is predicted by automated computational analysis. This record is derived from a genomic sequence";
static CharPtr reftxt12 = "annotated using gene prediction method:";
+static CharPtr reftxt13 = "and transcript sequence";
static void FindModelEvidenceUop (
UserObjectPtr uop,
@@ -1037,7 +1338,8 @@ static void FindModelEvidenceUop (
static Boolean DoGetAnnotationComment (
BioseqPtr bsp,
CharPtr PNTR namep,
- Int4Ptr gip,
+ UserFieldPtr PNTR assmp,
+ BIG_ID_PNTR gip,
Int4Ptr leftp,
Int4Ptr rightp,
CharPtr PNTR methodp,
@@ -1046,15 +1348,19 @@ static Boolean DoGetAnnotationComment (
)
{
+ UserFieldPtr assm = NULL;
Int2 ce = 0, cm = 0;
SeqMgrDescContext dcontext;
- Int4 gi = 0, left = 0, right = 0;
+ BIG_ID gi = 0;
+ Int4 left = 0, right = 0;
Int4Ptr ints;
CharPtr method = NULL;
UserObjectPtr moduop;
CharPtr name = NULL;
ObjectIdPtr oip;
SeqDescrPtr sdp;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
UserFieldPtr u;
UserFieldPtr ufp;
UserObjectPtr uop;
@@ -1068,13 +1374,15 @@ static Boolean DoGetAnnotationComment (
if (moduop != NULL) {
oip = moduop->type;
if (oip != NULL && StringCmp(oip->str, "ModelEvidence") == 0) {
- for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ for (ufp = moduop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL) continue;
if (StringCmp (oip->str, "Contig Name") == 0) {
name = (CharPtr) ufp->data.ptrvalue;
+ } else if (StringCmp (oip->str, "Assembly") == 0) {
+ assm = ufp;
} else if (StringCmp (oip->str, "Contig Gi") == 0) {
- gi = ufp->data.intvalue;
+ gi = (BIG_ID) ufp->data.intvalue;
} else if (StringCmp (oip->str, "Contig Span") == 0 && ufp->choice == 8 && ufp->num >= 2) {
ints = (Int4Ptr) ufp->data.ptrvalue;
if (ints != NULL) {
@@ -1107,13 +1415,25 @@ static Boolean DoGetAnnotationComment (
}
}
}
+ if (StringHasNoText (name) && bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ name = tsip->accession;
+ }
+ }
+ }
+ }
}
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
}
+
if (StringHasNoText (name)) return FALSE;
*namep = name;
+ *assmp = assm;
*gip = gi;
*leftp = left;
*rightp = right;
@@ -1126,7 +1446,8 @@ static Boolean DoGetAnnotationComment (
static Boolean GetAnnotationComment (
BioseqPtr bsp,
CharPtr PNTR namep,
- Int4Ptr gip,
+ UserFieldPtr PNTR assmp,
+ BIG_ID_PNTR gip,
Int4Ptr leftp,
Int4Ptr rightp,
CharPtr PNTR methodp,
@@ -1137,13 +1458,13 @@ static Boolean GetAnnotationComment (
{
SeqFeatPtr cds;
- if (DoGetAnnotationComment (bsp, namep, gip, leftp, rightp, methodp, mrnaEv, estEv)) return TRUE;
+ if (DoGetAnnotationComment (bsp, namep, assmp, gip, leftp, rightp, methodp, mrnaEv, estEv)) return TRUE;
if (ISA_aa (bsp->mol)) {
cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
if (cds != NULL) {
bsp = BioseqFindFromSeqLoc (cds->location);
if (bsp != NULL) {
- return DoGetAnnotationComment (bsp, namep, gip, leftp, rightp, methodp, mrnaEv, estEv);
+ return DoGetAnnotationComment (bsp, namep, assmp, gip, leftp, rightp, methodp, mrnaEv, estEv);
}
}
}
@@ -1354,11 +1675,11 @@ static CharPtr GetPrimaryStrForDelta (
{
Boolean accn;
- Char buf [64], tmp [80];
+ Char buf [128], tmp [128];
Int4 curr_start = 0, len, start0, start1;
DbtagPtr dbt;
DeltaSeqPtr deltasp;
- Int4 gi;
+ BIG_ID gi;
ValNodePtr head = NULL;
SeqIdPtr id, sip;
SeqIntPtr intp;
@@ -1384,7 +1705,7 @@ static CharPtr GetPrimaryStrForDelta (
id = NULL;
accn = FALSE;
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (GetAccnVerFromServer (gi, buf)) {
accn = TRUE;
} else {
@@ -1456,7 +1777,7 @@ static CharPtr GetStrForTpaOrRefSeqHist (
Char bfr [100];
Char buf [100];
DbtagPtr dbt;
- Int4 gi;
+ BIG_ID gi;
ValNodePtr head = NULL;
SeqHistPtr hist;
SeqIdPtr id;
@@ -1491,7 +1812,7 @@ static CharPtr GetStrForTpaOrRefSeqHist (
accn = FALSE;
buf [0] = '\0';
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (GetAccnVerFromServer (gi, buf)) {
accn = TRUE;
} else {
@@ -1796,7 +2117,7 @@ static CharPtr GeStrForTSA (
{
Int4 asf, ast, prf, prt;
- Char buf [80], tmp [80];
+ Char buf [128], tmp [128];
UserFieldPtr curr;
Boolean has_asf, has_ast, has_prf, has_prt;
ValNodePtr head = NULL;
@@ -2122,7 +2443,8 @@ typedef struct unverifiedtypeinfodata {
static UnverifiedTypeInfoData s_UnverifiedTypeInfo[] = {
{ "Organism", "source organism" },
- { "Features", "sequence and/or annotation" }
+ { "Features", "sequence and/or annotation" },
+ { "Misassembled", "sequence assembly" }
};
@@ -2228,7 +2550,46 @@ static CharPtr CommentTextFromUnverifiedFlags(BoolPtr unverified_flags)
StringCat (comment, comment_end);
return comment;
}
-
+
+static Int4 GetFileTrackPoint (SeqPntPtr spp, PackSeqPntPtr psp, Int4 index)
+
+{
+ if (spp != NULL) {
+ return spp->point;
+ } else if (psp != NULL) {
+ return PackSeqPntGet (psp, index);
+ }
+ return 0;
+}
+
+static Boolean CommentsAreDifferent (CharPtr str, CharPtr last_name)
+
+{
+ size_t lens, lenl;
+
+ if (str == NULL && last_name == NULL) return FALSE;
+
+ if (StringCmp (str, last_name) == 0) return FALSE;
+
+ lens = StringLen (str);
+ lenl = StringLen (last_name);
+
+ if (lens == lenl + 1) {
+ if (StringNCmp (str, last_name, lenl) == 0) {
+ if (str [lens - 1] == '.') {
+ return FALSE;
+ }
+ }
+ } else if (lenl == lens + 1) {
+ if (StringNCmp (str, last_name, lens) == 0) {
+ if (last_name [lenl - 1] == '.') {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
NLM_EXTERN void AddCommentBlock (
Asn2gbWorkPtr awp
@@ -2236,14 +2597,21 @@ NLM_EXTERN void AddCommentBlock (
{
size_t acclen;
+ CharPtr accn;
SeqMgrAndContext acontext;
AnnotDescPtr adp;
- Boolean annotDescCommentToComment;
+ Boolean annotDescCommentToComment = FALSE;
IntAsn2gbJobPtr ajp;
+ UserFieldPtr assm = NULL;
+ CharPtr authaccessvalue = NULL;
+ Int4 authaccess_itemID = 0;
BioseqPtr bsp;
Char buf [2048];
CommentBlockPtr cbp;
Char ch;
+ Int2 chunk;
+ Int2 count;
+ CharPtr PNTR cpp;
Boolean didGenome = FALSE;
Boolean didRefTrack = FALSE;
Boolean didTPA = FALSE;
@@ -2252,19 +2620,35 @@ NLM_EXTERN void AddCommentBlock (
DeltaSeqPtr dsp;
UserObjectPtr encodeUop = NULL;
Boolean estEv = FALSE;
+ BioseqPtr farbsp;
+ Uint2 fareid;
/*
SeqMgrFeatContext fcontext;
*/
+ CharPtr field;
+ PackSeqPntPtr filetrackpsp = NULL;
+ SeqPntPtr filetrackspp = NULL;
+ CharPtr filetrackURL = NULL;
+ Int4 basemodNum = 0;
+ CharPtr PNTR basemodURLhead = NULL;
+ CharPtr basemodURL = NULL;
+ Int4 filetrack_itemID = 0;
Boolean first = TRUE;
+ UserObjectPtr firstGenAnnotSCAD = NULL;
+ CharPtr firstGenAnnotSCStr = NULL;
+ Int4 frags;
GBBlockPtr gbp;
CharPtr geneName = NULL;
CharPtr genomeBuildNumber = NULL;
CharPtr genomeVersionNumber = NULL;
- Int4 gi = 0;
+ BIG_ID gi = 0;
Int4 gsdbid = 0;
+ /*
Boolean has_gaps = FALSE;
+ */
Boolean hasRefTrackStatus = FALSE;
SeqHistPtr hist;
+ Int4 idx;
Boolean is_collab = FALSE;
Boolean is_encode = FALSE;
Boolean is_other = FALSE;
@@ -2272,8 +2656,15 @@ NLM_EXTERN void AddCommentBlock (
Boolean is_wgs = FALSE;
Boolean isRefSeqStandard = FALSE;
Boolean is_unverified = FALSE;
+ Int4 j;
+ Int4 last;
+ Boolean last_had_tilde = FALSE;
+ CharPtr last_name;
Int4 left;
+ size_t len;
+ /*
SeqLitPtr litp;
+ */
ObjectIdPtr localID = NULL;
Char locusID [32];
CharPtr method = NULL;
@@ -2282,27 +2673,41 @@ NLM_EXTERN void AddCommentBlock (
SeqIdPtr msip;
CharPtr name = NULL;
ObjectIdPtr ncbifileID = NULL;
+ CharPtr nm;
+ Int4 num;
ObjectIdPtr oip;
Boolean okay;
+ CharPtr origLocalID = NULL;
+ /*
BioseqPtr parent;
+ */
+ CharPtr pfx;
+ CharPtr plural;
+ Int4 pos;
Int4 right;
SeqDescrPtr sdp;
- /*
SeqFeatPtr sfp;
- */
+ CharPtr sfx;
Boolean showedLocalID = FALSE;
Boolean showGBBSource = FALSE;
SeqIdPtr sip;
+ SeqLocPtr slp;
CharPtr str;
Char taxID [64];
+ CharPtr tlsaccn = NULL;
+ CharPtr tlsname = NULL;
Char tmp [128];
CharPtr tsaaccn = NULL;
CharPtr tsaname = NULL;
TextSeqIdPtr tsip;
+ TextSeqIdPtr tlstsip = NULL;
+ UserFieldPtr tufp;
UserFieldPtr ufp;
+ Boolean unordered = FALSE;
Int4 unverified_itemID = 0;
UserObjectPtr uop;
Int4 version;
+ ValNodePtr vnp;
CharPtr wgsaccn = NULL;
CharPtr wgsname = NULL;
StringItemPtr ffstring = NULL;
@@ -2350,6 +2755,44 @@ NLM_EXTERN void AddCommentBlock (
is_encode = TRUE;
encodeUop = uop;
}
+ if (StringICmp (oip->str, "FileTrack") == 0) {
+ filetrack_itemID = dcontext.itemID;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "FileTrackURL") == 0 || StringCmp (oip->str, "Map-FileTrackURL") == 0) {
+ if (ufp->choice == 1 && ufp->data.ptrvalue != NULL) {
+ filetrackURL = (CharPtr) ufp->data.ptrvalue;
+ } else if (ufp->choice == 7 && ufp->data.ptrvalue != NULL && ufp->num > 0) {
+ cpp = (CharPtr PNTR) ufp->data.ptrvalue;
+ if (cpp != NULL) {
+ filetrackURL = cpp [0];
+ }
+ }
+ } else if (StringCmp (oip->str, "BaseModification-FileTrackURL") == 0) {
+ if (ufp->choice == 1 && ufp->data.ptrvalue != NULL) {
+ basemodURL = (CharPtr) ufp->data.ptrvalue;
+ basemodNum = 1;
+ } else if (ufp->choice == 7 && ufp->data.ptrvalue != NULL && ufp->num > 0) {
+ cpp = (CharPtr PNTR) ufp->data.ptrvalue;
+ if (cpp != NULL) {
+ basemodURLhead = cpp;
+ basemodNum = ufp->num;
+ }
+ }
+ }
+ }
+ }
+ if (StringICmp (oip->str, "AuthorizedAccess") == 0) {
+ authaccess_itemID = dcontext.itemID;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "Study") != 0) continue;
+ if (ufp->choice != 1 || ufp->data.ptrvalue == NULL) continue;
+ authaccessvalue = (CharPtr) ufp->data.ptrvalue;
+ }
+ }
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
@@ -2363,6 +2806,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = unverified_itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2383,14 +2827,232 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+
+ if (bsp->repr == Seq_repr_map && bsp->seq_ext_type == 3) {
+ for (sfp = (SeqFeatPtr) bsp->seq_ext; sfp != NULL; sfp = sfp->next) {
+ if (sfp->data.choice != SEQFEAT_RSITE) continue;
+ slp = sfp->location;
+ if (slp == NULL) continue;
+ if (slp->choice == SEQLOC_PNT) {
+ filetrackspp = (SeqPntPtr) slp->data.ptrvalue;
+ } else if (slp->choice == SEQLOC_PACKED_PNT) {
+ filetrackpsp = (PackSeqPntPtr) slp->data.ptrvalue;
+ }
+ }
+ }
+
+ if (authaccessvalue != NULL) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->itemID = authaccess_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring, "These data are available through the dbGaP authorized access system. ", FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "&page=login", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "Request access", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, " to Study ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "http://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, "Request access to Study ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, authaccessvalue, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
}
}
+ /*
+ look for Seq-annot.desc.comment on annots packaged on current bioseq,
+ Genome-Annotation structured comment will suppress GenomeBuild user object
+ */
+
+ adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_user, &acontext);
+ while (adp != NULL) {
+ uop = (UserObjectPtr) adp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL) {
+ if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
+ if (StringCmp (oip->str, "Policy") == 0) {
+ if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
+ annotDescCommentToComment = TRUE;
+ }
+ }
+ }
+ } else if (StringICmp (oip->str, "StructuredComment") == 0) {
+ if (firstGenAnnotSCAD == NULL) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ firstGenAnnotSCAD = uop;
+ genomeBuildNumber = NULL;
+ genomeVersionNumber = NULL;
+ firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_user, &acontext);
+ }
+
+ /*
+ also look on first far sequence component of NCBI_GENOMES records
+ */
+
+ if (awp->isNCBIGenomes && firstGenAnnotSCAD == NULL && bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4) {
+ for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
+ if (dsp->choice != 1) continue;
+ slp = (SeqLocPtr) dsp->data.ptrvalue;
+ if (slp == NULL) continue;
+ sip = SeqLocId (slp);
+ if (sip == NULL) continue;
+ farbsp = BioseqLockById (sip);
+ if (farbsp == NULL) break;
+ fareid = ObjMgrGetEntityIDForPointer (farbsp);
+ SeqMgrIndexFeatures (fareid, NULL);
+ adp = SeqMgrGetNextAnnotDesc (farbsp, NULL, Annot_descr_user, &acontext);
+ while (adp != NULL && firstGenAnnotSCAD == NULL) {
+ uop = (UserObjectPtr) adp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL) {
+ if (StringICmp (oip->str, "StructuredComment") == 0) {
+ if (firstGenAnnotSCAD == NULL) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ firstGenAnnotSCAD = uop;
+ genomeBuildNumber = NULL;
+ genomeVersionNumber = NULL;
+ firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ adp = SeqMgrGetNextAnnotDesc (farbsp, adp, Annot_descr_user, &acontext);
+ }
+ if (firstGenAnnotSCAD == NULL) {
+ sdp = SeqMgrGetNextDescriptor (farbsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ firstGenAnnotSCAD = uop;
+ genomeBuildNumber = NULL;
+ genomeVersionNumber = NULL;
+ firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
+ }
+ }
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (farbsp, sdp, Seq_descr_user, &dcontext);
+ }
+ }
+ BioseqUnlock (farbsp);
+ break;
+ }
+ }
+
+ /*
+ also look for Genome-Annotation structured comment descriptor to suppress GenomeBuild user object
+ */
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ genomeBuildNumber = NULL;
+ genomeVersionNumber = NULL;
+ if (firstGenAnnotSCAD == NULL) {
+ firstGenAnnotSCAD = uop;
+ firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
+ }
+ }
+ }
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+
gi = 0;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ tsip = NULL;
if (sip->choice == SEQID_OTHER) {
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
@@ -2405,6 +3067,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2453,6 +3116,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -2467,6 +3131,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2501,6 +3166,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -2513,6 +3179,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2580,6 +3247,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = TRUE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -2599,13 +3267,14 @@ NLM_EXTERN void AddCommentBlock (
method = NULL;
mrnaEv = FALSE;
estEv = FALSE;
- if (GetAnnotationComment (bsp, &name, &gi, &left, &right, &method, &mrnaEv, &estEv)) {
+ if (GetAnnotationComment (bsp, &name, &assm, &gi, &left, &right, &method, &mrnaEv, &estEv)) {
cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
if (cbp != NULL) {
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2631,10 +3300,10 @@ NLM_EXTERN void AddCommentBlock (
if ( GetWWW(ajp) ) {
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
- if (IS_ntdb_accession (name)) {
- FF_Add_NCBI_Base_URL (ffstring, link_seqn);
- } else {
+ if (IS_protdb_accession (name)) {
FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqn);
}
if (gi > 0) {
sprintf (tmp, "%ld", (long) gi);
@@ -2721,6 +3390,84 @@ NLM_EXTERN void AddCommentBlock (
}
FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
+
+ if (assm != NULL) {
+
+ plural = " (";
+ count = 0;
+ for (tufp = assm->data.ptrvalue; tufp != NULL; tufp = tufp->next) {
+ ufp = tufp->data.ptrvalue;
+ if (ufp != NULL) {
+ oip = ufp->label;
+ if (oip != NULL && oip->str != NULL && StringICmp (oip->str, "accession") == 0 && ufp->choice == 1) {
+ accn = (CharPtr) ufp->data.ptrvalue;
+ if (StringDoesHaveText (accn)) {
+ count++;
+ }
+ }
+ }
+ }
+ if (count > 1) {
+ plural = "s (";
+ }
+
+ if (count > 0) {
+ FFAddTextToString (ffstring, " ", reftxt13, plural, FALSE, FALSE, TILDE_IGNORE);
+
+ for (tufp = assm->data.ptrvalue; tufp != NULL; tufp = tufp->next) {
+ accn = NULL;
+ ufp = tufp->data.ptrvalue;
+ if (ufp != NULL) {
+ oip = ufp->label;
+ if (oip != NULL && oip->str != NULL && StringICmp (oip->str, "accession") == 0 && ufp->choice == 1) {
+ accn = (CharPtr) ufp->data.ptrvalue;
+ }
+ }
+ if (StringDoesHaveText (accn)) {
+ if (GetWWW (ajp) && ValidateAccnDotVer (accn) == 0 && GetGiFromAccnDotVer (accn, &gi)) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (IS_protdb_accession (nm)) {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqn);
+ }
+ if (gi > 0) {
+ sprintf (buf, "%ld", (long) gi);
+ FFAddTextToString(ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else if (GetWWW (ajp) && ValidateAccn (accn) == 0) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (IS_protdb_accession (nm)) {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqn);
+ }
+ FFAddTextToString(ffstring, /* "val=" */ NULL, accn, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ }
+ } else if (StringDoesHaveText (nm)) {
+ FFAddOneString (ffstring, nm, FALSE, FALSE, TILDE_IGNORE);
+ } else continue;
+ if (tufp->next != NULL) {
+ ufp = tufp->next;
+ if (ufp->next != NULL) {
+ FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ }
+
+ FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
if (method != NULL) {
FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
@@ -2778,12 +3525,57 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = TRUE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
}
}
- } else {
+ } else if (StringNCmp(tsip->accession, "WP_", 3) == 0) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->itemID = unverified_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring, "REFSEQ:", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, reftxt51, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ } else if (StringNCmp(tsip->accession, "NZ_", 3) == 0) {
+ if (StringLen (tsip->accession) == 15) {
+ is_wgs = TRUE;
+ if (StringCmp (tsip->accession + 9, "000000") == 0) {
+ wgsaccn = tsip->accession;
+ wgsname = tsip->name;
+ }
+ } else if (StringLen (tsip->accession) == 16) {
+ is_wgs = TRUE;
+ if (StringCmp (tsip->accession + 10, "000000") == 0) {
+ wgsaccn = tsip->accession;
+ wgsname = tsip->name;
+ }
+ }
+ } else {
if (StringLen (tsip->accession) == 15) {
is_wgs = TRUE;
if (StringCmp (tsip->accession + 9, "000000") == 0) {
@@ -2815,6 +3607,12 @@ NLM_EXTERN void AddCommentBlock (
wgsaccn = tsip->accession;
wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */
}
+ } else if (acclen == 14) {
+ is_wgs = TRUE;
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ wgsaccn = tsip->accession;
+ wgsname = tsip->name; /* master accession has 10 zeroes, name has project version plus 8 zeroes */
+ }
} else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) {
ch = tsip->accession [0];
if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
@@ -2844,6 +3642,12 @@ NLM_EXTERN void AddCommentBlock (
wgsaccn = tsip->accession;
wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */
}
+ } else if (acclen == 14) {
+ is_wgs = TRUE;
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ wgsaccn = tsip->accession;
+ wgsname = tsip->name; /* master accession has 10 zeroes, name has project version plus 8 zeroes */
+ }
} else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) {
ch = tsip->accession [0];
if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
@@ -2863,6 +3667,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
/* string will be created after we know if there are additional comments */
@@ -2884,6 +3689,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -2893,18 +3699,31 @@ NLM_EXTERN void AddCommentBlock (
}
} else if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
} else if (sip->choice == SEQID_LOCAL) {
localID = (ObjectIdPtr) sip->data.ptrvalue;
}
+
+ if (tsip != NULL) {
+ tlstsip = tsip;
+ }
}
+ origLocalID = FastaGetOriginalId (bsp);
+
if (localID != NULL) {
if (is_tpa || is_collab) {
if (awp->mode == SEQUIN_MODE || awp->mode == DUMP_MODE) {
buf [0] = '\0';
- if (! StringHasNoText (localID->str)) {
+ if (StringDoesHaveText (origLocalID)) {
+ if (StringLen (origLocalID) < 1000) {
+ sprintf (buf, "LocalID: %s", origLocalID);
+ showedLocalID = TRUE;
+ } else {
+ sprintf (buf, "LocalID string too large");
+ }
+ } else if (! StringHasNoText (localID->str)) {
if (StringLen (localID->str) < 1000) {
sprintf (buf, "LocalID: %s", localID->str);
showedLocalID = TRUE;
@@ -2921,6 +3740,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2935,6 +3755,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -2962,6 +3783,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -2976,6 +3798,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3003,6 +3826,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3017,6 +3841,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3027,12 +3852,8 @@ NLM_EXTERN void AddCommentBlock (
}
if (! ajp->flags.hideBankItComment) {
- str = GetStrForBankit (uop, (Boolean) (awp->mode == DUMP_MODE));
- if (str != NULL && showedLocalID && awp->mode == SEQUIN_MODE) {
- if (StringNICmp (str, "Bankit Comment: LocalID:", 24) == 0) {
- str = NULL;
- }
- }
+ str = GetStrForBankit (uop, (Boolean) (awp->mode == DUMP_MODE),
+ (Boolean) (showedLocalID && awp->mode == SEQUIN_MODE));
if (str != NULL) {
cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
@@ -3042,6 +3863,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3056,6 +3878,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3075,6 +3898,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3093,6 +3917,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3113,6 +3938,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3127,6 +3953,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3164,6 +3991,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3176,7 +4004,7 @@ NLM_EXTERN void AddCommentBlock (
FALSE, FALSE, TILDE_IGNORE);
if ( GetWWW(ajp) ) {
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_Add_NCBI_Base_URL (ffstring, "http://www.ncbi.nlm.nih.gov/RefSeq/RSG");
+ FF_Add_NCBI_Base_URL (ffstring, "http://www.ncbi.nlm.nih.gov/refseq/rsg/");
FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, "RefSeqGene", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
@@ -3189,12 +4017,14 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
}
}
+ /*
if (bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4 && is_wgs) {
has_gaps = FALSE;
for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
@@ -3214,6 +4044,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3232,12 +4063,14 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
}
}
}
+ */
/* Seq-hist results in allocated comment string */
@@ -3249,7 +4082,7 @@ NLM_EXTERN void AddCommentBlock (
okay = TRUE;
for (sip = hist->replaced_by_ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- if (gi == (Int4) sip->data.intvalue) {
+ if (gi == (BIG_ID) sip->data.intvalue) {
okay = FALSE;
}
}
@@ -3261,6 +4094,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3281,9 +4115,10 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
- }
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -3293,7 +4128,7 @@ NLM_EXTERN void AddCommentBlock (
okay = TRUE;
for (sip = hist->replace_ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- if (gi == (Int4) sip->data.intvalue) {
+ if (gi == (BIG_ID) sip->data.intvalue) {
okay = FALSE;
}
}
@@ -3305,6 +4140,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3320,6 +4156,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3329,6 +4166,48 @@ NLM_EXTERN void AddCommentBlock (
}
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL) {
+ if (StringCmp (oip->str, "RefSeqGenome") == 0) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = dcontext.entityID;
+ cbp->itemID = dcontext.itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ AddStrForRefSeqGenome (ajp, ffstring, uop);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+
+
+
/* just save IDs for comment, maploc, and region descriptors */
/*
@@ -3341,8 +4220,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3373,6 +4254,7 @@ NLM_EXTERN void AddCommentBlock (
*/
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3389,6 +4271,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3410,6 +4293,7 @@ NLM_EXTERN void AddCommentBlock (
*/
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3426,6 +4310,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3433,6 +4318,93 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemtype = 0;
}
}
+ } else if (mip->tech == MI_TECH_targeted) {
+
+ if (tlstsip != NULL) {
+ tlsaccn = tlstsip->accession;
+ tlsname = tlstsip->name;
+
+ if (tlsname != NULL && bsp->repr == Seq_repr_virtual) {
+
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ /*
+ cbp->entityID = dcontext.entityID;
+ cbp->itemID = dcontext.itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ */
+ cbp->entityID = awp->entityID;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ AddTLSMasterCommentString (ffstring, bsp, tlsaccn, tlsname);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ cbp->itemID = dcontext.itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ cbp->itemID = 0;
+ cbp->itemtype = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
+ if (sdp != NULL) {
+ gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gbp != NULL) {
+ unordered = FALSE;
+ for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringCmp (str, "UNORDERED") == 0) {
+ unordered = TRUE;
+ }
+ }
+ if (unordered) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = dcontext.entityID;
+ cbp->itemID = dcontext.itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ AddUnorderedCommentString (ffstring, bsp);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
}
}
}
@@ -3449,6 +4421,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3464,6 +4437,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3472,18 +4446,28 @@ NLM_EXTERN void AddCommentBlock (
}
}
+ last_name = NULL;
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext);
while (sdp != NULL) {
- if (StringDoesHaveText ((CharPtr)sdp->data.ptrvalue)) {
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (StringDoesHaveText (str) && (last_name == NULL || CommentsAreDifferent (str, last_name) || awp->mode == DUMP_MODE)) {
cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
if (cbp != NULL) {
+ last_name = (CharPtr) str;
+
cbp->entityID = dcontext.entityID;
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
+ len = StringLen (str);
+ if (len > 4 && str [len - 1] == '~' && str [len - 2] == '~') {
+ last_had_tilde = TRUE;
+ }
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3501,8 +4485,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3510,18 +4496,26 @@ NLM_EXTERN void AddCommentBlock (
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_maploc, &dcontext);
}
+ last_name = NULL;
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext);
while (sdp != NULL) {
- if (sdp->data.ptrvalue != NULL) {
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (StringDoesHaveText (str) &&
+ ((last_name == NULL || StringCmp (str, last_name) != 0) || awp->mode == DUMP_MODE) &&
+ (StringCmp (str, ".") != 0 || awp->mode == DUMP_MODE)) {
cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
if (cbp != NULL) {
+ last_name = (CharPtr) str;
+
cbp->entityID = dcontext.entityID;
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3530,18 +4524,26 @@ NLM_EXTERN void AddCommentBlock (
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_region, &dcontext);
}
+ last_name = NULL;
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_name, &dcontext);
while (sdp != NULL) {
- if (sdp->data.ptrvalue != NULL) {
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (StringDoesHaveText (str) &&
+ ((last_name == NULL || StringCmp (str, last_name) != 0) || awp->mode == DUMP_MODE) &&
+ (StringCmp (str, ".") != 0 || awp->mode == DUMP_MODE)) {
cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
if (cbp != NULL) {
+ last_name = (CharPtr) str;
+
cbp->entityID = dcontext.entityID;
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3550,6 +4552,95 @@ NLM_EXTERN void AddCommentBlock (
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_name, &dcontext);
}
+ if (basemodNum > 0 && (basemodURLhead != NULL || basemodURL != NULL)) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->itemID = filetrack_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ if (! last_had_tilde && ! cbp->first) {
+ FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
+ }
+
+ if (basemodNum == 1) {
+ FFAddOneString (ffstring, "This genome has a ", FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ str = NULL;
+ if (basemodURL != NULL) {
+ str = basemodURL;
+ } else if (basemodURLhead != NULL) {
+ str = basemodURLhead [0];
+ }
+ if (StringDoesHaveText (str)) {
+ FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "base modification file", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+ } else {
+ FFAddOneString (ffstring, "base modification file", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, " available.", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, "There are ", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) basemodNum);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, " base modification files", FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp)) {
+ pfx = " (";
+ sfx = "";
+ for (j = 0; j < basemodNum; j++) {
+ str = NULL;
+ if (basemodURL != NULL) {
+ str = basemodURL;
+ } else if (basemodURLhead != NULL) {
+ str = basemodURLhead [j];
+ }
+ if (StringHasNoText (str)) continue;
+ FFAddOneString (ffstring, pfx, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) (j + 1));
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ if (basemodNum == 2) {
+ pfx = " and ";
+ } else if (j == basemodNum - 2) {
+ pfx = ", and ";
+ } else {
+ pfx = ", ";
+ }
+ sfx = ")";
+ }
+ FFAddOneString (ffstring, sfx, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, " available for this genome.", FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+
/* StructuredComment user object */
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
@@ -3558,17 +4649,42 @@ NLM_EXTERN void AddCommentBlock (
if (uop != NULL) {
oip = uop->type;
if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
- cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
- if (cbp != NULL) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ if (firstGenAnnotSCAD == NULL) {
+ firstGenAnnotSCAD = uop;
+ genomeBuildNumber = NULL;
+ genomeVersionNumber = NULL;
+ firstGenAnnotSCStr = GetStrForStructuredComment (ajp, firstGenAnnotSCAD);
+ uop = NULL;
+ } else {
+ firstGenAnnotSCAD = NULL;
+ }
+ break;
+ }
+ }
+ }
+ if (uop != NULL) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
- cbp->entityID = dcontext.entityID;
- cbp->itemID = dcontext.itemID;
- cbp->itemtype = OBJ_SEQDESC;
- cbp->first = first;
- first = FALSE;
+ cbp->entityID = dcontext.entityID;
+ cbp->itemID = dcontext.itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
}
}
}
@@ -3595,6 +4711,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = dcontext.itemID;
cbp->itemtype = OBJ_SEQDESC;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3609,6 +4726,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3630,6 +4748,7 @@ NLM_EXTERN void AddCommentBlock (
*/
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3644,6 +4763,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3663,6 +4783,7 @@ NLM_EXTERN void AddCommentBlock (
*/
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3677,6 +4798,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString(ajp, ffstring);
ffstring = FFGetString(ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3686,12 +4808,12 @@ NLM_EXTERN void AddCommentBlock (
}
}
- parent = awp->parent;
- if (parent == NULL) return;
-
/* no longer adding comment features that are full length on appropriate segment */
/*
+ parent = awp->parent;
+ if (parent == NULL) return;
+
sfp = SeqMgrGetNextFeature (parent, NULL, SEQFEAT_COMMENT, 0, &fcontext);
while (sfp != NULL) {
if (fcontext.left == awp->from && fcontext.right == awp->to) {
@@ -3702,8 +4824,10 @@ NLM_EXTERN void AddCommentBlock (
cbp->itemID = fcontext.itemID;
cbp->itemtype = OBJ_SEQFEAT;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3713,8 +4837,12 @@ NLM_EXTERN void AddCommentBlock (
}
*/
- /* look for Seq-annot.desc.comment on annots packaged on current bioseq */
+ /*
+ search for Seq-annot.desc.comment on annots packaged on current bioseq
+ is now done earlier in order to suppress GenomeBuild user object comment
+ */
+ /*
annotDescCommentToComment = FALSE;
adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_user, &acontext);
while (adp != NULL) {
@@ -3732,11 +4860,27 @@ NLM_EXTERN void AddCommentBlock (
}
}
}
+ } else if (StringICmp (oip->str, "StructuredComment") == 0) {
+ if (firstGenAnnotSCAD == NULL) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Annotation-Data-START##") == 0) {
+ firstGenAnnotSCAD = uop;
+ }
+ }
+ }
+ }
}
}
}
adp = SeqMgrGetNextAnnotDesc (bsp, adp, Annot_descr_user, &acontext);
}
+ */
if (annotDescCommentToComment) {
adp = SeqMgrGetNextAnnotDesc (bsp, NULL, Annot_descr_comment, &acontext);
@@ -3748,6 +4892,7 @@ NLM_EXTERN void AddCommentBlock (
cbp->entityID = awp->entityID;
cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
first = FALSE;
if (cbp->first) {
@@ -3762,6 +4907,7 @@ NLM_EXTERN void AddCommentBlock (
FFRecycleString (ajp, ffstring);
ffstring = FFGetString (ajp);
+ last_had_tilde = FALSE;
if (awp->afp != NULL) {
DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
}
@@ -3771,6 +4917,171 @@ NLM_EXTERN void AddCommentBlock (
}
}
+ if (firstGenAnnotSCAD != NULL) {
+ if (StringDoesHaveText (firstGenAnnotSCStr)) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ if (! last_had_tilde) {
+ FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
+ }
+ }
+
+ first = FALSE;
+
+ FFAddOneString (ffstring, firstGenAnnotSCStr, FALSE, FALSE, TILDE_EXPAND);
+
+ cbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString (ajp, ffstring);
+ ffstring = FFGetString (ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+ }
+ if (firstGenAnnotSCStr != NULL) {
+ MemFree (firstGenAnnotSCStr);
+ }
+
+ num = 0;
+ if (filetrackspp != NULL) {
+ num = 1;
+ } else if (filetrackpsp != NULL) {
+ num = PackSeqPntNum (filetrackpsp);
+ }
+ if (num > 0) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->itemID = filetrack_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ cbp->no_blank_before = last_had_tilde;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring, "This ", FALSE, FALSE, TILDE_IGNORE);
+ if (GetWWW (ajp) && filetrackURL != NULL) {
+ FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, filetrackURL, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "map", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, "map", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, " has ", FALSE, FALSE, TILDE_IGNORE);
+ frags = num;
+
+ if (bsp->topology != TOPOLOGY_CIRCULAR) {
+ if (num > 1 && GetFileTrackPoint (filetrackspp, filetrackpsp, num - 1) < bsp->length - 1 ) {
+ frags = num + 1;
+ }
+ }
+
+ sprintf (tmp, "%ld", (long) frags);
+ FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
+ if (frags > 1) {
+ FFAddOneString (ffstring, " pieces:", FALSE, FALSE, TILDE_IGNORE);
+ } else if (frags == 1) {
+ FFAddOneString (ffstring, " piece:", FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ last = 1;
+ pos = GetFileTrackPoint (filetrackspp, filetrackpsp, 0) + 1;
+ if (bsp->topology != TOPOLOGY_CIRCULAR) {
+
+ FFAddNewLine (ffstring);
+ sprintf (tmp, "* %7ld %7ld: fragment of %ld bp in length",
+ (long) last, (long) pos, (long) (pos - last + 1));
+ FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
+
+ }
+ last = pos + 1;
+
+ chunk = 0;
+ for (idx = 1; idx < num; idx++) {
+
+ chunk++;
+ if (chunk >= 100) {
+ chunk = 0;
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+ cbp->entityID = awp->entityID;
+ cbp->itemID = filetrack_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = FALSE;
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+ } else {
+ FFAddNewLine (ffstring);
+ }
+
+ pos = GetFileTrackPoint (filetrackspp, filetrackpsp, idx) + 1;
+
+ sprintf (tmp, "* %7ld %7ld: fragment of %ld bp in length",
+ (long) last, (long) pos, (long) (pos - last + 1));
+ FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
+
+ last = pos + 1;
+ }
+
+ if (bsp->topology != TOPOLOGY_CIRCULAR) {
+ pos = bsp->length;
+
+ if (last < pos) {
+ FFAddNewLine (ffstring);
+ sprintf (tmp, "* %7ld %7ld: fragment of %ld bp in length",
+ (long) last, (long) pos, (long) (pos - last + 1));
+ FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ } else {
+ pos = GetFileTrackPoint (filetrackspp, filetrackpsp, 0) + 1;
+
+ FFAddNewLine (ffstring);
+ sprintf (tmp, "* %7ld %7ld: fragment of %ld bp in length",
+ (long) last, (long) pos, (long) (bsp->length + pos - last + 1));
+ FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ last_had_tilde = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+
FFRecycleString(ajp, ffstring);
}
@@ -3910,13 +5221,13 @@ static BaseBlockPtr AddSource (
for (omp = onp->mod; omp != NULL; omp = omp->next) {
subtype = omp->subtype;
if (subtype == 253) {
- subtype = 35;
+ subtype = 39;
} else if (subtype == 254) {
- subtype = 36;
+ subtype = 40;
} else if (subtype == 255) {
- subtype = 37;
+ subtype = 41;
}
- if (subtype < 38) {
+ if (subtype < 42) {
idx = orgModToSourceIdx [subtype];
if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
str = asn2gnbk_source_quals [idx].name;
@@ -3936,9 +5247,9 @@ static BaseBlockPtr AddSource (
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
subtype = ssp->subtype;
if (subtype == 255) {
- subtype = 38;
+ subtype = 44;
}
- if (subtype < 39) {
+ if (subtype < 45) {
idx = subSourceToSourceIdx [subtype];
if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
str = asn2gnbk_source_quals [idx].name;
@@ -4120,7 +5431,8 @@ static void GetSourcesOnBioseq (
BioseqPtr target,
BioseqPtr bsp,
Int4 from,
- Int4 to
+ Int4 to,
+ SeqFeatPtr cds
)
{
@@ -4134,14 +5446,20 @@ static void GetSourcesOnBioseq (
Boolean loop = FALSE;
Int2 idx;
IntSrcBlockPtr isp;
+ Boolean is_wp = FALSE;
Int4Ptr ivals;
SeqLocPtr newloc;
Boolean noLeft;
Boolean noRight;
Int2 numivals;
+ Int2 num_super_kingdom = 0;
Boolean okay;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ ObjValNodePtr ovp;
Int4 right;
SeqDescrPtr sdp;
+ ValNodePtr sdplist = NULL;
SeqFeatPtr sfp;
SeqInt sint;
SeqIdPtr sip;
@@ -4149,13 +5467,43 @@ static void GetSourcesOnBioseq (
Int4 start;
Int4 stop;
Uint1 strand;
+ Boolean super_kingdoms_different = FALSE;
+ CharPtr super_kingdom_name = NULL;
+ TaxElementPtr tep;
+ TextSeqIdPtr tsip;
ValNode vn;
ValNodePtr vnp;
+ ValNodePtr vnp2;
if (awp == NULL || target == NULL || bsp == NULL) return;
ajp = awp->ajp;
if (ajp == NULL) return;
+ if (cds != NULL) {
+ sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
+ if (sfp != NULL) {
+ biop = (BioSourcePtr) sfp->data.value.ptrvalue;
+ bbp = AddSource (awp, &(awp->srchead), biop, sfp->comment);
+ if (bbp != NULL) {
+
+ bbp->entityID = sfp->idx.entityID;
+ bbp->itemID = sfp->idx.itemID;
+ bbp->itemtype = OBJ_SEQFEAT;
+
+ isp = (IntSrcBlockPtr) bbp;
+ CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
+ hasNulls = LocationHasNullsBetween (sfp->location);
+ isp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, hasNulls);
+ SetSeqLocPartial (isp->loc, noLeft, noRight);
+ isp->left = fcontext.left;
+ isp->right = fcontext.right;
+ isp->comment = sfp->comment;
+ }
+ }
+
+ return;
+ }
+
if (awp->format != FTABLE_FMT || awp->mode == DUMP_MODE) {
/* full length loc for descriptors */
@@ -4181,56 +5529,93 @@ static void GetSourcesOnBioseq (
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_SWISSPROT) {
loop = TRUE;
+ } else if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ is_wp = TRUE;
+ }
}
}
}
-
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
while (sdp != NULL) {
-
- /* check if descriptor on part already added on segmented bioseq */
-
- okay = TRUE;
- for (vnp = awp->srchead; vnp != NULL && okay; vnp = vnp->next) {
- bbp = (BaseBlockPtr) vnp->data.ptrvalue;
- if (bbp != NULL) {
- if (bbp->entityID == dcontext.entityID &&
- bbp->itemID == dcontext.itemID &&
- bbp->itemtype == OBJ_SEQDESC) {
- okay = FALSE;
+ ValNodeAddPointer (&sdplist, 0, (Pointer) sdp);
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ onp = orp->orgname;
+ if (onp != NULL) {
+ if (onp->choice == 5) {
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ num_super_kingdom++;
+ if (super_kingdom_name == NULL) {
+ super_kingdom_name = tep->name;
+ } else if (StringICmp (super_kingdom_name, tep->name) != 0) {
+ super_kingdoms_different = TRUE;
+ }
+ }
+ }
+ }
}
}
}
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
+ }
+
+ vnp = sdplist;
+ while (vnp != NULL) {
+ sdp = (SeqDescrPtr) vnp->data.ptrvalue;
+
+ if (sdp != NULL && sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+
+ /* check if descriptor on part already added on segmented bioseq */
+
+ okay = TRUE;
+ for (vnp2 = awp->srchead; vnp2 != NULL && okay; vnp2 = vnp2->next) {
+ bbp = (BaseBlockPtr) vnp2->data.ptrvalue;
+ if (bbp != NULL) {
+ if (bbp->entityID == ovp->idx.entityID &&
+ bbp->itemID == ovp->idx.itemID &&
+ bbp->itemtype == OBJ_SEQDESC) {
+ okay = FALSE;
+ }
+ }
+ }
- if (okay) {
- biop = (BioSourcePtr) sdp->data.ptrvalue;
- bbp = AddSource (awp, &(awp->srchead), biop, NULL);
- if (bbp != NULL) {
-
- bbp->entityID = dcontext.entityID;
- bbp->itemID = dcontext.itemID;
- bbp->itemtype = OBJ_SEQDESC;
+ if (okay) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ bbp = AddSource (awp, &(awp->srchead), biop, NULL);
+ if (bbp != NULL) {
- isp = (IntSrcBlockPtr) bbp;
- isp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
- isp->left = 0;
- isp->right = bsp->length - 1;
- isp->is_descriptor = TRUE;
+ bbp->entityID = ovp->idx.entityID;
+ bbp->itemID = ovp->idx.itemID;
+ bbp->itemtype = OBJ_SEQDESC;
+
+ isp = (IntSrcBlockPtr) bbp;
+ isp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
+ isp->left = 0;
+ isp->right = bsp->length - 1;
+ isp->is_descriptor = TRUE;
+ }
}
}
-
- /* if SWISS-PROT, loop through multiple source descriptors */
-
- if (loop) {
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
+
+ if ((num_super_kingdom > 1 && super_kingdoms_different && is_wp) || loop) {
+ vnp = vnp->next;
} else {
- sdp = NULL;
+ vnp = NULL;
}
}
-
+
SeqIdFree (sint.id);
}
+ ValNodeFree (sdplist);
+
if ((! awp->contig) || awp->showconsource) {
/* features are indexed on parent if segmented */
@@ -4344,7 +5729,7 @@ static Boolean LIBCALLBACK GetSourcesOnSeg (
SeqEntrySetScope (oldscope);
if (bsp != NULL) {
- GetSourcesOnBioseq (awp, awp->target, bsp, from, to);
+ GetSourcesOnBioseq (awp, awp->target, bsp, from, to, NULL);
return TRUE;
}
@@ -4375,7 +5760,7 @@ static Boolean LIBCALLBACK GetSourcesOnSeg (
to = bsp->length - 1;
}
- GetSourcesOnBioseq (awp, awp->target, bsp, from, to);
+ GetSourcesOnBioseq (awp, awp->target, bsp, from, to, NULL);
BioseqUnlock (bsp);
#endif
@@ -4539,6 +5924,21 @@ static void CleanupPackedSeqInt (SeqLocPtr location)
MemFree (slp);
}
+static Boolean x_NotSpecialTaxName (
+ CharPtr taxname
+)
+
+{
+ if (StringHasNoText (taxname)) return TRUE;
+
+ if (StringICmp (taxname, "synthetic construct") == 0) return FALSE;
+ if (StringICmp (taxname, "artificial sequence") == 0) return FALSE;
+ if (StringStr (taxname, "vector") != NULL) return FALSE;
+ if (StringStr (taxname, "Vector") != NULL) return FALSE;
+
+ return TRUE;
+}
+
NLM_EXTERN void AddSourceFeatBlock (
Asn2gbWorkPtr awp
)
@@ -4547,10 +5947,11 @@ NLM_EXTERN void AddSourceFeatBlock (
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
+ BioSourcePtr biop;
BioseqPtr bsp;
SeqFeatPtr cds;
SeqMgrFeatContext context;
- Int4 currGi = 0;
+ BIG_ID currGi = 0;
BioseqPtr dna;
SeqLocPtr duploc;
Boolean excise;
@@ -4561,14 +5962,17 @@ NLM_EXTERN void AddSourceFeatBlock (
IntSrcBlockPtr lastisp;
IntSrcBlockPtr descrIsp;
ValNodePtr next;
+ OrgRefPtr orp;
Char pfx [128], sfx [128];
ValNodePtr PNTR prev;
+ SeqDescrPtr sdp;
SeqInt sint;
SeqIdPtr sip;
SeqLocPtr slp;
Int4 source_count = 0;
CharPtr str;
- BioseqPtr target;
+ BioseqPtr target = NULL;
+ CharPtr taxname;
ValNode vn;
ValNodePtr vnp;
Boolean descHasFocus = FALSE;
@@ -4591,29 +5995,44 @@ NLM_EXTERN void AddSourceFeatBlock (
/* collect biosources on bioseq */
awp->srchead = NULL;
- GetSourcesOnBioseq (awp, bsp, bsp, awp->from, awp->to);
- target = bsp;
- if (bsp->repr == Seq_repr_seg) {
+ if (ISA_aa (bsp->mol)) {
- /* collect biosource descriptors on local parts */
+ /* if protein, get sources applicable to DNA location of CDS */
+
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
+ if (sdp != NULL && sdp->choice == Seq_descr_source) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ taxname = orp->taxname;
+ if (StringHasNoText (taxname) || x_NotSpecialTaxName (taxname)) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, &context);
+ if (cds != NULL) {
+ dna = BioseqFindFromSeqLoc (cds->location);
+ if (dna != NULL) {
+ GetSourcesOnBioseq (awp, dna, dna, context.left, context.right, cds);
+ target = dna;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
- SeqMgrExploreSegments (bsp, (Pointer) awp, GetSourcesOnSeg);
- target = awp->target;
+ if (awp->srchead == NULL) {
+ GetSourcesOnBioseq (awp, bsp, bsp, awp->from, awp->to, NULL);
+ target = bsp;
}
- if (awp->srchead == NULL && ISA_aa (bsp->mol)) {
+ if (bsp->repr == Seq_repr_seg) {
- /* if protein with no sources, get sources applicable to DNA location of CDS */
+ /* collect biosource descriptors on local parts */
- cds = SeqMgrGetCDSgivenProduct (bsp, &context);
- if (cds != NULL) {
- dna = BioseqFindFromSeqLoc (cds->location);
- if (dna != NULL) {
- GetSourcesOnBioseq (awp, dna, dna, context.left, context.right);
- target = dna;
- }
- }
+ SeqMgrExploreSegments (bsp, (Pointer) awp, GetSourcesOnSeg);
+ target = awp->target;
}
head = awp->srchead;
@@ -4642,7 +6061,7 @@ NLM_EXTERN void AddSourceFeatBlock (
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- currGi = (Int4) sip->data.intvalue;
+ currGi = (BIG_ID) sip->data.intvalue;
}
}
@@ -4749,7 +6168,7 @@ NLM_EXTERN void AddSourceFeatBlock (
/* sort by hash values */
- head = SortValNode (head, SortSourcesByHash);
+ head = ValNodeSort (head, SortSourcesByHash);
/* unique sources, excise duplicates from list */
@@ -4817,7 +6236,7 @@ NLM_EXTERN void AddSourceFeatBlock (
/* Sort again, by location this time */
- head = SortValNode (head, SortSourcesByPos);
+ head = ValNodeSort (head, SortSourcesByPos);
/* If the descriptor has a focus, then subtract */
/* out all the other source locations. */
@@ -4907,7 +6326,7 @@ static Boolean IsCDD (
return FALSE;
}
-static void SetIfpFeatCount (
+NLM_EXTERN void SetIfpFeatCount (
IntFeatBlockPtr ifp,
IntAsn2gbJobPtr ajp,
Asn2gbWorkPtr awp,
@@ -5045,7 +6464,8 @@ static void GetFeatsOnCdsProduct (
pcontext.featdeftype == FEATDEF_mat_peptide_aa ||
pcontext.featdeftype == FEATDEF_sig_peptide_aa ||
pcontext.featdeftype == FEATDEF_transit_peptide_aa ||
- (pcontext.featdeftype == FEATDEF_preprotein /* && isRefSeq */)) {
+ pcontext.featdeftype == FEATDEF_preprotein ||
+ (pcontext.featdeftype == FEATDEF_propeptide /* && isRefSeq */)) {
if (awp->hideSitesBondsRegions && (pcontext.featdeftype == FEATDEF_REGION ||
pcontext.featdeftype == FEATDEF_SITE ||
@@ -5227,7 +6647,8 @@ static void GetRemoteFeatsOnCdsProduct (
prt->idx.subtype == FEATDEF_mat_peptide_aa ||
prt->idx.subtype == FEATDEF_sig_peptide_aa ||
prt->idx.subtype == FEATDEF_transit_peptide_aa ||
- (prt->idx.subtype == FEATDEF_preprotein /* && isRefSeq */)) {
+ prt->idx.subtype == FEATDEF_preprotein ||
+ (prt->idx.subtype == FEATDEF_propeptide /* && isRefSeq */)) {
if (awp->hideSitesBondsRegions && (prt->idx.subtype == FEATDEF_REGION ||
prt->idx.subtype == FEATDEF_SITE ||
@@ -5326,6 +6747,78 @@ static Boolean NotEMBLorDDBJ (
return TRUE;
}
+/*
+static Boolean EquivProtFeats (
+ SeqFeatPtr prot1,
+ SeqFeatPtr prot2
+)
+
+{
+ ProtRefPtr prp1, prp2;
+
+ if (prot1 == NULL || prot2 == NULL) return FALSE;
+ prp1 = (ProtRefPtr) prot1->data.value.ptrvalue;
+ prp2 = (ProtRefPtr) prot2->data.value.ptrvalue;
+ if (prp1 == NULL || prp2 == NULL) return FALSE;
+
+ if (! AsnIoMemComp (prp1, prp2, (AsnWriteFunc) ProtRefAsnWrite)) return FALSE;
+
+ if (StringDoesHaveText (prot1->comment) && StringDoesHaveText (prot2->comment)) {
+ if (StringCmp (prot1->comment, prot2->comment) != 0) return FALSE;
+ }
+
+ return TRUE;
+}
+*/
+
+/*
+static Boolean EquivProtFeats (
+ SeqFeatPtr prot1,
+ SeqFeatPtr prot2
+)
+
+{
+ SeqFeatPtr cpy1, cpy2;
+ Boolean rsult = FALSE;
+ SeqLocPtr tmp;
+
+ if (prot1 == NULL || prot2 == NULL) return FALSE;
+
+ cpy1 = AsnIoMemCopy ((Pointer) prot1,
+ (AsnReadFunc) SeqFeatAsnRead,
+ (AsnWriteFunc) SeqFeatAsnWrite);
+ cpy2 = AsnIoMemCopy ((Pointer) prot2,
+ (AsnReadFunc) SeqFeatAsnRead,
+ (AsnWriteFunc) SeqFeatAsnWrite);
+ if (cpy1 == NULL || cpy2 == NULL) return FALSE;
+
+ tmp = cpy1->location;
+ cpy1->location = cpy2->location;
+
+ rsult = AsnIoMemComp (cpy1, cpy2, (AsnWriteFunc) SeqFeatAsnWrite);
+
+ cpy1->location = tmp;
+ SeqFeatFree (cpy1);
+ SeqFeatFree (cpy2);
+
+ return rsult;
+}
+*/
+
+static Boolean LocInBioseq (
+ SeqLocPtr slp,
+ BioseqPtr bsp
+)
+
+{
+ SeqIdPtr sip;
+
+ if (slp == NULL || bsp == NULL) return FALSE;
+ sip = SeqLocId (slp);
+ if (sip == NULL) return FALSE;
+ return SeqIdIn (sip, bsp->id);
+}
+
static Boolean LIBCALLBACK GetFeatsOnBioseq (
SeqFeatPtr sfp,
SeqMgrFeatContextPtr fcontext
@@ -5341,11 +6834,15 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
SeqMgrFeatContext cdscontext;
FeatBlockPtr fbp;
SeqLocPtr firstslp;
+ SeqFeatPtr gap;
GBQualPtr gbq;
+ /*
SeqFeatPtr gene;
- Int4 gi;
+ */
+ BIG_ID gi;
GeneRefPtr grp;
- Boolean juststop = FALSE;
+ Boolean has_est_len;
+ Boolean has_gap_type;
IntCdsBlockPtr icp;
Int2 idx;
IntFeatBlockPtr ifp;
@@ -5353,6 +6850,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
Boolean is_whole;
Int4Ptr ivals;
Int2 j;
+ Boolean juststop = FALSE;
SeqAnnotPtr lastsap;
SeqFeatPtr lastsfp;
SeqLocPtr lastslp;
@@ -5366,6 +6864,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ValNodePtr ppr;
BioseqPtr prod;
ProtRefPtr prp;
+ Boolean psdo;
Boolean pseudo = FALSE;
RNAGenPtr rgp;
RnaRefPtr rrp;
@@ -5375,6 +6874,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
SeqLocPtr slp;
Int4 start;
Int4 stop;
+ Boolean supr;
TextSeqIdPtr tsip;
ValNodePtr vnp;
/*
@@ -5432,6 +6932,16 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
fcontext->left == awp->from && fcontext->right == awp->to) return TRUE;
*/
+ /*
+ if (ISA_aa (bsp->mol) && awp->format == GENPEPT_FMT && fcontext->seqfeattype == SEQFEAT_PROT) {
+ if (fcontext->left == awp->from && fcontext->right == awp->to) {
+ if (awp->bestprot != sfp) {
+ if (EquivProtFeats (awp->bestprot, sfp)) return TRUE;
+ }
+ }
+ }
+ */
+
ivals = fcontext->ivals;
numivals = fcontext->numivals;
@@ -5541,7 +7051,14 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
if (sfp->pseudo) {
pseudo = TRUE;
}
+ /*
grp = SeqMgrGetGeneXref (sfp);
+ */
+ grp = GetGeneByFeat (sfp, &psdo, &supr);
+ if (psdo) {
+ pseudo = TRUE;
+ }
+ /*
if (grp == NULL) {
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
oldscope = SeqEntrySetScope (sep);
@@ -5554,6 +7071,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
}
}
+ */
if (grp != NULL && grp->pseudo) {
pseudo = TRUE;
}
@@ -5723,7 +7241,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
break;
case FEATDEF_gap:
- /* modified_base requires FTQUAL_estimated_length */
+ /* gap requires FTQUAL_estimated_length */
gbq = sfp->qual;
while (gbq != NULL) {
if (StringICmp (gbq->qual, "estimated_length") == 0 && (StringDoesHaveText (gbq->val))) {
@@ -5780,6 +7298,38 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
break;
+ case FEATDEF_assembly_gap:
+ /* assembly_gap requires FTQUAL_estimated_length and FTQUAL_gap_type */
+ has_est_len = FALSE;
+ has_gap_type = FALSE;
+ gbq = sfp->qual;
+ while (gbq != NULL) {
+ if (StringDoesHaveText (gbq->val)) {
+ if (StringICmp (gbq->qual, "estimated_length") == 0) {
+ has_est_len = TRUE;
+ } else if (StringICmp (gbq->qual, "gap_type") == 0) {
+ has_gap_type = TRUE;
+ }
+ }
+ gbq = gbq->next;
+ }
+ if (has_est_len && has_gap_type) {
+ okay = TRUE;
+ }
+ break;
+
+ case FEATDEF_regulatory:
+ /* regulatory requires FTQUAL_regulatory_class */
+ gbq = sfp->qual;
+ while (gbq != NULL) {
+ if (StringICmp (gbq->qual, "regulatory_class") == 0 && (StringDoesHaveText (gbq->val))) {
+ okay = TRUE;
+ break;
+ }
+ gbq = gbq->next;
+ }
+ break;
+
default:
if (fcontext->featdeftype >= FEATDEF_GENE && fcontext->featdeftype < FEATDEF_MAX) {
okay = TRUE;
@@ -5806,6 +7356,48 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
}
+ gap = awp->currfargap;
+ if (gap != NULL && awp->afp != NULL) {
+ while (gap != NULL && LocInBioseq (gap->location, asp->bsp) && GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_LEFT_END) < fcontext->left) {
+
+ fbp = (FeatBlockPtr) Asn2gbAddBlock (awp, FEATURE_BLOCK, sizeof (IntFeatBlock));
+ if (fbp != NULL) {
+ fbp->entityID = 0;
+ fbp->itemID = 0;
+ fbp->itemtype = OBJ_SEQFEAT;
+ fbp->featdeftype = FEATDEF_gap;
+ ifp = (IntFeatBlockPtr) fbp;
+ ifp->mapToNuc = FALSE;
+ ifp->mapToProt = FALSE;
+ ifp->mapToGen = FALSE;
+ ifp->mapToMrna = FALSE;
+ ifp->mapToPep = FALSE;
+ ifp->left = 0;
+ ifp->right = 0;
+ if (bsp != NULL) {
+ SetIfpFeatCount (ifp, ajp, awp, ISA_aa (bsp->mol));
+ }
+ ifp->firstfeat = awp->firstfeat;
+ awp->firstfeat = FALSE;
+ if (awp->afp != NULL) {
+ DoImmediateRemoteFeatureFormat (awp->afp, (BaseBlockPtr) fbp, gap);
+ }
+ }
+
+ awp->currfargap = gap->next;
+ gap = awp->currfargap;
+ }
+ }
+
+ /* check for Imp-feat gap that is same as next Seq-lit gap - but need to check against scaffold coordinate */
+ if (! NotEMBLorDDBJ (awp->bsp)) {
+ if (gap != NULL && LocInBioseq (gap->location, asp->bsp) && fcontext->featdeftype == FEATDEF_gap &&
+ GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_LEFT_END) == fcontext->left &&
+ GetOffsetInBioseq (gap->location, asp->bsp, SEQLOC_RIGHT_END) == fcontext->right) {
+ awp->currfargap = gap->next;
+ }
+ }
+
awp->lastsfp = sfp;
awp->lastsap = fcontext->sap;
awp->lastleft = fcontext->left;
@@ -5838,11 +7430,18 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
- /* this allows remote SNP, CDD, MGC, etc., not to be treated as local annotation */
- if (awp->entityID != fbp->entityID || fbp->itemID <= awp->localFeatCount) {
- awp->featseen = TRUE;
+ /* local centromere, telomere, rep_origin, and region features (e.g, on eukaryotic NC record) do not contribute to test for far fetch suppression */
+ if (sfp->idx.subtype != FEATDEF_centromere &&
+ sfp->idx.subtype != FEATDEF_telomere &&
+ sfp->idx.subtype != FEATDEF_rep_origin &&
+ sfp->idx.subtype != FEATDEF_REGION) {
+
+ /* this allows remote SNP, CDD, MGC, etc., not to be treated as local annotation */
+ if (awp->entityID != fbp->entityID || fbp->itemID <= awp->localFeatCount) {
+ awp->featseen = TRUE;
+ }
+ awp->featjustseen = TRUE;
}
- awp->featjustseen = TRUE;
if (fcontext->seqfeattype == SEQFEAT_PROT) {
@@ -5974,7 +7573,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
/*
-static Boolean TestGetAccnVerFromServer (Int4 gi, CharPtr buf)
+static Boolean TestGetAccnVerFromServer (BIG_ID gi, CharPtr buf)
{
Char accn [64];
@@ -6026,19 +7625,16 @@ static Boolean LIBCALLBACK GetFeatsOnSeg (
Uint4 accntype;
IntAsn2gbJobPtr ajp;
Asn2gbWorkPtr awp;
- BaseBlockPtr bbp;
BioseqPtr bsp;
time_t currTime;
Uint2 entityID;
Int4 from;
- ValNodePtr gap;
- Int4 gi;
+ BIG_ID gi;
Int4 left;
SeqLocPtr loc;
CharPtr ptr;
Int4 right;
SeqIdPtr sip;
- CharPtr str;
Int4 to;
WgsAccnPtr wap = NULL;
@@ -6048,26 +7644,6 @@ static Boolean LIBCALLBACK GetFeatsOnSeg (
ajp = awp->ajp;
if (ajp == NULL) return FALSE;
- gap = awp->currfargap;
- if (gap != NULL) {
- awp->currfargap = gap->next;
-
- str = (CharPtr) gap->data.ptrvalue;
- if (StringDoesHaveText (str)) {
-
- bbp = Asn2gbAddBlock (awp, FEATHEADER_BLOCK, sizeof (BaseBlock));
- if (bbp != NULL) {
-
- bbp->entityID = awp->entityID;
- bbp->string = StringSave (str);
-
- if (awp->afp != NULL) {
- DoImmediateFormat (awp->afp, bbp);
- }
- }
- }
- }
-
/* do not fetch outside of desired component */
if (ajp->ajp.slp != NULL) {
@@ -6097,7 +7673,7 @@ static Boolean LIBCALLBACK GetFeatsOnSeg (
if (awp->farFeatTimeLimit) {
if (sip->choice == SEQID_GI) {
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (GetAccnVerFromServer (gi, accn)) {
ptr = StringChr (accn, '.');
if (ptr != NULL) {
@@ -6187,6 +7763,7 @@ NLM_EXTERN void AddFeatureBlock (
Boolean is_other;
MolInfoPtr mip;
SeqFeatPtr mrna;
+ SeqMgrFeatContext pcontext;
SeqFeatPtr prot;
SeqDescrPtr sdp;
SeqIdPtr sip;
@@ -6210,6 +7787,8 @@ NLM_EXTERN void AddFeatureBlock (
awp->sig_pept_trim_len = 0;
if (awp->format == GENPEPT_FMT && ISA_aa (bsp->mol)) {
+ awp->bestprot = SeqMgrGetBestProteinFeature (bsp, &pcontext);
+
prot = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (prot != NULL) {
if (fcontext.featdeftype == FEATDEF_sig_peptide_aa ||
@@ -6271,7 +7850,7 @@ NLM_EXTERN void AddFeatureBlock (
}
awp->farFeatTimeLimit = FALSE;
- if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta) {
+ if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
if (awp->mode == ENTREZ_MODE) {
awp->farFeatTimeLimit = TRUE;
}
@@ -6291,7 +7870,7 @@ NLM_EXTERN void AddFeatureBlock (
if (! awp->onlyNearFeats) {
if (awp->farFeatsSuppress) {
- if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta) {
+ if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
/* get start time for 25 second timeout in Web Entrez far WGS records */
@@ -6299,8 +7878,6 @@ NLM_EXTERN void AddFeatureBlock (
awp->farFeatStartTime = GetSecs ();
}
- awp->currfargap = ajp->fargaps;
-
/* if farFeatsSuppress first collect features on remote segments in MASTER_STYLE */
SeqMgrExploreSegments (bsp, (Pointer) awp, GetFeatsOnSeg);
@@ -6429,7 +8006,7 @@ NLM_EXTERN void AddFeatureBlock (
if (! awp->farFeatsSuppress) {
- if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta) {
+ if (bsp->repr == Seq_repr_seg || bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_ref) {
/* get start time for 25 second timeout in Web Entrez far WGS records */
@@ -6437,8 +8014,6 @@ NLM_EXTERN void AddFeatureBlock (
awp->farFeatStartTime = GetSecs ();
}
- awp->currfargap = ajp->fargaps;
-
/* if not farFeatsSuppress now collect features on remote segments in MASTER_STYLE */
SeqMgrExploreSegments (bsp, (Pointer) awp, GetFeatsOnSeg);
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index 966aa1a9..10943129 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.276 $
+* $Revision: 1.328 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -64,19 +64,19 @@
#endif
#endif
-static CharPtr link_muid = "http://www.ncbi.nlm.nih.gov/pubmed/";
+static CharPtr link_muid = "https://www.ncbi.nlm.nih.gov/pubmed/";
-static CharPtr link_go = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&depth=1&query=GO:";
+static CharPtr link_go = "http://amigo.geneontology.org/amigo/term/GO:";
static CharPtr link_go_ref = "http://www.geneontology.org/cgi-bin/references.cgi#GO_REF:";
-static CharPtr link_code = "http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?";
+static CharPtr link_code = "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?";
-static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
-static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
/*
static CharPtr ec_link = "http://www.expasy.org/cgi-bin/nicezyme.pl?";
@@ -89,6 +89,12 @@ static CharPtr ec_link = "http://www.expasy.org/enzyme/";
/* ordering arrays for qualifiers and note components */
static FtQualType feat_qual_order [] = {
+ FTQUAL_ncRNA_class,
+ FTQUAL_ncRNA_other,
+
+ FTQUAL_regulatory_class,
+ FTQUAL_regulatory_other,
+
FTQUAL_partial,
FTQUAL_gene,
@@ -102,9 +108,6 @@ static FtQualType feat_qual_order [] = {
FTQUAL_operon,
- FTQUAL_ncRNA_class,
- FTQUAL_ncRNA_other,
-
FTQUAL_product,
FTQUAL_prot_EC_number,
@@ -230,6 +233,7 @@ static FtQualType feat_note_order [] = {
FTQUAL_prot_comment,
FTQUAL_prot_method,
FTQUAL_ncRNA_note,
+ FTQUAL_regulatory_note,
FTQUAL_figure,
FTQUAL_maploc,
FTQUAL_prot_conflict,
@@ -364,6 +368,9 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "pyrrolysine", Qual_class_string },
{ "region", Qual_class_region },
{ "region_name", Qual_class_string },
+ { "regulatory_class", Qual_class_quote },
+ { "regulatory_note", Qual_class_string },
+ { "regulatory_class", Qual_class_string },
{ "replace", Qual_class_replace },
{ "ribosomal_slippage", Qual_class_boolean },
{ "rpt_family", Qual_class_quote },
@@ -407,7 +414,7 @@ typedef struct qualfeatur {
FtQualType featurclass;
} QualFeatur, PNTR QualFeaturPtr;
-#define NUM_GB_QUALS 45
+#define NUM_GB_QUALS 46
static QualFeatur qualToFeature [NUM_GB_QUALS] = {
{ "allele", FTQUAL_allele },
@@ -443,6 +450,7 @@ static QualFeatur qualToFeature [NUM_GB_QUALS] = {
{ "product", FTQUAL_product_quals },
{ "pseudogene", FTQUAL_pseudogene },
{ "rad_map", FTQUAL_gene_rad_map },
+ { "regulatory_class", FTQUAL_regulatory_class },
{ "replace", FTQUAL_replace },
{ "rpt_family", FTQUAL_rpt_family },
{ "rpt_type", FTQUAL_rpt_type },
@@ -1529,7 +1537,31 @@ static ValQual legalGbqualList [] = {
{ FEATDEF_telomere , FTQUAL_rpt_type },
{ FEATDEF_telomere , FTQUAL_rpt_unit_range },
{ FEATDEF_telomere , FTQUAL_rpt_unit_seq },
- { FEATDEF_telomere , FTQUAL_standard_name }
+ { FEATDEF_telomere , FTQUAL_standard_name },
+
+ { FEATDEF_assembly_gap , FTQUAL_estimated_length },
+ { FEATDEF_assembly_gap , FTQUAL_gap_type },
+ { FEATDEF_assembly_gap , FTQUAL_linkage_evidence },
+
+ { FEATDEF_regulatory , FTQUAL_allele },
+ { FEATDEF_regulatory , FTQUAL_bound_moiety },
+ { FEATDEF_regulatory , FTQUAL_function },
+ { FEATDEF_regulatory , FTQUAL_map },
+ { FEATDEF_regulatory , FTQUAL_old_locus_tag },
+ { FEATDEF_regulatory , FTQUAL_operon },
+ { FEATDEF_regulatory , FTQUAL_phenotype },
+ { FEATDEF_regulatory , FTQUAL_regulatory_class },
+ { FEATDEF_regulatory , FTQUAL_regulatory_other },
+ { FEATDEF_regulatory , FTQUAL_standard_name },
+
+ { FEATDEF_propeptide , FTQUAL_allele },
+ { FEATDEF_propeptide , FTQUAL_EC_number },
+ { FEATDEF_propeptide , FTQUAL_function },
+ { FEATDEF_propeptide , FTQUAL_label },
+ { FEATDEF_propeptide , FTQUAL_map },
+ { FEATDEF_propeptide , FTQUAL_old_locus_tag },
+ { FEATDEF_propeptide , FTQUAL_product },
+ { FEATDEF_propeptide , FTQUAL_standard_name }
};
/* comparison of ValQual's -- first compare featdef then ftqual */
@@ -1575,7 +1607,22 @@ static Boolean AllowedValQual (Uint2 featureKey, FtQualType qualKey, Boolean for
static CharPtr validRptString [] = {
- "tandem", "inverted", "flanking", "terminal", "direct", "dispersed", "other", NULL
+ "tandem",
+ "inverted",
+ "flanking",
+ "nested",
+ "terminal",
+ "direct",
+ "dispersed",
+ "long_terminal_repeat",
+ "non_LTR_retrotransposon_polymeric_tract",
+ "X_element_combinatorial_repeat",
+ "Y_prime_element",
+ "telomeric_repeat",
+ "centromeric_repeat",
+ "engineered_foreign_repetitive_element",
+ "other",
+ NULL
};
static CharPtr validLRBString [] = {
@@ -1717,6 +1764,7 @@ static Int2 ValidateAccnInternal (
if (numAlpha == 4 && numDigits == 9) return 0;
if (numAlpha == 5 && numDigits == 7) return 0;
} else if (numUndersc == 1) {
+ if (numAlpha == 3 && numDigits == 6 && StringNCmp (accession, "MAP_", 4) == 0) return 0;
if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return -2;
if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
if (accession [1] == 'M' ||
@@ -1731,7 +1779,7 @@ static Int2 ValidateAccnInternal (
return 0;
}
}
- if (accession [0] == 'A' || accession [0] == 'Y') {
+ if (accession [0] == 'A' || accession [0] == 'W' || accession [0] == 'Y') {
if (accession [1] == 'P') return 0;
}
}
@@ -1807,14 +1855,15 @@ static void GetStrFormRNAEvidence (
)
{
- Int2 ce = 0, cm = 0, cp = 0, ne = 0, nm = 0, np = 0;
+ Int2 ce = 0, cm = 0, cp = 0, cr = 0;
+ Int2 ne = 0, nm = 0, np = 0, nr = 0, cov = 0, ful = 0;
Boolean has_counts = FALSE;
size_t len;
CharPtr method = NULL, prefix = NULL;
ObjectIdPtr oip;
CharPtr str = NULL;
CharPtr PNTR strp;
- Char tmp [20];
+ Char tmp [150];
UserFieldPtr u, ufp, uu;
if (uop == NULL) return;
@@ -1825,7 +1874,14 @@ static void GetStrFormRNAEvidence (
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
- if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "rnaseq_base_coverage") == 0) {
+ cov = (Int2) ufp->data.intvalue;
+ } else if (StringCmp (oip->str, "rnaseq_biosamples_introns_full") == 0) {
+ ful = (Int2) ufp->data.intvalue;
+ } else if (ufp->data.ptrvalue == NULL) {
+ continue;
+ }
if (StringCmp (oip->str, "Method") == 0) {
method = StringSaveNoNull ((CharPtr) ufp->data.ptrvalue);
} else if (StringCmp (oip->str, "mRNA") == 0) {
@@ -1850,6 +1906,17 @@ static void GetStrFormRNAEvidence (
}
}
}
+ } else if (StringCmp (oip->str, "long SRA read") == 0) {
+ for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
+ if (u->data.ptrvalue == NULL) continue;
+ for (uu = (UserFieldPtr) u->data.ptrvalue; uu != NULL; uu = uu->next) {
+ oip = uu->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "accession") == 0) {
+ nr++;
+ }
+ }
+ }
} else if (StringCmp (oip->str, "Protein") == 0) {
for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
if (u->data.ptrvalue == NULL) continue;
@@ -1872,6 +1939,8 @@ static void GetStrFormRNAEvidence (
cm = (Int2) u->data.intvalue;
} else if (StringCmp (oip->str, "EST") == 0) {
ce = (Int2) u->data.intvalue;
+ } else if (StringCmp (oip->str, "long SRA read") == 0) {
+ cr = (Int2) u->data.intvalue;
} else if (StringCmp (oip->str, "Protein") == 0) {
cp = (Int2) u->data.intvalue;
}
@@ -1883,9 +1952,10 @@ static void GetStrFormRNAEvidence (
nm = cm;
ne = ce;
np = cp;
+ nr = cr;
}
- len = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 80;
+ len = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 300;
str = (CharPtr) MemNew (len);
if (str == NULL) return;
@@ -1894,7 +1964,7 @@ static void GetStrFormRNAEvidence (
} else {
sprintf (str, "%s.", mrnaevtext1);
}
- if (nm > 0 || ne > 0 || np > 0) {
+ if (nm > 0 || ne > 0 || np > 0 || nr > 0 || cov > 0) {
StringCat (str, " ");
StringCat (str, mrnaevtext3);
}
@@ -1919,6 +1989,16 @@ static void GetStrFormRNAEvidence (
StringCat (str, tmp);
prefix = ", ";
}
+ if (nr > 0) {
+ StringCat (str, prefix);
+ if (nr > 1) {
+ sprintf (tmp, "%d long SRA reads", (int) nr);
+ } else {
+ sprintf (tmp, "%d long SRA read", (int) nr);
+ }
+ StringCat (str, tmp);
+ prefix = ", ";
+ }
if (np > 0) {
StringCat (str, prefix);
if (np > 1) {
@@ -1929,6 +2009,24 @@ static void GetStrFormRNAEvidence (
StringCat (str, tmp);
prefix = ", ";
}
+ if (has_counts) {
+ prefix = ", and ";
+ }
+ if (cov > 0) {
+ StringCat (str, prefix);
+ sprintf (tmp, "%d%s coverage of the annotated genomic feature by RNAseq alignments", (int) cov, "%");
+ StringCat (str, tmp);
+ prefix = ", ";
+ if (ful > 0) {
+ StringCat (str, prefix);
+ if (ful > 1) {
+ sprintf (tmp, "including %d samples with support for all annotated introns", (int) ful);
+ } else {
+ sprintf (tmp, "including %d sample with support for all annotated introns", (int) ful);
+ }
+ StringCat (str, tmp);
+ }
+ }
*strp = str;
}
@@ -2022,6 +2120,7 @@ static CharPtr GetCombinedGOtext (
)
{
+ Boolean add_dash, is_www;
UserFieldPtr entry, topufp, ufp;
CharPtr evidence, goid, goref, last = NULL,
str, textstr, prefix;
@@ -2029,7 +2128,6 @@ static CharPtr GetCombinedGOtext (
Char gid [32], tmp [32];
GoStrucPtr gsp;
ValNodePtr head = NULL, vnp;
- Boolean is_www;
Int2 j;
ObjectIdPtr oip;
Int4 pmid;
@@ -2117,6 +2215,7 @@ static CharPtr GetCombinedGOtext (
if (prefix != NULL) {
FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
}
+ add_dash = FALSE;
if (StringDoesHaveText (gsp->goid)) {
FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
@@ -2125,9 +2224,12 @@ static CharPtr GetCombinedGOtext (
FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, gsp->goid, FALSE, TRUE, TILDE_IGNORE);
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ add_dash = TRUE;
}
if (StringDoesHaveText (gsp->term)) {
- FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ if (add_dash) {
+ FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ }
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
FF_Add_NCBI_Base_URL (ffstring, link_go);
FFAddOneString (ffstring, gsp->goid, FALSE, FALSE, TILDE_IGNORE);
@@ -2195,12 +2297,16 @@ static CharPtr GetCombinedGOtext (
if (prefix != NULL) {
FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
}
+ add_dash = FALSE;
if (StringDoesHaveText (gsp->goid)) {
FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
FFAddOneString (ffstring, gsp->goid, FALSE, TRUE, TILDE_IGNORE);
+ add_dash = TRUE;
}
if (StringDoesHaveText (gsp->term)) {
- FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ if (add_dash) {
+ FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ }
FFAddOneString (ffstring, gsp->term, FALSE, TRUE, TILDE_IGNORE);
}
}
@@ -2249,6 +2355,7 @@ static CharPtr GetGOtext (
)
{
+ Boolean add_dash;
CharPtr evidence = NULL;
StringItemPtr ffstring;
Char gid [32];
@@ -2311,6 +2418,7 @@ static CharPtr GetGOtext (
if (is_www) {
ffstring = FFGetString (ajp);
if (ffstring != NULL) {
+ add_dash = FALSE;
if (StringDoesHaveText (goid)) {
FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
@@ -2319,10 +2427,13 @@ static CharPtr GetGOtext (
FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, goid, FALSE, TRUE, TILDE_IGNORE);
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ add_dash = TRUE;
}
if (StringDoesHaveText (textstr)) {
- FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ if (add_dash) {
+ FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
+ }
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
FF_Add_NCBI_Base_URL (ffstring, link_go);
FFAddOneString (ffstring, goid, FALSE, FALSE, TILDE_IGNORE);
@@ -2373,13 +2484,17 @@ static CharPtr GetGOtext (
StringLen (evidence) + StringLen (goref) + 100);
if (str == NULL) return NULL;
+ add_dash = FALSE;
if (StringDoesHaveText (goid)) {
StringCat (str, "GO:");
StringCat (str, goid);
+ add_dash = TRUE;
}
if (StringDoesHaveText (textstr)) {
- StringCat (str, " - ");
+ if (add_dash) {
+ StringCat (str, " - ");
+ }
StringCat (str, textstr);
}
@@ -2578,7 +2693,7 @@ static void FF_www_nuc_or_prot_id (
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
CharPtr seqid,
- Int4 gi,
+ BIG_ID gi,
Boolean is_na
)
{
@@ -2909,13 +3024,13 @@ static void FormatFeatureBlockQuals (
Char anticodon [8];
Boolean at_end = FALSE;
ByteStorePtr bs;
- Char buf [80];
+ Char buf [512];
Choice cbaa;
CodeBreakPtr cbp;
Char ch;
Uint1 choice;
ValNodePtr citlist;
- Int4 gi;
+ BIG_ID gi;
Boolean hadProtDesc = FALSE;
DbtagPtr dbt;
DeltaItemPtr dip;
@@ -3884,17 +3999,18 @@ static void FormatFeatureBlockQuals (
aa = qvp [FTQUAL_trna_aa].str;
if (slp != NULL && StringDoesHaveText (aa)) {
+ if (StringICmp (aa, "fMet") == 0 || StringICmp (aa, "iMet") == 0) {
+ aa++;
+ }
+
anticodon [0] = '\0';
- if (ajp->refseqConventions && SeqLocLen (slp) == 3) {
+ if (SeqLocLen (slp) == 3) {
str = GetSequenceByLocation (slp);
if (str != NULL) {
ptr = str;
ch = *ptr;
while (ch != '\0') {
- ch = TO_UPPER (ch);
- if (ch == 'T') {
- ch = 'U';
- }
+ ch = TO_LOWER(ch);
*ptr = ch;
ptr++;
ch = *ptr;
@@ -4101,7 +4217,7 @@ static void FormatFeatureBlockQuals (
if (okay) {
if (! StringHasNoText (oip->str)) {
- if (StringLen (oip->str) < 80) {
+ if (StringLen (oip->str) < 400) {
sprintf (buf, "%s", oip->str);
}
} else {
@@ -4240,7 +4356,7 @@ static void FormatFeatureBlockQuals (
gi = 0;
for (sip = prod->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
choice = 0;
@@ -4268,10 +4384,12 @@ static void FormatFeatureBlockQuals (
FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
}
- sprintf (seqid, "%ld", (long) sip->data.intvalue);
- FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
- FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ if (! ajp->hideGI) {
+ sprintf (seqid, "%ld", (long) sip->data.intvalue);
+ FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
} else if (sip->choice == SEQID_GENERAL) {
dbt = (DbtagPtr) sip->data.ptrvalue;
if (dbt != NULL && StringCmp (dbt->db, "PID") == 0) {
@@ -4304,7 +4422,7 @@ static void FormatFeatureBlockQuals (
}
} else {
if (sip->choice == SEQID_GI) {
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (GetAccnVerFromServer (gi, seqid)) {
#ifdef OS_UNIX
if (getenv ("ASN2GB_PSF_DEBUG") != NULL) {
@@ -4346,10 +4464,12 @@ static void FormatFeatureBlockQuals (
}
}
- sprintf (seqid, "%ld", (long) gi);
- FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
- FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ if (! ajp->hideGI) {
+ sprintf (seqid, "%ld", (long) gi);
+ FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
} else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
gi = GetGIForSeqId (sip);
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
@@ -4362,10 +4482,12 @@ static void FormatFeatureBlockQuals (
}
if (gi > 0) {
- sprintf (seqid, "%ld", (long) gi);
- FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
- FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
- FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ if (! ajp->hideGI) {
+ sprintf (seqid, "%ld", (long) gi);
+ FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
}
}
}
@@ -4513,7 +4635,7 @@ static void FormatFeatureBlockQuals (
case Qual_class_peptide :
if (qvp [idx].ble) {
- if (ajp->showPeptide) {
+ if (ajp->showPeptide || (ajp->gbseq != NULL && ajp->format == GENPEPT_FMT && ISA_aa (bsp->mol))) {
str = GetSequenceByFeature (sfp);
if (str != NULL) {
ptr = str;
@@ -5037,8 +5159,8 @@ static void FormatFeatureBlockQuals (
break;
case Qual_class_model_ev :
- uop = qvp [jdx].uop;
- if (uop != NULL) {
+ uop = qvp [jdx].uop;
+ if (uop != NULL && qvp [FTQUAL_experiment].gbq == NULL) {
str = NULL;
VisitUserObjectsInUop (sfp->ext, (Pointer) &str, GetStrFormRNAEvidence);
if (! StringHasNoText (str)) {
@@ -5086,7 +5208,7 @@ static void FormatFeatureBlockQuals (
}
} else {
if (sip->choice == SEQID_GI) {
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (GetAccnVerFromServer (gi, seqid)) {
if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
@@ -5156,7 +5278,7 @@ static void FormatFeatureBlockQuals (
}
-NLM_EXTERN void FF_asn2gb_www_featkey (
+NLM_EXTERN void FF_asn2gb_www_featkey_Ex (
StringItemPtr ffstring,
CharPtr key,
SeqFeatPtr sfp,
@@ -5164,17 +5286,19 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
Int4 from,
Int4 to,
Uint1 strand,
- Uint4 itemID
+ Uint4 itemID,
+ Int2 sat,
+ Int4 sat_key
)
{
BioseqPtr bsp;
- Char buf [16];
+ Char buf [32];
Int4 featID = 0;
Int4 ffrom = 0;
Int4 fto = 0;
- Int4 gi = 0;
- Char gi_buf[16];
+ BIG_ID gi = 0;
+ Char gi_buf[32];
Boolean is_aa = FALSE;
ObjectIdPtr oip;
CharPtr prefix = "?";
@@ -5187,7 +5311,7 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
is_aa = ISA_aa (bsp->mol);
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
} else {
@@ -5199,7 +5323,7 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
}
sip = SeqLocId (slp);
if (sip != NULL && sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
if (slp->choice == SEQLOC_INT) {
@@ -5209,7 +5333,7 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
fto = sintp->to + 1;
sip = sintp->id;
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
} else if (slp->choice == SEQLOC_PNT) {
@@ -5219,7 +5343,7 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
fto = spp->point + 1;
sip = spp->id;
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
}
@@ -5239,6 +5363,14 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
sprintf (buf, "%ld", (long) featID);
FFAddOneString(ffstring, "?featID=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ if (sat > 0 && sat_key > 0) {
+ sprintf (buf, "%d", (int) sat);
+ FFAddOneString(ffstring, "&sat=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) sat_key);
+ FFAddOneString(ffstring, "&sat_key=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ }
prefix = "&";
} else if (ffrom > 0 && fto > 0) {
sprintf (buf, "%ld", (long) ffrom);
@@ -5247,11 +5379,27 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
sprintf (buf, "%ld", (long) fto);
FFAddOneString(ffstring, "&to=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ if (sat > 0 && sat_key > 0) {
+ sprintf (buf, "%d", (int) sat);
+ FFAddOneString(ffstring, "&sat=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) sat_key);
+ FFAddOneString(ffstring, "&sat_key=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ }
prefix = "&";
} else if (itemID > 0) {
sprintf (buf, "%ld", (long) itemID);
FFAddOneString(ffstring, "?itemid=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ if (sat > 0 && sat_key > 0) {
+ sprintf (buf, "%d", (int) sat);
+ FFAddOneString(ffstring, "&sat=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ sprintf (buf, "%ld", (long) sat_key);
+ FFAddOneString(ffstring, "&sat_key=", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
+ }
prefix = "&";
} else if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL) {
sip = SeqLocId (sfp->product);
@@ -5281,6 +5429,22 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
}
+NLM_EXTERN void FF_asn2gb_www_featkey (
+ StringItemPtr ffstring,
+ CharPtr key,
+ SeqFeatPtr sfp,
+ SeqLocPtr slp,
+ Int4 from,
+ Int4 to,
+ Uint1 strand,
+ Uint4 itemID
+)
+
+{
+ FF_asn2gb_www_featkey_Ex (ffstring, key, sfp, slp, from, to, strand, itemID, 0, 0);
+}
+
+
NLM_EXTERN SeqIdPtr SeqLocIdForProduct (
SeqLocPtr product
)
@@ -5381,7 +5545,7 @@ NLM_EXTERN void AddIntervalsToGbfeat (
Int4 from;
IntFuzzPtr fuzz;
GBIntervalPtr gbint;
- Int4 gi;
+ BIG_ID gi;
Boolean interbp;
Boolean iscomp;
GBIntervalPtr last = NULL;
@@ -5466,7 +5630,7 @@ NLM_EXTERN void AddIntervalsToGbfeat (
if (sip != NULL) {
accn [0] = '\0';
if (sip->choice == SEQID_GI) {
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
if (! GetAccnVerFromServer (gi, accn)) {
accn [0] = '\0';
}
@@ -6197,7 +6361,7 @@ NLM_EXTERN GeneRefPtr GetGeneByFeat (
*/
gene = GetGeneByXref (bspx, grp);
}
- if (gene != NULL) {
+ if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
grp = (GeneRefPtr) gene->data.value.ptrvalue;
if (gene->pseudo) {
pseudo = TRUE;
@@ -6234,6 +6398,27 @@ NLM_EXTERN GeneRefPtr GetGeneByFeat (
return grp;
}
+typedef struct reg_feat {
+ CharPtr feat_key;
+ CharPtr reg_class;
+} RegFeatData, PNTR RegFeatPtr;
+
+static RegFeatData reg_feat_keys [] = {
+ { "enhancer", "enhancer" },
+ { "promoter", "promoter" },
+ { "CAAT_signal", "CAAT_signal" },
+ { "TATA_signal", "TATA_box" },
+ { "-35_signal", "minus_35_signal" },
+ { "-10_signal", "minus_10_signal" },
+ { "GC_signal", "GC_signal" },
+ { "RBS", "ribosome_binding_site" },
+ { "polyA_signal", "polyA_signal_sequence" },
+ { "attenuator", "attenuator" },
+ { "terminator", "terminator" },
+ { "misc_signal", "other" },
+ { NULL, NULL }
+};
+
static CharPtr FormatFeatureBlockEx (
Asn2gbFormatPtr afp,
IntAsn2gbJobPtr ajp,
@@ -6265,7 +6450,7 @@ static CharPtr FormatFeatureBlockEx (
Char ch;
Uint1 code = Seq_code_ncbieaa;
CdRegionPtr crp;
- Int4 currGi = 0;
+ BIG_ID currGi = 0;
SeqMgrDescContext dcontext;
Boolean encode_prefix = FALSE;
CharPtr exception_note = NULL;
@@ -6285,6 +6470,7 @@ static CharPtr FormatFeatureBlockEx (
ValNodePtr good_inference = NULL;
GeneRefPtr grp = NULL;
Boolean hetPop = FALSE;
+ int i;
IntAsn2gbSectPtr iasp;
IntCdsBlockPtr icp;
Uint2 idx;
@@ -6313,7 +6499,7 @@ static CharPtr FormatFeatureBlockEx (
SeqMgrFeatContext mcontext;
MolInfoPtr mip;
SeqFeatPtr mrna;
- SeqLocPtr newloc;
+ SeqLocPtr newloc = NULL;
Boolean noLeft;
Boolean noRight;
SeqLocPtr nslp = NULL;
@@ -6322,6 +6508,7 @@ static CharPtr FormatFeatureBlockEx (
SeqEntryPtr oldscope;
SeqFeatPtr operon = NULL;
Uint2 partial;
+ BioseqPtr pbsp;
SeqMgrFeatContext pcontext;
Char pfx [128], sfx [128];
BioseqPtr prd;
@@ -6336,6 +6523,7 @@ static CharPtr FormatFeatureBlockEx (
CharPtr ptr;
Uint2 pEID;
Int2 qualclass;
+ CharPtr regulatory_class = NULL;
Uint1 residue;
RNAGenPtr rgp;
Boolean riboSlippage = FALSE;
@@ -6366,6 +6554,7 @@ static CharPtr FormatFeatureBlockEx (
VariationRefPtr vrp;
VarRefDataSetPtr vsp;
ValNodePtr vnp;
+ SeqFeatXrefPtr xref;
SeqLocPtr xslp = NULL;
StringItemPtr ffstring;
/*
@@ -6554,7 +6743,7 @@ static CharPtr FormatFeatureBlockEx (
key = "Site";
}
if (ifp->mapToPep) {
- if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_transit_peptide_aa) {
+ if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_propeptide) {
key = "Precursor";
itemID = 0;
}
@@ -6580,6 +6769,13 @@ static CharPtr FormatFeatureBlockEx (
}
}
*/
+ if (featdeftype == FEATDEF_propeptide) {
+ if (format == GENPEPT_FMT && isProt) {
+ } else if (is_other) {
+ } else if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ key = "misc_feature";
+ }
+ }
/* deal with unmappable impfeats */
@@ -6595,9 +6791,17 @@ static CharPtr FormatFeatureBlockEx (
key = "repeat_region";
}
+ for (i = 0; reg_feat_keys [i].feat_key != NULL; i++) {
+ if (StringICmp (key, reg_feat_keys [i].feat_key) == 0) {
+ key = "regulatory";
+ regulatory_class = reg_feat_keys [i].reg_class;
+ break;
+ }
+ }
+
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- currGi = (Int4) sip->data.intvalue;
+ currGi = (BIG_ID) sip->data.intvalue;
}
}
@@ -6634,8 +6838,17 @@ static CharPtr FormatFeatureBlockEx (
StringICmp (key, "gap") != 0 &&
StringICmp (key, "assembly_gap") != 0 &&
bsp != NULL /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
- FF_asn2gb_www_featkey (ffstring, key, sfp, sfp->location, fcontext->left + 1, fcontext->right + 1,
- fcontext->strand, itemID);
+ if (ifp->mapToNuc) {
+ pbsp = BioseqFindFromSeqLoc (location);
+ left = GetOffsetInBioseq (location, pbsp, SEQLOC_LEFT_END);
+ right = GetOffsetInBioseq (location, pbsp, SEQLOC_RIGHT_END);
+ strand = SeqLocStrand (location);
+ FF_asn2gb_www_featkey_Ex (ffstring, key, sfp, location, left + 1, right + 1,
+ strand, 0, ajp->sat, ajp->sat_key);
+ } else {
+ FF_asn2gb_www_featkey_Ex (ffstring, key, sfp, sfp->location, fcontext->left + 1, fcontext->right + 1,
+ fcontext->strand, itemID, ajp->sat, ajp->sat_key);
+ }
} else {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
}
@@ -6675,7 +6888,6 @@ static CharPtr FormatFeatureBlockEx (
if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans && featdeftype < FEATDEF_MAX) {
js = AddJsInterval (iasp, pfx, target, featdeftype, newloc);
}
- SeqLocFree (newloc);
/*
thirdloc = SeqLoc2Str (ajp->ajp.slp);
if (StringCmp (str, "?") != 0) {
@@ -6719,10 +6931,18 @@ static CharPtr FormatFeatureBlockEx (
}
gbfeat->partial5 = fcontext->partialL;
gbfeat->partial3 = fcontext->partialR;
- if (ajp->masterStyle) {
- AddIntervalsToGbfeat (gbfeat, location, target);
+ if (newloc != NULL) {
+ if (ajp->masterStyle) {
+ AddIntervalsToGbfeat (gbfeat, newloc, target);
+ } else {
+ AddIntervalsToGbfeat (gbfeat, newloc, NULL);
+ }
} else {
- AddIntervalsToGbfeat (gbfeat, location, NULL);
+ if (ajp->masterStyle) {
+ AddIntervalsToGbfeat (gbfeat, location, target);
+ } else {
+ AddIntervalsToGbfeat (gbfeat, location, NULL);
+ }
}
}
}
@@ -6735,6 +6955,10 @@ static CharPtr FormatFeatureBlockEx (
locforgene = sfp->location;
}
+ if (newloc != NULL) {
+ SeqLocFree (newloc);
+ }
+
if (location != NULL) {
ifp->left = GetOffsetInBioseq (location, bsp, SEQLOC_LEFT_END);
ifp->right = GetOffsetInBioseq (location, bsp, SEQLOC_RIGHT_END);
@@ -6747,6 +6971,10 @@ static CharPtr FormatFeatureBlockEx (
sfp->partial = FlatAnnotPartial(sfp, use_product);
*/
+ if (regulatory_class != NULL) {
+ qvp [FTQUAL_regulatory_other].str = regulatory_class;
+ }
+
if (sfp->partial) {
partial = SeqLocPartialCheck (location);
if (partial == SLP_COMPLETE /* || partial > SLP_OTHER */ ) {
@@ -6871,7 +7099,7 @@ static CharPtr FormatFeatureBlockEx (
*/
gene = GetGeneByXref (bspx, grp);
}
- if (gene != NULL) {
+ if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
grp = (GeneRefPtr) gene->data.value.ptrvalue;
if (gene->pseudo) {
pseudo = TRUE;
@@ -6884,6 +7112,44 @@ static CharPtr FormatFeatureBlockEx (
}
}
}
+
+ /* if feature ID xref, find referenced gene, treat as if overlapped */
+
+ } else if (featid != NULL) {
+ bspx = BioseqFindFromSeqLoc (sfp->location);
+ if (bspx != NULL) {
+ gene = NULL;
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->id.choice != 3) continue;
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip == NULL) continue;
+ featid = NULL;
+ if (StringDoesHaveText (oip->str)) {
+ featid = oip->str;
+ } else {
+ sprintf (fbuf, "%ld", (long) oip->id);
+ featid = fbuf;
+ }
+ if (featid == NULL) continue;
+ gene = SeqMgrGetFeatureByFeatID (0, bspx, featid, NULL, &gcontext);
+ if (gene == NULL) continue;
+ /* if found, break out of loop */
+ if (gene->data.choice == SEQFEAT_GENE) break;
+ /* otherwise xref is to other kind of feature, null out gene and keep looking */
+ gene = NULL;
+ }
+ if (gene != NULL && gene->data.choice == SEQFEAT_GENE) {
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (gene->pseudo) {
+ pseudo = TRUE;
+ }
+ if (grp != NULL && grp->db != NULL) {
+ qvp [FTQUAL_gene_xref].vnp = grp->db;
+ } else {
+ qvp [FTQUAL_gene_xref].vnp = gene->dbxref;
+ }
+ }
+ }
}
if (! suppressed) {
@@ -6967,8 +7233,7 @@ static CharPtr FormatFeatureBlockEx (
}
if (grp != NULL &&
- ((featdeftype != FEATDEF_repeat_region &&
- featdeftype != FEATDEF_mobile_element &&
+ ((featdeftype != FEATDEF_mobile_element &&
featdeftype != FEATDEF_centromere &&
featdeftype != FEATDEF_telomere) ||
is_ed || gene == NULL)) {
@@ -6999,16 +7264,14 @@ static CharPtr FormatFeatureBlockEx (
}
if (grp != NULL &&
featdeftype != FEATDEF_variation &&
- ((featdeftype != FEATDEF_repeat_region &&
- featdeftype != FEATDEF_mobile_element &&
+ ((featdeftype != FEATDEF_mobile_element &&
featdeftype != FEATDEF_centromere &&
featdeftype != FEATDEF_telomere) || is_ed)) {
qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */
}
if (gene != NULL &&
- ((featdeftype != FEATDEF_repeat_region &&
- featdeftype != FEATDEF_mobile_element &&
+ ((featdeftype != FEATDEF_mobile_element &&
featdeftype != FEATDEF_centromere &&
featdeftype != FEATDEF_telomere) || is_ed)) {
/* now propagate old_locus_tag to almost any underlying feature */
@@ -7353,17 +7616,17 @@ static CharPtr FormatFeatureBlockEx (
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
if (prp != NULL) {
if (! pseudo) {
- if (ajp->showPeptide) {
- if (prp->processed == 2 || prp->processed == 3 || prp->processed == 4) {
+ if (ajp->showPeptide || (ajp->gbseq != NULL && format == GENPEPT_FMT && isProt)) {
+ if (prp->processed == 2 || prp->processed == 3 || prp->processed == 4 || prp->processed == 5) {
qvp [FTQUAL_peptide].ble = TRUE;
}
}
- if (format == GENPEPT_FMT && isProt && is_other) {
- /* enable calculated_mol_wt qualifier for RefSeq proteins */
+ if (format == GENPEPT_FMT && isProt && (is_other || ajp->gbseq != NULL)) {
+ /* enable calculated_mol_wt qualifier for RefSeq proteins, or for GBSeq */
qvp [FTQUAL_mol_wt].ble = TRUE;
}
}
- if (prp->processed == 3 || prp->processed == 4) {
+ if (prp->processed == 3 || prp->processed == 4 || prp->processed == 5) {
if (! is_other) {
/* Only RefSeq allows product on signal or transit peptide */
qvp [FTQUAL_product].str = NULL;
@@ -7408,7 +7671,7 @@ static CharPtr FormatFeatureBlockEx (
}
}
}
- if (rrp->type == 2) {
+ if (rrp->type == RNA_TYPE_mRNA || rrp->type == RNA_TYPE_tRNA) {
if (! pseudo) {
if (ajp->showTranscript) {
qvp [FTQUAL_transcription].ble = TRUE;
@@ -7512,10 +7775,18 @@ static CharPtr FormatFeatureBlockEx (
if (aa != '*') {
idx = aa - (64 /* + shift */);
} else {
- idx = 25;
+ idx = 27;
}
if (idx > 0 && idx < 28) {
str = trnaList [idx];
+ if (idx == 13) {
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "product") != 0) continue;
+ if (StringICmp (gbq->val, "tRNA-fMet") == 0 || StringICmp (gbq->val, "tRNA-iMet") == 0) {
+ str = gbq->val;
+ }
+ }
+ }
qvp [FTQUAL_product].str = str;
if (StringNICmp (str, "tRNA-", 5) == 0) {
qvp [FTQUAL_trna_aa].str = str + 5;
@@ -7712,11 +7983,12 @@ static CharPtr FormatFeatureBlockEx (
qvp [FTQUAL_go_function].ufp = NULL;
}
- if (featdeftype == FEATDEF_repeat_region ||
- featdeftype == FEATDEF_mobile_element ||
+ if (featdeftype == FEATDEF_mobile_element ||
featdeftype == FEATDEF_centromere ||
featdeftype == FEATDEF_telomere) {
pseudo = FALSE;
+ qvp [FTQUAL_pseudo].ble = FALSE;
+ qvp [FTQUAL_pseudogene].gbq = NULL;
}
qvp [FTQUAL_pseudo].ble = pseudo;
@@ -7729,7 +8001,12 @@ static CharPtr FormatFeatureBlockEx (
for (adp = sap->desc; adp != NULL; adp = adp->next) {
if (adp->choice == Annot_descr_comment) {
if (StringDoesHaveText ((CharPtr) adp->data.ptrvalue)) {
- qvp [FTQUAL_seqannot_note].str = (CharPtr) adp->data.ptrvalue;
+ str = (CharPtr) adp->data.ptrvalue;
+ /* compensate for gpipe annotation of tRNAscan */
+ if (StringCmp (str, "tRNA features were annotated by tRNAscan-SE.") == 0 && featdeftype != FEATDEF_tRNA) {
+ } else {
+ qvp [FTQUAL_seqannot_note].str = str;
+ }
}
} else if (adp->choice == Annot_descr_user) {
uop = (UserObjectPtr) adp->data.ptrvalue;
@@ -7761,48 +8038,37 @@ static CharPtr FormatFeatureBlockEx (
switch (featdeftype) {
case FEATDEF_allele:
- case FEATDEF_attenuator:
- case FEATDEF_CAAT_signal:
case FEATDEF_centromere:
case FEATDEF_conflict:
case FEATDEF_D_loop:
- case FEATDEF_enhancer:
- case FEATDEF_GC_signal:
case FEATDEF_iDNA:
case FEATDEF_LTR:
case FEATDEF_misc_binding:
case FEATDEF_misc_difference:
case FEATDEF_misc_recomb:
- case FEATDEF_misc_signal:
case FEATDEF_misc_structure:
case FEATDEF_modified_base:
case FEATDEF_mobile_element:
case FEATDEF_mutation:
case FEATDEF_old_sequence:
- case FEATDEF_polyA_signal:
case FEATDEF_polyA_site:
case FEATDEF_precursor_RNA:
case FEATDEF_prim_transcript:
case FEATDEF_primer_bind:
case FEATDEF_protein_bind:
- case FEATDEF_RBS:
case FEATDEF_repeat_region:
case FEATDEF_repeat_unit:
case FEATDEF_rep_origin:
case FEATDEF_satellite:
case FEATDEF_stem_loop:
case FEATDEF_STS:
- case FEATDEF_TATA_signal:
case FEATDEF_telomere:
- case FEATDEF_terminator:
case FEATDEF_unsure:
case FEATDEF_variation:
case FEATDEF_3clip:
case FEATDEF_3UTR:
case FEATDEF_5clip:
case FEATDEF_5UTR:
- case FEATDEF_10_signal:
- case FEATDEF_35_signal:
qvp [FTQUAL_pseudo].ble = FALSE;
qvp [FTQUAL_pseudogene].gbq = NULL;
break;
@@ -8012,6 +8278,26 @@ static CharPtr FormatFeatureBlockEx (
}
}
+ if (qvp [FTQUAL_regulatory_class].gbq != NULL) {
+ gbq = qvp [FTQUAL_regulatory_class].gbq;
+ if (StringDoesHaveText (gbq->val)) {
+ if (! IsStringInRegulatoryClassList (gbq->val)) {
+ qvp [FTQUAL_regulatory_other].str = "other";
+ tmp = gbq->val;
+ if (tmp != NULL) {
+ if (StringNICmp (tmp, "other:", 6) == 0) {
+ tmp += 6;
+ }
+ while (*tmp == ' ') {
+ tmp++;
+ }
+ }
+ qvp [FTQUAL_regulatory_note].str = tmp;
+ qvp [FTQUAL_regulatory_class].gbq = NULL;
+ }
+ }
+ }
+
if (ajp->mode != DUMP_MODE) {
ParseInference (qvp [FTQUAL_inference].gbq, &good_inference, &bad_inference);
qvp [FTQUAL_inference_good].vnp = good_inference;
@@ -8312,6 +8598,7 @@ NLM_EXTERN CharPtr FormatFeatHeaderBlock (
Boolean has_space;
Char id [128];
ObjectIdPtr oip;
+ CharPtr original_id = NULL;
CharPtr ptr;
SeqIdPtr sip;
SeqIdPtr sip2;
@@ -8333,7 +8620,11 @@ NLM_EXTERN CharPtr FormatFeatHeaderBlock (
if (ajp->format == FTABLE_FMT) {
sip = SeqIdFindBest (target->id, 0);
if (sip == NULL) return NULL;
+
id [0] = '\0';
+ if (ShouldUseOriginalID (bsp)) {
+ original_id = FastaGetOriginalId (bsp);
+ }
if (sip->choice == SEQID_GI) {
sip2 = GetSeqIdForGI (sip->data.intvalue);
@@ -8363,6 +8654,9 @@ NLM_EXTERN CharPtr FormatFeatHeaderBlock (
if (id [0] == '\0') {
SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
}
+ if (original_id != NULL && StringLen (original_id) + 5 < sizeof (id)) {
+ sprintf (id, "lcl|%s", original_id);
+ }
if (! StringHasNoText (id)) {
tmp = (CharPtr) MemNew ((StringLen(feature_table_header_format) + StringLen(id)) * sizeof(Char));
sprintf (tmp, ">Feature %s\n", id);
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index 7af30771..ab8f3c1a 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.196 $
+* $Revision: 1.282 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -66,7 +66,7 @@
/* URLs */
-static CharPtr link_muid = "http://www.ncbi.nlm.nih.gov/pubmed/";
+static CharPtr link_muid = "https://www.ncbi.nlm.nih.gov/pubmed/";
static CharPtr link_uspto = "http://patft.uspto.gov/netacgi/nph-Parser?patentnumber=";
@@ -116,6 +116,90 @@ NLM_EXTERN void FF_www_featloc(StringItemPtr ffstring, CharPtr loc)
/* ************** */
+static Boolean LooksLikeAccession (
+ CharPtr accession,
+ Int2Ptr alphaP,
+ Int2Ptr digitP,
+ Int2Ptr unscrP
+)
+
+{
+ Char ch;
+ Int2 numAlpha = 0;
+ Int2 numDigits = 0;
+ Int2 numUndersc = 0;
+ CharPtr str;
+
+ if (accession == NULL || accession [0] == '\0') return FALSE;
+
+ if (StringLen (accession) >= 16) return FALSE;
+
+ if (accession [0] < 'A' || accession [0] > 'Z') return FALSE;
+
+ str = accession;
+ if (StringNCmp (str, "NZ_", 3) == 0) {
+ str += 3;
+ }
+ ch = *str;
+ while (IS_ALPHA (ch)) {
+ numAlpha++;
+ str++;
+ ch = *str;
+ }
+ while (ch == '_') {
+ numUndersc++;
+ str++;
+ ch = *str;
+ }
+ while (IS_DIGIT (ch)) {
+ numDigits++;
+ str++;
+ ch = *str;
+ }
+ if (ch != '\0' && ch != ' ' && ch != '.') return FALSE;
+
+ if (numUndersc > 1) return FALSE;
+
+ if (alphaP != NULL) {
+ *alphaP = numAlpha;
+ }
+ if (digitP != NULL) {
+ *digitP = numDigits;
+ }
+ if (unscrP != NULL) {
+ *unscrP = numUndersc;
+ }
+
+ if (numUndersc == 0) {
+ if (numAlpha == 1 && numDigits == 5) return TRUE;
+ if (numAlpha == 2 && numDigits == 6) return TRUE;
+ if (numAlpha == 3 && numDigits == 5) return TRUE;
+ if (numAlpha == 4 && numDigits == 8) return TRUE;
+ if (numAlpha == 4 && numDigits == 9) return TRUE;
+ if (numAlpha == 5 && numDigits == 7) return TRUE;
+ } else if (numUndersc == 1) {
+ if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return FALSE;
+ if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
+ if (accession [1] == 'M' ||
+ accession [1] == 'C' ||
+ accession [1] == 'T' ||
+ accession [1] == 'P' ||
+ accession [1] == 'G' ||
+ accession [1] == 'R' ||
+ accession [1] == 'S' ||
+ accession [1] == 'W' ||
+ accession [1] == 'Z') {
+ return TRUE;
+ }
+ }
+ if (accession [0] == 'A' || accession [0] == 'Y') {
+ if (accession [1] == 'P') return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
typedef struct dbxrefurldata {
CharPtr tag;
CharPtr url;
@@ -123,108 +207,133 @@ typedef struct dbxrefurldata {
static UrlData Nlm_url_base [] = {
{"AceView/WormGenes", "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q="},
- {"AFTOL", "http://aftol1.biology.duke.edu/pub/displayTaxonInfo?aftol_id="},
+ {"AFTOL", "http://wasabi.lutzonilab.net/pub/displayTaxonInfo?aftol_id="},
{"AntWeb", "http://www.antweb.org/specimen.do?name="},
- {"APHIDBASE", "http://webapps1.genouest.org/grs-1.0/grs?reportID=chado_genome_report&objectID="},
+ {"APHIDBASE", "http://bipaa.genouest.org/apps/grs-2.3/grs?reportID=aphidbase_transcript_report&objectID="},
{"ApiDB", "http://www.apidb.org/apidb/showRecord.do?name=GeneRecordClasses.ApiDBGeneRecordClass&primary_key="},
- {"ApiDB_CryptoDB", "http://cryptodb.org/cryptodb/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=&primary_key="},
- {"ApiDB_PlasmoDB", "http://www.plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=&primary_key="},
- {"ApiDB_ToxoDB", "http://www.toxodb.org/toxo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=&primary_key="},
- {"ASAP", "https://asap.ahabs.wisc.edu/annotation/php/feature_info.php?FeatureID="},
- {"ATCC", "http://www.atcc.org/SearchCatalogs/linkin?id="},
+ {"ApiDB_CryptoDB", "http://cryptodb.org/cryptodb/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=CryptoDB&source_id="},
+ {"ApiDB_PlasmoDB", "http://plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=PlasmoDB&source_id="},
+ {"ApiDB_ToxoDB", "http://toxodb.org/toxo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=ToxoDB&source_id="},
+ {"Araport", "https://www.araport.org/locus/"},
+ {"ASAP", "https://asap.genetics.wisc.edu/asap/feature_info.php?FeatureID="},
+ {"ATCC", "http://www.atcc.org/Products/All/"},
{"Axeldb", "http://www.dkfz-heidelberg.de/tbi/services/axeldb/clone/xenopus?name="},
- {"BEEBASE", "http://genomes.arc.georgetown.edu/cgi-bin/gbrowse/bee_genome4/?name="},
+ {"BEEBASE", "http://hymenopteragenome.org/cgi-bin/gb2/gbrowse/bee_genome45/?name="},
{"BEETLEBASE", "http://www.beetlebase.org/cgi-bin/report.cgi?name="},
- {"BGD", "http://genomes.arc.georgetown.edu/bovine/genepages/genes/"},
+ {"BEI", "https://www.beiresources.org/Catalog/animalViruses/"},
+ {"BGD", "http://bovinegenome.org/genepages/btau40/genes/"},
+ {"BioProject", "http://www.ncbi.nlm.nih.gov/bioproject/"},
+ {"BioSample", "http://www.ncbi.nlm.nih.gov/biosample/"},
{"BOLD", "http://www.boldsystems.org/connectivity/specimenlookup.php?processid="},
{"CCDS", "http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA="},
{"CDD", "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid="},
- {"CGNC", "http://www.agnc.msstate.edu/GeneReport.aspx?a="},
+ {"CGD", "http://www.candidagenome.org/cgi-bin/locus.pl?locus="},
+ {"CGNC", "http://birdgenenames.org/cgnc/GeneReport?id="},
{"CK", "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid="},
{"COG", "http://www.ncbi.nlm.nih.gov/COG/new/release/cow.cgi?cog="},
+ {"CollecTF", "http://collectf.umbc.edu/"},
{"dbClone", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=clone&cmd=Retrieve&list_uids="},
{"dbCloneLib", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=clonelib&cmd=Retrieve&list_uids="},
{"dbEST", "http://www.ncbi.nlm.nih.gov/nucest/"},
{"dbProbe", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=probe&cmd=Retrieve&list_uids="},
{"dbSNP", "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs="},
- {"dbSTS", "http://www.ncbi.nlm.nih.gov/nuccore/"},
+ {"dbSTS", "https://www.ncbi.nlm.nih.gov/nuccore/"},
{"dictyBase", "http://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid="},
{"ECOCYC", "http://biocyc.org/ECOLI/new-image?type=GENE&object="},
- {"EcoGene", "http://ecogene.org/geneInfo.php?eg_id="},
+ {"EcoGene", "http://www.ecogene.org/gene/"},
{"ENSEMBL", "http://www.ensembl.org/id/"},
- {"ERIC", "http://www.ericbrc.org/genbank/dbxref/"},
+ {"EnsemblGenomes", "http://ensemblgenomes.org/id/"},
+ {"EnsemblGenomes-Gn", "http://ensemblgenomes.org/id/"},
+ {"EnsemblGenomes-Tr", "http://ensemblgenomes.org/id/"},
{"FANTOM_DB", "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="},
{"FBOL", "http://www.fungalbarcoding.org/BioloMICS.aspx?Table=Fungal%20barcodes&Fields=All&Rec="},
{"FLYBASE", "http://flybase.bio.indiana.edu/.bin/fbidq.html?"},
+ {"Fungorum", "http://www.indexfungorum.org/Names/NamesRecord.asp?RecordID="},
{"GABI", "http://www.gabipd.org/database/cgi-bin/GreenCards.pl.cgi?Mode=ShowSequence&App=ncbi&SequenceId="},
+ {"GenBank", "https://www.ncbi.nlm.nih.gov/nuccore/"},
{"GeneDB", "http://old.genedb.org/genedb/Search?organism=All%3A*&name="},
{"GeneID", "http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=Retrieve&dopt=full_report&list_uids="},
- {"GO", "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&depth=1&query=GO:"},
+ {"GO", "http://amigo.geneontology.org/amigo/term/GO:"},
{"GOA", "http://www.ebi.ac.uk/ego/GProtein?ac="},
{"Greengenes", "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id="},
{"GRIN", "http://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?"},
{"H-InvDB", "http://www.h-invitational.jp"},
- {"HGNC", "http://www.genenames.org/data/hgnc_data.php?hgnc_id="},
- {"HMP", "http://www.hmpdacc-resources.org/cgi-bin/hmp_catalog/main.cgi?section=HmpSummary&page=displayHmpProject&hmp_id="},
+ {"HGNC", "http://www.genenames.org/cgi-bin/gene_symbol_report?hgnc_id=HGNC:"},
+ {"HMP", "http://www.hmpdacc.org/catalog/grid.php?dataset=genomic&hmp_id="},
{"HOMD", "http://www.homd.org/"},
+ {"HPM", "http://www.humanproteomemap.org/protein.php?hpm_id="},
{"HPRD", "http://www.hprd.org/protein/"},
- {"HSSP", "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-newId+-e+hssp-ID:"},
+ {"HSSP", "http://mrs.cmbi.ru.nl/m6/search?db=all&q="},
+ {"I5KNAL", "https://i5k.nal.usda.gov/"},
{"IKMC", "http://www.knockoutmouse.org/martsearch/project/"},
{"IMGT/GENE-DB", "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+"},
+ {"IMGT/HLA", "http://www.ebi.ac.uk/cgi-bin/ipd/imgt/hla/get_allele.cgi?"},
{"IMGT/LIGM", "http://www.imgt.org/cgi-bin/IMGTlect.jv?query=201+"},
{"InterimID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
{"InterPro", "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query="},
+ {"IntrepidBio", "http://server1.intrepidbio.com/FeatureBrowser/gene/browse/"},
{"IRD", "http://www.fludb.org/brc/fluSegmentDetails.do?irdSubmissionId="},
{"ISD", "http://www.flu.lanl.gov/search/view_record.html?accession="},
{"ISFinder", "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name="},
+ {"ISHAM-ITS", "http://its.mycologylab.org/BioloMICS.aspx?Table=Sequences&ExactMatch=T&Name=MITS"},
{"JCM", "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="},
{"JGIDB", "http://genome.jgi-psf.org/cgi-bin/jgrs?id="},
{"LocusID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
{"MaizeGDB", "http://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?"},
+ {"MedGen", "http://www.ncbi.nlm.nih.gov/medgen/"},
{"MGI", "http://www.informatics.jax.org/marker/MGI:"},
{"MIM", "http://www.ncbi.nlm.nih.gov/omim/"},
{"miRBase", "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc="},
{"MycoBank", "http://www.mycobank.org/MycoTaxo.aspx?Link=T&Rec="},
- {"NASONIABASE", "http://genomes.arc.georgetown.edu/cgi-bin/gbrowse/nasonia10_scaffold/?name="},
+ {"NASONIABASE", "http://hymenopteragenome.org/cgi-bin/gbrowse/nasonia10_scaffold/?name="},
{"NBRC", "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="},
{"NextDB", "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk="},
{"niaEST", "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1="},
{"NMPDR", "http://www.nmpdr.org/linkin.cgi?id="},
{"NRESTdb", "http://genome.ukm.my/nrestdb/db/single_view_est.php?id="},
+ {"OrthoMCL", "http://orthomcl.org/orthomcl/showRecord.do?name=GroupRecordClasses.GroupRecordClass&group_name="},
{"Osa1", "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name="},
- {"Pathema", "http://pathema.jcvi.org/cgi-bin/Burkholderia/shared/GenePage.cgi?all=1&locus="},
{"PBmice", "http://www.idmshanghai.cn/PBmice/DetailedSearch.do?type=insert&id="},
{"PBR", "http://www.poxvirus.org/query.asp?web_id="},
{"PDB", "http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId="},
{"PFAM", "http://pfam.sanger.ac.uk/family?acc="},
{"PGN", "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier="},
+ {"Phytozome", "http://www.phytozome.net/genePage.php?db=Phytozome&crown&method=0&search=1&detail=1&searchText=locusname:"},
{"PomBase", "http://www.pombase.org/spombe/result/"},
{"PseudoCap", "http://www.pseudomonas.com/getAnnotation.do?locusID="},
{"RAP-DB", "http://rapdb.dna.affrc.go.jp/cgi-bin/gbrowse_details/latest?name="},
{"RATMAP", "http://ratmap.gen.gu.se/ShowSingleLocus.htm?accno="},
+ {"RBGE_garden", "http://data.rbge.org.uk/living/"},
+ {"RBGE_herbarium", "http://data.rbge.org.uk/herb/"},
{"REBASE", "http://rebase.neb.com/rebase/enz/"},
- {"RFAM", "http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?"},
- {"RGD", "http://rgd.mcw.edu/generalSearch/RgdSearch.jsp?quickSearch=1&searchKeyword="},
+ {"RefSeq", "https://www.ncbi.nlm.nih.gov/nuccore/"},
+ {"RFAM", "http://rfam.xfam.org/family/"},
+ {"RGD", "http://rgd.mcw.edu/rgdweb/search/search.html?term="},
{"RiceGenes", "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="},
{"SEED", "http://www.theseed.org/linkin.cgi?id="},
- {"SGD", "http://db.yeastgenome.org/cgi-bin/SGD/locus.pl?locus="},
+ {"SGD", "http://www.yeastgenome.org/cgi-bin/locus.fpl?sgdid="},
{"SGN", "http://www.sgn.cornell.edu/search/est.pl?request_type=7&request_id="},
{"SK-FST", "http://aafc-aac.usask.ca/fst/"},
+ {"SRPDB", "http://rnp.uthscsa.edu/rnp/SRPDB/rna/sequences/fasta/"},
{"SubtiList", "http://genolist.pasteur.fr/SubtiList/genome.cgi?external_query+"},
{"TAIR", "http://www.arabidopsis.org/servlets/TairObject?type=locus&name="},
- {"taxon", "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?"},
- {"TIGRFAM", "http://cmr.tigr.org/tigr-scripts/CMR/HmmReport.cgi?hmm_acc="},
+ {"taxon", "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?"},
+ {"TIGRFAM", "http://www.jcvi.org/cgi-bin/tigrfams/HmmReportPage.cgi?acc="},
+ {"TubercuList", "http://tuberculist.epfl.ch/quicksearch.php?gene+name="},
{"UniGene", "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=unigene&Cmd=Search&Term="},
{"UniProtKB/Swiss-Prot", "http://www.uniprot.org/uniprot/"},
{"UniProtKB/TrEMBL", "http://www.uniprot.org/uniprot/"},
- {"UniSTS", "http://www.ncbi.nlm.nih.gov/genome/sts/sts.cgi?uid="},
+ {"UniSTS", "http://www.ncbi.nlm.nih.gov/probe?term="},
{"UNITE", "http://unite.ut.ee/bl_forw.php?nimi="},
{"VBASE2", "http://www.dnaplot.de/vbase2/vgene.php?id="},
{"VBRC", "http://vbrc.org/query.asp?web_view=curation&web_id="},
{"VectorBase", "http://www.vectorbase.org/Genome/BRCGene/?feature="},
+ {"Vega", "http://vega.sanger.ac.uk/id/"},
+ {"VGNC", "http://vertebrate.genenames.org/data/gene-symbol-report/#!/vgnc_id/VGNC:"},
{"ViPR", "http://www.viprbrc.org/brc/viprStrainDetails.do?viprSubmissionId="},
+ {"VISTA", "http://enhancer.lbl.gov/cgi-bin/dbxref.pl?id="},
{"WorfDB", "http://worfdb.dfci.harvard.edu/search.pl?form=1&search="},
- {"WormBase", "http://www.wormbase.org/db/gene/gene?class=CDS;name="},
+ {"WormBase", "http://www.wormbase.org/search/gene/"},
{"Xenbase", "http://www.xenbase.org/gene/showgene.do?method=display&geneId="},
{"ZFIN", "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID="},
};
@@ -267,10 +376,11 @@ static void FF_www_get_url (
)
{
- CharPtr base = NULL, prefix = NULL, profix = NULL, ident = NULL, suffix = NULL, url = NULL, ptr, str;
+ CharPtr base = NULL, prefix = NULL, profix = NULL, ident = NULL,
+ suffix = NULL, url = NULL, redundant = NULL, ptr, str;
Char ch, buf [128], id [20], taxname [128];
Boolean is_numeric;
- Int2 R;
+ Int2 num_alpha, num_digit, num_unscr, R;
if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
@@ -296,6 +406,13 @@ static void FF_www_get_url (
url += 28;
}
}
+ } else if (StringNICmp (url, "https://www.ncbi.nlm.nih.gov/", 29) == 0) {
+ if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
+ if (StringDoesHaveText (buf)) {
+ base = buf;
+ url += 29;
+ }
+ }
}
/* special cases */
@@ -305,10 +422,22 @@ static void FF_www_get_url (
suffix = "&decorator=influenza";
+ } else if (StringCmp (db, "ATCC") == 0) {
+
+ suffix = ".aspx";
+
+ } else if (StringCmp (db, "BEI") == 0) {
+
+ suffix = ".aspx";
+
} else if (StringCmp (db, "ViPR") == 0) {
suffix = "&decorator=vipr";
+ } else if (StringCmp (db, "SRPDB") == 0) {
+
+ suffix = ".fasta";
+
} else if (StringCmp (db, "dbSTS") == 0) {
/*
@@ -339,6 +468,16 @@ static void FF_www_get_url (
url = "http://www.fruitfly.org/cgi-bin/annot/fban?";
}
+ } else if (StringCmp (db, "Fungorum") == 0) {
+
+ str = identifier;
+ ch = *str;
+ while (ch != '\0' && ! IS_DIGIT (ch)) {
+ str++;
+ ch = *str;
+ }
+ ident = str;
+
} else if (StringCmp (db, "dictyBase") == 0) {
if (StringChr (identifier, '_') != NULL) {
@@ -399,6 +538,12 @@ static void FF_www_get_url (
}
}
+ } else if (StringCmp (db, "IMGT/HLA") == 0) {
+
+ if (StringNICmp (identifier, "HLA", 3) != NULL ) {
+ url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=imgthla;id=";
+ }
+
} else if (StringCmp (db, "MaizeGDB") == 0) {
is_numeric = TRUE;
@@ -424,6 +569,19 @@ static void FF_www_get_url (
url = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=";
}
+ } else if (StringCmp (db, "RefSeq") == 0) {
+
+ ch = identifier [0];
+ if (IS_ALPHA (ch) && identifier [1] == 'P' && identifier [2] == '_') {
+ url = "https://www.ncbi.nlm.nih.gov/protein/";
+ }
+
+ } else if (StringCmp (db, "WormBase") == 0) {
+
+ if (LooksLikeAccession (identifier, &num_alpha, &num_digit, &num_unscr) && num_alpha == 3 && num_digit == 5) {
+ url = "http://www.wormbase.org/search/protein/";
+ }
+
} else if (StringCmp (db, "niaEST") == 0) {
suffix = "&val=1";
@@ -449,6 +607,43 @@ static void FF_www_get_url (
prefix = "name=";
}
+ } else if (StringCmp (db, "UniSTS") == 0) {
+
+ suffix = "%20%5BUniSTS%20ID%5D";
+
+ } else if (StringCmp (db, "HGNC") == 0) {
+
+ if (StringNCmp (identifier, "HGNC:", 5) == 0 ) {
+ ident += 5;
+ }
+ redundant = "HGNC:";
+
+ } else if (StringCmp (db, "VGNC") == 0) {
+
+ if (StringNCmp (identifier, "VGNC:", 5) == 0 ) {
+ ident += 5;
+ }
+ redundant = "VGNC:";
+
+ } else if (StringCmp (db, "MGI") == 0) {
+
+ if (StringNCmp (identifier, "MGI:", 4) == 0 ) {
+ ident += 4;
+ }
+ redundant = "MGI:";
+
+ } else if (StringCmp (db, "RGD") == 0) {
+
+ if (StringNCmp (identifier, "RGD:", 4) == 0 ) {
+ ident += 4;
+ }
+
+ } else if (StringCmp (db, "ISHAM-ITS") == 0) {
+
+ if (StringNCmp (identifier, "MITS", 4) == 0 ) {
+ ident += 4;
+ }
+
}
/* now generate URL */
@@ -470,7 +665,12 @@ static void FF_www_get_url (
if (StringDoesHaveText (suffix)) {
FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
}
- FFAddTextToString (ffstring, "\">", identifier, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ if (StringDoesHaveText (redundant)) {
+ FFAddOneString (ffstring, redundant, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
}
NLM_EXTERN void FF_www_db_xref (
@@ -481,13 +681,30 @@ NLM_EXTERN void FF_www_db_xref (
BioseqPtr bsp
)
{
+ CharPtr colon = ":";
+
if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
+ if (StringCmp (db, "HGNC") == 0 && StringNCmp (identifier, "HGNC:", 5) == 0) {
+ identifier += 5;
+ } else if (StringCmp (db, "VGNC") == 0 && StringNCmp (identifier, "VGNC:", 5) == 0) {
+ identifier += 5;
+ } else if (StringCmp (db, "MGI") == 0 && StringNCmp (identifier, "MGI:", 4) == 0) {
+ identifier += 4;
+ }
+
if (GetWWW (ajp)) {
- FFAddTextToString (ffstring, db, ":", NULL, FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString (ffstring, db, colon, NULL, FALSE, FALSE, TILDE_IGNORE);
FF_www_get_url (ffstring, db, identifier, bsp);
} else {
- FFAddTextToString (ffstring, db, ":", identifier, FALSE, FALSE, TILDE_IGNORE);
+ if (StringCmp (db, "HGNC") == 0) {
+ colon = ":HGNC:";
+ } else if (StringCmp (db, "VGNC") == 0) {
+ colon = ":VGNC:";
+ } else if (StringCmp (db, "MGI") == 0) {
+ colon = ":MGI:";
+ }
+ FFAddTextToString (ffstring, db, colon, identifier, FALSE, FALSE, TILDE_IGNORE);
}
}
@@ -511,6 +728,13 @@ NLM_EXTERN void FF_Add_NCBI_Base_URL (
url += 28;
}
}
+ } else if (StringNICmp (url, "https://www.ncbi.nlm.nih.gov/", 29) == 0) {
+ if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
+ if (StringDoesHaveText (buf)) {
+ base = buf;
+ url += 29;
+ }
+ }
}
if (StringDoesHaveText (base)) {
@@ -533,7 +757,7 @@ NLM_EXTERN CharPtr asn2gnbk_dbxref (
{
IntAsn2gbJobPtr ajp;
- Char buf [80];
+ Char buf [128];
StringItemPtr ffstring;
ObjectIdPtr oip;
CharPtr ptr;
@@ -1147,7 +1371,7 @@ static Int2 FixPages (
Char firstend[MAX_PAGE_DIGITS];
Char secondend[MAX_PAGE_DIGITS];
Char temp[MAX_PAGE_DIGITS];
- CharPtr alphabegin, numbegin, alphaend, numend, ptr, in=in_pages;
+ CharPtr alphabegin, numbegin = NULL, alphaend, numend = NULL, ptr, in=in_pages;
Int2 diff, index, retval=0;
Int2 length_nb, length_ab, length_ne, length_ae;
Int4 num1=0, num2=0;
@@ -1480,7 +1704,7 @@ static void DoSup (
CharPtr str;
CharPtr temp;
- len = StringLen (issue) + StringLen (part_sup) + StringLen (part_supi) + 25;
+ len = StringLen (issue) + StringLen (part_sup) + StringLen (part_supi) + 30;
str = MemNew (sizeof (Char) * len);
if (str == NULL) return;
temp = str;
@@ -1488,7 +1712,11 @@ static void DoSup (
if (! StringHasNoText (part_sup)) {
*temp = ' ';
temp++;
+ *temp = '(';
+ temp++;
temp = StringMove (temp, part_sup);
+ *temp = ')';
+ temp++;
}
if (StringHasNoText (issue) && StringHasNoText (part_supi)) {
ValNodeCopyStr (head, 0, str);
@@ -2645,6 +2873,7 @@ static CharPtr FormatCitSub (
afp = alp->affil;
if (afp != NULL) {
affil = GetAffil (afp);
+ Asn2gnbkCompressSpaces (affil);
if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
if (StringNCmp(affil, " to the EMBL/GenBank/DDBJ databases.", 36) != 0) {
ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
@@ -2911,19 +3140,111 @@ NLM_EXTERN CharPtr CleanQualValue (
return str;
}
+#define twocommas ((',') << 8 | (','))
+#define twospaces ((' ') << 8 | (' '))
+#define twosemicolons ((';') << 8 | (';'))
+#define space_comma ((' ') << 8 | (','))
+#define space_bracket ((' ') << 8 | (')'))
+#define bracket_space (('(') << 8 | (' '))
+#define space_semicolon ((' ') << 8 | (';'))
+#define comma_space ((',') << 8 | (' '))
+#define semicolon_space ((';') << 8 | (' '))
+
NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str)
{
Char ch;
- CharPtr dst;
Char last;
+ CharPtr dst;
CharPtr ptr;
+ Char curr;
+ Char next;
+ CharPtr in;
+ CharPtr out;
+ Uint2 two_chars;
+
+ if (str == NULL || str [0] == '\0') return str;
+
+ in = str;
+ out = str;
+
+ curr = *in;
+ in++;
+
+ next = 0;
+ two_chars = curr;
+
+ while (curr != '\0') {
+ next = *in;
+ in++;
+
+ two_chars = (two_chars << 8) | next;
+
+ if (two_chars == twocommas) {
+ *out++ = curr;
+ next = ' ';
+ } else if (two_chars == twospaces) {
+ } else if (two_chars == twosemicolons) {
+ } else if (two_chars == bracket_space) {
+ next = curr;
+ two_chars = curr;
+ } else if (two_chars == space_bracket) {
+ } else if (two_chars == space_comma) {
+ *out++ = next;
+ next = curr;
+ *out++ = ' ';
+ while (next == ' ' || next == ',') {
+ next = *in;
+ in++;
+ }
+ two_chars = next;
+ } else if (two_chars == space_semicolon) {
+ *out++ = next;
+ next = curr;
+ *out++ = ' ';
+ while (next == ' ' || next == ';') {
+ next = *in;
+ in++;
+ }
+ two_chars = next;
+ } else if (two_chars == comma_space) {
+ *out++ = curr;
+ *out++ = ' ';
+ while (next == ' ' || next == ',') {
+ next = *in;
+ in++;
+ }
+ two_chars = next;
+ } else if (two_chars == semicolon_space) {
+ *out++ = curr;
+ *out++ = ' ';
+ while (next == ' ' || next == ';') {
+ next = *in;
+ in++;
+ }
+ two_chars = next;
+ } else {
+ *out++ = curr;
+ }
+
+ curr = next;
+ }
+
+ if (curr > 0 && curr != ' ') {
+ *out = curr;
+ out++;
+ }
+ *out = '\0';
+
+ /* TrimSpacesAroundString but allow leading/trailing tabs/newlines */
+
if (str != NULL && str [0] != '\0') {
+ last = '\0';
dst = str;
ptr = str;
ch = *ptr;
- while (ch != '\0' && ch <= ' ') {
+ while (ch != '\0' && ch == ' ') {
ptr++;
ch = *ptr;
}
@@ -2933,11 +3254,7 @@ NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str)
ptr++;
last = ch;
ch = *ptr;
- if (ch != '\0' && ch < ' ') {
- *ptr = ' ';
- ch = *ptr;
- }
- while (ch != '\0' && last <= ' ' && ch <= ' ') {
+ while (ch != '\0' && last == ' ' && ch == ' ') {
ptr++;
ch = *ptr;
}
@@ -2959,6 +3276,7 @@ NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str)
*dst = '\0';
}
}
+
return str;
}
@@ -3212,6 +3530,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
CharPtr remprefix = NULL;
SubmitBlockPtr sbp;
SeqDescrPtr sdp;
+ ErrSev sev;
SeqFeatPtr sfp = NULL;
SeqIdPtr sip;
SeqLocPtr slp;
@@ -3327,11 +3646,13 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
irp = (IntRefBlockPtr) rbp;
if (irp->justuids) {
+ sev = ErrSetMessageLevel (SEV_MAX);
if (rbp->pmid != 0) {
pep = GetPubMedForUid (rbp->pmid);
} else if (rbp->muid != 0) {
pep = GetPubMedForUid (rbp->muid);
}
+ ErrSetMessageLevel (sev);
if (pep != NULL) {
mep = (MedlineEntryPtr) pep->medent;
if (mep != NULL && mep->cit != NULL) {
@@ -3467,6 +3788,10 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (alp != NULL) {
str = GetAuthorsString (afp->format, alp, &consortium, index, gbref);
TrimSpacesAroundString (str);
+ Asn2gnbkCompressSpaces (str);
+ if (StringCmp (str, "?") == 0) {
+ str = MemFree (str);
+ }
}
if (str != NULL || StringHasNoText (consortium)) {
@@ -3605,6 +3930,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
}
StrStripSpaces (str);
TrimSpacesAroundString (str);
+ Asn2gnbkCompressSpaces (str);
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
needsPeriod = FALSE;
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index 8dda213f..d0eac172 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.281 $
+* $Revision: 1.371 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -57,6 +57,7 @@
#include <alignmgr2.h>
#include <asn2gnbi.h>
#include <findrepl.h>
+#include <valid.h>
#ifdef WIN_MAC
#if __profile__
@@ -64,18 +65,21 @@
#endif
#endif
-static CharPtr link_tax = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
+static CharPtr link_tax = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
-static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
-static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
-static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
+static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
+static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
static CharPtr link_lat_lon = "http://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html?";
static CharPtr link_gold_stamp_id = "http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp=";
+static CharPtr link_annot_soft_ver = "http://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version";
+
+static CharPtr link_annot_ver = "http://www.ncbi.nlm.nih.gov/genome/annotation_euk/";
/* ordering arrays for qualifiers and note components */
@@ -101,6 +105,8 @@ static SourceType source_qual_order [] = {
SCQUAL_culture_collection,
SCQUAL_bio_material,
+ SCQUAL_type_material,
+
SCQUAL_db_xref,
SCQUAL_org_xref,
@@ -121,7 +127,6 @@ static SourceType source_qual_order [] = {
SCQUAL_clone_lib,
SCQUAL_dev_stage,
SCQUAL_ecotype,
- SCQUAL_frequency,
SCQUAL_germline,
SCQUAL_rearranged,
@@ -141,6 +146,7 @@ static SourceType source_qual_order [] = {
SCQUAL_focus,
SCQUAL_lat_lon,
+ SCQUAL_altitude,
SCQUAL_collection_date,
SCQUAL_collected_by,
SCQUAL_identified_by,
@@ -191,6 +197,7 @@ static SourceType source_desc_note_order [] = {
SCQUAL_anamorph,
SCQUAL_teleomorph,
SCQUAL_breed,
+ SCQUAL_frequency,
SCQUAL_metagenome_source,
SCQUAL_metagenome_note,
@@ -243,7 +250,8 @@ static SourceType source_feat_note_order [] = {
SCQUAL_anamorph,
SCQUAL_teleomorph,
SCQUAL_breed,
-
+ SCQUAL_frequency,
+
SCQUAL_metagenome_source,
SCQUAL_metagenome_note,
@@ -274,6 +282,7 @@ static SourceType source_feat_note_order [] = {
NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
{ "", Qual_class_ignore },
{ "acronym", Qual_class_orgmod },
+ { "altitude", Qual_class_subsource },
{ "anamorph", Qual_class_orgmod },
{ "authority", Qual_class_orgmod },
{ "biotype", Qual_class_orgmod },
@@ -341,6 +350,7 @@ NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
{ "PCR_primers", Qual_class_pcr },
{ "PCR_primers", Qual_class_pcr },
{ "PCR_primers", Qual_class_pcr_react },
+ { "phenotype", Qual_class_subsource },
{ "plasmid", Qual_class_subsource },
{ "plastid", Qual_class_subsource },
{ "pop_variant", Qual_class_subsource },
@@ -370,15 +380,17 @@ NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
{ "transgenic", Qual_class_subsource },
{ "transposon", Qual_class_subsource },
{ "type", Qual_class_orgmod },
+ { "type_material", Qual_class_orgmod },
{ "unstructured", Qual_class_valnode },
{ "usedin", Qual_class_quote },
{ "variety", Qual_class_orgmod },
+ { "whole_replicon", Qual_class_subsource },
{ "?", Qual_class_orgmod },
{ "?", Qual_class_orgmod },
{ "?", Qual_class_subsource }
};
-NLM_EXTERN SourceType subSourceToSourceIdx [42] = {
+NLM_EXTERN SourceType subSourceToSourceIdx [45] = {
SCQUAL_zero_subsrc,
SCQUAL_chromosome,
SCQUAL_map,
@@ -420,6 +432,9 @@ NLM_EXTERN SourceType subSourceToSourceIdx [42] = {
SCQUAL_mating_type,
SCQUAL_linkage_group,
SCQUAL_haplogroup,
+ SCQUAL_whole_replicon,
+ SCQUAL_phenotype,
+ SCQUAL_altitude,
SCQUAL_subsource_note
};
@@ -440,6 +455,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"ApiDB_CryptoDB",
"ApiDB_PlasmoDB",
"ApiDB_ToxoDB",
+ "Araport",
"ASAP",
"ATCC",
"ATCC(in host)",
@@ -447,10 +463,13 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"Axeldb",
"BDGP_EST",
"BDGP_INS",
+ "BEEBASE",
"BEETLEBASE",
+ "BEI",
"BGD",
"BOLD",
"CDD",
+ "CGD",
"CK",
"COG",
"dbClone",
@@ -462,6 +481,9 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"dictyBase",
"EcoGene",
"ENSEMBL",
+ "EnsemblGenomes",
+ "EnsemblGenomes-Gn",
+ "EnsemblGenomes-Tr",
"ERIC",
"ESTLIB",
"FANTOM_DB",
@@ -480,19 +502,23 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"HMP",
"HOMD",
"HSSP",
+ "I5KNAL",
"IKMC",
"IMGT/GENE-DB",
"IMGT/HLA",
"IMGT/LIGM",
"InterimID",
"InterPro",
+ "IntrepidBio",
"IRD",
"ISD",
"ISFinder",
+ "ISHAM-ITS",
"JCM",
"JGIDB",
"LocusID",
"MaizeGDB",
+ "MedGen",
"MGI",
"MIM",
"miRBase",
@@ -502,12 +528,14 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"niaEST",
"NMPDR",
"NRESTdb",
+ "OrthoMCL",
"Osa1",
"Pathema",
"PBmice",
"PDB",
"PFAM",
"PGN",
+ "Phytozome",
"PIR",
"PomBase",
"PSEUDO",
@@ -522,10 +550,12 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"SGD",
"SGN",
"SoyBase",
+ "SRPDB",
"SubtiList",
"TAIR",
"taxon",
"TIGRFAM",
+ "TubercuList",
"UniGene",
"UNILIB",
"UniProtKB/Swiss-Prot",
@@ -534,7 +564,10 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"UNITE",
"VBASE2",
"VectorBase",
+ "Vega",
+ "VGNC",
"ViPR",
+ "VISTA",
"WorfDB",
"WormBase",
"Xenbase",
@@ -548,10 +581,12 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
"ATCC",
"ATCC(dna)",
"ATCC(in host)",
+ "BEI",
"BOLD",
"FANTOM_DB",
"FBOL",
"FLYBASE",
+ "Fungorum",
"Greengenes",
"GRIN",
"HMP",
@@ -559,10 +594,13 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
"IKMC",
"IMGT/HLA",
"IMGT/LIGM",
+ "ISHAM-ITS",
"JCM",
"MGI",
"MycoBank",
"NBRC",
+ "RBGE_garden",
+ "RBGE_herbarium",
"RZPD",
"taxon",
"UNILIB",
@@ -571,17 +609,21 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
};
NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
- "BEEBASE",
"BioProject",
+ "BioSample",
"CCDS",
"CGNC",
"CloneID",
+ "CollecTF",
"ECOCYC",
+ "GenBank",
+ "HPM",
"HPRD",
"LRG",
"NASONIABASE",
"PBR",
"REBASE",
+ "RefSeq",
"SK-FST",
"VBRC",
NULL
@@ -1019,7 +1061,7 @@ NLM_EXTERN CharPtr FormatSourceBlock (
}
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
-
+
temp = FFGetString(ajp);
if (ajp->newSourceOrg) {
@@ -1061,14 +1103,14 @@ NLM_EXTERN CharPtr FormatSourceBlock (
gbseq->source = StringSave (str);
}
-
+
FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
if (str != NULL) {
FFAddTextToString(ffstring, NULL, str, NULL, addPeriod, FALSE, TILDE_TO_SPACES);
} else {
FFAddOneChar(ffstring, '.', FALSE);
}
-
+
MemFree (str);
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
@@ -1079,7 +1121,7 @@ NLM_EXTERN CharPtr FormatSourceBlock (
FFAddTextToString(ffstring, " (", common, ")", FALSE, FALSE, TILDE_TO_SPACES);
}
}
-
+
str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 0, 5, "OS");
FFRecycleString(ajp, ffstring);
return str;
@@ -1195,7 +1237,7 @@ NLM_EXTERN CharPtr FormatOrganismBlock (
if ( ffstring == NULL || temp == NULL ) return NULL;
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
-
+
FFStartPrint(temp, afp->format, 2, 12, "ORGANISM", 12, 5, 5, "OC", FALSE);
if (! ajp->newSourceOrg) {
FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
@@ -1279,7 +1321,7 @@ NLM_EXTERN CharPtr FormatOrganismBlock (
}
}
}
-
+
str = FFToCharPtr(ffstring);
FFRecycleString(ajp, ffstring);
return str;
@@ -1296,7 +1338,7 @@ static Boolean IsTildeEOL(CharPtr str) {
++str;
-
+
for ( ptr = str;
IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
++ptr) continue;
@@ -1382,6 +1424,12 @@ NLM_EXTERN void AddCommentWithURLlinks (
return;
}
+ /*
+ if (GetWWW (ajp)) {
+ str = EncodeXmlEx (str);
+ }
+ */
+
while (! StringHasNoText (str)) {
ptr = StringStr (str, "http://");
if (ptr == NULL) {
@@ -1453,34 +1501,39 @@ static size_t ThresholdForStructuredCommentColumnarDisplay (
// We are trying to make those structured comments look pretty. However, if the first column gets
// too big, the printout starts to look ugly. This function attempts to define the first column
// extent at which pretty turns into ugly.
-
+
const size_t MAX_COLUMN_WIDTH = 45;
switch ( format ) {
-
+
case GENBANK_FMT:
case GENPEPT_FMT:
return MIN( MAX_COLUMN_WIDTH, ASN2FF_GB_MAX - 12 );
-
+
default:
return MIN( MAX_COLUMN_WIDTH, ASN2FF_EMBL_MAX - 5 );
}
}
-static CharPtr GetStrForStructuredComment (
+NLM_EXTERN CharPtr GetStrForStructuredComment (
IntAsn2gbJobPtr ajp,
UserObjectPtr uop
)
{
- Char buf [120];
+ Char buf [132];
+ Char ch;
UserFieldPtr curr;
StringItemPtr ffstring;
CharPtr field;
ValNodePtr head = NULL;
size_t len;
+ CharPtr link_annot_tmp;
size_t max = 0;
ObjectIdPtr oip;
CharPtr prefix = NULL;
+ CharPtr provider = NULL;
+ CharPtr ptr;
+ CharPtr status = NULL;
CharPtr str;
CharPtr suffix = NULL;
CharPtr tmp;
@@ -1512,6 +1565,17 @@ static CharPtr GetStrForStructuredComment (
}
continue;
}
+ if (StringCmp (field, "Annotation Provider") == 0) {
+ str = (CharPtr) curr->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ provider = str;
+ }
+ } else if (StringCmp (field, "Annotation Status") == 0) {
+ str = (CharPtr) curr->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ status = str;
+ }
+ }
len = StringLen (field);
if (len > max) {
max = len;
@@ -1567,11 +1631,12 @@ static CharPtr GetStrForStructuredComment (
FFStartPrint (ffstring, GENBANK_FMT, 0, max + 1, field, max + 1, 0, max + 1, field, TRUE);
*/
StringNCpy_0 (buf, field, sizeof (buf) - 40);
- StringCat (buf, " ");
+ StringCat (buf, " ");
buf [max + 1] = ':';
buf [max + 2] = ':';
buf [max + 3] = '\0';
FFStartPrint (ffstring, GENBANK_FMT, 0, max + 4, buf, max + 4, 0, max + 4, buf, TRUE);
+
if (GetWWW (ajp) && StringCmp (field, "GOLD Stamp ID") == 0 && StringNCmp (str, "Gi", 2) == 0) {
FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
FF_Add_NCBI_Base_URL (ffstring, link_gold_stamp_id);
@@ -1580,6 +1645,49 @@ static CharPtr GetStrForStructuredComment (
FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+
+ } else if (GetWWW (ajp) &&
+ StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
+ StringCmp (field, "Annotation Software Version") == 0) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_annot_soft_ver);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+
+ } else if (GetWWW (ajp) &&
+ StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
+ StringCmp (field, "Annotation Version") == 0 &&
+ StringCmp (provider, "NCBI") == 0 &&
+ StringCmp (status, "Full annotation") == 0) {
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_annot_ver);
+ link_annot_tmp = StringSave (str);
+ if (link_annot_tmp != NULL) {
+ ptr = StringStr (link_annot_tmp, " Annotation Release ");
+ if (ptr != NULL) {
+ *ptr = '\0';
+ StringCat (link_annot_tmp, "/");
+ ptr += 20;
+ StringCat (link_annot_tmp, ptr);
+ ptr = link_annot_tmp;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == ' ') {
+ *ptr = '_';
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ }
+ FFAddOneString (ffstring, link_annot_tmp, FALSE, FALSE, TILDE_EXPAND);
+ MemFree (link_annot_tmp);
+ }
+ FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+
} else if (GetWWW (ajp) && StringCmp (field, "url") == 0) {
AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
} else if (GetWWW (ajp) && StringNICmp (str, "http://", 7) == 0) {
@@ -1858,6 +1966,30 @@ static void CatenateCommentInGbseq (
}
}
+static void CommentTildes (
+ CharPtr PNTR str
+)
+
+{
+#ifndef OS_MSWIN
+ FindReplaceString (str, "nnotated by GenomeRefine~~", "nnotated by GenomeRefine", FALSE, FALSE);
+ FindReplaceString (str, "based on SOLiD3 (Applied Biosystems)~~", "based on SOLiD3 (Applied Biosystems)", FALSE, FALSE);
+ FindReplaceString (str, "Biological resourse center, NITE (NRBC)~~", "Biological resourse center, NITE (NRBC)", FALSE, FALSE);
+ FindReplaceString (str, "developmental01.html~~", "developmental01.html", FALSE, FALSE);
+ FindReplaceString (str, "http://bionano.toyo.ac.jp/~~", "http://bionano.toyo.ac.jp/", FALSE, FALSE);
+ FindReplaceString (str, "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/~~", "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/", FALSE, FALSE);
+ FindReplaceString (str, "http://egg.umh.es~~", "http://egg.umh.es", FALSE, FALSE);
+ FindReplaceString (str, "http://www.aist.go.jp/~~", "http://www.aist.go.jp/", FALSE, FALSE);
+ FindReplaceString (str, "http://www.bio.nite.go.jp/~~DOGAN ; Database", "http://www.bio.nite.go.jp/\n \nDOGAN ; Database", FALSE, FALSE);
+ FindReplaceString (str, "http://www.bio.nite.go.jp/ngac/e/~~", "http://www.bio.nite.go.jp/ngac/e/", FALSE, FALSE);
+ FindReplaceString (str, "http://www.brs.kyushu-u.ac.jp/~fcmic/~~", "http://www.brs.kyushu-u.ac.jp/~fcmic/", FALSE, FALSE);
+ FindReplaceString (str, "http://www.miyazaki-u.ac.jp/ir/english/index.html~~", "http://www.miyazaki-u.ac.jp/ir/english/index.html", FALSE, FALSE);
+ FindReplaceString (str, "URL:http://www.bio.nite.go.jp/~~", "URL:http://www.bio.nite.go.jp/", FALSE, FALSE);
+ FindReplaceString (str, "RAST version 2.0 (http://rast.nmpdr.org/)~~", "RAST version 2.0 (http://rast.nmpdr.org/)", FALSE, FALSE);
+ FindReplaceString (str, "URL:http://www.tmd.ac.jp/grad/bac/database.html~~", "URL:http://www.tmd.ac.jp/grad/bac/database.html", FALSE, FALSE);
+#endif
+}
+
NLM_EXTERN CharPtr FormatCommentBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
@@ -1870,6 +2002,7 @@ NLM_EXTERN CharPtr FormatCommentBlock (
Boolean as_string = FALSE;
Boolean blank_before = FALSE;
CommentBlockPtr cbp;
+ Char ch;
SeqMgrDescContext dcontext;
CharPtr db;
DbtagPtr dbt;
@@ -1883,6 +2016,7 @@ NLM_EXTERN CharPtr FormatCommentBlock (
SeqFeatPtr sfp;
Char sfx [32];
CharPtr str;
+ CharPtr struc_comm_title = NULL;
CharPtr suffix;
CharPtr title;
UserObjectPtr uop = NULL;
@@ -1968,6 +2102,7 @@ NLM_EXTERN CharPtr FormatCommentBlock (
if (uop != NULL) {
title = GetStrForStructuredComment (ajp, uop);
if (title != NULL) {
+ struc_comm_title = title;
str = GetStructuredCommentTable (ajp, uop);
CatenateCommentInGbseq (ajp, gbseq, str, TRUE, FALSE);
MemFree (str);
@@ -2001,21 +2136,39 @@ NLM_EXTERN CharPtr FormatCommentBlock (
} else {
FFStartPrint (ffstring, afp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
if (blank_before) {
- FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
+ if (! cbp->no_blank_before) {
+ FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
+ }
}
}
str = StringSave (title);
+
+ if (StringDoesHaveText (str)) {
+ CommentTildes (&str);
+ }
+
TrimSpacesAndJunkFromEnds (str, TRUE);
+ /* remove trailing double tilde */
+ /*
+ len = StringLen (str);
+ if (len > 5 && str [len-1] == '~' && str [len-2] == '~') {
+ str [len-2] = '\0';
+ }
+ */
+
if (as_string) {
FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
} else {
if (! IsEllipsis (str)) {
s_RemovePeriodFromEnd (str);
len = StringLen (str);
- if (len > 0 && str [len - 1] != '.') {
- add_period = TRUE;
+ if (len > 0) {
+ ch = str [len - 1];
+ if (ch != '.' && ch != '/' && ch != '~') {
+ add_period = TRUE;
+ }
}
}
AddCommentWithURLlinks(ajp, ffstring, prefix, str, suffix);
@@ -2033,6 +2186,9 @@ NLM_EXTERN CharPtr FormatCommentBlock (
}
FFRecycleString(ajp, ffstring);
+
+ MemFree (struc_comm_title);
+
return str;
}
@@ -2101,7 +2257,7 @@ static Boolean order_initialized = FALSE;
static CharPtr lim_str [5] = { "", ">","<", ">", "<" };
-NLM_EXTERN Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf)
+NLM_EXTERN Boolean GetAccnVerFromServer (BIG_ID gi, CharPtr buf)
{
AccnVerLookupFunc func;
@@ -2648,7 +2804,34 @@ static void FF_DoFlatLoc (
}
}
+static void FF_DoFlatLocEx (
+ IntAsn2gbJobPtr ajp,
+ StringItemPtr ffstring,
+ BioseqPtr bsp,
+ SeqLocPtr location,
+ Boolean ok_to_complement,
+ Boolean isGap,
+ Boolean swapPartials
+)
+
+{
+ Boolean partial5, partial3;
+ SeqLocPtr slp;
+ if (location == NULL) return;
+
+ if (! swapPartials) {
+ FF_DoFlatLoc (ajp, ffstring, bsp, location, ok_to_complement, isGap);
+ return;
+ }
+
+ slp = (SeqLocPtr) AsnIoMemCopy ((Pointer) location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
+ if (slp == NULL) return;
+ CheckSeqLocForPartial (slp, &partial5, &partial3);
+ SetSeqLocPartial (slp, partial3, partial5);
+ FF_DoFlatLoc (ajp, ffstring, bsp, slp, ok_to_complement, isGap);
+ SeqLocFree (slp);
+}
NLM_EXTERN CharPtr FFFlatLoc (
@@ -2663,6 +2846,7 @@ NLM_EXTERN CharPtr FFFlatLoc (
Boolean hasNulls;
IntFuzzPtr fuzz = NULL;
SeqLocPtr loc;
+ Boolean minus_strand = FALSE;
Boolean noLeft;
Boolean noRight;
Uint1 num = 1;
@@ -2699,8 +2883,12 @@ NLM_EXTERN CharPtr FFFlatLoc (
order_initialized = TRUE;
}
+ if (ajp->ajp.slp != NULL) {
+ minus_strand = (Boolean) (SeqLocStrand (ajp->ajp.slp) == Seq_strand_minus);
+ }
+
if (ajp->smallGenomeSet) {
- FF_DoFlatLoc (ajp, ffstring, bsp, location, TRUE, isGap);
+ FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
} else if (masterStyle) {
/* map location from parts to segmented bioseq */
@@ -2743,12 +2931,12 @@ NLM_EXTERN CharPtr FFFlatLoc (
}
}
- FF_DoFlatLoc (ajp, ffstring, bsp, loc, TRUE, isGap);
+ FF_DoFlatLocEx (ajp, ffstring, bsp, loc, TRUE, isGap, minus_strand);
SeqLocFree (loc);
} else {
- FF_DoFlatLoc (ajp, ffstring, bsp, location, TRUE, isGap);
+ FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
}
str = FFToCharPtr(ffstring);
@@ -2892,9 +3080,9 @@ static void SubSourceToQualArray (
while (ssp != NULL) {
subtype = ssp->subtype;
if (subtype == 255) {
- subtype = 41;
+ subtype = 44;
}
- if (subtype < 42) {
+ if (subtype < 45) {
idx = subSourceToSourceIdx [subtype];
if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
if (qvp [idx].ssp == NULL) {
@@ -2906,7 +3094,7 @@ static void SubSourceToQualArray (
}
}
-NLM_EXTERN SourceType orgModToSourceIdx [41] = {
+NLM_EXTERN SourceType orgModToSourceIdx [42] = {
SCQUAL_zero_orgmod,
SCQUAL_one_orgmod,
SCQUAL_strain,
@@ -2945,6 +3133,7 @@ NLM_EXTERN SourceType orgModToSourceIdx [41] = {
SCQUAL_culture_collection,
SCQUAL_bio_material,
SCQUAL_metagenome_source,
+ SCQUAL_type_material,
SCQUAL_old_lineage,
SCQUAL_old_name,
SCQUAL_orgmod_note
@@ -2964,13 +3153,13 @@ static void OrgModToQualArray (
while (omp != NULL) {
subtype = omp->subtype;
if (subtype == 253) {
- subtype = 38;
- } else if (subtype == 254) {
subtype = 39;
- } else if (subtype == 255) {
+ } else if (subtype == 254) {
subtype = 40;
+ } else if (subtype == 255) {
+ subtype = 41;
}
- if (subtype < 41) {
+ if (subtype < 42) {
idx = orgModToSourceIdx [subtype];
if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
if (qvp [idx].omp == NULL) {
@@ -3132,12 +3321,18 @@ NLM_EXTERN void AddFeatureToGbseq (
if (sfp->data.choice == SEQFEAT_CDREGION &&
StringICmp (qual, "translation") == 0) {
RemoveAllSpaces (gbqual->value);
+ } else if (sfp->data.choice == SEQFEAT_CDREGION &&
+ StringICmp (qual, "coded_by") == 0) {
+ RemoveAllSpaces (gbqual->value);
} else if (sfp->data.choice == SEQFEAT_RNA &&
StringICmp (qual, "transcription") == 0) {
RemoveAllSpaces (gbqual->value);
} else if (sfp->data.choice == SEQFEAT_PROT &&
StringICmp (qual, "peptide") == 0) {
RemoveAllSpaces (gbqual->value);
+ } else if (sfp->data.choice == SEQFEAT_PROT &&
+ StringICmp (qual, "derived_from") == 0) {
+ RemoveAllSpaces (gbqual->value);
}
}
}
@@ -3627,7 +3822,7 @@ static void SetupInstCodeNameTable (void)
ErrSetMessageLevel (sev);
if (fp != NULL) {
FileCacheSetup (&fc, fp);
-
+
str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
while (str != NULL) {
if (StringDoesHaveText (str)) {
@@ -3710,116 +3905,172 @@ static CharPtr FullNameFromInstCode (CharPtr code)
/* specimen_voucher, culture_collection, bio_material hyperlinks */
-#define s_atcc_base "http://www.atcc.org/SearchCatalogs/linkin?id="
-#define s_bcrc_base "http://strain.bcrc.firdi.org.tw/BSAS/controller?event=SEARCH&bcrc_no="
+#define s_acbr_base "http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr="
+#define s_atcc_base "http://www.atcc.org/Products/All/"
+#define s_bcrc_base "https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no="
+#define s_cas_base "http://collections.calacademy.org/herp/specimen/"
#define s_cbs_base "http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+"
#define s_ccap_base "http://www.ccap.ac.uk/strain_info.php?Strain_No="
#define s_ccmp_base "https://ccmp.bigelow.org/node/1/strain/CCMP"
#define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
-#define s_cori_base "http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref="
+#define s_cfmr_base "http://www.fpl.fs.fed.us/search/mycologysearch_action.php?sorting_rule=1u&phrasesAndKeywords02="
+#define s_cori_base "http://ccr.coriell.org/Sections/Search/Search.aspx?q="
#define s_dsmz_base "http://www.dsmz.de/catalogues/details/culture/DSM-"
+#define s_frr_base "http://www.foodscience.csiro.au/cgi-bin/rilax/search.pl?stpos=0&stype=AND&query="
#define s_fsu_base "http://www.prz.uni-jena.de/data.php?fsu="
+#define s_jcm_base "http://www.jcm.riken.jp/cgi-bin/jcm/jcm_number?JCM="
#define s_kctc_base "http://www.brc.re.kr/English/_SearchView.aspx?sn="
-#define s_ku_base "http://collections.nhm.ku.edu/"
+#define s_ku_base "https://ichthyology.specify.ku.edu/specify/bycatalog/"
#define s_lcr_base "http://scd.landcareresearch.co.nz/Specimen/"
+#define s_maff_base "http://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff="
+#define s_mcz_base "http://mczbase.mcz.harvard.edu/guid/"
+#define s_mtcc_base "http://mtcc.imtech.res.in/catalogue_hyper.php?a="
+#define s_mucl_base "http://bccm.belspo.be/db/mucl_search_results.php?FIRSTITEM=1&LIST1=STRAIN_NUMBER&TEXT1="
+#define s_nbrc_base "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="
+#define s_ncimb_base "http://www.ncimb.com/BioloMICS.aspx?Table=NCIMBstrains&ExactMatch=T&Fields=All&Name=NCIMB%20"
+#define s_nctc_base "https://www.phe-culturecollections.org.uk/products/bacteria/detail.jsp?collection=nctc&refId=NCTC+"
+#define s_nrrl_base "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
+#define s_nrrl_mold "http://nrrl.ncaur.usda.gov/cgi-bin/usda/mold/report.html?nrrlcodes="
+#define s_nrrl_prok "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
+#define s_nrrl_yest "http://nrrl.ncaur.usda.gov/cgi-bin/usda/yeast/report.html?nrrlcodes="
#define s_pcc_base "http://www.crbip.pasteur.fr/fiches/fichecata.jsp?crbip=PCC+"
#define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
+#define s_pycc_base "http://pycc.bio-aware.com/BioloMICS.aspx?Table=PYCC%20strains&Name=PYCC%20"
#define s_sag_base "http://sagdb.uni-goettingen.de/detailedList.php?str_number="
#define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
#define s_uam_base "http://arctos.database.museum/guid/"
-#define s_ypm_base "http://peabody.research.yale.edu/cgi-bin/Query.Ledger?"
+#define s_uamh_base "https://secure.devonian.ualberta.ca/uamh/details.php?id="
+#define s_usnm_base "http://collections.mnh.si.edu/services/resolver/resolver.php?"
+#define s_ypm_base "http://collections.peabody.yale.edu/search/Record/"
#define s_colon_pfx ":"
#define s_uscr_pfx "_"
-#define s_kui_pfx "KU_Fish/detail.jsp?record="
-#define s_kuit_pfx "KU_Tissue/detail.jsp?record="
+#define s_kui_pfx "KUI/"
+#define s_kuit_pfx "KUIT/"
#define s_psu_pfx "PSU:Mamm:"
-
-#define s_ypment_pfx "LE=ent&ID="
-#define s_ypmher_pfx "LE=her&ID="
-#define s_ypmich_pfx "LE=ich&ID="
-#define s_ypmiz_pfx "LE=iz&ID="
-#define s_ypmmam_pfx "LE=mam&ID="
-#define s_ypmorn_pfx "LE=orn&ID="
-
-#define s_bcrc_sfx "&type_id=6&keyword=;;"
+#define s_usnm_pfx "voucher=Birds:"
+
+#define s_ypment_pfx "YPM-ENT-"
+#define s_ypmher_pfx "YPM-HER-"
+#define s_ypmich_pfx "YPM-ICH-"
+#define s_ypmiz_pfx "YPM-IZ-"
+#define s_ypmmam_pfx "YPM-MAM-"
+#define s_ypmorn_pfx "YPM-ORN-"
+
+#define s_acbr_sfx "&Fields=All&ExactMatch=T"
+#define s_atcc_sfx ".aspx"
+#define s_bcrc_sfx "&type_id=9&keyword="
+#define s_ku_sfx "/"
+#define s_mucl_sfx "&LIST2=ALL+FIELDS&CONJ=OR&RANGE=20&B3=Run+Query"
+#define s_pycc_sfx "&Fields=All&ExactMatch=T"
typedef struct vouch {
CharPtr sites;
CharPtr links;
Boolean prepend_institute;
+ Int2 pad_to;
+ CharPtr pad_with;
CharPtr prefix;
CharPtr suffix;
} VouchData, PNTR VouchDataPtr;
static VouchData Nlm_spec_vouchers [] = {
- { "ATCC", s_atcc_base, FALSE, NULL, NULL },
- { "BCRC", s_bcrc_base, FALSE, NULL, s_bcrc_sfx },
- { "CBS", s_cbs_base, FALSE, NULL, NULL },
- { "CCAP", s_ccap_base, FALSE, NULL, NULL },
- { "CCMP", s_ccmp_base, FALSE, NULL, NULL },
- { "CCUG", s_ccug_base, FALSE, NULL, NULL },
- { "CHR", s_lcr_base, TRUE, s_uscr_pfx, NULL },
- { "Coriell", s_cori_base, FALSE, NULL, NULL },
- { "CRCM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DGR:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DGR:Ento", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DGR:Fish", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DGR:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DGR:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DMNS:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DMNS:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "DSM", s_dsmz_base, FALSE, NULL, NULL },
- { "FSU<DEU>", s_fsu_base, FALSE, NULL, NULL },
- { "ICMP", s_lcr_base, TRUE, s_uscr_pfx, NULL },
- { "KCTC", s_kctc_base, FALSE, NULL, NULL },
- { "KNWR:Ento", s_uam_base , TRUE, s_colon_pfx, NULL },
- { "KU:I", s_ku_base, FALSE, s_kui_pfx, NULL },
- { "KU:IT", s_ku_base, FALSE, s_kuit_pfx, NULL },
- { "KWP:Ento", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MLZ:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MLZ:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MSB:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MSB:Para", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Egg", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Hild", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Img", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZ:Page", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "MVZObs:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "NBSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "NZAC", s_lcr_base, TRUE, s_uscr_pfx, NULL },
- { "PCC", s_pcc_base, FALSE, NULL, NULL },
- { "PCMB", s_pcmb_base, FALSE, NULL, NULL },
- { "PDD", s_lcr_base, TRUE , s_uscr_pfx, NULL },
- { "PSU<USA-OR>:Mamm", s_uam_base, FALSE, s_psu_pfx, NULL },
- { "SAG", s_sag_base, FALSE, NULL, NULL },
- { "TGRC", s_tgrc_base, FALSE, NULL, NULL },
- { "UAM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Bryo", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Crus", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Ento", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Fish", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Herb", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Moll", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAM:Paleo", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "UAMObs:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "WNMU:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "WNMU:Fish", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "WNMU:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "YPM:ENT", s_ypm_base, FALSE, s_ypment_pfx, NULL },
- { "YPM:HER", s_ypm_base, FALSE, s_ypmher_pfx, NULL },
- { "YPM:ICH", s_ypm_base, FALSE, s_ypmich_pfx, NULL },
- { "YPM:IZ", s_ypm_base, FALSE, s_ypmiz_pfx, NULL },
- { "YPM:MAM", s_ypm_base, FALSE, s_ypmmam_pfx, NULL },
- { "YPM:ORN", s_ypm_base, FALSE, s_ypmorn_pfx, NULL },
- { NULL, NULL, FALSE, NULL, NULL }
+ { "ACBR", s_acbr_base, FALSE, 0, "", NULL, s_acbr_sfx },
+ { "ATCC", s_atcc_base, FALSE, 0, "", NULL, s_atcc_sfx },
+ { "BCRC", s_bcrc_base, FALSE, 0, "", NULL, s_bcrc_sfx },
+ { "CAS:HERP", s_cas_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "CBS", s_cbs_base, FALSE, 0, "", NULL, NULL },
+ { "CCAP", s_ccap_base, FALSE, 0, "", NULL, NULL },
+ { "CCMP", s_ccmp_base, FALSE, 0, "", NULL, NULL },
+ { "CCUG", s_ccug_base, FALSE, 0, "", NULL, NULL },
+ { "CFMR", s_cfmr_base, FALSE, 0, "", NULL, NULL },
+ { "CHR", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
+ { "Coriell", s_cori_base, FALSE, 0, "", NULL, NULL },
+ { "CRCM:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DGR:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DGR:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DGR:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DGR:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DGR:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DMNS:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DMNS:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "DSM", s_dsmz_base, FALSE, 0, "", NULL, NULL },
+ { "FRR", s_frr_base, FALSE, 0, "", NULL, NULL },
+ { "FSU<DEU>", s_fsu_base, FALSE, 0, "", NULL, NULL },
+ { "ICMP", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
+ { "JCM", s_jcm_base, FALSE, 0, "", NULL, NULL },
+ { "KCTC", s_kctc_base, FALSE, 0, "", NULL, NULL },
+ { "KNWR:Ento", s_uam_base , TRUE, 0, "", s_colon_pfx, NULL },
+ { "KU:I", s_ku_base, FALSE, 0, "", s_kui_pfx, s_ku_sfx },
+ { "KU:IT", s_ku_base, FALSE, 0, "", s_kuit_pfx, s_ku_sfx },
+ { "KWP:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MAFF", s_maff_base, FALSE, 0, "", NULL, NULL },
+ { "MCZ:Bird", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Cryo", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Ent", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Fish", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Herp", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Ich", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:IP", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:IZ", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Mala", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Mamm", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MCZ:Orn", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MLZ:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MLZ:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MSB:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MSB:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MSB:Para", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MTCC", s_mtcc_base, FALSE, 0, "", NULL, NULL },
+ { "MUCL", s_mucl_base, FALSE, 0, "", NULL, s_mucl_sfx },
+ { "MVZ:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Egg", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Hild", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Img", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZ:Page", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "MVZObs:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "NBRC", s_nbrc_base, FALSE, 0, "", NULL, NULL },
+ { "NBSB:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "NCIMB", s_ncimb_base, FALSE, 0, "", NULL, NULL },
+ { "NCTC", s_nctc_base, FALSE, 0, "", NULL, NULL },
+ { "NRRL", s_nrrl_base, FALSE, 0, "", NULL, NULL },
+ { "NRRL:MOLD", s_nrrl_mold, FALSE, 0, "", NULL, NULL },
+ { "NRRL:PROK", s_nrrl_prok, FALSE, 0, "", NULL, NULL },
+ { "NRRL:YEAST", s_nrrl_yest, FALSE, 0, "", NULL, NULL },
+ { "NZAC", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
+ { "PCC", s_pcc_base, FALSE, 0, "", NULL, NULL },
+ { "PCMB", s_pcmb_base, FALSE, 0, "", NULL, NULL },
+ { "PDD", s_lcr_base, TRUE , 0, "", s_uscr_pfx, NULL },
+ { "PSU<USA-OR>:Mamm", s_uam_base, FALSE, 0, "", s_psu_pfx, NULL },
+ { "PYCC", s_pycc_base, FALSE, 0, "", NULL, s_pycc_sfx },
+ { "SAG", s_sag_base, FALSE, 0, "", NULL, NULL },
+ { "TGRC", s_tgrc_base, FALSE, 0, "", NULL, NULL },
+ { "UAM:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Bryo", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Crus", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Herb", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Moll", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAM:Paleo", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "UAMH", s_uamh_base, FALSE, 0, "", NULL, NULL },
+ { "UAMObs:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "USNM:Birds", s_usnm_base, FALSE, 0, "", s_usnm_pfx, NULL },
+ { "WNMU:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "WNMU:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "WNMU:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
+ { "YPM:ENT", s_ypm_base, FALSE, 6, "0", s_ypment_pfx, NULL },
+ { "YPM:HER", s_ypm_base, FALSE, 6, "0", s_ypmher_pfx, NULL },
+ { "YPM:ICH", s_ypm_base, FALSE, 6, "0", s_ypmich_pfx, NULL },
+ { "YPM:IZ", s_ypm_base, FALSE, 6, "0", s_ypmiz_pfx, NULL },
+ { "YPM:MAM", s_ypm_base, FALSE, 6, "0", s_ypmmam_pfx, NULL },
+ { "YPM:ORN", s_ypm_base, FALSE, 6, "0", s_ypmorn_pfx, NULL },
+ { NULL, NULL, FALSE, 0, "", NULL, NULL }
};
static Int2 VoucherNameIsValid (
@@ -3850,9 +4101,9 @@ static Int2 VoucherNameIsValid (
}
}
- /* case sensitive comparison at end enforces strictness */
+ /* switch to case sensitive comparison to restore case strictness */
- if (StringCmp (Nlm_spec_vouchers [R].sites, str) == 0) {
+ if (StringICmp (Nlm_spec_vouchers [R].sites, str) == 0) {
return R;
}
@@ -3906,6 +4157,7 @@ static void Do_www_specimen_voucher (
)
{
+ size_t len_id, len_pad;
CharPtr mouseover = NULL;
if ( ffstring == NULL || inst == NULL || id == NULL || vdp == NULL || vdp->links == NULL ) return;
@@ -3929,6 +4181,14 @@ static void Do_www_specimen_voucher (
if (vdp->prefix != NULL) {
FFAddOneString (ffstring, vdp->prefix, FALSE, FALSE, TILDE_IGNORE);
}
+ if (vdp->pad_to > 0) {
+ len_id = StringLen (id);
+ len_pad = StringLen (vdp->pad_with);
+ while (len_id < vdp->pad_to) {
+ FFAddOneString (ffstring, vdp->pad_with, FALSE, FALSE, TILDE_IGNORE);
+ len_id += len_pad;
+ }
+ }
FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
if (vdp->suffix != NULL) {
FFAddOneString (ffstring, vdp->suffix, FALSE, FALSE, TILDE_IGNORE);
@@ -3946,7 +4206,7 @@ NLM_EXTERN void FF_www_specimen_voucher (
{
Char buf [512];
- CharPtr inst = NULL, id = NULL, mouseover = NULL;
+ CharPtr inst = NULL, id = NULL, mouseover = NULL, encoded;
Int2 R;
VouchDataPtr vdp;
@@ -3967,7 +4227,9 @@ NLM_EXTERN void FF_www_specimen_voucher (
FFAddOneString (ffstring, "<acronym title=\"", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, mouseover, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, "\" class=\"voucher\">", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
+ encoded = EncodeXml (inst);
+ FFAddOneString (ffstring, encoded, FALSE, FALSE, TILDE_IGNORE);
+ MemFree (encoded);
FFAddOneString (ffstring, "</acronym>", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
@@ -3981,7 +4243,9 @@ NLM_EXTERN void FF_www_specimen_voucher (
FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
return;
}
- Do_www_specimen_voucher (ffstring, inst, id, vdp);
+ encoded = EncodeXml (inst);
+ Do_www_specimen_voucher (ffstring, encoded, id, vdp);
+ MemFree (encoded);
}
static void Do_www_lat_lon (
@@ -4096,7 +4360,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
BioseqSetPtr bssp;
Char buf [128], pfx [512], sfx [128];
CharPtr common = NULL;
- Int4 currGi = 0;
+ BIG_ID currGi = 0;
DbtagPtr dbt;
SeqMgrDescContext dcontext;
SeqMgrFeatContext fcontext;
@@ -4206,7 +4470,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- currGi = (Int4) sip->data.intvalue;
+ currGi = (BIG_ID) sip->data.intvalue;
}
}
@@ -4313,7 +4577,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
}
}
}
-
+
if (ajp->refseqConventions) {
is_other = TRUE;
}
@@ -4417,6 +4681,21 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
qvp [SCQUAL_metagenome_source].omp = NULL;
}
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ if (qvp [SCQUAL_altitude].ssp != NULL) {
+ ssp = qvp [SCQUAL_altitude].ssp;
+ if (! AltitudeIsValid (ssp->name)) {
+ qvp [SCQUAL_altitude].ssp = NULL;
+ }
+ }
+ if (qvp [SCQUAL_type_material].omp != NULL) {
+ ssp = qvp [SCQUAL_type_material].ssp;
+ if (! TypeMaterialIsValid (ssp->name)) {
+ qvp [SCQUAL_type_material].ssp = NULL;
+ }
+ }
+ }
+
#if 0
if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
/* leave mating_type as a separate qualifier */
@@ -4811,7 +5090,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
if (add_period) {
prefix = ".\n";
} else {
- prefix = "\n";
+ prefix = ";\n";
}
} else {
prefix = "; ";
@@ -4855,7 +5134,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
if (add_period) {
prefix = ".\n";
} else {
- prefix = "\n";
+ prefix = ";\n";
}
} else {
prefix = "; ";
@@ -4916,7 +5195,7 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
}
if ( !FFEmpty(unique) ) {
notestr = FFToCharPtr(unique);
-
+
if (add_period) {
s_AddPeriodToEnd (notestr);
}
@@ -5094,7 +5373,7 @@ static void PrintSeqLine (
StringItemPtr ffstring,
FmtType format,
CharPtr buf,
- Int4 gi,
+ BIG_ID gi,
Int4 startwithoutgap,
Int4 start,
Int4 stop
@@ -5173,7 +5452,7 @@ static CharPtr CompressNonBases (CharPtr str)
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 20, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
20, /* 11 = general Dbtag */
255, /* 12 = gi */
10, /* 13 = ddbj */
@@ -5182,8 +5461,8 @@ static CharPtr CompressNonBases (CharPtr str)
10, /* 16 = tpg */
10, /* 17 = tpe */
10, /* 18 = tpd */
- 10, /* 19 = gpp */
- 10 /* 20 = nat */
+ 15, /* 19 = gpp */
+ 15 /* 20 = nat */
};
static void PrintGenome (
@@ -5195,11 +5474,12 @@ static void PrintGenome (
Boolean is_na
)
{
- Char buf[40], gibuf [32], vbuf [80];
+ Char buf[128], gibuf [32], vbuf [128];
Boolean first = TRUE;
SeqIdPtr freeid = NULL, sid = NULL, newid = NULL;
SeqLocPtr slp = NULL;
- Int4 start = 0, stop = 0, gi = 0;
+ Int4 start = 0, stop = 0;
+ BIG_ID gi = 0;
BioseqPtr bsp = NULL;
Int2 p1 = 0, p2 = 0;
@@ -5226,7 +5506,7 @@ static void PrintGenome (
buf [0] = '\0';
gi = 0;
if (sid->choice == SEQID_GI) {
- gi = sid->data.intvalue;
+ gi = (BIG_ID) sid->data.intvalue;
if (GetAccnVerFromServer (gi, buf)) {
/* no need to call GetSeqIdForGI */
} else {
@@ -5449,7 +5729,7 @@ NLM_EXTERN CharPtr FormatContigBlock (
} else {
label = "CONTIG";
}
-
+
FFAddOneString(ffstring, label, FALSE, FALSE, TILDE_IGNORE);
FFAddNChar(ffstring, ' ', 12 - StringLen(label), FALSE);
*/
@@ -5514,6 +5794,12 @@ NLM_EXTERN CharPtr FormatContigBlock (
prefix = ",";
}
+
+ } else if (bsp->seq_ext_type == 2) {
+
+ slp = (SeqLocPtr) bsp->seq_ext;
+ PrintGenome (ajp, ffstring, slp, prefix, segWithParts, is_na);
+
}
FFAddOneChar (ffstring, ')', FALSE);
@@ -5605,7 +5891,7 @@ static Int2 GapAtStart (
for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
if (ch != '-') return j;
}
- return 0;
+ return j;
}
static void FixGapAtStart (
@@ -5724,8 +6010,8 @@ static Int2 ProcessGapSpecialFormat (
{
Char fmt_buf [64];
Char gapbuf [80];
- Int4 gi;
- Char gi_buf [16];
+ BIG_ID gi;
+ Char gi_buf [32];
Boolean is_na;
Char pad;
Char rgn_buf [64];
@@ -5761,7 +6047,7 @@ static Int2 ProcessGapSpecialFormat (
gi = 0;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
if (gi > 0) {
@@ -5851,7 +6137,7 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
Int4 extend;
StreamFlgType flags = STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL;
GBSeqPtr gbseq;
- Int4 gi = 0;
+ BIG_ID gi = 0;
IntAsn2gbSectPtr iasp;
Int2 lin;
SeqLocPtr loc;
@@ -5988,7 +6274,7 @@ NLM_EXTERN CharPtr FormatSequenceBlock (
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice != SEQID_GI) continue;
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
/* format subsequence cached with SeqPortStream */
@@ -6207,6 +6493,7 @@ NLM_EXTERN CharPtr FormatSlashBlock (
is.sequence = gbseq->sequence;
is.contig = gbseq->contig;
is.alt_seq = (INSDAltSeqDataPtr) gbseq->alt_seq;
+ is.xrefs = (INSDXrefPtr) gbseq->xrefs;
INSDSeqAsnWrite (&is, afp->aip, afp->atp);
} else {
GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index 652b0693..ef91eaf7 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.147 $
+* $Revision: 1.166 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -146,7 +146,7 @@ typedef struct int_asn2gb_job {
Boolean oldXmlPolicy;
Boolean refseqConventions;
ValNodePtr lockedBspList;
- ValNodePtr fargaps;
+ ValNodePtr manygaps;
ValNodePtr gapvnp;
ValNodePtr remotevnp;
Asn2gbLockFunc remotelock;
@@ -164,16 +164,21 @@ typedef struct int_asn2gb_job {
Boolean www;
Boolean specialGapFormat;
Boolean relaxedMapping;
+ Boolean gpipdDeflines;
+ Boolean hideProteinID;
Boolean hideGoTerms;
Boolean multiIntervalGenes;
Boolean segmentedBioseqs;
Boolean smallGenomeSet;
Boolean reindex;
Int4 seqGapCurrLen;
+ Boolean hideGI;
ValNodePtr gihead;
ValNodePtr gitail;
TextFsaPtr bad_html_fsa;
Boolean seqspans;
+ Int2 sat;
+ Int4 sat_key;
} IntAsn2gbJob, PNTR IntAsn2gbJobPtr;
/* array for assigning biosource and feature data fields to qualifiers */
@@ -267,9 +272,9 @@ typedef struct asn2gbwork {
Int2 sectionCount;
Int2 sectionMax;
- Int4 currGi;
- Int4 prevGi;
- Int4 nextGi;
+ BIG_ID currGi;
+ BIG_ID prevGi;
+ BIG_ID nextGi;
ValNodePtr gilistpos;
Boolean showAllFeats;
@@ -315,19 +320,22 @@ typedef struct asn2gbwork {
Boolean copyGpsCdsUp;
Boolean copyGpsGeneDown;
+ Boolean isNCBIGenomes;
Boolean isRefSeq;
Boolean showContigAndSeq;
Char basename [SEQID_MAX_LEN];
- ValNodePtr currfargap;
+ SeqFeatPtr currfargap;
SeqFeatPtr lastsfp;
SeqAnnotPtr lastsap;
Int4 lastleft;
Int4 lastright;
+ SeqFeatPtr bestprot;
+
Boolean firstfeat;
Boolean featseen;
Boolean featjustseen;
@@ -357,6 +365,7 @@ typedef struct asn2gbwork {
typedef struct comment_block {
ASN2GB_BASE_BLOCK
Boolean first;
+ Boolean no_blank_before;
} CommentBlock, PNTR CommentBlockPtr;
/* internal reference block has fields on top of RefBlock fields */
@@ -515,6 +524,7 @@ typedef enum {
typedef enum {
SCQUAL_acronym = 1,
+ SCQUAL_altitude,
SCQUAL_anamorph,
SCQUAL_authority,
SCQUAL_biotype,
@@ -582,6 +592,7 @@ typedef enum {
SCQUAL_PCR_primers,
SCQUAL_PCR_primer_note,
SCQUAL_PCR_reaction,
+ SCQUAL_phenotype,
SCQUAL_plasmid_name,
SCQUAL_plastid_name,
SCQUAL_pop_variant,
@@ -611,16 +622,18 @@ typedef enum {
SCQUAL_transgenic,
SCQUAL_transposon_name,
SCQUAL_type,
+ SCQUAL_type_material,
SCQUAL_unstructured,
SCQUAL_usedin,
SCQUAL_variety,
+ SCQUAL_whole_replicon,
SCQUAL_zero_orgmod,
SCQUAL_one_orgmod,
SCQUAL_zero_subsrc,
ASN2GNBK_TOTAL_SOURCE
} SourceType;
-NLM_EXTERN SourceType orgModToSourceIdx [41];
+NLM_EXTERN SourceType orgModToSourceIdx [42];
typedef enum {
FTQUAL_allele = 1,
@@ -720,6 +733,9 @@ typedef enum {
FTQUAL_pyrrolysine_note,
FTQUAL_region,
FTQUAL_region_name,
+ FTQUAL_regulatory_class,
+ FTQUAL_regulatory_note,
+ FTQUAL_regulatory_other,
FTQUAL_replace,
FTQUAL_ribosomal_slippage,
FTQUAL_rpt_family,
@@ -789,7 +805,7 @@ typedef struct sourcequal {
NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE];
-NLM_EXTERN SourceType subSourceToSourceIdx [42];
+NLM_EXTERN SourceType subSourceToSourceIdx [45];
NLM_EXTERN void DoOneSection (
BioseqPtr target,
@@ -848,6 +864,10 @@ NLM_EXTERN void AddCommentWithURLlinks (
CharPtr str,
CharPtr suffix
);
+NLM_EXTERN CharPtr GetStrForStructuredComment (
+ IntAsn2gbJobPtr ajp,
+ UserObjectPtr uop
+);
NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string);
NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction);
NLM_EXTERN void FFAddPeriod (StringItemPtr sip);
@@ -1097,6 +1117,19 @@ NLM_EXTERN void FF_asn2gb_www_featkey (
Uint4 itemID
);
+NLM_EXTERN void FF_asn2gb_www_featkey_Ex (
+ StringItemPtr ffstring,
+ CharPtr key,
+ SeqFeatPtr sfp,
+ SeqLocPtr slp,
+ Int4 from,
+ Int4 to,
+ Uint1 strand,
+ Uint4 itemID,
+ Int2 sat,
+ Int4 sat_key
+);
+
NLM_EXTERN CharPtr AddJsInterval (
IntAsn2gbSectPtr iasp,
CharPtr pfx,
@@ -1181,7 +1214,7 @@ NLM_EXTERN CharPtr StripAllSpaces (
CharPtr str
);
-NLM_EXTERN Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf);
+NLM_EXTERN Boolean GetAccnVerFromServer (BIG_ID gi, CharPtr buf);
NLM_EXTERN CharPtr bondList [];
NLM_EXTERN CharPtr siteList [];
@@ -1226,6 +1259,7 @@ NLM_EXTERN void AddLocusBlock (
Asn2gbWorkPtr awp,
Boolean willshowwgs,
Boolean willshowtsa,
+ Boolean willshowtls,
Boolean willshowcage,
Boolean willshowgenome,
Boolean willshowcontig,
@@ -1257,10 +1291,7 @@ NLM_EXTERN void AddSegmentBlock (
Boolean onePartOfSeg,
Boolean is_na
);
-NLM_EXTERN void AddSourceBlock (
- Asn2gbWorkPtr awp
-);
-NLM_EXTERN void AddOrganismBlock (
+NLM_EXTERN void AddSourceOrganismBlock (
Asn2gbWorkPtr awp
);
NLM_EXTERN void AddCommentBlock (
@@ -1281,6 +1312,9 @@ NLM_EXTERN void AddWGSBlock (
NLM_EXTERN void AddTSABlock (
Asn2gbWorkPtr awp
);
+NLM_EXTERN void AddTLSBlock (
+ Asn2gbWorkPtr awp
+);
NLM_EXTERN void AddCAGEBlock (
Asn2gbWorkPtr awp
);
@@ -1395,6 +1429,13 @@ NLM_EXTERN Boolean SegHasParts (
BioseqPtr bsp
);
+NLM_EXTERN void SetIfpFeatCount (
+ IntFeatBlockPtr ifp,
+ IntAsn2gbJobPtr ajp,
+ Asn2gbWorkPtr awp,
+ Boolean isProt
+);
+
#ifdef __cplusplus
}
diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h
index bafc90f1..7a217f79 100644
--- a/api/asn2gnbk.h
+++ b/api/asn2gnbk.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.89 $
+* $Revision: 6.92 $
*
* File Description: New GenBank flatfile generator
*
@@ -123,6 +123,12 @@ typedef unsigned long FlgType;
#define RELAXED_MAPPING 524288
+#define GPIPE_DEFLINES 1048576
+
+#define HIDE_PROTEIN_ID 2097152
+
+#define HIDE_GI_NUMBERS 4194304
+
/* locking behavior for system performance */
typedef unsigned long LckType;
diff --git a/api/asn2gnbp.h b/api/asn2gnbp.h
index fab325e9..3a6aae60 100644
--- a/api/asn2gnbp.h
+++ b/api/asn2gnbp.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.46 $
+* $Revision: 6.47 $
*
* File Description: New GenBank flatfile generator, private header
*
@@ -289,6 +289,8 @@ typedef struct XtraData {
BlockMask bkmask;
Boolean reindex;
Boolean seqspans;
+ Int2 sat;
+ Int4 sat_key;
} XtraBlock;
diff --git a/api/ecnum_ambiguous.inc b/api/ecnum_ambiguous.inc
index 6e9c9722..b3737bbf 100644
--- a/api/ecnum_ambiguous.inc
+++ b/api/ecnum_ambiguous.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_ambiguous.inc,v 1.6 2012/01/25 17:29:33 kazimird Exp $
+/* $Id: ecnum_ambiguous.inc,v 1.12 2015/11/04 22:54:26 fukanchi Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -77,6 +77,8 @@ static const char* const kECNum_ambiguous[] = {
"1.3.2.n With a cytochrome as acceptor",
"1.3.3.- With oxygen as acceptor",
"1.3.3.n With oxygen as acceptor",
+ "1.3.4.- With a disulfide as acceptor",
+ "1.3.4.n With a disulfide as acceptor",
"1.3.5.- With a quinone or related compound as acceptor",
"1.3.5.n With a quinone or related compound as acceptor",
"1.3.7.- With an iron-sulfur protein as acceptor",
@@ -131,12 +133,18 @@ static const char* const kECNum_ambiguous[] = {
"1.6.1.n With NAD(+) or NADP(+) as acceptor",
"1.6.2.- With a heme protein as acceptor",
"1.6.2.n With a heme protein as acceptor",
- "1.6.3.- With a oxygen as acceptor",
- "1.6.3.n With a oxygen as acceptor",
+ "1.6.3.- With oxygen as acceptor",
+ "1.6.3.n With oxygen as acceptor",
+ "1.6.4.- With a disulfide as acceptor",
+ "1.6.4.n With a disulfide as acceptor",
"1.6.5.- With a quinone or similar compound as acceptor",
"1.6.5.n With a quinone or similar compound as acceptor",
"1.6.6.- With a nitrogenous group as acceptor",
"1.6.6.n With a nitrogenous group as acceptor",
+ "1.6.7.- With a iron-sulfur protein as acceptor",
+ "1.6.7.n With a iron-sulfur protein as acceptor",
+ "1.6.8.- With a flavin as acceptor",
+ "1.6.8.n With a flavin as acceptor",
"1.6.99.- With other acceptors",
"1.6.99.n With other acceptors",
"1.7.-.- Acting on other nitrogenous compounds as donors",
@@ -169,6 +177,8 @@ static const char* const kECNum_ambiguous[] = {
"1.8.4.n With a disulfide as acceptor",
"1.8.5.- With a quinone or similar compound as acceptor",
"1.8.5.n With a quinone or similar compound as acceptor",
+ "1.8.6.- With an nitrogenous group as acceptor",
+ "1.8.6.n With an nitrogenous group as acceptor",
"1.8.7.- With an iron-sulfur protein as acceptor",
"1.8.7.n With an iron-sulfur protein as acceptor",
"1.8.98.- With other, known, acceptors",
@@ -193,6 +203,8 @@ static const char* const kECNum_ambiguous[] = {
"1.10.2.n With a cytochrome as acceptor",
"1.10.3.- With oxygen as acceptor",
"1.10.3.n With oxygen as acceptor",
+ "1.10.5.- With a quinone or related compound as acceptor",
+ "1.10.5.n With a quinone or related compound as acceptor",
"1.10.9.- With a copper protein as acceptor",
"1.10.9.n With a copper protein as acceptor",
"1.10.98.- With other, known, acceptors",
@@ -205,8 +217,8 @@ static const char* const kECNum_ambiguous[] = {
"1.11.1.n Peroxidases",
"1.11.2.- With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product",
"1.11.2.n With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product",
- "1.12.-.- Acting on hydrogen as donor",
- "1.12.n.n Acting on hydrogen as donor",
+ "1.12.-.- Acting on hydrogen as donors",
+ "1.12.n.n Acting on hydrogen as donors",
"1.12.1.- With NAD(+) or NADP(+) as acceptor",
"1.12.1.n With NAD(+) or NADP(+) as acceptor",
"1.12.2.- With a cytochrome as acceptor",
@@ -215,20 +227,28 @@ static const char* const kECNum_ambiguous[] = {
"1.12.5.n With a quinone or similar compound as acceptor",
"1.12.7.- With an iron-sulfur protein as acceptor",
"1.12.7.n With an iron-sulfur protein as acceptor",
- "1.12.98.- With other known acceptors",
- "1.12.98.n With other known acceptors",
+ "1.12.98.- With other, known, acceptors",
+ "1.12.98.n With other, known, acceptors",
"1.12.99.- With other acceptors",
"1.12.99.n With other acceptors",
- "1.13.-.- Acting on single donors with incorporation of molecular oxygen",
- "1.13.n.n Acting on single donors with incorporation of molecular oxygen",
+ "1.13.-.- Acting on single donors with incorporation of molecular oxygen (oxygenases). The oxygen incorporated need not be derived from O(2)",
+ "1.13.n.n Acting on single donors with incorporation of molecular oxygen (oxygenases). The oxygen incorporated need not be derived from O(2)",
+ "1.13.1.- With NADH or NADPH as one donor",
+ "1.13.1.n With NADH or NADPH as one donor",
"1.13.11.- With incorporation of two atoms of oxygen",
"1.13.11.n With incorporation of two atoms of oxygen",
- "1.13.12.- With incorporation of one atom of oxygen",
- "1.13.12.n With incorporation of one atom of oxygen",
- "1.13.99.- Miscellaneous (requires further characterization)",
- "1.13.99.n Miscellaneous (requires further characterization)",
- "1.14.-.- Acting on paired donors, with incorporation or reduction of molecular oxygen",
- "1.14.n.n Acting on paired donors, with incorporation or reduction of molecular oxygen",
+ "1.13.12.- With incorporation of one atom of oxygen (internal monooxygenases or internal mixed function oxidases)",
+ "1.13.12.n With incorporation of one atom of oxygen (internal monooxygenases or internal mixed function oxidases)",
+ "1.13.99.- Miscellaneous",
+ "1.13.99.n Miscellaneous",
+ "1.14.-.- Acting on paired donors, with incorporation or reduction of molecular oxygen. The oxygen incorporated need not be derived from O(2)",
+ "1.14.n.n Acting on paired donors, with incorporation or reduction of molecular oxygen. The oxygen incorporated need not be derived from O(2)",
+ "1.14.1.- With NADH or NADPH as one donor",
+ "1.14.1.n With NADH or NADPH as one donor",
+ "1.14.2.- With ascorbate as one donor",
+ "1.14.2.n With ascorbate as one donor",
+ "1.14.3.- With reduced pteridine as one donor",
+ "1.14.3.n With reduced pteridine as one donor",
"1.14.11.- With 2-oxoglutarate as one donor, and incorporation of one atom each of oxygen into both donors",
"1.14.11.n With 2-oxoglutarate as one donor, and incorporation of one atom each of oxygen into both donors",
"1.14.12.- With NADH or NADPH as one donor, and incorporation of two atoms of oxygen into one donor",
@@ -237,8 +257,8 @@ static const char* const kECNum_ambiguous[] = {
"1.14.13.n With NADH or NADPH as one donor, and incorporation of one atom of oxygen",
"1.14.14.- With reduced flavin or flavoprotein as one donor, and incorporation of one atom of oxygen",
"1.14.14.n With reduced flavin or flavoprotein as one donor, and incorporation of one atom of oxygen",
- "1.14.15.- With a reduced iron-sulfur protein as one donor, and incorporation of one atom of oxygen",
- "1.14.15.n With a reduced iron-sulfur protein as one donor, and incorporation of one atom of oxygen",
+ "1.14.15.- With reduced iron-sulfur protein as one donor, and incorporation of one atom of oxygen",
+ "1.14.15.n With reduced iron-sulfur protein as one donor, and incorporation of one atom of oxygen",
"1.14.16.- With reduced pteridine as one donor, and incorporation of one atom of oxygen",
"1.14.16.n With reduced pteridine as one donor, and incorporation of one atom of oxygen",
"1.14.17.- With reduced ascorbate as one donor, and incorporation of one atom of oxygen",
@@ -251,8 +271,8 @@ static const char* const kECNum_ambiguous[] = {
"1.14.20.n With 2-oxoglutarate as one donor, and the other dehydrogenated",
"1.14.21.- With NADH or NADPH as one donor, and the other dehydrogenated",
"1.14.21.n With NADH or NADPH as one donor, and the other dehydrogenated",
- "1.14.99.- Miscellaneous (requires further characterization)",
- "1.14.99.n Miscellaneous (requires further characterization)",
+ "1.14.99.- Miscellaneous",
+ "1.14.99.n Miscellaneous",
"1.15.-.- Acting on superoxide as acceptor",
"1.15.n.n Acting on superoxide as acceptor",
"1.15.1.- Acting on superoxide as acceptor",
@@ -265,8 +285,8 @@ static const char* const kECNum_ambiguous[] = {
"1.16.3.n With oxygen as acceptor",
"1.16.5.- With a quinone or similar compound as acceptor",
"1.16.5.n With a quinone or similar compound as acceptor",
- "1.16.8.- With flavin as acceptor",
- "1.16.8.n With flavin as acceptor",
+ "1.16.8.- With a flavin as acceptor",
+ "1.16.8.n With a flavin as acceptor",
"1.16.9.- With a copper protein as acceptor",
"1.16.9.n With a copper protein as acceptor",
"1.16.98.- With other known acceptors",
@@ -285,22 +305,32 @@ static const char* const kECNum_ambiguous[] = {
"1.17.5.n With a quinone or similar compound as acceptor",
"1.17.7.- With an iron-sulfur protein as acceptor",
"1.17.7.n With an iron-sulfur protein as acceptor",
+ "1.17.98.- With other, known, acceptors",
+ "1.17.98.n With other, known, acceptors",
"1.17.99.- With other acceptors",
"1.17.99.n With other acceptors",
"1.18.-.- Acting on iron-sulfur proteins as donors",
"1.18.n.n Acting on iron-sulfur proteins as donors",
"1.18.1.- With NAD(+) or NADP(+) as acceptor",
"1.18.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.18.2.- With dinitrogen as acceptor",
+ "1.18.2.n With dinitrogen as acceptor",
+ "1.18.3.- With H(+) as acceptor",
+ "1.18.3.n With H(+) as acceptor",
"1.18.6.- With dinitrogen as acceptor",
"1.18.6.n With dinitrogen as acceptor",
+ "1.18.96.- With other, known, acceptors",
+ "1.18.96.n With other, known, acceptors",
+ "1.18.99.- With H(+) as acceptor",
+ "1.18.99.n With H(+) as acceptor",
"1.19.-.- Acting on reduced flavodoxin as donor",
"1.19.n.n Acting on reduced flavodoxin as donor",
"1.19.6.- With dinitrogen as acceptor",
"1.19.6.n With dinitrogen as acceptor",
"1.20.-.- Acting on phosphorus or arsenic in donors",
"1.20.n.n Acting on phosphorus or arsenic in donors",
- "1.20.1.- With NAD(P)(+) as acceptor",
- "1.20.1.n With NAD(P)(+) as acceptor",
+ "1.20.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.20.1.n With NAD(+) or NADP(+) as acceptor",
"1.20.2.- With a cytochrome as acceptor",
"1.20.2.n With a cytochrome as acceptor",
"1.20.4.- With disulfide as acceptor",
@@ -311,22 +341,42 @@ static const char* const kECNum_ambiguous[] = {
"1.20.98.n With other, known acceptors",
"1.20.99.- With other acceptors",
"1.20.99.n With other acceptors",
- "1.21.-.- Acting on x-H and y-H to form an x-y bond",
- "1.21.n.n Acting on x-H and y-H to form an x-y bond",
+ "1.21.-.- Catalyzing the reaction X-H + Y-H = 'X-Y'",
+ "1.21.n.n Catalyzing the reaction X-H + Y-H = 'X-Y'",
+ "1.21.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.21.1.n With NAD(+) or NADP(+) as acceptor",
"1.21.3.- With oxygen as acceptor",
"1.21.3.n With oxygen as acceptor",
"1.21.4.- With a disulfide as acceptor",
"1.21.4.n With a disulfide as acceptor",
+ "1.21.98.- With other, known acceptors",
+ "1.21.98.n With other, known acceptors",
"1.21.99.- With other acceptors",
"1.21.99.n With other acceptors",
"1.22.-.- Acting on halogen in donors",
"1.22.n.n Acting on halogen in donors",
"1.22.1.- With NAD(+) or NADP(+) as acceptor",
"1.22.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.23.-.- Reducing C-O-C group as acceptor",
+ "1.23.n.n Reducing C-O-C group as acceptor",
+ "1.23.1.- With NADH or NADPH as donor",
+ "1.23.1.n With NADH or NADPH as donor",
+ "1.23.5.- With a quinone or similar compound as acceptor",
+ "1.23.5.n With a quinone or similar compound as acceptor",
"1.97.-.- Other oxidoreductases",
"1.97.n.n Other oxidoreductases",
- "1.97.1.- Sole sub-subclass for oxidoreductases that do not belong in the other subclasses",
- "1.97.1.n Sole sub-subclass for oxidoreductases that do not belong in the other subclasses",
+ "1.97.1.- Other oxidoreductases",
+ "1.97.1.n Other oxidoreductases",
+ "1.98.-.- Enzymes using H(2) as reductant",
+ "1.98.n.n Enzymes using H(2) as reductant",
+ "1.98.1.- Other oxidoreductases",
+ "1.98.1.n Other oxidoreductases",
+ "1.99.-.- Other enzymes using O(2) as oxidant",
+ "1.99.n.n Other enzymes using O(2) as oxidant",
+ "1.99.1.- Hydroxylases",
+ "1.99.1.n Hydroxylases",
+ "1.99.2.- Oxygenases",
+ "1.99.2.n Oxygenases",
"2.-.-.- Transferases",
"2.n.n.n Transferases",
"2.1.-.- Transferring one-carbon groups",
@@ -335,12 +385,12 @@ static const char* const kECNum_ambiguous[] = {
"2.1.1.n Methyltransferases",
"2.1.2.- Hydroxymethyl-, formyl- and related transferases",
"2.1.2.n Hydroxymethyl-, formyl- and related transferases",
- "2.1.3.- Carboxyl- and carbamoyltransferases",
- "2.1.3.n Carboxyl- and carbamoyltransferases",
+ "2.1.3.- Carboxy- and carbamoyltransferases",
+ "2.1.3.n Carboxy- and carbamoyltransferases",
"2.1.4.- Amidinotransferases",
"2.1.4.n Amidinotransferases",
- "2.2.-.- Transferring aldehyde or ketone residues",
- "2.2.n.n Transferring aldehyde or ketone residues",
+ "2.2.-.- Transferring aldehyde or ketonic groups",
+ "2.2.n.n Transferring aldehyde or ketonic groups",
"2.2.1.- Transketolases and transaldolases",
"2.2.1.n Transketolases and transaldolases",
"2.3.-.- Acyltransferases",
@@ -349,8 +399,8 @@ static const char* const kECNum_ambiguous[] = {
"2.3.1.n Transferring groups other than amino-acyl groups",
"2.3.2.- Aminoacyltransferases",
"2.3.2.n Aminoacyltransferases",
- "2.3.3.- Acyl groups converted into alkyl on transfer",
- "2.3.3.n Acyl groups converted into alkyl on transfer",
+ "2.3.3.- Acyl groups converted into alkyl groups on transfer",
+ "2.3.3.n Acyl groups converted into alkyl groups on transfer",
"2.4.-.- Glycosyltransferases",
"2.4.n.n Glycosyltransferases",
"2.4.1.- Hexosyltransferases",
@@ -365,22 +415,26 @@ static const char* const kECNum_ambiguous[] = {
"2.5.1.n Transferring alkyl or aryl groups, other than methyl groups",
"2.6.-.- Transferring nitrogenous groups",
"2.6.n.n Transferring nitrogenous groups",
- "2.6.1.- Transaminases (aminotransferases)",
- "2.6.1.n Transaminases (aminotransferases)",
+ "2.6.1.- Transaminases",
+ "2.6.1.n Transaminases",
+ "2.6.2.- Amidinotransferases",
+ "2.6.2.n Amidinotransferases",
"2.6.3.- Oximinotransferases",
"2.6.3.n Oximinotransferases",
"2.6.99.- Transferring other nitrogenous groups",
"2.6.99.n Transferring other nitrogenous groups",
- "2.7.-.- Transferring phosphorous-containing groups",
- "2.7.n.n Transferring phosphorous-containing groups",
+ "2.7.-.- Transferring phosphorus-containing groups",
+ "2.7.n.n Transferring phosphorus-containing groups",
"2.7.1.- Phosphotransferases with an alcohol group as acceptor",
"2.7.1.n Phosphotransferases with an alcohol group as acceptor",
- "2.7.2.- Phosphotransferases with a carboxyl group as acceptor",
- "2.7.2.n Phosphotransferases with a carboxyl group as acceptor",
+ "2.7.2.- Phosphotransferases with a carboxy group as acceptor",
+ "2.7.2.n Phosphotransferases with a carboxy group as acceptor",
"2.7.3.- Phosphotransferases with a nitrogenous group as acceptor",
"2.7.3.n Phosphotransferases with a nitrogenous group as acceptor",
"2.7.4.- Phosphotransferases with a phosphate group as acceptor",
"2.7.4.n Phosphotransferases with a phosphate group as acceptor",
+ "2.7.5.- Phosphotransferases with regeneration of donors, apparently catalyzing intramolecular transfers",
+ "2.7.5.n Phosphotransferases with regeneration of donors, apparently catalyzing intramolecular transfers",
"2.7.6.- Diphosphotransferases",
"2.7.6.n Diphosphotransferases",
"2.7.7.- Nucleotidyltransferases",
@@ -397,6 +451,8 @@ static const char* const kECNum_ambiguous[] = {
"2.7.12.n Dual-specificity kinases (those acting on Ser/Thr and Tyr residues)",
"2.7.13.- Protein-histidine kinases",
"2.7.13.n Protein-histidine kinases",
+ "2.7.14.- Protein-arginine kinases",
+ "2.7.14.n Protein-arginine kinases",
"2.7.99.- Other protein kinases",
"2.7.99.n Other protein kinases",
"2.8.-.- Transferring sulfur-containing groups",
@@ -439,44 +495,62 @@ static const char* const kECNum_ambiguous[] = {
"3.1.8.n Phosphoric triester hydrolases",
"3.1.11.- Exodeoxyribonucleases producing 5'-phosphomonoesters",
"3.1.11.n Exodeoxyribonucleases producing 5'-phosphomonoesters",
+ "3.1.12.- Exodeoxyribonucleases producing 3'-phosphomonoesters",
+ "3.1.12.n Exodeoxyribonucleases producing 3'-phosphomonoesters",
"3.1.13.- Exoribonucleases producing 5'-phosphomonoesters",
"3.1.13.n Exoribonucleases producing 5'-phosphomonoesters",
"3.1.14.- Exoribonucleases producing 3'-phosphomonoesters",
"3.1.14.n Exoribonucleases producing 3'-phosphomonoesters",
- "3.1.15.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
- "3.1.15.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
- "3.1.16.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
- "3.1.16.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.1.15.- Exonucleases active with either ribo- or deoxyribonucleic acids and producing 5'-phosphomonoesters",
+ "3.1.15.n Exonucleases active with either ribo- or deoxyribonucleic acids and producing 5'-phosphomonoesters",
+ "3.1.16.- Exonucleases active with either ribo- or deoxyribonucleic acids and producing 3'-phosphomonoesters",
+ "3.1.16.n Exonucleases active with either ribo- or deoxyribonucleic acids and producing 3'-phosphomonoesters",
"3.1.21.- Endodeoxyribonucleases producing 5'-phosphomonoesters",
"3.1.21.n Endodeoxyribonucleases producing 5'-phosphomonoesters",
"3.1.22.- Endodeoxyribonucleases producing other than 5'-phosphomonoesters",
"3.1.22.n Endodeoxyribonucleases producing other than 5'-phosphomonoesters",
+ "3.1.23.- Site specific endodeoxyribonucleases: cleavage is sequence specific",
+ "3.1.23.n Site specific endodeoxyribonucleases: cleavage is sequence specific",
+ "3.1.24.- Site specific endodeoxyribonucleases: cleavage is not sequence specific",
+ "3.1.24.n Site specific endodeoxyribonucleases: cleavage is not sequence specific",
"3.1.25.- Site-specific endodeoxyribonucleases specific for altered bases",
"3.1.25.n Site-specific endodeoxyribonucleases specific for altered bases",
"3.1.26.- Endoribonucleases producing 5'-phosphomonoesters",
"3.1.26.n Endoribonucleases producing 5'-phosphomonoesters",
"3.1.27.- Endoribonucleases producing other than 5'-phosphomonoesters",
"3.1.27.n Endoribonucleases producing other than 5'-phosphomonoesters",
- "3.1.30.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
- "3.1.30.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
- "3.1.31.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
- "3.1.31.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.1.30.- Endoribonucleases active with either ribo- or deoxyribonucleic acids and producing 5'-phosphomonoesters",
+ "3.1.30.n Endoribonucleases active with either ribo- or deoxyribonucleic acids and producing 5'-phosphomonoesters",
+ "3.1.31.- Endoribonucleases active with either ribo- or deoxyribonucleic acids and producing 3'-phosphomonoesters",
+ "3.1.31.n Endoribonucleases active with either ribo- or deoxyribonucleic acids and producing 3'-phosphomonoesters",
"3.2.-.- Glycosylases",
"3.2.n.n Glycosylases",
"3.2.1.- Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds",
"3.2.1.n Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds",
"3.2.2.- Hydrolyzing N-glycosyl compounds",
"3.2.2.n Hydrolyzing N-glycosyl compounds",
+ "3.2.3.- Hydrolyzing S-glycosyl compounds",
+ "3.2.3.n Hydrolyzing S-glycosyl compounds",
"3.3.-.- Acting on ether bonds",
"3.3.n.n Acting on ether bonds",
"3.3.1.- Thioether and trialkylsulfonium hydrolases",
"3.3.1.n Thioether and trialkylsulfonium hydrolases",
"3.3.2.- Ether hydrolases",
"3.3.2.n Ether hydrolases",
- "3.4.-.- Acting on peptide bonds (peptide hydrolases)",
- "3.4.n.n Acting on peptide bonds (peptide hydrolases)",
+ "3.4.-.- Acting on peptide bonds (peptidases)",
+ "3.4.n.n Acting on peptide bonds (peptidases)",
+ "3.4.1.- alpha-Amino-acyl-peptide hydrolases",
+ "3.4.1.n alpha-Amino-acyl-peptide hydrolases",
+ "3.4.2.- Peptidyl-amino-acid hydrolases",
+ "3.4.2.n Peptidyl-amino-acid hydrolases",
+ "3.4.3.- Dipeptide hydrolases",
+ "3.4.3.n Dipeptide hydrolases",
+ "3.4.4.- Peptidyl peptide hydrolases",
+ "3.4.4.n Peptidyl peptide hydrolases",
"3.4.11.- Aminopeptidases",
"3.4.11.n Aminopeptidases",
+ "3.4.12.- Peptidylamino-acid hydrolases or acylamino-acid hydrolases",
+ "3.4.12.n Peptidylamino-acid hydrolases or acylamino-acid hydrolases",
"3.4.13.- Dipeptidases",
"3.4.13.n Dipeptidases",
"3.4.14.- Dipeptidyl-peptidases and tripeptidyl-peptidases",
@@ -519,14 +593,14 @@ static const char* const kECNum_ambiguous[] = {
"3.5.99.n In other compounds",
"3.6.-.- Acting on acid anhydrides",
"3.6.n.n Acting on acid anhydrides",
- "3.6.1.- In phosphorous-containing anhydrides",
- "3.6.1.n In phosphorous-containing anhydrides",
+ "3.6.1.- In phosphorus-containing anhydrides",
+ "3.6.1.n In phosphorus-containing anhydrides",
"3.6.2.- In sulfonyl-containing anhydrides",
"3.6.2.n In sulfonyl-containing anhydrides",
"3.6.3.- Acting on acid anhydrides; catalyzing transmembrane movement of substances",
"3.6.3.n Acting on acid anhydrides; catalyzing transmembrane movement of substances",
- "3.6.4.- Acting on acid anhydrides; involved in cellular and subcellular movement",
- "3.6.4.n Acting on acid anhydrides; involved in cellular and subcellular movement",
+ "3.6.4.- Acting on ATP; involved in cellular and subcellular movement",
+ "3.6.4.n Acting on ATP; involved in cellular and subcellular movement",
"3.6.5.- Acting on GTP; involved in cellular and subcellular movement",
"3.6.5.n Acting on GTP; involved in cellular and subcellular movement",
"3.7.-.- Acting on carbon-carbon bonds",
@@ -537,6 +611,8 @@ static const char* const kECNum_ambiguous[] = {
"3.8.n.n Acting on halide bonds",
"3.8.1.- In C-halide compounds",
"3.8.1.n In C-halide compounds",
+ "3.8.2.- In P-halide compounds",
+ "3.8.2.n In P-halide compounds",
"3.9.-.- Acting on phosphorus-nitrogen bonds",
"3.9.n.n Acting on phosphorus-nitrogen bonds",
"3.9.1.- Acting on phosphorus-nitrogen bonds",
@@ -587,8 +663,8 @@ static const char* const kECNum_ambiguous[] = {
"4.3.2.n Lyases acting on amides, amidines, etc",
"4.3.3.- Amine-lyases",
"4.3.3.n Amine-lyases",
- "4.3.99.- Other carbon-oxygen lyases",
- "4.3.99.n Other carbon-oxygen lyases",
+ "4.3.99.- Other carbon-nitrogen lyases",
+ "4.3.99.n Other carbon-nitrogen lyases",
"4.4.-.- Carbon-sulfur lyases",
"4.4.n.n Carbon-sulfur lyases",
"4.4.1.- Carbon-sulfur lyases",
@@ -601,10 +677,14 @@ static const char* const kECNum_ambiguous[] = {
"4.6.n.n Phosphorus-oxygen lyases",
"4.6.1.- Phosphorus-oxygen lyases",
"4.6.1.n Phosphorus-oxygen lyases",
+ "4.7.-.- Carbon-phosphorus lyases",
+ "4.7.n.n Carbon-phosphorus lyases",
+ "4.7.1.- Carbon-phosphorus lyases",
+ "4.7.1.n Carbon-phosphorus lyases",
"4.99.-.- Other lyases",
"4.99.n.n Other lyases",
- "4.99.1.- Sole sub-subclass for lyases that do not belong in the other subclasses",
- "4.99.1.n Sole sub-subclass for lyases that do not belong in the other subclasses",
+ "4.99.1.- Other lyases",
+ "4.99.1.n Other lyases",
"5.-.-.- Isomerases",
"5.n.n.n Isomerases",
"5.1.-.- Racemases and epimerases",
@@ -619,22 +699,22 @@ static const char* const kECNum_ambiguous[] = {
"5.1.99.n Acting on other compounds",
"5.2.-.- Cis-trans-isomerases",
"5.2.n.n Cis-trans-isomerases",
- "5.2.1.- Cis-trans Isomerases",
- "5.2.1.n Cis-trans Isomerases",
+ "5.2.1.- Cis-trans isomerases",
+ "5.2.1.n Cis-trans isomerases",
"5.3.-.- Intramolecular oxidoreductases",
"5.3.n.n Intramolecular oxidoreductases",
- "5.3.1.- Interconverting aldoses and ketoses, and related compounds",
- "5.3.1.n Interconverting aldoses and ketoses, and related compounds",
- "5.3.2.- Interconverting keto- and enol- groups",
- "5.3.2.n Interconverting keto- and enol- groups",
- "5.3.3.- Transposing C==C bonds",
- "5.3.3.n Transposing C==C bonds",
+ "5.3.1.- Interconverting aldoses and ketoses",
+ "5.3.1.n Interconverting aldoses and ketoses",
+ "5.3.2.- Interconverting keto- and enol-groups",
+ "5.3.2.n Interconverting keto- and enol-groups",
+ "5.3.3.- Transposing C=C bonds",
+ "5.3.3.n Transposing C=C bonds",
"5.3.4.- Transposing S-S bonds",
"5.3.4.n Transposing S-S bonds",
"5.3.99.- Other intramolecular oxidoreductases",
"5.3.99.n Other intramolecular oxidoreductases",
- "5.4.-.- Intramolecular transferases (mutases)",
- "5.4.n.n Intramolecular transferases (mutases)",
+ "5.4.-.- Intramolecular transferases",
+ "5.4.n.n Intramolecular transferases",
"5.4.1.- Transferring acyl groups",
"5.4.1.n Transferring acyl groups",
"5.4.2.- Phosphotransferases (phosphomutases)",
@@ -651,8 +731,8 @@ static const char* const kECNum_ambiguous[] = {
"5.5.1.n Intramolecular lyases",
"5.99.-.- Other isomerases",
"5.99.n.n Other isomerases",
- "5.99.1.- Sole sub-subclass for isomerases that do not belong in the other subclasses",
- "5.99.1.n Sole sub-subclass for isomerases that do not belong in the other subclasses",
+ "5.99.1.- Other isomerases",
+ "5.99.1.n Other isomerases",
"6.-.-.- Ligases",
"6.n.n.n Ligases",
"6.1.-.- Forming carbon-oxygen bonds",
@@ -667,10 +747,10 @@ static const char* const kECNum_ambiguous[] = {
"6.2.1.n Acid--thiol ligases",
"6.3.-.- Forming carbon-nitrogen bonds",
"6.3.n.n Forming carbon-nitrogen bonds",
- "6.3.1.- Acid--ammonia (or amide) ligases (amide synthases)",
- "6.3.1.n Acid--ammonia (or amide) ligases (amide synthases)",
- "6.3.2.- Acid--D-amino-acid ligases (peptide synthases)",
- "6.3.2.n Acid--D-amino-acid ligases (peptide synthases)",
+ "6.3.1.- Acid--ammonia (or amine) ligases (amide synthases)",
+ "6.3.1.n Acid--ammonia (or amine) ligases (amide synthases)",
+ "6.3.2.- Acid--amino-acid ligases (peptide synthases)",
+ "6.3.2.n Acid--amino-acid ligases (peptide synthases)",
"6.3.3.- Cyclo-ligases",
"6.3.3.n Cyclo-ligases",
"6.3.4.- Other carbon--nitrogen ligases",
@@ -679,12 +759,12 @@ static const char* const kECNum_ambiguous[] = {
"6.3.5.n Carbon--nitrogen ligases with glutamine as amido-N-donor",
"6.4.-.- Forming carbon-carbon bonds",
"6.4.n.n Forming carbon-carbon bonds",
- "6.4.1.- Ligases that form carbon-carbon bonds",
- "6.4.1.n Ligases that form carbon-carbon bonds",
+ "6.4.1.- Forming carbon-carbon bonds",
+ "6.4.1.n Forming carbon-carbon bonds",
"6.5.-.- Forming phosphoric ester bonds",
"6.5.n.n Forming phosphoric ester bonds",
- "6.5.1.- Ligases that form phosphoric-ester bonds",
- "6.5.1.n Ligases that form phosphoric-ester bonds",
+ "6.5.1.- Forming phosphoric ester bonds",
+ "6.5.1.n Forming phosphoric ester bonds",
"6.6.-.- Forming nitrogen-metal bonds",
"6.6.n.n Forming nitrogen-metal bonds",
"6.6.1.- Forming coordination complexes",
diff --git a/api/ecnum_deleted.inc b/api/ecnum_deleted.inc
index f271b375..4497ea5f 100644
--- a/api/ecnum_deleted.inc
+++ b/api/ecnum_deleted.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_deleted.inc,v 1.7 2012/06/04 17:49:35 kazimird Exp $
+/* $Id: ecnum_deleted.inc,v 1.13 2016/02/18 22:34:14 fukanchi Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -33,16 +33,21 @@
static const char* const kECNum_deleted[] = {
"1.1.1.74",
"1.1.1.128",
+ "1.1.1.161",
"1.1.1.293",
+ "1.1.3.3",
"1.1.3.31",
"1.1.5.1",
"1.2.1.6",
+ "1.2.1.40",
"1.2.2.2",
"1.2.3.10",
+ "1.2.7.2",
"1.3.1.23",
"1.3.1.55",
"1.3.1.59",
"1.3.1.61",
+ "1.3.99.1",
"1.4.3.18",
"1.5.3.3",
"1.6.2.3",
@@ -50,6 +55,7 @@ static const char* const kECNum_deleted[] = {
"1.7.1.8",
"1.7.99.2",
"1.8.1.1",
+ "1.8.99.1",
"1.12.99.2",
"1.13.1.7",
"1.13.11.7",
@@ -85,9 +91,11 @@ static const char* const kECNum_deleted[] = {
"2.7.1.98",
"2.7.7.20",
"2.8.3.4",
+ "2.8.3.7",
"3.1.1.9",
"3.1.1.21",
"3.1.2.9",
+ "3.1.2.15",
"3.1.3.61",
"3.1.4.24",
"3.1.4.26",
@@ -155,6 +163,7 @@ static const char* const kECNum_deleted[] = {
"3.4.99.39",
"3.4.99.40",
"3.4.99.42",
+ "3.5.1.27",
"3.13.1.2",
"4.1.1.13",
"4.1.2.3",
@@ -171,5 +180,7 @@ static const char* const kECNum_deleted[] = {
"5.3.1.18",
"5.3.99.1",
"5.4.3.1",
- "6.1.1.8"
+ "6.1.1.8",
+ "6.1.1.25",
+ "6.1.1.28"
};
diff --git a/api/ecnum_replaced.inc b/api/ecnum_replaced.inc
index 9f26b118..a0bc401a 100644
--- a/api/ecnum_replaced.inc
+++ b/api/ecnum_replaced.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_replaced.inc,v 1.10 2012/06/04 17:49:35 kazimird Exp $
+/* $Id: ecnum_replaced.inc,v 1.20 2016/05/25 20:39:14 fukanchi Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,22 +32,32 @@
static const char* const kECNum_replaced[] = {
"1.1.1.5 1.1.1.303 1.1.1.304",
+ "1.1.1.63 1.1.1.239",
"1.1.1.68 1.5.1.20",
"1.1.1.70 1.2.1.3",
"1.1.1.89 1.1.1.86",
"1.1.1.109 1.3.1.28",
"1.1.1.139 1.1.1.21",
"1.1.1.155 1.1.1.87",
+ "1.1.1.158 1.3.1.98",
"1.1.1.171 1.5.1.20",
"1.1.1.180 1.1.1.131",
"1.1.1.182 1.1.1.198 1.1.1.227 1.1.1.228",
"1.1.1.204 1.17.1.4",
"1.1.1.242 1.3.1.69",
+ "1.1.1.246 1.1.1.348 4.2.1.139",
"1.1.1.249 2.5.1.46",
"1.1.1.253 1.5.1.33",
"1.1.1.n1 1.1.1.305",
"1.1.1.n2 1.1.1.300",
+ "1.1.1.n3 1.1.1.336",
+ "1.1.1.n6 1.1.1.369",
+ "1.1.1.n7 1.1.1.320",
+ "1.1.1.n8 1.1.1.366",
+ "1.1.1.n9 1.1.1.365",
"1.1.1.n10 1.1.1.301",
+ "1.1.1.n13 1.1.1.330",
+ "1.1.1.n14 1.1.1.347",
"1.1.2.1 1.1.5.3",
"1.1.3.1 1.1.3.15",
"1.1.3.2 1.13.12.4",
@@ -60,9 +70,12 @@ static const char* const kECNum_replaced[] = {
"1.1.3.34 1.14.21.3",
"1.1.3.35 1.14.21.4",
"1.1.3.36 1.14.21.5",
+ "1.1.4.1 1.17.4.4",
+ "1.1.4.2 1.17.4.5",
"1.1.98.1 1.1.9.1",
"1.1.99.5 1.1.5.3",
"1.1.99.8 1.1.2.7 1.1.2.8",
+ "1.1.99.10 1.1.5.9",
"1.1.99.15 1.5.1.20",
"1.1.99.16 1.1.5.4",
"1.1.99.17 1.1.5.2",
@@ -80,32 +93,54 @@ static const char* const kECNum_replaced[] = {
"1.2.1.56 1.1.1.280",
"1.2.1.66 1.1.1.306",
"1.2.1.n1 1.2.1.77",
+ "1.2.1.n3 1.2.1.86",
"1.2.3.2 1.17.3.2",
+ "1.2.3.11 1.2.3.1",
"1.2.3.12 1.14.13.82",
"1.2.4.3 1.2.4.4",
"1.2.7.9 1.2.7.3",
"1.2.99.1 1.17.99.4",
+ "1.2.99.3 1.2.5.2",
+ "1.2.99.4 1.2.98.1",
+ "1.3.1.4 1.3.1.22",
+ "1.3.1.26 1.17.1.8",
+ "1.3.1.30 1.3.1.22",
+ "1.3.1.35 1.14.19.22",
"1.3.1.50 1.1.1.252",
+ "1.3.1.52 1.3.8.5",
+ "1.3.1.63 1.21.1.2",
+ "1.3.1.80 1.3.7.12",
"1.3.1.n1 1.3.1.87",
"1.3.2.1 1.3.8.1",
- "1.3.2.2 1.3.99.3",
+ "1.3.2.2 1.3.8.7",
"1.3.3.1 1.3.98.1",
- "1.3.3.2 1.14.21.6",
+ "1.3.3.2 1.14.19.20",
+ "1.3.7.10 1.14.19.8",
+ "1.3.98.2 1.3.4.1",
"1.3.99.2 1.3.8.1",
+ "1.3.99.3 1.3.8.7 1.3.8.8 1.3.8.9",
+ "1.3.99.7 1.3.8.6",
"1.3.99.9 1.21.99.1",
+ "1.3.99.10 1.3.8.4",
"1.3.99.11 1.3.5.2",
+ "1.3.99.13 1.3.8.8",
"1.3.99.15 1.3.7.8",
"1.3.99.20 1.3.7.9",
+ "1.3.99.21 1.3.8.3",
+ "1.3.99.34 1.3.7.11",
"1.3.99.n2 4.1.99.19",
+ "1.3.99.n3 1.3.99.36",
"1.4.1.6 1.21.4.1",
"1.4.3.6 1.4.3.21 1.4.3.22",
"1.4.3.9 1.4.3.4",
"1.4.3.17 1.3.3.10",
"1.4.4.1 1.21.4.1",
"1.4.98.1 1.4.9.1",
+ "1.4.99.1 1.4.99.6",
"1.4.99.3 1.4.9.1",
"1.4.99.4 1.4.9.2",
"1.5.1.4 1.5.1.3",
+ "1.5.1.12 1.2.1.88",
"1.5.1.13 1.17.1.5",
"1.5.1.14 1.5.1.21",
"1.5.1.29 1.5.1.38 1.5.1.39 1.5.1.41",
@@ -126,7 +161,10 @@ static const char* const kECNum_replaced[] = {
"1.5.99.1 1.5.8.3",
"1.5.99.2 1.5.8.4",
"1.5.99.7 1.5.8.2",
+ "1.5.99.8 1.5.5.2",
+ "1.5.99.9 1.5.98.1",
"1.5.99.10 1.5.8.1",
+ "1.5.99.11 1.5.98.2",
"1.6.2.1 1.6.99.3",
"1.6.4.1 1.8.1.6",
"1.6.4.2 1.8.1.7",
@@ -156,6 +194,7 @@ static const char* const kECNum_replaced[] = {
"1.6.8.2 1.5.1.30",
"1.6.99.2 1.6.5.2",
"1.6.99.4 1.18.1.2",
+ "1.6.99.5 1.6.5.11",
"1.6.99.6 1.6.5.10",
"1.6.99.7 1.5.1.34",
"1.6.99.8 1.16.1.3",
@@ -164,6 +203,7 @@ static const char* const kECNum_replaced[] = {
"1.6.99.11 1.16.1.5",
"1.6.99.12 1.16.1.6",
"1.6.99.13 1.16.1.7",
+ "1.7.3.4 1.7.2.6 1.7.3.6",
"1.7.99.3 1.7.2.1",
"1.7.99.5 1.5.1.20",
"1.7.99.6 1.7.2.4",
@@ -173,9 +213,12 @@ static const char* const kECNum_replaced[] = {
"1.8.6.1 2.5.1.18",
"1.8.99.4 1.8.4.8",
"1.9.3.2 1.7.2.1",
+ "1.9.99.1 1.9.98.1",
"1.10.3.7 1.21.3.4",
"1.10.3.8 1.21.3.5",
"1.10.99.1 1.10.9.1",
+ "1.10.99.2 1.10.5.1",
+ "1.10.99.3 1.23.5.1",
"1.11.1.4 1.13.11.11",
"1.12.1.1 1.12.7.2",
"1.12.7.1 1.12.7.2",
@@ -195,73 +238,118 @@ static const char* const kECNum_replaced[] = {
"1.13.1.11 1.13.99.1",
"1.13.1.12 1.13.11.11",
"1.13.1.13 1.13.11.12",
- "1.13.11.21 1.14.99.36",
+ "1.13.11.21 1.13.11.63",
"1.13.11.32 1.13.12.16",
"1.13.11.44 1.13.11.60 5.4.4.6",
+ "1.13.11.n1 1.13.11.74",
+ "1.13.11.n2 1.13.11.63",
"1.13.12.11 1.14.13.8",
+ "1.13.12.12 1.13.11.67",
"1.13.12.14 1.14.13.122",
"1.13.99.2 1.14.12.10",
"1.13.99.4 1.14.12.9",
"1.13.99.5 1.13.11.47",
"1.14.1.1 1.14.14.1",
"1.14.1.2 1.14.13.9",
- "1.14.1.3 1.14.13.132 5.4.99.7",
+ "1.14.1.3 1.14.14.17 5.4.99.7",
"1.14.1.4 1.14.99.2",
"1.14.1.5 1.14.13.5",
"1.14.1.6 1.14.15.4",
- "1.14.1.7 1.14.99.9",
- "1.14.1.8 1.14.99.10",
+ "1.14.1.7 1.14.14.19",
+ "1.14.1.8 1.14.14.16",
"1.14.1.10 1.14.99.11",
"1.14.2.1 1.14.17.1",
"1.14.2.2 1.13.11.27",
"1.14.3.1 1.14.16.1",
"1.14.11.5 1.14.11.6",
+ "1.14.11.n1 1.14.11.39",
"1.14.12.2 1.14.13.35",
"1.14.12.6 1.14.13.66",
- "1.14.12.n1 1.14.12.21",
+ "1.14.12.20 1.14.15.17",
+ "1.14.12.21 1.14.13.208",
+ "1.14.12.n1 1.14.13.208",
"1.14.13.3 1.14.14.9",
+ "1.14.13.15 1.14.15.15",
+ "1.14.13.17 1.14.14.23",
+ "1.14.13.26 1.14.18.4",
"1.14.13.45 1.14.18.2",
+ "1.14.13.60 1.14.13.100",
+ "1.14.13.86 1.14.13.136",
+ "1.14.13.95 1.14.18.8",
+ "1.14.13.98 1.14.14.25",
+ "1.14.13.99 1.14.14.26",
+ "1.14.13.126 1.14.15.16",
+ "1.14.13.132 1.14.14.17",
+ "1.14.13.159 1.14.14.24",
+ "1.14.13.164 1.13.11.65",
+ "1.14.13.169 1.14.18.5",
"1.14.13.n1 1.14.13.124",
"1.14.13.n2 1.14.13.125",
"1.14.13.n3 1.14.13.127",
- "1.14.13.n4 1.14.13.126",
+ "1.14.13.n4 1.14.15.16",
+ "1.14.13.n9 1.14.13.149",
"1.14.14.2 1.14.14.1",
"1.14.14.4 1.14.15.7",
"1.14.14.6 1.14.13.111",
+ "1.14.14.7 1.14.19.9",
+ "1.14.14.n1 1.14.99.46",
+ "1.14.15.2 1.14.13.162",
"1.14.17.2 1.14.18.1",
+ "1.14.19.7 1.11.1.23",
"1.14.19.n1 1.14.19.4",
"1.14.19.n2 1.14.19.5",
"1.14.19.n3 1.14.19.6",
+ "1.14.21.6 1.14.19.20",
+ "1.14.99.3 1.14.14.18",
"1.14.99.5 1.14.19.1",
"1.14.99.6 1.14.19.2",
- "1.14.99.7 1.14.13.132",
+ "1.14.99.7 1.14.14.17",
"1.14.99.8 1.14.14.1",
+ "1.14.99.9 1.14.14.19",
+ "1.14.99.10 1.14.14.16",
"1.14.99.13 1.14.13.23",
"1.14.99.16 1.14.13.72",
"1.14.99.17 1.14.16.5",
"1.14.99.25 1.14.19.3",
+ "1.14.99.28 1.14.13.151",
"1.14.99.30 1.3.5.6",
- "1.14.99.n1 1.14.99.41",
+ "1.14.99.31 1.14.19.24",
+ "1.14.99.32 1.14.19.5",
+ "1.14.99.33 1.14.19.39",
+ "1.14.99.40 1.13.11.79",
+ "1.14.99.41 1.13.11.75",
+ "1.14.99.n1 1.13.11.75",
+ "1.14.99.n2 1.13.11.71",
"1.14.99.n3 1.14.99.42",
+ "1.14.99.n5 1.13.11.70",
"1.16.98.1 1.16.9.1",
- "1.17.1.6 1.17.99.5",
+ "1.17.1.2 1.17.7.4",
+ "1.17.1.6 1.17.98.1",
+ "1.17.1.7 1.2.1.91",
"1.17.4.3 1.17.7.1",
+ "1.17.99.5 1.17.98.1",
"1.18.2.1 1.18.6.1",
"1.18.3.1 1.12.7.2",
"1.18.96.1 1.15.1.2",
"1.18.99.1 1.12.7.2",
"1.20.98.1 1.20.9.1",
+ "1.21.3.9 1.21.98.2",
+ "1.21.99.2 1.21.98.1",
+ "1.22.1.1 1.21.1.1",
+ "1.97.1.3 1.12.98.4",
"1.97.1.5 1.20.4.1",
"1.97.1.6 1.20.99.1",
"1.97.1.7 1.20.4.2",
+ "1.97.1.10 1.21.99.4",
+ "1.97.1.11 1.21.99.3",
"1.98.1.1 1.12.7.2",
"1.99.1.1 1.14.14.1",
"1.99.1.2 1.14.16.1",
"1.99.1.5 1.14.13.9",
"1.99.1.7 1.14.15.4",
- "1.99.1.9 1.14.99.9",
- "1.99.1.11 1.14.99.10",
- "1.99.1.13 1.14.13.132 5.4.99.7",
+ "1.99.1.9 1.14.14.19",
+ "1.99.1.11 1.14.14.16",
+ "1.99.1.13 1.14.14.17 5.4.99.7",
"1.99.1.14 1.13.11.27",
"1.99.2.1 1.13.11.12",
"1.99.2.2 1.13.11.1",
@@ -269,7 +357,7 @@ static const char* const kECNum_replaced[] = {
"1.99.2.4 1.13.11.4",
"1.99.2.5 1.13.11.5",
"1.99.2.6 1.13.99.1",
- "2.1.1.23 2.1.1.124 2.1.1.125 2.1.1.126",
+ "2.1.1.23 2.1.1.319 2.1.1.320 2.1.1.321 2.1.1.322",
"2.1.1.24 2.1.1.77 2.1.1.80 2.1.1.100",
"2.1.1.29 2.1.1.202 2.1.1.203 2.1.1.204",
"2.1.1.31 2.1.1.221 2.1.1.228",
@@ -279,18 +367,40 @@ static const char* const kECNum_replaced[] = {
"2.1.1.51 2.1.1.187 2.1.1.188",
"2.1.1.52 2.1.1.171 2.1.1.172 2.1.1.173 2.1.1.174",
"2.1.1.58 2.1.1.57",
+ "2.1.1.66 2.1.1.230",
"2.1.1.73 2.1.1.37",
"2.1.1.81 2.1.1.49",
"2.1.1.92 2.1.1.69",
"2.1.1.93 2.1.1.70",
+ "2.1.1.124 2.1.1.319 2.1.1.320 2.1.1.321 2.1.1.322",
+ "2.1.1.125 2.1.1.319 2.1.1.320 2.1.1.321 2.1.1.322",
+ "2.1.1.126 2.1.1.319 2.1.1.320 2.1.1.321 2.1.1.322",
"2.1.1.134 2.1.1.129",
"2.1.1.135 1.16.1.8",
+ "2.1.1.149 2.1.1.267",
"2.1.1.n2 2.1.1.211",
+ "2.1.1.n3 2.1.1.280",
+ "2.1.1.n5 2.1.1.244",
+ "2.1.1.n6 2.1.1.255",
+ "2.1.1.n9 2.1.1.274",
+ "2.1.1.n10 2.1.1.278",
+ "2.1.1.n13 2.1.1.301",
+ "2.1.1.n14 2.1.1.300",
+ "2.1.1.n15 2.1.1.298",
+ "2.1.1.n16 2.1.1.297",
"2.1.2.6 2.1.2.5",
"2.1.2.12 2.1.1.74",
"2.1.2.n1 2.1.2.13",
+ "2.1.3.13 6.1.2.2",
+ "2.1.3.14 6.1.2.2",
"2.3.1.55 2.3.1.82",
+ "2.3.1.104 2.3.1.25",
+ "2.3.1.119 1.1.1.330 1.3.1.93 2.3.1.199 4.2.1.134",
"2.3.1.n1 2.3.1.191",
+ "2.3.1.n8 2.3.1.199",
+ "2.3.1.n9 2.3.1.211",
+ "2.3.1.n10 2.3.1.222",
+ "2.3.1.n11 2.3.1.223",
"2.4.1.3 2.4.1.25",
"2.4.1.42 2.4.1.17",
"2.4.1.51 2.4.1.101 2.4.1.143 2.4.1.144 2.4.1.145",
@@ -307,6 +417,7 @@ static const char* const kECNum_replaced[] = {
"2.4.1.107 2.4.1.17",
"2.4.1.108 2.4.1.17",
"2.4.1.112 2.4.1.186",
+ "2.4.1.119 2.4.99.18",
"2.4.1.124 2.4.1.87",
"2.4.1.130 2.4.1.258 2.4.1.259 2.4.1.260 2.4.1.261",
"2.4.1.151 2.4.1.87",
@@ -317,7 +428,10 @@ static const char* const kECNum_replaced[] = {
"2.4.1.n1 2.4.1.245",
"2.4.1.n3 2.4.1.250",
"2.4.1.n4 2.4.1.131",
+ "2.4.1.n5 2.4.99.16",
+ "2.4.2.11 6.3.4.21",
"2.4.2.13 2.5.1.6",
+ "2.4.2.23 2.4.2.2 2.4.2.3 2.4.2.4",
"2.4.2.n1 2.4.2.43",
"2.5.1.8 2.5.1.75",
"2.5.1.11 2.5.1.84 2.5.1.85",
@@ -331,14 +445,23 @@ static const char* const kECNum_replaced[] = {
"2.5.1.n1 2.2.1.9",
"2.5.1.n2 2.5.1.81",
"2.5.1.n3 2.5.1.73",
+ "2.5.1.n4 2.5.1.112",
+ "2.5.1.n5 2.5.1.113",
+ "2.5.1.n6 2.5.1.105",
+ "2.5.1.n7 2.5.1.108",
+ "2.5.1.n8 2.5.1.115",
"2.6.1.10 2.6.1.21",
"2.6.1.25 2.6.1.24",
"2.6.1.53 1.4.1.13",
"2.6.1.61 2.6.1.40",
+ "2.6.1.91 2.6.1.34",
"2.6.1.n1 2.6.1.87",
+ "2.6.1.n2 2.6.1.105",
"2.6.2.1 2.1.4.1",
+ "2.6.99.4 2.3.1.234",
"2.7.1.37 2.7.11.1 2.7.11.8 2.7.11.9 2.7.11.10 2.7.11.11 2.7.11.12 2.7.11.13 2.7.11.21 2.7.11.22 2.7.11.24 2.7.11.25 2.7.11.30 2.7.12.1",
"2.7.1.38 2.7.11.19",
+ "2.7.1.69 2.7.1.191",
"2.7.1.70 2.7.11.1",
"2.7.1.75 2.7.1.21",
"2.7.1.96 2.7.1.86",
@@ -370,15 +493,21 @@ static const char* const kECNum_replaced[] = {
"2.7.1.n1 2.7.1.170",
"2.7.1.n2 2.7.1.161",
"2.7.1.n3 2.7.1.164",
+ "2.7.1.n4 2.7.1.173",
+ "2.7.1.n5 2.7.1.174",
"2.7.1.n6 2.7.1.163",
+ "2.7.1.n7 2.7.1.176",
+ "2.7.1.n8 2.7.1.175",
"2.7.2.5 6.3.4.16",
"2.7.2.9 6.3.5.5",
"2.7.3.11 2.7.13.1",
"2.7.3.12 2.7.13.2",
"2.7.4.5 2.7.4.14",
+ "2.7.4.n1 2.7.4.27",
+ "2.7.4.n2 2.7.4.28",
"2.7.5.1 5.4.2.2",
"2.7.5.2 5.4.2.3",
- "2.7.5.3 5.4.2.1",
+ "2.7.5.3 5.4.2.11 5.4.2.12",
"2.7.5.4 5.4.2.4",
"2.7.5.5 5.4.2.5",
"2.7.5.6 5.4.2.7",
@@ -389,15 +518,23 @@ static const char* const kECNum_replaced[] = {
"2.7.7.25 2.7.7.72",
"2.7.7.26 3.1.27.3",
"2.7.7.29 2.7.7.28",
+ "2.7.7.54 6.3.2.40",
+ "2.7.7.55 6.3.2.40",
+ "2.7.7.63 6.3.1.20",
"2.7.7.n2 2.7.7.67",
"2.7.7.n3 2.7.7.73",
"2.7.7.n4 2.7.7.80",
"2.7.7.n5 2.7.7.75",
"2.7.8.16 2.7.8.2",
- "2.7.8.n1 2.7.8.30",
+ "2.7.8.25 2.4.2.52",
+ "2.7.8.30 2.4.2.53",
+ "2.7.8.n1 2.4.2.53",
"2.7.8.n2 2.7.8.33",
+ "2.7.11.n1 2.7.11.32",
+ "2.7.11.n2 2.7.11.33",
"2.8.1.n1 2.8.1.11",
"2.8.2.12 2.8.2.8",
+ "2.8.2.n1 2.8.2.20",
"2.9.1.n1 2.9.1.2",
"3.1.1.12 3.1.1.1",
"3.1.1.16 3.1.1.24 5.3.3.4",
@@ -408,17 +545,22 @@ static const char* const kECNum_replaced[] = {
"3.1.2.8 3.1.2.6",
"3.1.2.24 3.13.1.3",
"3.1.2.n1 3.1.2.28",
+ "3.1.2.n2 3.1.2.30",
"3.1.3.30 3.1.3.31",
"3.1.3.65 3.1.3.64",
"3.1.3.n1 3.1.3.86",
"3.1.3.n2 3.1.3.84",
"3.1.3.n3 3.1.3.78",
+ "3.1.3.n4 3.1.3.87",
+ "3.1.3.n5 3.1.3.93",
+ "3.1.3.n6 3.1.3.96",
"3.1.4.5 3.1.21.1",
"3.1.4.6 3.1.22.1",
"3.1.4.7 3.1.31.1",
"3.1.4.8 3.1.27.3",
"3.1.4.9 3.1.30.2",
"3.1.4.10 4.6.1.13",
+ "3.1.4.15 2.7.7.89",
"3.1.4.18 3.1.16.1",
"3.1.4.19 3.1.13.3",
"3.1.4.20 3.1.13.1",
@@ -433,6 +575,7 @@ static const char* const kECNum_replaced[] = {
"3.1.4.36 3.1.4.43",
"3.1.4.47 4.6.1.14",
"3.1.4.n1 3.1.4.53",
+ "3.1.7.4 4.2.1.133 4.2.3.141",
"3.1.22.3 3.1.21.7",
"3.1.23.1 3.1.21.4",
"3.1.23.2 3.1.21.4",
@@ -499,6 +642,7 @@ static const char* const kECNum_replaced[] = {
"3.1.24.4 3.1.21.5",
"3.1.25.2 4.2.99.18",
"3.1.26.n1 3.1.26.12",
+ "3.1.27.9 4.6.1.16",
"3.2.1.12 3.2.1.54",
"3.2.1.13 3.2.1.54",
"3.2.1.29 3.2.1.52",
@@ -645,8 +789,12 @@ static const char* const kECNum_replaced[] = {
"3.5.1.80 3.5.1.25",
"3.5.1.n1 3.5.1.108",
"3.5.1.n2 3.5.1.99",
+ "3.5.1.n4 3.5.1.110",
"3.5.2.8 3.5.2.6",
+ "3.5.3.19 3.5.1.116",
+ "3.5.4.14 3.5.4.5",
"3.5.4.n1 3.5.4.31",
+ "3.5.4.n2 3.5.4.39",
"3.5.5.3 4.2.1.104",
"3.6.1.4 3.6.1.3",
"3.6.1.30 3.6.1.59 3.6.1.62",
@@ -663,15 +811,22 @@ static const char* const kECNum_replaced[] = {
"3.6.1.49 3.6.5.4",
"3.6.1.50 3.6.5.5",
"3.6.1.51 3.6.5.6",
+ "3.6.1.n4 3.6.1.67",
"3.6.1.n5 3.6.1.54",
"3.6.3.13 3.6.3.1",
"3.6.3.45 3.6.3.44",
+ "3.6.3.n1 3.6.3.54",
+ "3.7.1.15 4.2.1.138",
+ "3.7.1.16 3.3.2.12",
"3.7.1.n1 3.7.1.14",
- "3.8.1.4 1.97.1.10",
+ "3.7.1.n2 3.7.1.22",
+ "3.8.1.4 1.21.99.4",
"3.8.2.1 3.1.8.2",
"4.1.1.10 4.1.1.12",
"4.1.1.26 4.1.1.28",
"4.1.1.27 4.1.1.28",
+ "4.1.1.n1 4.1.1.97",
+ "4.1.1.n2 4.1.1.94",
"4.1.2.1 4.1.3.16",
"4.1.2.7 4.1.2.13",
"4.1.2.15 2.5.1.54",
@@ -680,6 +835,10 @@ static const char* const kECNum_replaced[] = {
"4.1.2.37 4.1.2.46 4.1.2.47",
"4.1.2.39 4.1.2.46 4.1.2.47",
"4.1.2.n1 4.1.2.44",
+ "4.1.2.n3 4.1.2.53",
+ "4.1.2.n4 4.1.2.52",
+ "4.1.2.n5 2.2.1.10",
+ "4.1.2.n6 4.1.2.56",
"4.1.3.2 2.3.3.9",
"4.1.3.5 2.3.3.10",
"4.1.3.7 2.3.3.1",
@@ -699,12 +858,15 @@ static const char* const kECNum_replaced[] = {
"4.1.3.31 2.3.3.5",
"4.1.3.33 2.3.3.6",
"4.1.3.37 2.2.1.7",
+ "4.1.3.n1 4.1.3.43",
"4.1.99.4 3.5.99.7",
"4.1.99.6 4.2.3.6",
"4.1.99.7 4.2.3.9",
- "4.1.99.8 4.2.3.14",
+ "4.1.99.8 4.2.3.119 4.2.3.120",
"4.1.99.9 4.2.3.15",
"4.1.99.10 4.2.3.16",
+ "4.1.99.21 4.2.3.153",
+ "4.2.1.4 4.2.1.3",
"4.2.1.13 4.3.1.17",
"4.2.1.14 4.3.1.18",
"4.2.1.15 4.4.1.1",
@@ -714,22 +876,33 @@ static const char* const kECNum_replaced[] = {
"4.2.1.29 4.99.1.6",
"4.2.1.37 3.3.2.4",
"4.2.1.38 4.3.1.20",
+ "4.2.1.52 4.3.3.7",
+ "4.2.1.58 4.2.1.59",
+ "4.2.1.60 4.2.1.59",
+ "4.2.1.61 4.2.1.59",
"4.2.1.63 3.3.2.9 3.3.2.10",
"4.2.1.64 3.3.2.9 3.3.2.10",
"4.2.1.71 4.2.1.27",
"4.2.1.72 4.1.1.78",
"4.2.1.86 4.2.1.98",
+ "4.2.1.89 2.8.3.21 4.2.1.149",
"4.2.1.102 4.2.1.100",
"4.2.1.n1 4.2.1.126",
+ "4.2.1.n2 4.2.1.134",
"4.2.2.4 4.2.2.20 4.2.2.21",
+ "4.2.3.14 4.2.3.119 4.2.3.120",
"4.2.3.n1 4.2.3.38",
"4.2.3.n3 4.2.3.56",
+ "4.2.3.n4 4.2.3.117",
"4.2.3.n5 4.2.3.52",
+ "4.2.3.n6 4.2.3.113",
+ "4.2.3.n7 4.2.3.119",
"4.2.3.n8 4.2.3.103",
"4.2.3.n9 4.2.3.44",
"4.2.3.n10 4.2.3.62",
"4.2.3.n12 4.2.3.65",
"4.2.3.n13 4.2.3.75",
+ "4.2.3.n14 4.2.3.118",
"4.2.99.1 4.2.2.1",
"4.2.99.2 4.2.3.1",
"4.2.99.3 4.2.2.2",
@@ -750,6 +923,8 @@ static const char* const kECNum_replaced[] = {
"4.3.1.5 4.3.1.23 4.3.1.24 4.3.1.25",
"4.3.1.8 2.5.1.61",
"4.3.1.21 4.3.1.9",
+ "4.3.1.26 1.21.98.2",
+ "4.3.3.n1 4.1.99.20",
"4.3.99.1 4.2.1.104",
"4.4.1.7 2.5.1.18",
"4.4.1.18 1.8.3.5",
@@ -761,14 +936,27 @@ static const char* const kECNum_replaced[] = {
"4.6.1.9 4.2.3.11",
"4.6.1.10 4.2.3.12",
"4.6.1.11 4.2.3.13",
+ "5.1.1.n1 5.1.1.20",
"5.1.3.n1 5.1.3.24",
+ "5.1.3.n2 5.1.3.29",
+ "5.1.3.n3 5.1.3.32",
"5.2.1.7 3.1.1.64",
"5.3.1.10 3.5.99.6",
"5.3.1.19 2.6.1.16",
+ "5.3.1.n1 5.3.1.30",
+ "5.3.1.n2 5.3.1.29",
+ "5.3.2.n1 5.3.2.5",
+ "5.3.3.15 5.3.2.7",
+ "5.3.3.16 5.3.2.8",
+ "5.3.99.n1 5.3.99.11",
+ "5.4.1.2 5.4.99.61",
+ "5.4.2.1 5.4.2.11 5.4.2.12",
"5.4.99.6 5.4.4.2",
"5.4.99.10 5.4.99.11",
"5.4.99.n1 5.4.99.39",
"5.4.99.n2 5.4.99.41",
+ "5.5.1.21 4.2.1.133",
+ "5.5.1.n1 5.4.99.62",
"5.5.1.n2 5.5.1.19",
"6.1.1.n1 6.3.1.13",
"6.1.1.n2 6.1.1.27",
@@ -776,8 +964,17 @@ static const char* const kECNum_replaced[] = {
"6.2.1.29 6.2.1.7",
"6.2.1.n1 6.2.1.37",
"6.3.1.3 6.3.4.13",
+ "6.3.1.16 6.3.3.6",
"6.3.2.15 6.3.2.10",
+ "6.3.2.19 2.3.2.23 2.3.2.27 6.2.1.45",
"6.3.2.22 6.3.1.14",
+ "6.3.2.27 6.3.2.38 6.3.2.39",
+ "6.3.2.28 6.3.2.49",
"6.3.2.n1 6.3.2.37",
+ "6.3.2.n2 6.3.1.19",
+ "6.3.2.n4 6.3.2.43",
+ "6.3.2.n5 6.3.2.44",
+ "6.3.2.n6 6.3.2.41",
+ "6.3.4.1 6.3.5.2",
"6.3.5.8 2.6.1.85"
};
diff --git a/api/ecnum_specific.inc b/api/ecnum_specific.inc
index 62cd0be1..e7eb661b 100644
--- a/api/ecnum_specific.inc
+++ b/api/ecnum_specific.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_specific.inc,v 1.10 2012/06/04 17:49:35 kazimird Exp $
+/* $Id: ecnum_specific.inc,v 1.22 2016/05/25 20:39:14 fukanchi Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -73,16 +73,16 @@ static const char* const kECNum_specific[] = {
"1.1.1.41 Isocitrate dehydrogenase (NAD(+))",
"1.1.1.42 Isocitrate dehydrogenase (NADP(+))",
"1.1.1.43 Phosphogluconate 2-dehydrogenase",
- "1.1.1.44 Phosphogluconate dehydrogenase (decarboxylating)",
+ "1.1.1.44 Phosphogluconate dehydrogenase (NADP(+)-dependent, decarboxylating)",
"1.1.1.45 L-gulonate 3-dehydrogenase",
"1.1.1.46 L-arabinose 1-dehydrogenase",
- "1.1.1.47 Glucose 1-dehydrogenase",
+ "1.1.1.47 Glucose 1-dehydrogenase (NAD(P)(+))",
"1.1.1.48 D-galactose 1-dehydrogenase",
- "1.1.1.49 Glucose-6-phosphate dehydrogenase",
- "1.1.1.50 3-alpha-hydroxysteroid dehydrogenase (B-specific)",
+ "1.1.1.49 Glucose-6-phosphate dehydrogenase (NADP(+))",
+ "1.1.1.50 3-alpha-hydroxysteroid 3-dehydrogenase (Si-specific)",
"1.1.1.51 3(or 17)-beta-hydroxysteroid dehydrogenase",
- "1.1.1.52 3-alpha-hydroxycholanate dehydrogenase",
- "1.1.1.53 3-alpha-(or 20-beta)-hydroxysteroid dehydrogenase",
+ "1.1.1.52 3-alpha-hydroxycholanate dehydrogenase (NAD(+))",
+ "1.1.1.53 3-alpha(or 20-beta)-hydroxysteroid dehydrogenase",
"1.1.1.54 Allyl-alcohol dehydrogenase",
"1.1.1.55 Lactaldehyde reductase (NADPH)",
"1.1.1.56 Ribitol 2-dehydrogenase",
@@ -91,8 +91,7 @@ static const char* const kECNum_specific[] = {
"1.1.1.59 3-hydroxypropionate dehydrogenase",
"1.1.1.60 2-hydroxy-3-oxopropionate reductase",
"1.1.1.61 4-hydroxybutyrate dehydrogenase",
- "1.1.1.62 Estradiol 17-beta-dehydrogenase",
- "1.1.1.63 Testosterone 17-beta-dehydrogenase",
+ "1.1.1.62 17-beta-estradiol 17-dehydrogenase",
"1.1.1.64 Testosterone 17-beta-dehydrogenase (NADP(+))",
"1.1.1.65 Pyridoxine 4-dehydrogenase",
"1.1.1.66 Omega-hydroxydecanoate dehydrogenase",
@@ -104,7 +103,7 @@ static const char* const kECNum_specific[] = {
"1.1.1.75 (R)-aminopropanol dehydrogenase",
"1.1.1.76 (S,S)-butanediol dehydrogenase",
"1.1.1.77 Lactaldehyde reductase",
- "1.1.1.78 Methylglyoxal reductase (NADH-dependent)",
+ "1.1.1.78 Methylglyoxal reductase (NADH)",
"1.1.1.79 Glyoxylate reductase (NADP(+))",
"1.1.1.80 Isopropanol dehydrogenase (NADP(+))",
"1.1.1.81 Hydroxypyruvate reductase",
@@ -112,7 +111,7 @@ static const char* const kECNum_specific[] = {
"1.1.1.83 D-malate dehydrogenase (decarboxylating)",
"1.1.1.84 Dimethylmalate dehydrogenase",
"1.1.1.85 3-isopropylmalate dehydrogenase",
- "1.1.1.86 Ketol-acid reductoisomerase",
+ "1.1.1.86 Ketol-acid reductoisomerase (NADP(+))",
"1.1.1.87 Homoisocitrate dehydrogenase",
"1.1.1.88 Hydroxymethylglutaryl-CoA reductase",
"1.1.1.90 Aryl-alcohol dehydrogenase",
@@ -140,12 +139,12 @@ static const char* const kECNum_specific[] = {
"1.1.1.113 L-xylose 1-dehydrogenase",
"1.1.1.114 Apiose 1-reductase",
"1.1.1.115 Ribose 1-dehydrogenase (NADP(+))",
- "1.1.1.116 D-arabinose 1-dehydrogenase",
+ "1.1.1.116 D-arabinose 1-dehydrogenase (NAD(+))",
"1.1.1.117 D-arabinose 1-dehydrogenase (NAD(P)(+))",
"1.1.1.118 Glucose 1-dehydrogenase (NAD(+))",
"1.1.1.119 Glucose 1-dehydrogenase (NADP(+))",
"1.1.1.120 Galactose 1-dehydrogenase (NADP(+))",
- "1.1.1.121 Aldose 1-dehydrogenase",
+ "1.1.1.121 Aldose 1-dehydrogenase (NAD(+))",
"1.1.1.122 D-threo-aldose 1-dehydrogenase",
"1.1.1.123 Sorbose 5-dehydrogenase (NADP(+))",
"1.1.1.124 Fructose 5-dehydrogenase (NADP(+))",
@@ -175,23 +174,21 @@ static const char* const kECNum_specific[] = {
"1.1.1.150 21-hydroxysteroid dehydrogenase (NAD(+))",
"1.1.1.151 21-hydroxysteroid dehydrogenase (NADP(+))",
"1.1.1.152 3-alpha-hydroxy-5-beta-androstane-17-one 3-alpha-dehydrogenase",
- "1.1.1.153 Sepiapterin reductase",
+ "1.1.1.153 Sepiapterin reductase (L-erythro-7,8-dihydrobiopterin forming)",
"1.1.1.154 Ureidoglycolate dehydrogenase",
"1.1.1.156 Glycerol 2-dehydrogenase (NADP(+))",
"1.1.1.157 3-hydroxybutyryl-CoA dehydrogenase",
- "1.1.1.158 UDP-N-acetylmuramate dehydrogenase",
"1.1.1.159 7-alpha-hydroxysteroid dehydrogenase",
"1.1.1.160 Dihydrobunolol dehydrogenase",
- "1.1.1.161 Cholestanetetraol 26-dehydrogenase",
"1.1.1.162 Erythrulose reductase",
"1.1.1.163 Cyclopentanol dehydrogenase",
"1.1.1.164 Hexadecanol dehydrogenase",
"1.1.1.165 2-alkyn-1-ol dehydrogenase",
"1.1.1.166 Hydroxycyclohexanecarboxylate dehydrogenase",
"1.1.1.167 Hydroxymalonate dehydrogenase",
- "1.1.1.168 2-dehydropantolactone reductase (A-specific)",
+ "1.1.1.168 2-dehydropantolactone reductase (Re-specific)",
"1.1.1.169 2-dehydropantoate 2-reductase",
- "1.1.1.170 Sterol-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)",
+ "1.1.1.170 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)",
"1.1.1.172 2-oxoadipate reductase",
"1.1.1.173 L-rhamnose 1-dehydrogenase",
"1.1.1.174 Cyclohexane-1,2-diol dehydrogenase",
@@ -201,7 +198,7 @@ static const char* const kECNum_specific[] = {
"1.1.1.178 3-hydroxy-2-methylbutyryl-CoA dehydrogenase",
"1.1.1.179 D-xylose 1-dehydrogenase (NADP(+))",
"1.1.1.181 Cholest-5-ene-3-beta,7-alpha-diol 3-beta-dehydrogenase",
- "1.1.1.183 Geraniol dehydrogenase",
+ "1.1.1.183 Geraniol dehydrogenase (NADP(+))",
"1.1.1.184 Carbonyl reductase (NADPH)",
"1.1.1.185 L-glycol dehydrogenase",
"1.1.1.186 dTDP-galactose 6-dehydrogenase",
@@ -230,8 +227,8 @@ static const char* const kECNum_specific[] = {
"1.1.1.210 3-beta-(or 20-alpha)-hydroxysteroid dehydrogenase",
"1.1.1.211 Long-chain-3-hydroxyacyl-CoA dehydrogenase",
"1.1.1.212 3-oxoacyl-[acyl-carrier-protein] reductase (NADH)",
- "1.1.1.213 3-alpha-hydroxysteroid dehydrogenase (A-specific)",
- "1.1.1.214 2-dehydropantolactone reductase (B-specific)",
+ "1.1.1.213 3-alpha-hydroxysteroid dehydrogenase (Re-specific)",
+ "1.1.1.214 2-dehydropantolactone reductase (Si-specific)",
"1.1.1.215 Gluconate 2-dehydrogenase",
"1.1.1.216 Farnesol dehydrogenase",
"1.1.1.217 Benzyl-2-methyl-hydroxybutyrate dehydrogenase",
@@ -262,7 +259,6 @@ static const char* const kECNum_specific[] = {
"1.1.1.243 Carveol dehydrogenase",
"1.1.1.244 Methanol dehydrogenase",
"1.1.1.245 Cyclohexanol dehydrogenase",
- "1.1.1.246 Pterocarpin synthase",
"1.1.1.247 Codeinone reductase (NADPH)",
"1.1.1.248 Salutaridine reductase (NADPH)",
"1.1.1.250 D-arabinitol 2-dehydrogenase",
@@ -278,26 +274,26 @@ static const char* const kECNum_specific[] = {
"1.1.1.261 sn-glycerol-1-phosphate dehydrogenase",
"1.1.1.262 4-hydroxythreonine-4-phosphate dehydrogenase",
"1.1.1.263 1,5-anhydro-D-fructose reductase",
- "1.1.1.264 L-idonate 5-dehydrogenase",
+ "1.1.1.264 L-idonate 5-dehydrogenase (NAD(P)(+))",
"1.1.1.265 3-methylbutanal reductase",
"1.1.1.266 dTDP-4-dehydro-6-deoxyglucose reductase",
"1.1.1.267 1-deoxy-D-xylulose-5-phosphate reductoisomerase",
"1.1.1.268 2-(R)-hydroxypropyl-CoM dehydrogenase",
"1.1.1.269 2-(S)-hydroxypropyl-CoM dehydrogenase",
- "1.1.1.270 3-keto-steroid reductase",
+ "1.1.1.270 3-beta-hydroxysteroid 3-dehydrogenase",
"1.1.1.271 GDP-L-fucose synthase",
- "1.1.1.272 (R)-2-hydroxyacid dehydrogenase",
+ "1.1.1.272 D-2-hydroxyacid dehydrogenase (NADP(+))",
"1.1.1.273 Vellosimine dehydrogenase",
- "1.1.1.274 2,5-didehydrogluconate reductase",
+ "1.1.1.274 2,5-didehydrogluconate reductase (2-dehydro-D-gluconate-forming)",
"1.1.1.275 (+)-trans-carveol dehydrogenase",
- "1.1.1.276 Serine 3-dehydrogenase",
+ "1.1.1.276 Serine 3-dehydrogenase (NADP(+))",
"1.1.1.277 3-beta-hydroxy-5-beta-steroid dehydrogenase",
"1.1.1.278 3-beta-hydroxy-5-alpha-steroid dehydrogenase",
"1.1.1.279 (R)-3-hydroxyacid-ester dehydrogenase",
"1.1.1.280 (S)-3-hydroxyacid-ester dehydrogenase",
"1.1.1.281 GDP-4-dehydro-6-deoxy-D-mannose reductase",
"1.1.1.282 Quinate/shikimate dehydrogenase",
- "1.1.1.283 Methylglyoxal reductase (NADPH-dependent)",
+ "1.1.1.283 Methylglyoxal reductase (NADPH)",
"1.1.1.284 S-(hydroxymethyl)glutathione dehydrogenase",
"1.1.1.285 3''-deamino-3''-oxonicotianamine reductase",
"1.1.1.286 Isocitrate--homoisocitrate dehydrogenase",
@@ -331,14 +327,87 @@ static const char* const kECNum_specific[] = {
"1.1.1.315 11-cis-retinol dehydrogenase",
"1.1.1.316 L-galactose 1-dehydrogenase",
"1.1.1.317 Perakine reductase",
- "1.1.1.n3 UDP-N-acetyl-D-mannosamine dehydrogenase",
+ "1.1.1.318 Eugenol synthase",
+ "1.1.1.319 Isoeugenol synthase",
+ "1.1.1.320 Benzil reductase ((S)-benzoin forming)",
+ "1.1.1.321 Benzil reductase ((R)-benzoin forming)",
+ "1.1.1.322 (-)-endo-fenchol dehydrogenase",
+ "1.1.1.323 (+)-thujan-3-ol dehydrogenase",
+ "1.1.1.324 8-hydroxygeraniol dehydrogenase",
+ "1.1.1.325 Sepiapterin reductase (L-threo-7,8-dihydrobiopterin forming)",
+ "1.1.1.326 Zerumbone synthase",
+ "1.1.1.327 5-exo-hydroxycamphor dehydrogenase",
+ "1.1.1.328 Nicotine blue oxidoreductase",
+ "1.1.1.329 2-deoxy-scyllo-inosamine dehydrogenase",
+ "1.1.1.330 Very-long-chain 3-oxoacyl-CoA reductase",
+ "1.1.1.331 Secoisolariciresinol dehydrogenase",
+ "1.1.1.332 Chanoclavine-I dehydrogenase",
+ "1.1.1.333 Decaprenylphospho-beta-D-erythro-pentofuranosid-2-ulose 2-reductase",
+ "1.1.1.334 Methylecgonone reductase",
+ "1.1.1.335 UDP-N-acetyl-2-amino-2-deoxyglucuronate dehydrogenase",
+ "1.1.1.336 UDP-N-acetyl-D-mannosamine dehydrogenase",
+ "1.1.1.337 L-2-hydroxycarboxylate dehydrogenase (NAD(+))",
+ "1.1.1.338 (2R)-3-sulfolactate dehydrogenase (NADP(+))",
+ "1.1.1.339 dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD(+))",
+ "1.1.1.340 1-deoxy-11-beta-hydroxypentalenate dehydrogenase",
+ "1.1.1.341 CDP-abequose synthase",
+ "1.1.1.342 CDP-paratose synthase",
+ "1.1.1.343 Phosphogluconate dehydrogenase (NAD(+)-dependent, decarboxylating)",
+ "1.1.1.344 dTDP-6-deoxy-L-talose 4-dehydrogenase (NAD(P)(+))",
+ "1.1.1.345 D-2-hydroxyacid dehydrogenase (NAD(+))",
+ "1.1.1.346 2,5-didehydrogluconate reductase (2-dehydro-L-gulonate-forming)",
+ "1.1.1.347 Geraniol dehydrogenase (NAD(+))",
+ "1.1.1.348 Vestitone reductase",
+ "1.1.1.349 Norsolorinic acid ketoreductase",
+ "1.1.1.350 Ureidoglycolate dehydrogenase (NAD(+))",
+ "1.1.1.351 Phosphogluconate dehydrogenase (NAD(P)(+)-dependent, decarboxylating)",
+ "1.1.1.352 5'-hydroxyaverantin dehydrogenase",
+ "1.1.1.353 Versiconal hemiacetal acetate reductase",
+ "1.1.1.354 Farnesol dehydrogenase (NAD(+))",
+ "1.1.1.355 2'-dehydrokanamycin reductase",
+ "1.1.1.356 GDP-L-colitose synthase",
+ "1.1.1.357 3-alpha-hydroxysteroid 3-dehydrogenase",
+ "1.1.1.358 2-dehydropantolactone reductase",
+ "1.1.1.359 Aldose 1-dehydrogenase (NAD(P)(+))",
+ "1.1.1.360 Glucose/galactose 1-dehydrogenase",
+ "1.1.1.361 Glucose-6-phosphate 3-dehydrogenase",
+ "1.1.1.362 Aklaviketone reductase",
+ "1.1.1.363 Glucose-6-phosphate dehydrogenase (NAD(P)(+))",
+ "1.1.1.364 dTDP-4-dehydro-6-deoxy-alpha-D-gulose 4-ketoreductase",
+ "1.1.1.365 D-galacturonate reductase",
+ "1.1.1.366 L-idonate 5-dehydrogenase (NAD(+))",
+ "1.1.1.367 UDP-2-acetamido-2,6-beta-L-arabino-hexul-4-ose reductase",
+ "1.1.1.368 6-hydroxycyclohex-1-ene-1-carbonyl-CoA dehydrogenase",
+ "1.1.1.369 D-chiro-inositol 1-dehydrogenase",
+ "1.1.1.370 Scyllo-inositol 2-dehydrogenase (NAD(+))",
+ "1.1.1.371 Scyllo-inositol 2-dehydrogenase (NADP(+))",
+ "1.1.1.372 D/L-glyceraldehyde reductase",
+ "1.1.1.373 Sulfolactaldehyde 3-reductase",
+ "1.1.1.374 UDP-N-acetylglucosamine 3-dehydrogenase",
+ "1.1.1.375 L-2-hydroxycarboxylate dehydrogenase (NAD(P)(+))",
+ "1.1.1.376 L-arabinose 1-dehydrogenase (NAD(P)(+))",
+ "1.1.1.377 L-rhamnose 1-dehydrogenase (NADP(+))",
+ "1.1.1.378 L-rhamnose 1-dehydrogenase (NAD(P)(+))",
+ "1.1.1.379 (R)-mandelate dehydrogenase",
+ "1.1.1.380 L-gulonate 5-dehydrogenase",
+ "1.1.1.381 3-hydroxy acid dehydrogenase",
+ "1.1.1.382 Ketol-acid reductoisomerase (NAD(+))",
+ "1.1.1.383 Ketol-acid reductoisomerase (NAD(P)(+))",
+ "1.1.1.384 dTDP-3,4-didehydro-2,6-dideoxy-alpha-D-glucose 3-reductase",
+ "1.1.1.385 Dihydroanticapsin 7-dehydrogenase",
+ "1.1.1.386 Ipsdienol dehydrogenase",
+ "1.1.1.387 L-serine 3-dehydrogenase (NAD(+))",
+ "1.1.1.388 Glucose-6-phosphate dehydrogenase (NAD(+))",
+ "1.1.1.389 2-dehydro-3-deoxy-L-galactonate 5-dehydrogenase",
+ "1.1.1.390 Sulfoquinovose 1-dehydrogenase",
+ "1.1.1.391 3-beta-hydroxycholanate 3-dehydrogenase (NAD(+))",
+ "1.1.1.392 3-alpha-hydroxycholanate dehydrogenase (NADP(+))",
+ "1.1.1.393 3-beta-hydroxycholanate 3-dehydrogenase (NADP(+))",
+ "1.1.1.394 Aurachin B dehydrogenase",
"1.1.1.n4 (-)-trans-carveol dehydrogenase",
"1.1.1.n5 3-methylmalate dehydrogenase",
- "1.1.1.n6 D-chiro-inositol 3-dehydrogenase",
- "1.1.1.n7 Benzil reductase ((S)-benzoin forming)",
- "1.1.1.n8 L-idonate dehydrogenase",
- "1.1.1.n9 D-galacturonate reductase",
"1.1.1.n11 Succinic semialdehyde reductase",
+ "1.1.1.n12 (3R)-hydroxyacyl-CoA dehydrogenase",
"1.1.2.2 Mannitol dehydrogenase (cytochrome)",
"1.1.2.3 L-lactate dehydrogenase (cytochrome)",
"1.1.2.4 D-lactate dehydrogenase (cytochrome)",
@@ -346,7 +415,6 @@ static const char* const kECNum_specific[] = {
"1.1.2.6 Polyvinyl alcohol dehydrogenase (cytochrome)",
"1.1.2.7 Methanol dehydrogenase (cytochrome c)",
"1.1.2.8 Alcohol dehydrogenase (cytochrome c)",
- "1.1.3.3 Malate oxidase",
"1.1.3.4 Glucose oxidase",
"1.1.3.5 Hexose oxidase",
"1.1.3.6 Cholesterol oxidase",
@@ -362,7 +430,7 @@ static const char* const kECNum_specific[] = {
"1.1.3.16 Ecdysone oxidase",
"1.1.3.17 Choline oxidase",
"1.1.3.18 Secondary-alcohol oxidase",
- "1.1.3.19 4-hydroxymandelate oxidase",
+ "1.1.3.19 4-hydroxymandelate oxidase (decarboxylating)",
"1.1.3.20 Long-chain-alcohol oxidase",
"1.1.3.21 Glycerol-3-phosphate oxidase",
"1.1.3.23 Thiamine oxidase",
@@ -376,26 +444,34 @@ static const char* const kECNum_specific[] = {
"1.1.3.40 D-mannitol oxidase",
"1.1.3.41 Alditol oxidase",
"1.1.3.42 Prosolanapyrone-II oxidase",
- "1.1.4.1 Vitamin-K-epoxide reductase (warfarin-sensitive)",
- "1.1.4.2 Vitamin-K-epoxide reductase (warfarin-insensitive)",
- "1.1.5.2 Quinoprotein glucose dehydrogenase",
+ "1.1.3.43 Paromamine 6'-oxidase",
+ "1.1.3.44 6'''-hydroxyneomycin C oxidase",
+ "1.1.3.45 Aclacinomycin-N oxidase",
+ "1.1.3.46 4-hydroxymandelate oxidase",
+ "1.1.3.47 5-(hydroxymethyl)furfural oxidase",
+ "1.1.3.48 3-deoxy-alpha-D-manno-octulosonate 8-oxidase",
+ "1.1.5.2 Quinoprotein glucose dehydrogenase (PQQ, quinone)",
"1.1.5.3 Glycerol-3-phosphate dehydrogenase",
"1.1.5.4 Malate dehydrogenase (quinone)",
"1.1.5.5 Alcohol dehydrogenase (quinone)",
"1.1.5.6 Formate dehydrogenase-N",
"1.1.5.7 Cyclic alcohol dehydrogenase (quinone)",
"1.1.5.8 Quinate dehydrogenase (quinone)",
+ "1.1.5.9 Glucose 1-dehydrogenase (FAD, quinone)",
+ "1.1.5.10 D-2-hydroxyacid dehydrogenase (quinone)",
"1.1.5.n1 Quinoprotein inositol dehydrogenase",
"1.1.9.1 Alcohol dehydrogenase (azurin)",
"1.1.98.2 Glucose-6-phosphate dehydrogenase (coenzyme-F420)",
+ "1.1.98.3 Decaprenylphospho-beta-D-ribofuranose 2-dehydrogenase",
+ "1.1.98.4 F420H(2):quinone oxidoreductase",
+ "1.1.98.5 Secondary-alcohol dehydrogenase (coenzyme-F420)",
"1.1.99.1 Choline dehydrogenase",
- "1.1.99.2 2-hydroxyglutarate dehydrogenase",
+ "1.1.99.2 L-2-hydroxyglutarate dehydrogenase",
"1.1.99.3 Gluconate 2-dehydrogenase (acceptor)",
"1.1.99.4 Dehydrogluconate dehydrogenase",
- "1.1.99.6 D-2-hydroxy-acid dehydrogenase",
+ "1.1.99.6 D-lactate dehydrogenase (acceptor)",
"1.1.99.7 Lactate--malate transhydrogenase",
"1.1.99.9 Pyridoxine 5-dehydrogenase",
- "1.1.99.10 Glucose dehydrogenase (acceptor)",
"1.1.99.11 Fructose 5-dehydrogenase",
"1.1.99.12 Sorbose dehydrogenase",
"1.1.99.13 Glucoside 3-dehydrogenase",
@@ -414,8 +490,10 @@ static const char* const kECNum_specific[] = {
"1.1.99.32 L-sorbose 1-dehydrogenase",
"1.1.99.33 Formate dehydrogenase (acceptor)",
"1.1.99.35 Soluble quinoprotein glucose dehydrogenase",
- "1.1.99.36 NDMA-dependent alcohol dehydrogenase",
- "1.1.99.37 NDMA-dependent methanol dehydrogenase",
+ "1.1.99.36 Alcohol dehydrogenase (nicotinoprotein)",
+ "1.1.99.37 Methanol dehydrogenase (nicotinoprotein)",
+ "1.1.99.38 2-deoxy-scyllo-inosamine dehydrogenase (AdoMet-dependent)",
+ "1.1.99.39 D-2-hydroxyglutarate dehydrogenase",
"1.2.1.2 Formate dehydrogenase",
"1.2.1.3 Aldehyde dehydrogenase (NAD(+))",
"1.2.1.4 Aldehyde dehydrogenase (NADP(+))",
@@ -439,7 +517,7 @@ static const char* const kECNum_specific[] = {
"1.2.1.24 Succinate-semialdehyde dehydrogenase (NAD(+))",
"1.2.1.25 2-oxoisovalerate dehydrogenase (acylating)",
"1.2.1.26 2,5-dioxovalerate dehydrogenase",
- "1.2.1.27 Methylmalonate-semialdehyde dehydrogenase (acylating)",
+ "1.2.1.27 Methylmalonate-semialdehyde dehydrogenase (CoA acylating)",
"1.2.1.28 Benzaldehyde dehydrogenase (NAD(+))",
"1.2.1.29 Aryl-aldehyde dehydrogenase",
"1.2.1.30 Aryl-aldehyde dehydrogenase (NADP(+))",
@@ -449,7 +527,6 @@ static const char* const kECNum_specific[] = {
"1.2.1.36 Retinal dehydrogenase",
"1.2.1.38 N-acetyl-gamma-glutamyl-phosphate reductase",
"1.2.1.39 Phenylacetaldehyde dehydrogenase",
- "1.2.1.40 3-alpha,7-alpha,12-alpha-trihydroxycholestan-26-al 26-oxidoreductase",
"1.2.1.41 Glutamate-5-semialdehyde dehydrogenase",
"1.2.1.42 Hexadecanal dehydrogenase (acylating)",
"1.2.1.43 Formate dehydrogenase (NADP(+))",
@@ -470,7 +547,7 @@ static const char* const kECNum_specific[] = {
"1.2.1.61 4-hydroxymuconic-semialdehyde dehydrogenase",
"1.2.1.62 4-formylbenzenesulfonate dehydrogenase",
"1.2.1.63 6-oxohexanoate dehydrogenase",
- "1.2.1.64 4-hydroxybenzaldehyde dehydrogenase",
+ "1.2.1.64 4-hydroxybenzaldehyde dehydrogenase (NAD(+))",
"1.2.1.65 Salicylaldehyde dehydrogenase",
"1.2.1.67 Vanillin dehydrogenase",
"1.2.1.68 Coniferyl-aldehyde dehydrogenase",
@@ -488,6 +565,21 @@ static const char* const kECNum_specific[] = {
"1.2.1.80 Long-chain acyl-[acyl-carrier-protein] reductase",
"1.2.1.81 Sulfoacetaldehyde dehydrogenase (acylating)",
"1.2.1.82 Beta-apo-4'-carotenal oxygenase",
+ "1.2.1.83 3-succinoylsemialdehyde-pyridine dehydrogenase",
+ "1.2.1.84 Alcohol-forming fatty acyl-CoA reductase",
+ "1.2.1.85 2-hydroxymuconate-6-semialdehyde dehydrogenase",
+ "1.2.1.86 Geranial dehydrogenase",
+ "1.2.1.87 Propanal dehydrogenase (CoA-propanoylating)",
+ "1.2.1.88 L-glutamate gamma-semialdehyde dehydrogenase",
+ "1.2.1.89 D-glyceraldehyde dehydrogenase (NADP(+))",
+ "1.2.1.90 Glyceraldehyde-3-phosphate dehydrogenase (NAD(P)(+))",
+ "1.2.1.91 3-oxo-5,6-dehydrosuberyl-CoA semialdehyde dehydrogenase",
+ "1.2.1.92 3,6-anhydro-alpha-L-galactose dehydrogenase",
+ "1.2.1.93 Formate dehydrogenase (NAD(+), ferredoxin)",
+ "1.2.1.94 Farnesal dehydrogenase",
+ "1.2.1.95 L-2-aminoadipate reductase",
+ "1.2.1.96 4-hydroxybenzaldehyde dehydrogenase (NADP(+))",
+ "1.2.1.97 3-sulfolactaldehyde dehydrogenase",
"1.2.1.n2 Fatty acyl-CoA reductase",
"1.2.2.1 Formate dehydrogenase (cytochrome)",
"1.2.2.3 Formate dehydrogenase (cytochrome c-553)",
@@ -500,15 +592,15 @@ static const char* const kECNum_specific[] = {
"1.2.3.7 Indole-3-acetaldehyde oxidase",
"1.2.3.8 Pyridoxal oxidase",
"1.2.3.9 Aryl-aldehyde oxidase",
- "1.2.3.11 Retinal oxidase",
"1.2.3.13 4-hydroxyphenylpyruvate oxidase",
"1.2.3.14 Abscisic-aldehyde oxidase",
"1.2.4.1 Pyruvate dehydrogenase (acetyl-transferring)",
"1.2.4.2 Oxoglutarate dehydrogenase (succinyl-transferring)",
"1.2.4.4 3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring)",
"1.2.5.1 Pyruvate dehydrogenase (quinone)",
+ "1.2.5.2 Aldehyde dehydrogenase (quinone)",
+ "1.2.5.3 Aerobic carbon monoxide dehydrogenase",
"1.2.7.1 Pyruvate synthase",
- "1.2.7.2 2-oxobutyrate synthase",
"1.2.7.3 2-oxoglutarate synthase",
"1.2.7.4 Carbon-monoxide dehydrogenase (ferredoxin)",
"1.2.7.5 Aldehyde ferredoxin oxidoreductase",
@@ -516,22 +608,23 @@ static const char* const kECNum_specific[] = {
"1.2.7.7 3-methyl-2-oxobutanoate dehydrogenase (ferredoxin)",
"1.2.7.8 Indolepyruvate ferredoxin oxidoreductase",
"1.2.7.10 Oxalate oxidoreductase",
+ "1.2.7.11 2-oxoacid oxidoreductase (ferredoxin)",
+ "1.2.98.1 Formaldehyde dismutase",
"1.2.99.2 Carbon-monoxide dehydrogenase (acceptor)",
- "1.2.99.3 Aldehyde dehydrogenase (pyrroloquinoline-quinone)",
- "1.2.99.4 Formaldehyde dismutase",
"1.2.99.5 Formylmethanofuran dehydrogenase",
"1.2.99.6 Carboxylate reductase",
"1.2.99.7 Aldehyde dehydrogenase (FAD-independent)",
+ "1.2.99.8 Glyceraldehyde dehydrogenase (FAD-containing)",
+ "1.2.99.9 Formate dehydrogenase (coenzyme F420)",
"1.3.1.1 Dihydrouracil dehydrogenase (NAD(+))",
"1.3.1.2 Dihydropyrimidine dehydrogenase (NADP(+))",
"1.3.1.3 Delta(4)-3-oxosteroid 5-beta-reductase",
- "1.3.1.4 Cortisone alpha-reductase",
"1.3.1.5 Cucurbitacin Delta(23)-reductase",
"1.3.1.6 Fumarate reductase (NADH)",
"1.3.1.7 Meso-tartrate dehydrogenase",
"1.3.1.8 Acyl-CoA dehydrogenase (NADP(+))",
"1.3.1.9 Enoyl-[acyl-carrier-protein] reductase (NADH)",
- "1.3.1.10 Enoyl-[acyl-carrier-protein] reductase (NADPH, B-specific)",
+ "1.3.1.10 Enoyl-[acyl-carrier-protein] reductase (NADPH, Si-specific)",
"1.3.1.11 2-coumarate reductase",
"1.3.1.12 Prephenate dehydrogenase",
"1.3.1.13 Prephenate dehydrogenase (NADP(+))",
@@ -543,23 +636,20 @@ static const char* const kECNum_specific[] = {
"1.3.1.19 Cis-1,2-dihydrobenzene-1,2-diol dehydrogenase",
"1.3.1.20 Trans-1,2-dihydrobenzene-1,2-diol dehydrogenase",
"1.3.1.21 7-dehydrocholesterol reductase",
- "1.3.1.22 Cholestenone 5-alpha-reductase",
+ "1.3.1.22 3-oxo-5-alpha-steroid 4-dehydrogenase (NADP(+))",
"1.3.1.24 Biliverdin reductase",
"1.3.1.25 1,6-dihydroxycyclohexa-2,4-diene-1-carboxylate dehydrogenase",
- "1.3.1.26 Dihydrodipicolinate reductase",
"1.3.1.27 2-hexadecenal reductase",
"1.3.1.28 2,3-dihydro-2,3-dihydroxybenzoate dehydrogenase",
"1.3.1.29 Cis-1,2-dihydro-1,2-dihydroxynaphthalene dehydrogenase",
- "1.3.1.30 Progesterone 5-alpha-reductase",
"1.3.1.31 2-enoate reductase",
"1.3.1.32 Maleylacetate reductase",
"1.3.1.33 Protochlorophyllide reductase",
"1.3.1.34 2,4-dienoyl-CoA reductase (NADPH)",
- "1.3.1.35 Phosphatidylcholine desaturase",
"1.3.1.36 Geissoschizine dehydrogenase",
"1.3.1.37 Cis-2-enoyl-CoA reductase (NADPH)",
"1.3.1.38 Trans-2-enoyl-CoA reductase (NADPH)",
- "1.3.1.39 Enoyl-[acyl-carrier-protein] reductase (NADPH, A-specific)",
+ "1.3.1.39 Enoyl-[acyl-carrier-protein] reductase (NADPH, Re-specific)",
"1.3.1.40 2-hydroxy-6-oxo-6-phenylhexa-2,4-dienoate reductase",
"1.3.1.41 Xanthommatin reductase",
"1.3.1.42 12-oxophytodienoate reductase",
@@ -568,10 +658,9 @@ static const char* const kECNum_specific[] = {
"1.3.1.45 2'-hydroxyisoflavone reductase",
"1.3.1.46 Biochanin-A reductase",
"1.3.1.47 Alpha-santonin 1,2-reductase",
- "1.3.1.48 15-oxoprostaglandin 13-oxidase",
+ "1.3.1.48 15-oxoprostaglandin 13-reductase",
"1.3.1.49 Cis-3,4-dihydrophenanthrene-3,4-diol dehydrogenase",
"1.3.1.51 2'-hydroxydaidzein reductase",
- "1.3.1.52 2-methyl-branched-chain-enoyl-CoA reductase",
"1.3.1.53 (3S,4R)-3,4-dihydroxycyclohexa-1,5-diene-1,4-dicarboxylate dehydrogenase",
"1.3.1.54 Precorrin-6A reductase",
"1.3.1.56 Cis-2,3-dihydrobiphenyl-2,3-diol dehydrogenase",
@@ -579,7 +668,6 @@ static const char* const kECNum_specific[] = {
"1.3.1.58 2,3-dihydroxy-2,3-dihydro-p-cumate dehydrogenase",
"1.3.1.60 Dibenzothiophene dihydrodiol dehydrogenase",
"1.3.1.62 Pimeloyl-CoA dehydrogenase",
- "1.3.1.63 2,4-dichlorobenzoyl-CoA reductase",
"1.3.1.64 Phthalate 4,5-cis-dihydrodiol dehydrogenase",
"1.3.1.65 5,6-dihydroxy-3-methyl-2-oxo-1,2,5,6-tetrahydroquinoline dehydrogenase",
"1.3.1.66 Cis-dihydroethylcatechol dehydrogenase",
@@ -590,13 +678,12 @@ static const char* const kECNum_specific[] = {
"1.3.1.71 Delta(24(24(1)))-sterol reductase",
"1.3.1.72 Delta(24)-sterol reductase",
"1.3.1.73 1,2-dihydrovomilenine reductase",
- "1.3.1.74 2-alkenal reductase",
+ "1.3.1.74 2-alkenal reductase (NAD(P)(+))",
"1.3.1.75 Divinyl chlorophyllide a 8-vinyl-reductase",
"1.3.1.76 Precorrin-2 dehydrogenase",
"1.3.1.77 Anthocyanidin reductase",
"1.3.1.78 Arogenate dehydrogenase (NADP(+))",
"1.3.1.79 Arogenate dehydrogenase (NAD(P)(+))",
- "1.3.1.80 Red chlorophyll catabolite reductase",
"1.3.1.81 (+)-pulegone reductase",
"1.3.1.82 (-)-isopiperitenone reductase",
"1.3.1.83 Geranylgeranyl diphosphate reductase",
@@ -608,6 +695,27 @@ static const char* const kECNum_specific[] = {
"1.3.1.89 tRNA-dihydrouridine(47) synthase (NAD(P)(+))",
"1.3.1.90 tRNA-dihydrouridine(20a/20b) synthase (NAD(P)(+))",
"1.3.1.91 tRNA-dihydrouridine(20) synthase (NAD(P)(+))",
+ "1.3.1.92 Artemisinic aldehyde Delta(11(13)) reductase",
+ "1.3.1.93 Very-long-chain enoyl-CoA reductase",
+ "1.3.1.94 Polyprenol reductase",
+ "1.3.1.95 Acryloyl-CoA reductase (NADH)",
+ "1.3.1.96 Botryococcus squalene synthase",
+ "1.3.1.97 Botryococcene synthase",
+ "1.3.1.98 UDP-N-acetylmuramate dehydrogenase",
+ "1.3.1.99 Iridoid synthase",
+ "1.3.1.100 Chanoclavine-I aldehyde reductase",
+ "1.3.1.101 2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)",
+ "1.3.1.102 2-alkenal reductase (NADP(+))",
+ "1.3.1.103 2-haloacrylate reductase",
+ "1.3.1.104 Enoyl-[acyl-carrier-protein] reductase",
+ "1.3.1.105 2-methylene-furan-3-one reductase",
+ "1.3.1.106 Cobalt-precorrin-6A reductase",
+ "1.3.1.107 Sanguinarine reductase",
+ "1.3.1.108 Caffeoyl-CoA reductase",
+ "1.3.1.109 Butanoyl-CoA dehydrogenase (NAD(+),ferredoxin)",
+ "1.3.1.110 Lactate dehydrogenase (NAD(+),ferredoxin)",
+ "1.3.1.n2 Camalexin synthase",
+ "1.3.1.n3 Curcumin reductase",
"1.3.2.3 L-galactonolactone dehydrogenase",
"1.3.3.3 Coproporphyrinogen oxidase",
"1.3.3.4 Protoporphyrinogen oxidase",
@@ -619,10 +727,13 @@ static const char* const kECNum_specific[] = {
"1.3.3.10 Tryptophan alpha,beta-oxidase",
"1.3.3.11 Pyrroloquinoline-quinone synthase",
"1.3.3.12 L-galactonolactone oxidase",
- "1.3.5.1 Succinate dehydrogenase (ubiquinone)",
+ "1.3.3.13 Albonoursin synthase",
+ "1.3.3.14 Aclacinomycin-A oxidase",
+ "1.3.4.1 Fumarate reductase (CoM/CoB)",
+ "1.3.5.1 Succinate dehydrogenase (quinone)",
"1.3.5.2 Dihydroorotate dehydrogenase (quinone)",
"1.3.5.3 Protoporphyrinogen IX dehydrogenase (menaquinone)",
- "1.3.5.4 Fumarate reductase (menaquinone)",
+ "1.3.5.4 Fumarate reductase (quinol)",
"1.3.5.5 15-cis-phytoene desaturase",
"1.3.5.6 9,9'-di-cis-zeta-carotene desaturase",
"1.3.7.1 6-hydroxynicotinate reductase",
@@ -634,25 +745,31 @@ static const char* const kECNum_specific[] = {
"1.3.7.7 Ferredoxin:protochlorophyllide reductase (ATP-dependent)",
"1.3.7.8 Benzoyl-CoA reductase",
"1.3.7.9 4-hydroxybenzoyl-CoA reductase",
- "1.3.8.1 Butyryl-CoA dehydrogenase",
- "1.3.8.2 4,4'-diapophytoene desaturase",
+ "1.3.7.11 2,3-bis-O-geranylgeranyl-sn-glycero-phospholipid reductase",
+ "1.3.7.12 Red chlorophyll catabolite reductase",
+ "1.3.8.1 Short-chain acyl-CoA dehydrogenase",
+ "1.3.8.2 4,4'-diapophytoene desaturase (4,4'-diapolycopene-forming)",
+ "1.3.8.3 (R)-benzylsuccinyl-CoA dehydrogenase",
+ "1.3.8.4 Isovaleryl-CoA dehydrogenase",
+ "1.3.8.5 2-methyl-branched-chain-enoyl-CoA reductase",
+ "1.3.8.6 Glutaryl-CoA dehydrogenase (ETF)",
+ "1.3.8.7 Medium-chain acyl-CoA dehydrogenase",
+ "1.3.8.8 Long-chain-acyl-CoA dehydrogenase",
+ "1.3.8.9 Very-long-chain acyl-CoA dehydrogenase",
+ "1.3.8.10 Cyclohex-1-ene-1-carbonyl-CoA dehydrogenase",
+ "1.3.8.11 Cyclohexane-1-carbonyl-CoA dehydrogenase",
+ "1.3.8.12 (2S)-methylsuccinyl-CoA dehydrogenase",
"1.3.98.1 Dihydroorotate oxidase (fumarate)",
- "1.3.99.1 Succinate dehydrogenase",
- "1.3.99.3 Acyl-CoA dehydrogenase",
"1.3.99.4 3-oxosteroid 1-dehydrogenase",
- "1.3.99.5 3-oxo-5-alpha-steroid 4-dehydrogenase",
+ "1.3.99.5 3-oxo-5-alpha-steroid 4-dehydrogenase (acceptor)",
"1.3.99.6 3-oxo-5-beta-steroid 4-dehydrogenase",
- "1.3.99.7 Glutaryl-CoA dehydrogenase",
"1.3.99.8 2-furoyl-CoA dehydrogenase",
- "1.3.99.10 Isovaleryl-CoA dehydrogenase",
"1.3.99.12 2-methylacyl-CoA dehydrogenase",
- "1.3.99.13 Long-chain-acyl-CoA dehydrogenase",
"1.3.99.14 Cyclohexanone dehydrogenase",
"1.3.99.16 Isoquinoline 1-oxidoreductase",
"1.3.99.17 Quinoline 2-oxidoreductase",
"1.3.99.18 Quinaldate 4-oxidoreductase",
"1.3.99.19 Quinoline-4-carboxylate 2-oxidoreductase",
- "1.3.99.21 (R)-benzylsuccinyl-CoA dehydrogenase",
"1.3.99.22 Coproporphyrinogen dehydrogenase",
"1.3.99.23 All-trans-retinol 13,14-reductase",
"1.3.99.24 2-amino-4-deoxychorismate dehydrogenase",
@@ -663,6 +780,11 @@ static const char* const kECNum_specific[] = {
"1.3.99.29 Phytoene desaturase (zeta-carotene-forming)",
"1.3.99.30 Phytoene desaturase (3,4-didehydrolycopene-forming)",
"1.3.99.31 Phytoene desaturase (lycopene-forming)",
+ "1.3.99.32 Glutaryl-CoA dehydrogenase (acceptor)",
+ "1.3.99.33 Urocanate reductase",
+ "1.3.99.35 Chlorophyllide a reductase",
+ "1.3.99.36 Cypemycin cysteine dehydrogenase (decarboxylating)",
+ "1.3.99.37 1-hydroxy-2-isopentenylcarotenoid 3,4-desaturase",
"1.3.99.n1 3-hydroxybenzoyl-CoA reductase",
"1.4.1.1 Alanine dehydrogenase",
"1.4.1.2 Glutamate dehydrogenase",
@@ -684,6 +806,8 @@ static const char* const kECNum_specific[] = {
"1.4.1.19 Tryptophan dehydrogenase",
"1.4.1.20 Phenylalanine dehydrogenase",
"1.4.1.21 Aspartate dehydrogenase",
+ "1.4.1.23 Valine dehydrogenase (NAD(+))",
+ "1.4.1.24 3-dehydroquinate synthase II",
"1.4.2.1 Glycine dehydrogenase (cytochrome)",
"1.4.3.1 D-aspartate oxidase",
"1.4.3.2 L-amino-acid oxidase",
@@ -704,15 +828,16 @@ static const char* const kECNum_specific[] = {
"1.4.3.21 Primary-amine oxidase",
"1.4.3.22 Diamine oxidase",
"1.4.3.23 7-chloro-L-tryptophan oxidase",
- "1.4.4.2 Glycine dehydrogenase (decarboxylating)",
+ "1.4.3.24 Pseudooxynicotine oxidase",
+ "1.4.4.2 Glycine dehydrogenase (aminomethyl-transferring)",
"1.4.5.1 D-amino acid dehydrogenase (quinone)",
"1.4.7.1 Glutamate synthase (ferredoxin)",
"1.4.9.1 Methylamine dehydrogenase (amicyanin)",
"1.4.9.2 Aralkylamine dehydrogenase (azurin)",
- "1.4.99.1 D-amino-acid dehydrogenase",
"1.4.99.2 Taurine dehydrogenase",
"1.4.99.5 Glycine dehydrogenase (cyanide-forming)",
- "1.5.1.1 Pyrroline-2-carboxylate reductase",
+ "1.4.99.6 D-arginine dehydrogenase",
+ "1.5.1.1 1-piperideine-2-carboxylate/1-pyrroline-2-carboxylate reductase (NAD(P)H)",
"1.5.1.2 Pyrroline-5-carboxylate reductase",
"1.5.1.3 Dihydrofolate reductase",
"1.5.1.5 Methylenetetrahydrofolate dehydrogenase (NADP(+))",
@@ -722,14 +847,13 @@ static const char* const kECNum_specific[] = {
"1.5.1.9 Saccharopine dehydrogenase (NAD(+), L-glutamate-forming)",
"1.5.1.10 Saccharopine dehydrogenase (NADP(+), L-glutamate-forming)",
"1.5.1.11 D-octopine dehydrogenase",
- "1.5.1.12 1-pyrroline-5-carboxylate dehydrogenase",
"1.5.1.15 Methylenetetrahydrofolate dehydrogenase (NAD(+))",
"1.5.1.16 D-lysopine dehydrogenase",
"1.5.1.17 Alanopine dehydrogenase",
"1.5.1.18 Ephedrine dehydrogenase",
"1.5.1.19 D-nopaline dehydrogenase",
"1.5.1.20 Methylenetetrahydrofolate reductase (NAD(P)H)",
- "1.5.1.21 Delta(1)-piperideine-2-carboxylate reductase",
+ "1.5.1.21 1-piperideine-2-carboxylate/1-pyrroline-2-carboxylate reductase (NADPH)",
"1.5.1.22 Strombine dehydrogenase",
"1.5.1.23 Tauropine dehydrogenase",
"1.5.1.24 N(5)-(carboxyethyl)ornithine synthase",
@@ -749,6 +873,14 @@ static const char* const kECNum_specific[] = {
"1.5.1.40 8-hydroxy-5-deazaflavin:NADPH oxidoreductase",
"1.5.1.41 Riboflavin reductase (NAD(P)H)",
"1.5.1.42 FMN reductase (NADH)",
+ "1.5.1.43 Carboxynorspermidine synthase",
+ "1.5.1.44 Festuclavine dehydrogenase",
+ "1.5.1.45 FAD reductase (NAD(P)H)",
+ "1.5.1.46 Agroclavine dehydrogenase",
+ "1.5.1.47 Dihydromethanopterin reductase (NAD(P)(+))",
+ "1.5.1.48 2-methyl-1-pyrroline reductase",
+ "1.5.1.49 1-pyrroline-2-carboxylate reductase (NAD(P)H)",
+ "1.5.1.50 Dihydromonapterin reductase",
"1.5.3.1 Sarcosine oxidase",
"1.5.3.2 N-methyl-L-amino-acid oxidase",
"1.5.3.4 N(6)-methyl-lysine oxidase",
@@ -763,29 +895,43 @@ static const char* const kECNum_specific[] = {
"1.5.3.16 Spermine oxidase",
"1.5.3.17 Non-specific polyamine oxidase",
"1.5.3.18 L-saccharopine oxidase",
+ "1.5.3.19 4-methylaminobutanoate oxidase (formaldehyde-forming)",
+ "1.5.3.20 N-alkylglycine oxidase",
+ "1.5.3.21 4-methylaminobutanoate oxidase (methylamine-forming)",
+ "1.5.3.22 Coenzyme F420H(2) oxidase",
"1.5.4.1 Pyrimidodiazepine synthase",
"1.5.5.1 Electron-transferring-flavoprotein dehydrogenase",
+ "1.5.5.2 Proline dehydrogenase",
"1.5.7.1 Methylenetetrahydrofolate reductase (ferredoxin)",
+ "1.5.7.2 Coenzyme F420 oxidoreductase (ferredoxin)",
"1.5.8.1 Dimethylamine dehydrogenase",
"1.5.8.2 Trimethylamine dehydrogenase",
"1.5.8.3 Sarcosine dehydrogenase",
"1.5.8.4 Dimethylglycine dehydrogenase",
+ "1.5.98.1 Methylenetetrahydromethanopterin dehydrogenase",
+ "1.5.98.2 5,10-methylenetetrahydromethanopterin reductase",
"1.5.99.3 L-pipecolate dehydrogenase",
"1.5.99.4 Nicotine dehydrogenase",
"1.5.99.5 Methylglutamate dehydrogenase",
"1.5.99.6 Spermidine dehydrogenase",
- "1.5.99.8 Proline dehydrogenase",
- "1.5.99.9 Methylenetetrahydromethanopterin dehydrogenase",
- "1.5.99.11 5,10-methylenetetrahydromethanopterin reductase",
"1.5.99.12 Cytokinin dehydrogenase",
"1.5.99.13 D-proline dehydrogenase",
- "1.6.1.1 NAD(P)(+) transhydrogenase (B-specific)",
- "1.6.1.2 NAD(P)(+) transhydrogenase (AB-specific)",
+ "1.5.99.14 6-hydroxypseudooxynicotine dehydrogenase",
+ "1.5.99.15 Dihydromethanopterin reductase (acceptor)",
+ "1.6.1.1 NAD(P)(+) transhydrogenase (Si-specific)",
+ "1.6.1.2 NAD(P)(+) transhydrogenase (Re/Si-specific)",
+ "1.6.1.3 NAD(P)(+) transhydrogenase",
+ "1.6.1.4 NAD(P)(+) transhydrogenase (ferredoxin)",
+ "1.6.1.5 Proton-translocating NAD(P)(+) transhydrogenase",
"1.6.2.2 Cytochrome-b5 reductase",
"1.6.2.4 NADPH--hemoprotein reductase",
"1.6.2.5 NADPH--cytochrome-c2 reductase",
"1.6.2.6 Leghemoglobin reductase",
- "1.6.3.1 NAD(P)H oxidase",
+ "1.6.3.1 NAD(P)H oxidase (H(2)O(2)-forming)",
+ "1.6.3.2 NAD(P)H oxidase (H(2)O-forming)",
+ "1.6.3.3 NADH oxidase (H(2)O(2)-forming)",
+ "1.6.3.4 NADH oxidase (H(2)O-forming)",
+ "1.6.3.5 Renalase",
"1.6.5.2 NAD(P)H dehydrogenase (quinone)",
"1.6.5.3 NADH:ubiquinone reductase (H(+)-translocating)",
"1.6.5.4 Monodehydroascorbate reductase (NADH)",
@@ -795,10 +941,12 @@ static const char* const kECNum_specific[] = {
"1.6.5.8 NADH:ubiquinone reductase (Na(+)-transporting)",
"1.6.5.9 NADH:ubiquinone reductase (non-electrogenic)",
"1.6.5.10 NADPH dehydrogenase (quinone)",
+ "1.6.5.11 NADH dehydrogenase (quinone)",
+ "1.6.5.12 Demethylphylloquinone reductase",
"1.6.6.9 Trimethylamine-N-oxide reductase",
"1.6.99.1 NADPH dehydrogenase",
"1.6.99.3 NADH dehydrogenase",
- "1.6.99.5 NADH dehydrogenase (quinone)",
+ "1.6.99.n1 NADPH dehydrogenase (coenzyme F420 dependent)",
"1.7.1.1 Nitrate reductase (NADH)",
"1.7.1.2 Nitrate reductase (NAD(P)H)",
"1.7.1.3 Nitrate reductase (NADPH)",
@@ -811,17 +959,19 @@ static const char* const kECNum_specific[] = {
"1.7.1.11 4-(dimethylamino)phenylazoxybenzene reductase",
"1.7.1.12 N-hydroxy-2-acetamidofluorene reductase",
"1.7.1.13 PreQ(1) synthase",
- "1.7.1.14 Nitric oxide reductase (NAD(P), nitrous oxide-forming)",
+ "1.7.1.14 Nitric oxide reductase (NAD(P)(+), nitrous oxide-forming)",
+ "1.7.1.15 Nitrite reductase (NADH)",
"1.7.2.1 Nitrite reductase (NO-forming)",
"1.7.2.2 Nitrite reductase (cytochrome; ammonia-forming)",
"1.7.2.3 Trimethylamine-N-oxide reductase (cytochrome c)",
"1.7.2.4 Nitrous-oxide reductase",
"1.7.2.5 Nitric-oxide reductase (cytochrome c)",
+ "1.7.2.6 Hydroxylamine dehydrogenase",
"1.7.3.1 Nitroalkane oxidase",
"1.7.3.2 Acetylindoxyl oxidase",
"1.7.3.3 Factor independent urate hydroxylase",
- "1.7.3.4 Hydroxylamine oxidase",
"1.7.3.5 3-aci-nitropropanoate oxidase",
+ "1.7.3.6 Hydroxylamine oxidase (cytochrome)",
"1.7.5.1 Nitrate reductase (quinone)",
"1.7.5.2 Nitric oxide reductase (menaquinol)",
"1.7.6.1 Nitrite dismutase",
@@ -830,7 +980,7 @@ static const char* const kECNum_specific[] = {
"1.7.99.1 Hydroxylamine reductase",
"1.7.99.4 Nitrate reductase",
"1.7.99.8 Hydrazine oxidoreductase",
- "1.8.1.2 Sulfite reductase (NADPH)",
+ "1.8.1.2 Assimilatory sulfite reductase (NADPH)",
"1.8.1.3 Hypotaurine dehydrogenase",
"1.8.1.4 Dihydrolipoyl dehydrogenase",
"1.8.1.5 2-oxopropyl-CoM reductase (carboxylating)",
@@ -846,6 +996,8 @@ static const char* const kECNum_specific[] = {
"1.8.1.15 Mycothione reductase",
"1.8.1.16 Glutathione amide reductase",
"1.8.1.17 Dimethylsulfone reductase",
+ "1.8.1.18 NAD(P)H sulfur oxidoreductase (CoA-dependent)",
+ "1.8.1.19 Sulfide dehydrogenase",
"1.8.2.1 Sulfite dehydrogenase",
"1.8.2.2 Thiosulfate dehydrogenase",
"1.8.2.3 Sulfide-cytochrome-c reductase (flavocytochrome c)",
@@ -872,19 +1024,20 @@ static const char* const kECNum_specific[] = {
"1.8.5.2 Thiosulfate dehydrogenase (quinone)",
"1.8.5.3 Dimethylsulfoxide reductase",
"1.8.5.4 Sulfide:quinone reductase",
- "1.8.7.1 Sulfite reductase (ferredoxin)",
+ "1.8.7.1 Assimilatory sulfite reductase (ferredoxin)",
"1.8.7.2 Ferredoxin:thioredoxin reductase",
"1.8.98.1 CoB--CoM heterodisulfide reductase",
"1.8.98.2 Sulfiredoxin",
- "1.8.99.1 Sulfite reductase",
+ "1.8.98.3 Sulfite reductase (coenzyme F420)",
"1.8.99.2 Adenylyl-sulfate reductase",
"1.8.99.3 Hydrogensulfite reductase",
+ "1.8.99.5 Dissimilatory sulfite reductase",
"1.9.3.1 Cytochrome-c oxidase",
"1.9.6.1 Nitrate reductase (cytochrome)",
- "1.9.99.1 Iron--cytochrome-c reductase",
+ "1.9.98.1 Iron--cytochrome-c reductase",
"1.10.1.1 Trans-acenaphthene-1,2-diol dehydrogenase",
"1.10.2.1 L-ascorbate--cytochrome-b5 reductase",
- "1.10.2.2 Ubiquinol--cytochrome-c reductase",
+ "1.10.2.2 Quinol--cytochrome-c reductase",
"1.10.3.1 Catechol oxidase",
"1.10.3.2 Laccase",
"1.10.3.3 L-ascorbate oxidase",
@@ -893,11 +1046,13 @@ static const char* const kECNum_specific[] = {
"1.10.3.6 Rifamycin-B oxidase",
"1.10.3.9 Photosystem II",
"1.10.3.10 Ubiquinol oxidase (H(+)-transporting)",
- "1.10.3.11 Ubiquinol oxidase",
+ "1.10.3.11 Ubiquinol oxidase (non-electrogenic)",
"1.10.3.12 Menaquinol oxidase (H(+)-transporting)",
+ "1.10.3.13 Caldariellaquinol oxidase (H(+)-transporting)",
+ "1.10.3.14 Ubiquinol oxidase (electrogenic, non H(+)-transporting)",
+ "1.10.3.15 Grixazone synthase",
+ "1.10.5.1 Ribosyldihydronicotinamide dehydrogenase (quinone)",
"1.10.9.1 Plastoquinol--plastocyanin reductase",
- "1.10.99.2 Ribosyldihydronicotinamide dehydrogenase (quinone)",
- "1.10.99.3 Violaxanthin de-epoxidase",
"1.11.1.1 NADH peroxidase",
"1.11.1.2 NADPH peroxidase",
"1.11.1.3 Fatty-acid peroxidase",
@@ -918,19 +1073,24 @@ static const char* const kECNum_specific[] = {
"1.11.1.19 Dye decolorizing peroxidase",
"1.11.1.20 Prostamide/prostaglandin F(2-alpha) synthase",
"1.11.1.21 Catalase peroxidase",
+ "1.11.1.22 Hydroperoxy fatty acid reductase",
+ "1.11.1.23 (S)-2-hydroxypropylphosphonic acid epoxidase",
"1.11.2.1 Unspecific peroxygenase",
"1.11.2.2 Myeloperoxidase",
"1.11.2.3 Plant seed peroxygenase",
"1.11.2.4 Fatty-acid peroxygenase",
+ "1.11.2.5 3-methyl-L-tyrosine peroxygenase",
"1.12.1.2 Hydrogen dehydrogenase",
"1.12.1.3 Hydrogen dehydrogenase (NADP(+))",
"1.12.1.4 Hydrogenase (NAD(+), ferredoxin)",
+ "1.12.1.5 Hydrogen dehydrogenase (NAD(P)(+))",
"1.12.2.1 Cytochrome-c3 hydrogenase",
"1.12.5.1 Hydrogen:quinone oxidoreductase",
"1.12.7.2 Ferredoxin hydrogenase",
"1.12.98.1 Coenzyme F420 hydrogenase",
"1.12.98.2 5,10-methenyltetrahydromethanopterin hydrogenase",
"1.12.98.3 Methanosarcina-phenazine hydrogenase",
+ "1.12.98.4 Sulfhydrogenase",
"1.12.99.6 Hydrogenase (acceptor)",
"1.13.11.1 Catechol 1,2-dioxygenase",
"1.13.11.2 Catechol 2,3-dioxygenase",
@@ -947,7 +1107,7 @@ static const char* const kECNum_specific[] = {
"1.13.11.15 3,4-dihydroxyphenylacetate 2,3-dioxygenase",
"1.13.11.16 3-carboxyethylcatechol 2,3-dioxygenase",
"1.13.11.17 Indole 2,3-dioxygenase",
- "1.13.11.18 Sulfur dioxygenase",
+ "1.13.11.18 Persulfide dioxygenase",
"1.13.11.19 Cysteamine dioxygenase",
"1.13.11.20 Cysteine dioxygenase",
"1.13.11.22 Caffeate 3,4-dioxygenase",
@@ -988,7 +1148,26 @@ static const char* const kECNum_specific[] = {
"1.13.11.60 Linoleate 8R-lipoxygenase",
"1.13.11.61 Linolenate 9R-lipoxygenase",
"1.13.11.62 Linoleate 10R-lipoxygenase",
- "1.13.11.n1 2-aminophenol 1,6-dioxygenase",
+ "1.13.11.63 Beta-carotene 15,15'-dioxygenase",
+ "1.13.11.64 5-nitrosalicylate dioxygenase",
+ "1.13.11.65 Carotenoid isomerooxygenase",
+ "1.13.11.66 Hydroquinone 1,2-dioxygenase",
+ "1.13.11.67 8'-apo-beta-carotenoid 14',13'-cleaving dioxygenase",
+ "1.13.11.68 9-cis-beta-carotene 9',10'-cleaving dioxygenase",
+ "1.13.11.69 Carlactone synthase",
+ "1.13.11.70 All-trans-10'-apo-beta-carotenal 13,14-cleaving dioxygenase",
+ "1.13.11.71 Carotenoid-9',10'-cleaving dioxygenase",
+ "1.13.11.72 2-hydroxyethylphosphonate dioxygenase",
+ "1.13.11.73 Methylphosphonate synthase",
+ "1.13.11.74 2-aminophenol 1,6-dioxygenase",
+ "1.13.11.75 All-trans-8'-apo-beta-carotenal 15,15'-oxygenase",
+ "1.13.11.76 2-amino-5-chlorophenol 1,6-dioxygenase",
+ "1.13.11.77 Oleate 10S-lipoxygenase",
+ "1.13.11.78 2-amino-1-hydroxyethylphosphonate dioxygenase (glycine-forming)",
+ "1.13.11.79 5,6-dimethylbenzimidazole synthase",
+ "1.13.11.80 (3,5-dihydroxyphenyl)acetyl-CoA 1,2-dioxygenase",
+ "1.13.11.81 7,8-dihydroneopterin oxygenase",
+ "1.13.11.82 8'-apo-carotenoid 13,14-cleaving dioxygenase",
"1.13.12.1 Arginine 2-monooxygenase",
"1.13.12.2 Lysine 2-monooxygenase",
"1.13.12.3 Tryptophan 2-monooxygenase",
@@ -998,13 +1177,15 @@ static const char* const kECNum_specific[] = {
"1.13.12.7 Photinus-luciferin 4-monooxygenase (ATP-hydrolyzing)",
"1.13.12.8 Watasenia-luciferin 2-monooxygenase",
"1.13.12.9 Phenylalanine 2-monooxygenase",
- "1.13.12.12 Apo-beta-carotenoid-14',13'-dioxygenase",
"1.13.12.13 Oplophorus-luciferin 2-monooxygenase",
"1.13.12.15 3,4-dihydroxyphenylalanine oxidative deaminase",
"1.13.12.16 Nitronate monooxygenase",
"1.13.12.17 Dichloroarcyriaflavin A synthase",
"1.13.12.18 Dinoflagellate luciferase",
"1.13.12.19 2-oxuglutarate dioxygenase (ethylene-forming)",
+ "1.13.12.20 Noranthrone monooxygenase",
+ "1.13.12.21 Tetracenomycin-F1 monooxygenase",
+ "1.13.12.22 Deoxynogalonate monooxygenase",
"1.13.99.1 Inositol oxygenase",
"1.13.99.3 Tryptophan 2'-dioxygenase",
"1.14.11.1 Gamma-butyrobetaine dioxygenase",
@@ -1040,7 +1221,21 @@ static const char* const kECNum_specific[] = {
"1.14.11.32 Codeine 3-O-demethylase",
"1.14.11.33 DNA oxidative demethylase",
"1.14.11.34 2-oxoglutarate/L-arginine monooxygenase/decarboxylase (succinate-forming)",
- "1.14.11.n1 L-asparagine oxygenase",
+ "1.14.11.35 1-deoxypentalenic acid 11-beta-hydroxylase",
+ "1.14.11.36 Pentalenolactone F synthase",
+ "1.14.11.37 Kanamycin B dioxygenase",
+ "1.14.11.38 Verruculogen synthase",
+ "1.14.11.39 L-asparagine oxygenase",
+ "1.14.11.40 Enduracididine beta-hydroxylase",
+ "1.14.11.41 L-arginine hydroxylase",
+ "1.14.11.42 tRNA(Phe) (7-(3-amino-3-carboxypropyl)wyosine(37)-C(2))-hydroxylase",
+ "1.14.11.43 (S)-dichlorprop dioxygenase (2-oxoglutarate)",
+ "1.14.11.44 (R)-dichlorprop dioxygenase (2-oxoglutarate)",
+ "1.14.11.45 L-isoleucine 4-hydroxylase",
+ "1.14.11.46 2-aminoethylphosphonate dioxygenase",
+ "1.14.11.47 50S ribosomal protein L16 3-hydroxylase",
+ "1.14.11.48 Xanthine dioxygenase",
+ "1.14.11.49 Uridine-5'-phosphate dioxygenase",
"1.14.11.n2 Methylcytosine dioxygenase",
"1.14.11.n3 L-proline cis-4-hydroxylase",
"1.14.11.n4 Ankyrin-repeat-histidine dioxagenase",
@@ -1054,22 +1249,22 @@ static const char* const kECNum_specific[] = {
"1.14.12.10 Benzoate 1,2-dioxygenase",
"1.14.12.11 Toluene dioxygenase",
"1.14.12.12 Naphthalene 1,2-dioxygenase",
- "1.14.12.13 2-chlorobenzoate 1,2-dioxygenase",
+ "1.14.12.13 2-halobenzoate 1,2-dioxygenase",
"1.14.12.14 2-aminobenzenesulfonate 2,3-dioxygenase",
"1.14.12.15 Terephthalate 1,2-dioxygenase",
"1.14.12.16 2-hydroxyquinoline 5,6-dioxygenase",
"1.14.12.17 Nitric oxide dioxygenase",
"1.14.12.18 Biphenyl 2,3-dioxygenase",
"1.14.12.19 3-phenylpropanoate dioxygenase",
- "1.14.12.20 Pheophorbide a oxygenase",
- "1.14.12.21 Benzoyl-CoA 2,3-dioxygenase",
"1.14.12.22 Carbazole 1,9a-dioxygenase",
+ "1.14.12.23 Nitroarene dioxygenase",
+ "1.14.12.24 2,4-dinitrotoluene dioxygenase",
"1.14.13.1 Salicylate 1-monooxygenase",
"1.14.13.2 4-hydroxybenzoate 3-monooxygenase",
"1.14.13.4 Melilotate 3-monooxygenase",
"1.14.13.5 Imidazoleacetate 4-monooxygenase",
"1.14.13.6 Orcinol 2-monooxygenase",
- "1.14.13.7 Phenol 2-monooxygenase",
+ "1.14.13.7 Phenol 2-monooxygenase (NADPH)",
"1.14.13.8 Flavin-containing monooxygenase",
"1.14.13.9 Kynurenine 3-monooxygenase",
"1.14.13.10 2,6-dihydroxypyridine 3-monooxygenase",
@@ -1077,9 +1272,7 @@ static const char* const kECNum_specific[] = {
"1.14.13.12 Benzoate 4-monooxygenase",
"1.14.13.13 Calcidiol 1-monooxygenase",
"1.14.13.14 Trans-cinnamate 2-monooxygenase",
- "1.14.13.15 Cholestanetriol 26-monooxygenase",
"1.14.13.16 Cyclopentanone monooxygenase",
- "1.14.13.17 Cholesterol 7-alpha-monooxygenase",
"1.14.13.18 4-hydroxyphenylacetate 1-monooxygenase",
"1.14.13.19 Taxifolin 8-monooxygenase",
"1.14.13.20 2,4-dichlorophenol 6-monooxygenase",
@@ -1088,7 +1281,6 @@ static const char* const kECNum_specific[] = {
"1.14.13.23 3-hydroxybenzoate 4-monooxygenase",
"1.14.13.24 3-hydroxybenzoate 6-monooxygenase",
"1.14.13.25 Methane monooxygenase (soluble)",
- "1.14.13.26 Phosphatidylcholine 12-monooxygenase",
"1.14.13.27 4-aminobenzoate 1-monooxygenase",
"1.14.13.28 3,9-dihydroxypterocarpan 6A-monooxygenase",
"1.14.13.29 4-nitrophenol 2-monooxygenase",
@@ -1101,7 +1293,7 @@ static const char* const kECNum_specific[] = {
"1.14.13.36 5-O-(4-coumaroyl)-D-quinate 3'-monooxygenase",
"1.14.13.37 Methyltetrahydroprotoberberine 14-monooxygenase",
"1.14.13.38 Anhydrotetracycline monooxygenase",
- "1.14.13.39 Nitric-oxide synthase",
+ "1.14.13.39 Nitric-oxide synthase (NADPH)",
"1.14.13.40 Anthraniloyl-CoA monooxygenase",
"1.14.13.41 Tyrosine N-monooxygenase",
"1.14.13.43 Questin monooxygenase",
@@ -1119,8 +1311,7 @@ static const char* const kECNum_specific[] = {
"1.14.13.56 Dihydrosanguinarine 10-monooxygenase",
"1.14.13.57 Dihydrochelirubine 12-monooxygenase",
"1.14.13.58 Benzoyl-CoA 3-monooxygenase",
- "1.14.13.59 L-lysine 6-monooxygenase (NADPH)",
- "1.14.13.60 27-hydroxycholesterol 7-alpha-monooxygenase",
+ "1.14.13.59 L-lysine N(6)-monooxygenase (NADPH)",
"1.14.13.61 2-hydroxyquinoline 8-monooxygenase",
"1.14.13.62 4-hydroxyquinoline 3-monooxygenase",
"1.14.13.63 3-hydroxyphenylacetate 6-hydroxylase",
@@ -1129,7 +1320,7 @@ static const char* const kECNum_specific[] = {
"1.14.13.67 Quinine 3-monooxygenase",
"1.14.13.68 4-hydroxyphenylacetaldehyde oxime monooxygenase",
"1.14.13.69 Alkene monooxygenase",
- "1.14.13.70 Sterol 14-demethylase",
+ "1.14.13.70 Sterol 14-alpha-demethylase",
"1.14.13.71 N-methylcoclaurine 3'-monooxygenase",
"1.14.13.72 Methylsterol monooxygenase",
"1.14.13.73 Tabersonine 16-hydroxylase",
@@ -1145,7 +1336,6 @@ static const char* const kECNum_specific[] = {
"1.14.13.83 Precorrin-3B synthase",
"1.14.13.84 4-hydroxyacetophenone monooxygenase",
"1.14.13.85 Glyceollin synthase",
- "1.14.13.86 2-hydroxyisoflavanone synthase",
"1.14.13.87 Licodione synthase",
"1.14.13.88 Flavonoid 3',5'-hydroxylase",
"1.14.13.89 Isoflavone 2'-hydroxylase",
@@ -1154,12 +1344,9 @@ static const char* const kECNum_specific[] = {
"1.14.13.92 Phenylacetone monooxygenase",
"1.14.13.93 (+)-abscisic acid 8'-hydroxylase",
"1.14.13.94 Lithocholate 6-beta-hydroxylase",
- "1.14.13.95 7-alpha-hydroxycholest-4-en-3-one 12-alpha-hydroxylase",
"1.14.13.96 5-beta-cholestane-3-alpha,7-alpha-diol 12-alpha-hydroxylase",
"1.14.13.97 Taurochenodeoxycholate 6-alpha-hydroxylase",
- "1.14.13.98 Cholesterol 24-hydroxylase",
- "1.14.13.99 24-hydroxycholesterol 7-alpha-hydroxylase",
- "1.14.13.100 25-hydroxycholesterol 7-alpha-hydroxylase",
+ "1.14.13.100 25/26-hydroxycholesterol 7-alpha-hydroxylase",
"1.14.13.101 Senecionine N-oxygenase",
"1.14.13.102 Psoralen synthase",
"1.14.13.103 8-dimethylallylnaringenin 2'-hydroxylase",
@@ -1185,17 +1372,15 @@ static const char* const kECNum_specific[] = {
"1.14.13.123 Germacrene A hydroxylase",
"1.14.13.124 Phenylalanine N-monooxygenase",
"1.14.13.125 Tryptophan N-monooxygenase",
- "1.14.13.126 Vitamin D(3) 24-hydroxylase",
"1.14.13.127 3-(3-hydroxy-phenyl)propanoic acid hydroxylase",
"1.14.13.128 7-methylxanthine demethylase",
"1.14.13.129 Beta-carotene 3-hydroxylase",
"1.14.13.130 Pyrrole-2-carboxylate monooxygenase",
"1.14.13.131 Dimethyl-sulfide monooxygenase",
- "1.14.13.132 Squalene monooxygenase",
"1.14.13.133 Pentalenene oxygenase",
"1.14.13.134 Beta-amyrin 11-oxidase",
"1.14.13.135 1-hydroxy-2-naphthoate hydroxylase",
- "1.14.13.136 Isoflavonoid synthase",
+ "1.14.13.136 2-hydroxyisoflavanone synthase",
"1.14.13.137 Indole-2-monooxygenase",
"1.14.13.138 Indolin-2-one monooxygenase",
"1.14.13.139 3-hydroxyindolin-2-one monooxygenase",
@@ -1208,64 +1393,194 @@ static const char* const kECNum_specific[] = {
"1.14.13.146 Taxoid 14-beta-hydroxylase",
"1.14.13.147 Taxoid 7-beta-hydroxylase",
"1.14.13.148 Trimethylamine monooxygenase",
+ "1.14.13.149 Phenylacetyl-CoA 1,2-epoxidase",
+ "1.14.13.150 Alpha-humulene 10-hydroxylase",
+ "1.14.13.151 Linalool 8-monooxygenase",
+ "1.14.13.152 Geraniol 8-hydroxylase",
+ "1.14.13.153 (+)-sabinene 3-hydroxylase",
+ "1.14.13.154 Erythromycin 12 hydroxylase",
+ "1.14.13.155 Alpha-pinene monooxygenase",
+ "1.14.13.156 1,8-cineole 2-endo-monooxygenase",
+ "1.14.13.157 1,8-cineole 2-exo-monooxygenase",
+ "1.14.13.158 Amorpha-4,11-diene 12 monooxygenase",
+ "1.14.13.160 (2,2,3-trimethyl-5-oxocyclopent-3-enyl)acetyl-CoA 1,5-monooxygenase",
+ "1.14.13.161 (+)-camphor 6-exo-hydroxylase",
+ "1.14.13.162 2,5-diketocamphane 1,2-monooxygenase",
+ "1.14.13.163 6-hydroxy-3-succinoylpyridine 3-monooxygenase",
+ "1.14.13.165 Nitric-oxide synthase (NAD(P)H)",
+ "1.14.13.166 4-nitrocatechol 4-monooxygenase",
+ "1.14.13.167 4-nitrophenol 4-monooxygenase",
+ "1.14.13.168 Indole-3-pyruvate monooxygenase",
+ "1.14.13.170 Pentalenolactone D synthase",
+ "1.14.13.171 Neopentalenolactone D synthase",
+ "1.14.13.172 Salicylate 5-hydroxylase",
+ "1.14.13.173 11-oxo-beta-amyrin 30-oxidase",
+ "1.14.13.174 Averantin hydroxylase",
+ "1.14.13.175 Aflatoxin B synthase",
+ "1.14.13.176 Tryprostatin B 6-hydroxylase",
+ "1.14.13.177 Fumitremorgin C monooxygenase",
+ "1.14.13.178 Methylxanthine N(1)-demethylase",
+ "1.14.13.179 Methylxanthine N(3)-demethylase",
+ "1.14.13.180 Aklavinone 12-hydroxylase",
+ "1.14.13.181 13-deoxydaunorubicin hydroxylase",
+ "1.14.13.182 2-heptyl-3-hydroxy-4(1H)-quinolone synthase",
+ "1.14.13.183 Dammarenediol 12-hydroxylase",
+ "1.14.13.184 Protopanaxadiol 6-hydroxylase",
+ "1.14.13.185 Pikromycin synthase",
+ "1.14.13.186 20-oxo-5-O-mycaminosyltylactone 23-monooxygenase",
+ "1.14.13.187 L-evernosamine nitrososynthase",
+ "1.14.13.188 6-deoxyerythronolide B hydroxylase",
+ "1.14.13.189 5-methyl-1-naphthoate 3-hydroxylase",
+ "1.14.13.190 Ferruginol synthase",
+ "1.14.13.191 Ent-sandaracopimaradiene 3-hydroxylase",
+ "1.14.13.192 Oryzalexin E synthase",
+ "1.14.13.193 Oryzalexin D synthase",
+ "1.14.13.194 Phylloquinone omega-hydroxylase",
+ "1.14.13.195 L-ornithine N(5)-monooxygenase (NADPH)",
+ "1.14.13.196 L-ornithine N(5)-monooxygenase (NAD(P)H)",
+ "1.14.13.197 Dihydromonacolin L hydroxylase",
+ "1.14.13.198 Monacolin L hydroxylase",
+ "1.14.13.199 Docosahexaenoic acid omega-hydroxylase",
+ "1.14.13.200 Tetracenomycin A2 monooxygenase-diooxygenase",
+ "1.14.13.201 Beta-amyrin 28-monooxygenase",
+ "1.14.13.202 Methyl farnesoate epoxidase",
+ "1.14.13.203 Farnesoate epoxidase",
+ "1.14.13.204 Long-chain acyl-CoA omega-monooxygenase",
+ "1.14.13.205 Long-chain fatty acid omega-monooxygenase",
+ "1.14.13.206 Laurate 7-monooxygenase",
+ "1.14.13.207 Ipsdienol synthase",
+ "1.14.13.208 Benzoyl-CoA 2,3-epoxidase",
+ "1.14.13.209 Salicyloyl-CoA 5-hydroxylase",
+ "1.14.13.210 4-methyl-5-nitrocatechol 5-monooxygenase",
+ "1.14.13.211 Rifampicin monooxygenase",
+ "1.14.13.212 1,3,7-trimethyluric acid 5-monooxygenase",
+ "1.14.13.213 Bursehernin 5'-monooxygenase",
+ "1.14.13.214 (-)-4'-demethyl-deoxypodophyllotoxin 4-hydroxylase",
+ "1.14.13.215 Protoasukamycin 4-monooxygenase",
+ "1.14.13.216 Asperlicin C monooxygenase",
"1.14.13.n5 Dihomomethionine N-hydroxylase",
"1.14.13.n6 Hexahomomethionine N-hydroxylase",
"1.14.13.n7 4-nitrophenol 2-hydroxylase",
"1.14.13.n8 2-methylbutanal oxime monooxygenase",
- "1.14.13.n9 Phenylacetyl-CoA 1,2-epoxidase",
"1.14.14.1 Unspecific monooxygenase",
"1.14.14.3 Alkanal monooxygenase (FMN-linked)",
"1.14.14.5 Alkanesulfonate monooxygenase",
- "1.14.14.7 Tryptophan 7-halogenase",
"1.14.14.8 Anthranilate 3-monooxygenase (FAD)",
"1.14.14.9 4-hydroxyphenylacetate 3-monooxygenase",
"1.14.14.10 Nitrilotriacetate monooxygenase",
"1.14.14.11 Styrene monooxygenase",
"1.14.14.12 3-hydroxy-9,10-secoandrosta-1,3,5(10)-triene-9,17-dione monooxygenase",
- "1.14.14.n1 Pyrimidine monooxygenase",
+ "1.14.14.13 4-(gamma-L-glutamylamino)butanoyl-[BtrI acyl-carrier protein] monooxygenase",
+ "1.14.14.14 Aromatase",
+ "1.14.14.15 (3S)-3-amino-3-(3-chloro-4-hydroxyphenyl)propanoyl-[peptidyl-carrier protein SgcC2] monooxygenase",
+ "1.14.14.16 Steroid 21-monooxygenase",
+ "1.14.14.17 Squalene monooxygenase",
+ "1.14.14.18 Heme oxygenase (biliverdin-producing)",
+ "1.14.14.19 Steroid 17-alpha-monooxygenase",
+ "1.14.14.20 Phenol 2-monooxygenase (FADH(2))",
+ "1.14.14.21 Dibenzothiophene monooxygenase",
+ "1.14.14.22 Dibenzothiophene sulfone monooxygenase",
+ "1.14.14.23 Cholesterol 7-alpha-monooxygenase",
+ "1.14.14.24 Vitamin D 25-hydroxylase",
+ "1.14.14.25 Cholesterol 24-hydroxylase",
+ "1.14.14.26 24-hydroxycholesterol 7-alpha-hydroxylase",
"1.14.15.1 Camphor 5-monooxygenase",
- "1.14.15.2 Camphor 1,2-monooxygenase",
"1.14.15.3 Alkane 1-monooxygenase",
"1.14.15.4 Steroid 11-beta-monooxygenase",
"1.14.15.5 Corticosterone 18-monooxygenase",
"1.14.15.6 Cholesterol monooxygenase (side-chain-cleaving)",
"1.14.15.7 Choline monooxygenase",
"1.14.15.8 Steroid 15-beta-monooxygenase",
+ "1.14.15.9 Spheroidene monooxygenase",
+ "1.14.15.10 (+)-camphor 6-endo-hydroxylase",
+ "1.14.15.11 Pentalenic acid synthase",
+ "1.14.15.12 Pimeloyl-[acyl-carrier protein] synthase",
+ "1.14.15.13 Pulcherriminic acid synthase",
+ "1.14.15.14 Methyl-branched lipid omega-hydroxylase",
+ "1.14.15.15 Cholestanetriol 26-monooxygenase",
+ "1.14.15.16 Vitamin D(3) 24-hydroxylase",
+ "1.14.15.17 Pheophorbide a oxygenase",
"1.14.16.1 Phenylalanine 4-monooxygenase",
"1.14.16.2 Tyrosine 3-monooxygenase",
"1.14.16.3 Anthranilate 3-monooxygenase",
"1.14.16.4 Tryptophan 5-monooxygenase",
"1.14.16.5 Alkylglycerol monooxygenase",
"1.14.16.6 Mandelate 4-monooxygenase",
+ "1.14.16.7 Phenylalanine 3-monooxygenase",
"1.14.17.1 Dopamine beta-monooxygenase",
"1.14.17.3 Peptidylglycine monooxygenase",
"1.14.17.4 Aminocyclopropanecarboxylate oxidase",
- "1.14.18.1 Monophenol monooxygenase",
+ "1.14.18.1 Tyrosinase",
"1.14.18.2 CMP-N-acetylneuraminate monooxygenase",
"1.14.18.3 Methane monooxygenase (particulate)",
+ "1.14.18.4 Phosphatidylcholine 12-monooxygenase",
+ "1.14.18.5 Sphingolipid C4-monooxygenase",
+ "1.14.18.6 4-hydroxysphinganine ceramide fatty acyl 2-hydroxylase",
+ "1.14.18.7 Dihydroceramide fatty acyl 2-hydroxylase",
+ "1.14.18.8 7-alpha-hydroxycholest-4-en-3-one 12-alpha-hydroxylase",
"1.14.19.1 Stearoyl-CoA 9-desaturase",
- "1.14.19.2 Acyl-[acyl-carrier-protein] desaturase",
- "1.14.19.3 Linoleoyl-CoA desaturase",
- "1.14.19.4 Delta(8)-fatty-acid desaturase",
- "1.14.19.5 Delta(11)-fatty-acid desaturase",
- "1.14.19.6 Delta(12)-fatty-acid desaturase",
- "1.14.19.7 (S)-2-hydroxypropylphosphonic acid epoxidase",
+ "1.14.19.2 Stearoyl-[acyl-carrier-protein] 9-desaturase",
+ "1.14.19.3 Acyl-CoA 6-desaturase",
+ "1.14.19.4 Acyl-lipid (11-3)-desaturase",
+ "1.14.19.5 Acyl-CoA 11-(Z)-desaturase",
+ "1.14.19.6 Acyl-CoA (9+3)-desaturase",
+ "1.14.19.8 Pentalenolactone synthase",
+ "1.14.19.9 Tryptophan 7-halogenase",
+ "1.14.19.10 Icosanoyl-CoA 5-desaturase",
+ "1.14.19.11 Acyl-[acyl-carrier-protein] 4-desaturase",
+ "1.14.19.12 Acyl-lipid omega-(9-4) desaturase",
+ "1.14.19.13 Acyl-CoA 15-desaturase",
+ "1.14.19.14 Linoleoyl-lipid Delta(9) conjugase",
+ "1.14.19.15 (11Z)-hexadec-11-enoyl-CoA conjugase",
+ "1.14.19.16 Linoleoyl-lipid Delta(12) conjugase (11E,13Z-forming)",
+ "1.14.19.17 Sphingolipid 4-desaturase",
+ "1.14.19.18 Sphingolipid 8-(E)-desaturase",
+ "1.14.19.19 Sphingolipid 10-desaturase",
+ "1.14.19.20 Delta(7)-sterol 5(6)-desaturase",
+ "1.14.19.21 Cholesterol 7-desaturase",
+ "1.14.19.22 Acyl-lipid omega-6 desaturase (cytochrome b5)",
+ "1.14.19.23 Acyl-lipid (n+3)-(Z)-desaturase (ferredoxin)",
+ "1.14.19.24 Acyl-CoA 11-(E)-desaturase",
+ "1.14.19.25 Acyl-lipid omega-3 desaturase (cytochrome b5)",
+ "1.14.19.26 Acyl-[acyl-carrier-protein] 6-desaturase",
+ "1.14.19.27 sn-2 palmitoyl-lipid 9-desaturase",
+ "1.14.19.28 sn-1 stearoyl-lipid 9-desaturase",
+ "1.14.19.29 Sphingolipid 8-(E/Z)-desaturase",
+ "1.14.19.30 Acyl-lipid (8-3)-desaturase",
+ "1.14.19.31 Acyl-lipid (7-3)-desaturase",
+ "1.14.19.32 Palmitoyl-CoA 14-(E/Z)-desaturase",
+ "1.14.19.33 Delta(12) acyl-lipid conjugase (11E,13E-forming)",
+ "1.14.19.34 Acyl-lipid (9+3)-(E)-desaturase",
+ "1.14.19.35 sn-2 acyl-lipid omega-3 desaturase (ferredoxin)",
+ "1.14.19.36 sn-1 acyl-lipid omega-3 desaturase (ferredoxin)",
+ "1.14.19.37 Acyl-CoA 5-desaturase",
+ "1.14.19.38 Acyl-lipid Delta(6)-acetylenase",
+ "1.14.19.39 Acyl-lipid Delta(12)-acetylenase",
+ "1.14.19.40 Hex-5-enoyl-[acyl-carrier protein] acetylenase",
+ "1.14.19.41 Sterol 22-desaturase",
+ "1.14.19.42 Palmitoyl-[glycerolipid] 7-desaturase",
+ "1.14.19.43 Palmitoyl-[glycerolipid] 3-(E)-desaturase",
+ "1.14.19.44 Acyl-CoA (8-3)-desaturase",
+ "1.14.19.45 sn-1 oleoyl-lipid 12-desaturase",
+ "1.14.19.46 sn-1 linoleoyl-lipid 6-desaturase",
+ "1.14.19.47 Acyl-lipid (9-3)-desaturase",
+ "1.14.19.n4 Stearoyl-CoA 9-desaturase",
+ "1.14.19.n5 Versicolorin B desaturase",
"1.14.20.1 Deacetoxycephalosporin-C synthase",
"1.14.20.2 2,4-dihydroxy-1,4-benzoxazin-3-one-glucoside dioxygenase",
+ "1.14.20.3 (5R)-carbapenem-3-carboxylate synthase",
"1.14.21.1 (S)-stylopine synthase",
"1.14.21.2 (S)-cheilanthifoline synthase",
"1.14.21.3 Berbamunine synthase",
"1.14.21.4 Salutaridine synthase",
"1.14.21.5 (S)-canadine synthase",
- "1.14.21.6 Lathosterol oxidase",
"1.14.21.7 Biflaviolin synthase",
"1.14.21.8 Pseudobaptigenin synthase",
+ "1.14.21.9 Mycocyclosin synthase",
+ "1.14.21.10 Fumitremorgin C synthase",
"1.14.99.1 Prostaglandin-endoperoxide synthase",
"1.14.99.2 Kynurenine 7,8-hydroxylase",
- "1.14.99.3 Heme oxygenase",
"1.14.99.4 Progesterone monooxygenase",
- "1.14.99.9 Steroid 17-alpha-monooxygenase",
- "1.14.99.10 Steroid 21-monooxygenase",
"1.14.99.11 Estradiol 6-beta-monooxygenase",
"1.14.99.12 Androst-4-ene-3,17-dione monooxygenase",
"1.14.99.14 Progesterone 11-alpha-monooxygenase",
@@ -1278,24 +1593,25 @@ static const char* const kECNum_specific[] = {
"1.14.99.24 Steroid 9-alpha-monooxygenase",
"1.14.99.26 2-hydroxypyridine 5-monooxygenase",
"1.14.99.27 Juglone 3-monooxygenase",
- "1.14.99.28 Linalool 8-monooxygenase",
"1.14.99.29 Deoxyhypusine monooxygenase",
- "1.14.99.31 Myristoyl-CoA 11-(E) desaturase",
- "1.14.99.32 Myristoyl-CoA 11-(Z) desaturase",
- "1.14.99.33 Delta(12)-fatty acid dehydrogenase",
"1.14.99.34 Monoprenyl isoflavone epoxidase",
"1.14.99.35 Thiophene-2-carbonyl-CoA monooxygenase",
"1.14.99.36 Beta-carotene 15,15'-monooxygenase",
"1.14.99.37 Taxadiene 5-alpha-hydroxylase",
"1.14.99.38 Cholesterol 25-hydroxylase",
"1.14.99.39 Ammonia monooxygenase",
- "1.14.99.40 5,6-dimethylbenzimidazole synthase",
- "1.14.99.41 All-trans-8'-apo-beta-carotenal 15,15'-oxygenase",
"1.14.99.42 Zeaxanthin 7,8-dioxygenase",
"1.14.99.43 Beta-amyrin 24-hydroxylase",
"1.14.99.44 Diapolycopene oxygenase",
"1.14.99.45 Carotene epsilon-monooxygenase",
- "1.14.99.n2 Beta,beta-carotene 9',10'-oxygenase",
+ "1.14.99.46 Pyrimidine monooxygenase",
+ "1.14.99.47 (+)-larreatricin hydroxylase",
+ "1.14.99.48 Heme oxygenase (staphylobilin-producing)",
+ "1.14.99.49 2-hydroxy-5-methyl-1-naphthoate 7-hydroxylase",
+ "1.14.99.50 Gamma-glutamyl hercynylcysteine S-oxide synthase",
+ "1.14.99.51 Hercynylcysteine S-oxide synthase",
+ "1.14.99.52 L-cysteinyl-L-histidinylsulfoxide synthase",
+ "1.14.99.n4 Carotenoid 9,10-dioxygenase",
"1.15.1.1 Superoxide dismutase",
"1.15.1.2 Superoxide reductase",
"1.16.1.1 Mercury(II) reductase",
@@ -1307,35 +1623,46 @@ static const char* const kECNum_specific[] = {
"1.16.1.7 Ferric-chelate reductase (NADH)",
"1.16.1.8 [Methionine synthase] reductase",
"1.16.1.9 Ferric-chelate reductase (NADPH)",
+ "1.16.1.10 Ferric-chelate reductase (NAD(P)H)",
"1.16.3.1 Ferroxidase",
+ "1.16.3.2 Bacterial non-heme ferritin",
"1.16.5.1 Ascorbate ferrireductase (transmembrane)",
"1.16.8.1 Cob(II)yrinic acid a,c-diamide reductase",
"1.16.9.1 Iron:rusticyanin reductase",
"1.17.1.1 CDP-4-dehydro-6-deoxyglucose reductase",
- "1.17.1.2 4-hydroxy-3-methylbut-2-enyl diphosphate reductase",
"1.17.1.3 Leucoanthocyanidin reductase",
"1.17.1.4 Xanthine dehydrogenase",
"1.17.1.5 Nicotinate dehydrogenase",
- "1.17.1.7 3-oxo-5,6-dehydrosuberyl-CoA semialdehyde dehydrogenase",
+ "1.17.1.8 4-hydroxy-tetrahydrodipicolinate reductase",
"1.17.2.1 Nicotinate dehydrogenase (cytochrome)",
+ "1.17.2.2 Lupanine 17-hydroxylase (cytochrome c)",
"1.17.3.1 Pteridine oxidase",
"1.17.3.2 Xanthine oxidase",
"1.17.3.3 6-hydroxynicotinate dehydrogenase",
"1.17.4.1 Ribonucleoside-diphosphate reductase",
"1.17.4.2 Ribonucleoside-triphosphate reductase",
+ "1.17.4.4 Vitamin-K-epoxide reductase (warfarin-sensitive)",
+ "1.17.4.5 Vitamin-K-epoxide reductase (warfarin-insensitive)",
"1.17.5.1 Phenylacetyl-CoA dehydrogenase",
"1.17.5.2 Caffeine dehydrogenase",
- "1.17.7.1 (E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase",
+ "1.17.7.1 (E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase (ferredoxin)",
"1.17.7.2 7-hydroxymethyl chlorophyll a reductase",
+ "1.17.7.3 (E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase (flavodoxin)",
+ "1.17.7.4 4-hydroxy-3-methylbut-2-enyl diphosphate reductase",
+ "1.17.98.1 Bile-acid 7-alpha-dehydroxylase",
"1.17.99.1 4-methylphenol dehydrogenase (hydroxylating)",
"1.17.99.2 Ethylbenzene hydroxylase",
"1.17.99.3 3-alpha,7-alpha,12-alpha-trihydroxy-5-beta-cholestanoyl-CoA 24-hydroxylase",
"1.17.99.4 Uracil/thymine dehydrogenase",
- "1.17.99.5 Bile-acid 7-alpha-dehydroxylase",
+ "1.17.99.6 Epoxyqueuosine reductase",
"1.18.1.1 Rubredoxin--NAD(+) reductase",
"1.18.1.2 Ferredoxin--NADP(+) reductase",
"1.18.1.3 Ferredoxin--NAD(+) reductase",
"1.18.1.4 Rubredoxin--NAD(P)(+) reductase",
+ "1.18.1.5 Putidaredoxin--NAD(+) reductase",
+ "1.18.1.6 Adrenodoxin-NADP(+) reductase",
+ "1.18.1.7 Ferredoxin--NAD(P)(+) reductase (naphthalene dioxygenase ferredoxin-specific)",
+ "1.18.1.8 Ferredoxin-NAD(+) oxidoreductase (Na(+)-transporting)",
"1.18.6.1 Nitrogenase",
"1.19.6.1 Nitrogenase (flavodoxin)",
"1.20.1.1 Phosphonate dehydrogenase",
@@ -1343,28 +1670,38 @@ static const char* const kECNum_specific[] = {
"1.20.4.1 Arsenate reductase (glutaredoxin)",
"1.20.4.2 Methylarsonate reductase",
"1.20.4.3 Mycoredoxin",
+ "1.20.4.4 Arsenate reductase (thioredoxin)",
"1.20.9.1 Arsenate reductase (azurin)",
"1.20.99.1 Arsenate reductase (donor)",
+ "1.21.1.1 Iodotyrosine deiodinase",
+ "1.21.1.2 2,4-dichlorobenzoyl-CoA reductase",
"1.21.3.1 Isopenicillin-N synthase",
"1.21.3.2 Columbamine oxidase",
"1.21.3.3 Reticuline oxidase",
"1.21.3.4 Sulochrin oxidase ((+)-bisdechlorogeodin-forming)",
"1.21.3.5 Sulochrin oxidase ((-)-bisdechlorogeodin-forming)",
"1.21.3.6 Aureusidin synthase",
+ "1.21.3.7 Tetrahydrocannabinolic acid synthase",
+ "1.21.3.8 Cannabidiolic acid synthase",
"1.21.4.1 D-proline reductase (dithiol)",
"1.21.4.2 Glycine reductase",
"1.21.4.3 Sarcosine reductase",
"1.21.4.4 Betaine reductase",
+ "1.21.98.1 Cyclic dehypoxanthinyl futalosine synthase",
+ "1.21.98.2 Dichlorochromopyrrolate synthase",
"1.21.99.1 Beta-cyclopiazonate dehydrogenase",
- "1.22.1.1 Iodotyrosine deiodinase",
+ "1.21.99.3 Thyroxine 5-deiodinase",
+ "1.21.99.4 Thyroxine 5'-deiodinase",
+ "1.23.1.1 (+)-pinoresinol reductase",
+ "1.23.1.2 (+)-lariciresinol reductase",
+ "1.23.1.3 (-)-pinoresinol reductase",
+ "1.23.1.4 (-)-lariciresinol reductase",
+ "1.23.5.1 Violaxanthin de-epoxidase",
"1.97.1.1 Chlorate reductase",
"1.97.1.2 Pyrogallol hydroxytransferase",
- "1.97.1.3 Sulfur reductase",
"1.97.1.4 [Formate-C-acetyltransferase]-activating enzyme",
"1.97.1.8 Tetrachloroethene reductive dehalogenase",
"1.97.1.9 Selenate reductase",
- "1.97.1.10 Thyroxine 5'-deiodinase",
- "1.97.1.11 Thyroxine 5-deiodinase",
"1.97.1.12 Photosystem I",
"2.1.1.1 Nicotinamide N-methyltransferase",
"2.1.1.2 Guanidinoacetate N-methyltransferase",
@@ -1402,7 +1739,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.41 Sterol 24-C-methyltransferase",
"2.1.1.42 Flavone 3'-O-methyltransferase",
"2.1.1.43 Histone-lysine N-methyltransferase",
- "2.1.1.44 Dimethylhistidine N-methyltransferase",
+ "2.1.1.44 L-histidine N(alpha)-methyltransferase",
"2.1.1.45 Thymidylate synthase",
"2.1.1.46 Isoflavone 4'-O-methyltransferase",
"2.1.1.47 Indolepyruvate C-methyltransferase",
@@ -1412,7 +1749,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.54 Deoxycytidylate C-methyltransferase",
"2.1.1.55 tRNA (adenine-N(6)-)-methyltransferase",
"2.1.1.56 mRNA (guanine-N(7)-)-methyltransferase",
- "2.1.1.57 mRNA (nucleoside-2'-O-)-methyltransferase",
+ "2.1.1.57 Methyltransferase cap1",
"2.1.1.59 [Cytochrome c]-lysine N-methyltransferase",
"2.1.1.60 Calmodulin-lysine N-methyltransferase",
"2.1.1.61 tRNA (5-methylaminomethyl-2-thiouridylate)-methyltransferase",
@@ -1420,7 +1757,6 @@ static const char* const kECNum_specific[] = {
"2.1.1.63 Methylated-DNA--[protein]-cysteine S-methyltransferase",
"2.1.1.64 3-demethylubiquinol 3-O-methyltransferase",
"2.1.1.65 Licodione 2'-O-methyltransferase",
- "2.1.1.66 rRNA (adenosine-2'-O-)-methyltransferase",
"2.1.1.67 Thiopurine S-methyltransferase",
"2.1.1.68 Caffeate O-methyltransferase",
"2.1.1.69 5-hydroxyfuranocoumarin 5-O-methyltransferase",
@@ -1442,7 +1778,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.87 Pyridine N-methyltransferase",
"2.1.1.88 8-hydroxyquercetin 8-O-methyltransferase",
"2.1.1.89 Tetrahydrocolumbamine 2-O-methyltransferase",
- "2.1.1.90 Methanol--5-hydroxybenzimidazolylcobamide Co-methyltransferase",
+ "2.1.1.90 Methanol--corrinoid protein Co-methyltransferase",
"2.1.1.91 Isobutyraldoxime O-methyltransferase",
"2.1.1.94 Tabersonine 16-O-methyltransferase",
"2.1.1.95 Tocopherol O-methyltransferase",
@@ -1474,15 +1810,12 @@ static const char* const kECNum_specific[] = {
"2.1.1.121 6-O-methylnorlaudanosoline 5'-O-methyltransferase",
"2.1.1.122 (S)-tetrahydroprotoberberine N-methyltransferase",
"2.1.1.123 [Cytochrome c]-methionine S-methyltransferase",
- "2.1.1.124 [Cytochrome c]-arginine N-methyltransferase",
- "2.1.1.125 Histone-arginine N-methyltransferase",
- "2.1.1.126 [Myelin basic protein]-arginine N-methyltransferase",
"2.1.1.127 [Ribulose-bisphosphate carboxylase]-lysine N-methyltransferase",
"2.1.1.128 (RS)-norcoclaurine 6-O-methyltransferase",
"2.1.1.129 Inositol 4-methyltransferase",
"2.1.1.130 Precorrin-2 C(20)-methyltransferase",
"2.1.1.131 Precorrin-3B C(17)-methyltransferase",
- "2.1.1.132 Precorrin-6Y C(5,15)-methyltransferase (decarboxylating)",
+ "2.1.1.132 Precorrin-6B C(5,15)-methyltransferase (decarboxylating)",
"2.1.1.133 Precorrin-4 C(11)-methyltransferase",
"2.1.1.136 Chlorophenol O-methyltransferase",
"2.1.1.137 Arsenite methyltransferase",
@@ -1496,7 +1829,6 @@ static const char* const kECNum_specific[] = {
"2.1.1.146 (Iso)eugenol O-methyltransferase",
"2.1.1.147 Corydaline synthase",
"2.1.1.148 Thymidylate synthase (FAD)",
- "2.1.1.149 Myricetin O-methyltransferase",
"2.1.1.150 Isoflavone 7-O-methyltransferase",
"2.1.1.151 Cobalt-factor II C(20)-methyltransferase",
"2.1.1.152 Precorrin-6A synthase (deacetylating)",
@@ -1513,9 +1845,9 @@ static const char* const kECNum_specific[] = {
"2.1.1.163 Demethylmenaquinone methyltransferase",
"2.1.1.164 Demethylrebeccamycin-D-glucose O-methyltransferase",
"2.1.1.165 Methyl halide transferase",
- "2.1.1.166 23S rRNA (uridine(2552)-2'-O-)-methyltransferase",
- "2.1.1.167 27S pre-rRNA (guanosine(2922)-2'-O-)-methyltransferase",
- "2.1.1.168 21S rRNA (uridine(2791)-2'-O-)-methyltransferase",
+ "2.1.1.166 23S rRNA (uridine(2552)-2'-O)-methyltransferase",
+ "2.1.1.167 27S pre-rRNA (guanosine(2922)-2'-O)-methyltransferase",
+ "2.1.1.168 21S rRNA (uridine(2791)-2'-O)-methyltransferase",
"2.1.1.169 Tricetin 3',4',5'-O-trimethyltransferase",
"2.1.1.170 16S rRNA (guanine(527)-N(7))-methyltransferase",
"2.1.1.171 16S rRNA (guanine(966)-N(2))-methyltransferase",
@@ -1532,7 +1864,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.182 16S rRNA (adenine(1518)-N(6)/adenine(1519)-N(6))-dimethyltransferase",
"2.1.1.183 18S rRNA (adenine(1779)-N(6)/adenine(1780)-N(6))-dimethyltransferase",
"2.1.1.184 23S rRNA (adenine(2085)-N(6))-dimethyltransferase",
- "2.1.1.185 23S rRNA (guanine(2251)-2'-O)-methyltransferase",
+ "2.1.1.185 23S rRNA (guanosine(2251)-2'-O)-methyltransferase",
"2.1.1.186 23S rRNA (cytidine(2498)-2'-O)-methyltransferase",
"2.1.1.187 23S rRNA (guanine(745)-N(1))-methyltransferase",
"2.1.1.188 23S rRNA (guanine(748)-N(1))-methyltransferase",
@@ -1542,8 +1874,8 @@ static const char* const kECNum_specific[] = {
"2.1.1.192 23S rRNA (adenine(2503)-C(2))-methyltransferase",
"2.1.1.193 16S rRNA (uracil(1498)-N(3))-methyltransferase",
"2.1.1.195 Cobalt-precorrin-5B (C(1))-methyltransferase",
- "2.1.1.196 Cobalt-precorrin-7 (C(15))-methyltransferase (decarboxylating)",
- "2.1.1.197 Malonyl-CoA O-methyltransferase",
+ "2.1.1.196 Cobalt-precorrin-6B (C(15))-methyltransferase (decarboxylating)",
+ "2.1.1.197 Malonyl-[acyl-carrier protein] O-methyltransferase",
"2.1.1.198 16S rRNA (cytidine(1402)-2'-O)-methyltransferase",
"2.1.1.199 16S rRNA (cytosine(1402)-N(4))-methyltransferase",
"2.1.1.200 tRNA (cytidine(32)/uridine(32)-2'-O)-methyltransferase",
@@ -1557,7 +1889,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.208 23S rRNA (uridine(2479)-2'-O)-methyltransferase",
"2.1.1.209 23S rRNA (guanine(2535)-N(1))-methyltransferase",
"2.1.1.210 Demethylspheroidene O-methyltransferase",
- "2.1.1.211 tRNA(Ser) (uridine(44)-2'-O-)-methyltransferase",
+ "2.1.1.211 tRNA(Ser) (uridine(44)-2'-O)-methyltransferase",
"2.1.1.212 2,7,4'-trihydroxyisoflavanone 4'-O-methyltransferase",
"2.1.1.213 tRNA (guanine(10)-N(2))-dimethyltransferase",
"2.1.1.214 tRNA (guanine(10)-N(2))-methyltransferase",
@@ -1568,7 +1900,7 @@ static const char* const kECNum_specific[] = {
"2.1.1.219 tRNA (adenine(57)-N(1)/adenine(58)-N(1))-methyltransferase",
"2.1.1.220 tRNA (adenine(58)-N(1))-methyltransferase",
"2.1.1.221 tRNA (guanine(9)-N(1))-methyltransferase",
- "2.1.1.222 2-polyprenyl-6-hydroxyphenyl methylase",
+ "2.1.1.222 2-polyprenyl-6-hydroxyphenol methylase",
"2.1.1.223 tRNA(1)(Val) (adenine(37)-N(6))-methyltransferase",
"2.1.1.224 23S rRNA (adenine(2503)-C(8))-methyltransferase",
"2.1.1.225 tRNA:m(4)X modification enzyme",
@@ -1589,12 +1921,93 @@ static const char* const kECNum_specific[] = {
"2.1.1.240 Trans-resveratrol di-O-methyltransferase",
"2.1.1.241 2,4,7-trihydroxy-1,4-benzoxazin-3-one-glucoside 7-O-methyltransferase",
"2.1.1.242 16S rRNA (guanine(1516)-N(2))-methyltransferase",
+ "2.1.1.243 2-ketoarginine methyltransferase",
+ "2.1.1.244 Protein N-terminal methyltransferase",
+ "2.1.1.245 5-methyltetrahydrosarcinapterin:corrinoid/iron-sulfur protein Co-methyltransferase",
+ "2.1.1.246 [Methyl-Co(III) methanol-specific corrinoid protein]:coenzyme M methyltransferase",
+ "2.1.1.247 [Methyl-Co(III) methylamine-specific corrinoid protein]:coenzyme M methyltransferase",
+ "2.1.1.248 [Methylamine--corrinoid protein] Co-methyltransferase",
+ "2.1.1.249 [Dimethylamine--corrinoid protein] Co-methyltransferase",
+ "2.1.1.250 [Trimethylamine--corrinoid protein] Co-methyltransferase",
+ "2.1.1.251 Methylated-thiol--coenzyme M methyltransferase",
+ "2.1.1.252 Tetramethylammonium--corrinoid protein Co-methyltransferase",
+ "2.1.1.253 [Methyl-Co(III) tetramethylammonium-specific corrinoid protein]:coenzyme M methyltransferase",
+ "2.1.1.254 Erythromycin 3''-O-methyltransferase",
+ "2.1.1.255 Geranyl diphosphate 2-C-methyltransferase",
+ "2.1.1.256 tRNA (guanine(6)-N(2))-methyltransferase",
+ "2.1.1.257 tRNA (pseudouridine(54)-N(1))-methyltransferase",
+ "2.1.1.258 5-methyltetrahydrofolate:corrinoid/iron-sulfur protein Co-methyltransferase",
+ "2.1.1.259 [Fructose-bisphosphate aldolase]-lysine N-methyltransferase",
+ "2.1.1.260 rRNA small subunit pseudouridine methyltransferase Nep1",
+ "2.1.1.261 4-dimethylallyltryptophan N-methyltransferase",
+ "2.1.1.262 Squalene methyltransferase",
+ "2.1.1.263 Botryococcene C-methyltransferase",
+ "2.1.1.264 23S rRNA (guanine(2069)-N(7))-methyltransferase",
+ "2.1.1.265 Tellurite methyltransferase",
+ "2.1.1.266 23S rRNA (adenine(2030)-N(6))-methyltransferase",
+ "2.1.1.267 Flavonoid 3',5'-methyltransferase",
+ "2.1.1.268 tRNA(Thr) (cytosine(32)-N(3))-methyltransferase",
+ "2.1.1.269 Dimethylsulfoniopropionate demethylase",
+ "2.1.1.270 (+)-6a-hydroxymaackiain 3-O-methyltransferase",
+ "2.1.1.271 Cobalt-precorrin-4 methyltransferase",
+ "2.1.1.272 Cobalt-factor III methyltransferase",
+ "2.1.1.273 Benzoate O-methyltransferase",
+ "2.1.1.274 Salicylate carboxymethyltransferase",
+ "2.1.1.275 Gibberellin A(9) O-methyltransferase",
+ "2.1.1.276 Gibberellin A(4) carboxyl methyltransferase",
+ "2.1.1.277 Anthranilate O-methyltransferase",
+ "2.1.1.278 Indole-3-acetate O-methyltransferase",
+ "2.1.1.279 Trans-anol O-methyltransferase",
+ "2.1.1.280 Selenocysteine Se-methyltransferase",
+ "2.1.1.281 Phenylpyruvate C(3)-methyltransferase",
+ "2.1.1.282 tRNA(Phe) 7-((3-amino-3-carboxypropyl)-4-demethylwyosine(37)-N(4))-methyltransferase",
+ "2.1.1.283 Emodin O-methyltransferase",
+ "2.1.1.284 8-demethylnovobiocic acid C(8)-methyltransferase",
+ "2.1.1.285 Demethyldecarbamoylnovobiocin O-methyltransferase",
+ "2.1.1.286 25S rRNA (adenine(2142)-N(1))-methyltransferase",
+ "2.1.1.287 25S rRNA (adenine(645)-N(1))-methyltransferase",
+ "2.1.1.288 Aklanonic acid methyltransferase",
+ "2.1.1.289 Cobalt-precorrin-7 (C(5))-methyltransferase",
+ "2.1.1.290 tRNA(Phe) (7-(3-amino-3-carboxypropyl)wyosine(37)-O)-methyltransferase",
+ "2.1.1.291 (R,S)-reticuline 7-O-methyltransferase",
+ "2.1.1.292 Carminomycin 4-O-methyltransferase",
+ "2.1.1.293 6-hydroxytryprostatin B O-methyltransferase",
+ "2.1.1.294 3-O-phospho-polymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 3-phospho-methyltransferase",
+ "2.1.1.295 2-methyl-6-phytyl-1,4-hydroquinone methyltransferase",
+ "2.1.1.296 Methyltransferase cap2",
+ "2.1.1.297 Peptide chain release factor N(5)-glutamine methyltransferase",
+ "2.1.1.298 Ribosomal protein L3 N(5)-glutamine methyltransferase",
+ "2.1.1.299 Protein N-terminal monomethyltransferase",
+ "2.1.1.300 Pavine N-methyltransferase",
+ "2.1.1.301 Cypemycin N-terminal methyltransferase",
+ "2.1.1.302 3-hydroxy-5-methyl-1-naphthoate 3-O-methyltransferase",
+ "2.1.1.303 2,7-dihydroxy-5-methyl-1-naphthoate 7-O-methyltransferase",
+ "2.1.1.304 L-tyrosine C(3)-methyltransferase",
+ "2.1.1.305 8-demethyl-8-alpha-L-rhamnosyl tetracenomycin-C 2'-O-methyltransferase",
+ "2.1.1.306 8-demethyl-8-(2-methoxy-alpha-L-rhamnosyl)-tetracenomycin-C 3'-O-methyltransferase",
+ "2.1.1.307 8-demethyl-8-(2,3-dimethoxy-alpha-L-rhamnosyl)-tetracenomycin-C 4'-O-methyltransferase",
+ "2.1.1.308 2-hydroxyethylphosphonate methyltransferase",
+ "2.1.1.309 18S rRNA (guanine(1575)-N(7))-methyltransferase",
+ "2.1.1.310 25S rRNA (cytosine(2870)-C(5))-methyltransferase",
+ "2.1.1.311 25S rRNA (cytosine(2278)-C(5))-methyltransferase",
+ "2.1.1.312 25S rRNA (uracil(2843)-N(3))-methyltransferase",
+ "2.1.1.313 25S rRNA (uracil(2634)-N(3))-methyltransferase",
+ "2.1.1.314 Diphthine methyl ester synthase",
+ "2.1.1.315 27-O-demethylrifamycin SV methyltransferase",
+ "2.1.1.316 Mitomycin 6-O-methyltransferase",
+ "2.1.1.317 Sphingolipid C(9)-methyltransferase",
+ "2.1.1.318 [Trehalose-6-phosphate synthase]-L-cysteine S-methyltransferase",
+ "2.1.1.319 Type I protein arginine methyltransferase",
+ "2.1.1.320 Type II protein arginine methyltransferase",
+ "2.1.1.321 Type III protein arginine methyltransferase",
+ "2.1.1.322 Type IV protein arginine methyltransferase",
+ "2.1.1.323 (-)-pluviatolide 4-O-methyltransferase",
"2.1.1.n1 Resorcinol O-methyltransferase",
- "2.1.1.n3 Selenocysteine Se-methyltransferase",
"2.1.1.n4 Thiocyanate methyltransferase",
- "2.1.1.n5 N-terminal protein methyltransferase",
- "2.1.1.n6 Geranyl diphosphate 2-C-methyltransferase",
"2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase",
+ "2.1.1.n8 Small RNA 2'-O-methyltransferase",
+ "2.1.1.n11 Methylphosphotriester-DNA--[protein]-cysteine S-methyltransferase",
+ "2.1.1.n12 2-phytyl-1,4-naphtoquinone methyltransferase",
"2.1.2.1 Glycine hydroxymethyltransferase",
"2.1.2.2 Phosphoribosylglycinamide formyltransferase",
"2.1.2.3 Phosphoribosylaminoimidazolecarboxamide formyltransferase",
@@ -1616,6 +2029,7 @@ static const char* const kECNum_specific[] = {
"2.1.3.9 N-acetylornithine carbamoyltransferase",
"2.1.3.10 Malonyl-S-ACP:biotin-protein carboxyltransferase",
"2.1.3.11 N-succinylornithine carbamoyltransferase",
+ "2.1.3.12 Decarbamoylnovobiocin carbamoyltransferase",
"2.1.4.1 Glycine amidinotransferase",
"2.1.4.2 Scyllo-inosamine-4-phosphate amidinotransferase",
"2.2.1.1 Transketolase",
@@ -1627,6 +2041,9 @@ static const char* const kECNum_specific[] = {
"2.2.1.7 1-deoxy-D-xylulose-5-phosphate synthase",
"2.2.1.8 Fluorothreonine transaldolase",
"2.2.1.9 2-succinyl-5-enolpyruvyl-6-hydroxy-3-cyclohexene-1-carboxylic-acid synthase",
+ "2.2.1.10 2-amino-3,7-dideoxy-D-threo-hept-6-ulosonate synthase",
+ "2.2.1.11 6-deoxy-5-ketofructose 1-phosphate synthase",
+ "2.2.1.12 3-acetyloctanal synthase",
"2.3.1.1 Amino-acid N-acetyltransferase",
"2.3.1.2 Imidazole N-acetyltransferase",
"2.3.1.3 Glucosamine N-acetyltransferase",
@@ -1641,7 +2058,7 @@ static const char* const kECNum_specific[] = {
"2.3.1.12 Dihydrolipoyllysine-residue acetyltransferase",
"2.3.1.13 Glycine N-acyltransferase",
"2.3.1.14 Glutamine N-phenylacetyltransferase",
- "2.3.1.15 Glycerol-3-phosphate O-acyltransferase",
+ "2.3.1.15 Glycerol-3-phosphate 1-O-acyltransferase",
"2.3.1.16 Acetyl-CoA C-acyltransferase",
"2.3.1.17 Aspartate N-acetyltransferase",
"2.3.1.18 Galactoside O-acetyltransferase",
@@ -1667,7 +2084,7 @@ static const char* const kECNum_specific[] = {
"2.3.1.38 [Acyl-carrier-protein] S-acetyltransferase",
"2.3.1.39 [Acyl-carrier-protein] S-malonyltransferase",
"2.3.1.40 Acyl-[acyl-carrier-protein]--phospholipid O-acyltransferase",
- "2.3.1.41 Beta-ketoacyl-acyl-carrier-protein synthase I",
+ "2.3.1.41 Beta-ketoacyl-[acyl-carrier-protein] synthase I",
"2.3.1.42 Glycerone-phosphate O-acyltransferase",
"2.3.1.43 Phosphatidylcholine--sterol O-acyltransferase",
"2.3.1.44 N-acetylneuraminate 4-O-acetyltransferase",
@@ -1685,7 +2102,7 @@ static const char* const kECNum_specific[] = {
"2.3.1.57 Diamine N-acetyltransferase",
"2.3.1.58 2,3-diaminopropionate N-oxalyltransferase",
"2.3.1.59 Gentamicin 2'-N-acetyltransferase",
- "2.3.1.60 Gentamicin 3'-N-acetyltransferase",
+ "2.3.1.60 Gentamicin 3-N-acetyltransferase",
"2.3.1.61 Dihydrolipoyllysine-residue succinyltransferase",
"2.3.1.62 2-acylglycerophosphocholine O-acyltransferase",
"2.3.1.63 1-alkylglycerophosphocholine O-acyltransferase",
@@ -1705,8 +2122,8 @@ static const char* const kECNum_specific[] = {
"2.3.1.78 Heparan-alpha-glucosaminide N-acetyltransferase",
"2.3.1.79 Maltose O-acetyltransferase",
"2.3.1.80 Cysteine-S-conjugate N-acetyltransferase",
- "2.3.1.81 Aminoglycoside N(3')-acetyltransferase",
- "2.3.1.82 Aminoglycoside N(6')-acetyltransferase",
+ "2.3.1.81 Aminoglycoside N(3)-acetyltransferase",
+ "2.3.1.82 Aminoglycoside 6'-N-acetyltransferase",
"2.3.1.83 Phosphatidylcholine--dolichol O-acyltransferase",
"2.3.1.84 Alcohol O-acetyltransferase",
"2.3.1.85 Fatty-acid synthase",
@@ -1728,7 +2145,6 @@ static const char* const kECNum_specific[] = {
"2.3.1.101 Formylmethanofuran--tetrahydromethanopterin N-formyltransferase",
"2.3.1.102 N(6)-hydroxylysine O-acetyltransferase",
"2.3.1.103 Sinapoylglucose--sinapoylglucose O-sinapoyltransferase",
- "2.3.1.104 1-alkenylglycerophosphocholine O-acyltransferase",
"2.3.1.105 Alkylglycerophosphate 2-O-acetyltransferase",
"2.3.1.106 Tartronate O-hydroxycinnamoyltransferase",
"2.3.1.107 Deacetylvindoline O-acetyltransferase",
@@ -1743,7 +2159,6 @@ static const char* const kECNum_specific[] = {
"2.3.1.116 Flavonol-3-O-beta-glucoside O-malonyltransferase",
"2.3.1.117 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase",
"2.3.1.118 N-hydroxyarylamine O-acetyltransferase",
- "2.3.1.119 Icosanoyl-CoA synthase",
"2.3.1.121 1-alkenylglycerophosphoethanolamine O-acyltransferase",
"2.3.1.122 Trehalose O-mycolyltransferase",
"2.3.1.123 Dolichol O-acyltransferase",
@@ -1773,9 +2188,9 @@ static const char* const kECNum_specific[] = {
"2.3.1.148 Glycerophospholipid acyltransferase (CoA-dependent)",
"2.3.1.149 Platelet-activating factor acetyltransferase",
"2.3.1.150 Salutaridinol 7-O-acetyltransferase",
- "2.3.1.151 Benzophenone synthase",
+ "2.3.1.151 2,3',4,6-tetrahydroxybenzophenone synthase",
"2.3.1.152 Alcohol O-cinnamoyltransferase",
- "2.3.1.153 Anthocyanin 5-aromatic acyltransferase",
+ "2.3.1.153 Anthocyanin 5-(6'''-hydroxycinnamoyltransferase)",
"2.3.1.154 Propionyl-CoA C(2)-trimethyltridecanoyltransferase",
"2.3.1.155 Acetyl-CoA C-myristoyltransferase",
"2.3.1.156 Phloroisovalerophenone synthase",
@@ -1799,10 +2214,10 @@ static const char* const kECNum_specific[] = {
"2.3.1.174 3-oxoadipyl-CoA thiolase",
"2.3.1.175 Deacetylcephalosporin-C acetyltransferase",
"2.3.1.176 Propanoyl-CoA C-acyltransferase",
- "2.3.1.177 Biphenyl synthase",
+ "2.3.1.177 3,5-dihydroxybiphenyl synthase",
"2.3.1.178 Diaminobutyrate acetyltransferase",
- "2.3.1.179 Beta-ketoacyl-acyl-carrier-protein synthase II",
- "2.3.1.180 Beta-ketoacyl-acyl-carrier-protein synthase III",
+ "2.3.1.179 Beta-ketoacyl-[acyl-carrier-protein] synthase II",
+ "2.3.1.180 Beta-ketoacyl-[acyl-carrier-protein] synthase III",
"2.3.1.181 Lipoyl(octanoyl) transferase",
"2.3.1.182 (R)-citramalate synthase",
"2.3.1.183 Phosphinothricin acetyltransferase",
@@ -1820,10 +2235,67 @@ static const char* const kECNum_specific[] = {
"2.3.1.195 (Z)-3-hexen-1-ol acetyltransferase",
"2.3.1.196 Benzyl alcohol O-benzoyltransferase",
"2.3.1.197 dTDP-3-amino-3,6-dideoxy-alpha-D-galactopyranose 3-N-acetyltransferase",
+ "2.3.1.198 Glycerol-3-phosphate 2-O-acyltransferase",
+ "2.3.1.199 Very-long-chain 3-oxoacyl-CoA synthase",
+ "2.3.1.200 Lipoyl amidotransferase",
+ "2.3.1.201 UDP-2-acetamido-3-amino-2,3-dideoxy-glucuronate N-acetyltransferase",
+ "2.3.1.202 UDP-4-amino-4,6-dideoxy-N-acetyl-beta-L-altrosamine N-acetyltransferase",
+ "2.3.1.203 UDP-N-acetylbacillosamine N-acetyltransferase",
+ "2.3.1.204 Octanoyl-[GcvH]:protein N-octanoyltransferase",
+ "2.3.1.205 Fumigaclavine B O-acetyltransferase",
+ "2.3.1.206 3,5,7-trioxododecanoyl-CoA synthase",
+ "2.3.1.207 Beta-ketodecanoyl-[acyl-carrier-protein] synthase",
+ "2.3.1.208 4-hydroxycoumarin synthase",
+ "2.3.1.209 dTDP-4-amino-4,6-dideoxy-D-glucose acyltransferase",
+ "2.3.1.210 dTDP-4-amino-4,6-dideoxy-D-galactose acyltransferase",
+ "2.3.1.211 Bisdemethoxycurcumin synthase",
+ "2.3.1.212 Benzalacetone synthase",
+ "2.3.1.213 Cyanidin 3-O-(6-O-glucosyl-2-O-xylosylgalactoside) 6'''-O-hydroxycinnamoyltransferase",
+ "2.3.1.214 Pelargonidin 3-O-(6-caffeoylglucoside) 5-O-(6-O-malonylglucoside) 4'''-malonyltransferase",
+ "2.3.1.215 Anthocyanidin 3-O-glucoside 6''-O-acyltransferase",
+ "2.3.1.216 5,7-dihydroxy-2-methylchromone synthase",
+ "2.3.1.217 Curcumin synthase",
+ "2.3.1.218 Phenylpropanoylacetyl-CoA synthase",
+ "2.3.1.219 Demethoxycurcumin synthase",
+ "2.3.1.220 2,4,6-trihydroxybenzophenone synthase",
+ "2.3.1.221 Noranthrone synthase",
+ "2.3.1.222 Phosphate propanoyltransferase",
+ "2.3.1.223 3-oxo-5,6-didehydrosuberyl-CoA thiolase",
+ "2.3.1.224 Acetyl-CoA-benzylalcohol acetyltransferase",
+ "2.3.1.225 Protein S-acyltransferase",
+ "2.3.1.226 Carboxymethylproline synthase",
+ "2.3.1.227 GDP-perosamine N-acetyltransferase",
+ "2.3.1.228 Isovaleryl-homoserine lactone synthase",
+ "2.3.1.229 4-coumaroyl-homoserine lactone synthase",
+ "2.3.1.230 2-heptyl-4(1H)-quinolone synthase",
+ "2.3.1.231 tRNA(Phe) (7-(3-amino-3-(methoxycarbonyl)propyl)wyosine(37)-N)-methoxycarbonyltransferase",
+ "2.3.1.232 Methanol O-anthraniloyltransferase",
+ "2.3.1.233 1,3,6,8-tetrahydroxynaphthalene synthase",
+ "2.3.1.234 N(6)-L-threonylcarbamoyladenine synthase",
+ "2.3.1.235 Tetracenomycin F2 synthase",
+ "2.3.1.236 5-methylnaphthoic acid synthase",
+ "2.3.1.237 Neocarzinostatin naphthoate synthase",
+ "2.3.1.238 Monacolin J acid methylbutanoate transferase",
+ "2.3.1.239 10-deoxymethynolide syntase",
+ "2.3.1.240 Narbonolide synthase",
+ "2.3.1.241 Kdo(2)-lipid IV(A) lauroyltransferase",
+ "2.3.1.242 Kdo(2)-lipid IV(A) palmitoleoyltransferase",
+ "2.3.1.243 Lauroyl-Kdo(2)-lipid IV(A) myristoyltransferase",
+ "2.3.1.244 2-methylbutanoate polyketide synthase",
+ "2.3.1.245 3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase",
+ "2.3.1.246 3,5-dihydroxyphenylacetyl-CoA synthase",
+ "2.3.1.247 3-keto-5-aminohexanoate cleavage enzyme",
+ "2.3.1.248 Spermidine disinapoyl transferase",
+ "2.3.1.249 Spermidine dicoumaroyl transferase",
+ "2.3.1.250 [Wnt protein] O-palmitoleoyl transferase",
+ "2.3.1.251 Lipid IV(A) palmitoyltransferase",
"2.3.1.n2 Phosphate acyltransferase",
"2.3.1.n3 Glycerol-3-phosphate acyltransferase (acyl-phosphate transferring)",
"2.3.1.n4 1-acyl-sn-glycerol-3-phosphate acyltransferase",
"2.3.1.n5 Glycerol-3-phosphate acyltransferase (acyl-[acyl-carrier-protein]-transferring)",
+ "2.3.1.n6 1-acylglycerophosphoserine O-acyltransferase",
+ "2.3.1.n7 1-acylglycerophosphoethanolamine O-acyltransferase",
+ "2.3.1.n12 Sinapoyl-beta-D-glucose:anthocyanin sinapoyltransferase",
"2.3.2.1 D-glutamyltransferase",
"2.3.2.2 Gamma-glutamyltransferase",
"2.3.2.3 Lysyltransferase",
@@ -1842,6 +2314,16 @@ static const char* const kECNum_specific[] = {
"2.3.2.16 Lipid II:glycine glycyltransferase",
"2.3.2.17 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-glycyl)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase",
"2.3.2.18 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-triglycine)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase",
+ "2.3.2.19 Ribostamycin:4-(gamma-L-glutamylamino)-(S)-2-hydroxybutanoyl-[BtrI acyl-carrier protein] 4-(gamma-L-glutamylamino)-(S)-2-hydroxybutanoate transferase",
+ "2.3.2.20 Cyclo(L-leucyl-L-phenylalanyl) synthase",
+ "2.3.2.21 Cyclo(L-tyrosyl-L-tyrosyl) synthase",
+ "2.3.2.22 Cyclo(L-leucyl-L-leucyl) synthase",
+ "2.3.2.23 E2 ubiquitin-conjugating enzyme",
+ "2.3.2.24 (E3-independent) E2 ubiquitin-conjugating enzyme",
+ "2.3.2.25 N-terminal E2 ubiquitin-conjugating enzyme",
+ "2.3.2.26 HECT-type E3 ubiquitin transferase",
+ "2.3.2.27 RING-type E3 ubiquitin transferase",
+ "2.3.2.28 L-allo-isoleucyltransferase",
"2.3.3.1 Citrate (Si)-synthase",
"2.3.3.2 Decylcitrate synthase",
"2.3.3.3 Citrate (Re)-synthase",
@@ -1857,7 +2339,8 @@ static const char* const kECNum_specific[] = {
"2.3.3.13 2-isopropylmalate synthase",
"2.3.3.14 Homocitrate synthase",
"2.3.3.15 Sulfoacetaldehyde acetyltransferase",
- "2.4.1.1 Phosphorylase",
+ "2.3.3.16 Citrate synthase (unknown stereospecificity)",
+ "2.4.1.1 Glycogen phosphorylase",
"2.4.1.2 Dextrin dextranase",
"2.4.1.4 Amylosucrase",
"2.4.1.5 Dextransucrase",
@@ -1887,7 +2370,7 @@ static const char* const kECNum_specific[] = {
"2.4.1.30 1,3-beta-oligoglucan phosphorylase",
"2.4.1.31 Laminaribiose phosphorylase",
"2.4.1.32 Glucomannan 4-beta-mannosyltransferase",
- "2.4.1.33 Alginate synthase",
+ "2.4.1.33 Mannuronan synthase",
"2.4.1.34 1,3-beta-glucan synthase",
"2.4.1.35 Phenol beta-glucosyltransferase",
"2.4.1.36 Alpha,alpha-trehalose-phosphate synthase (GDP-forming)",
@@ -1948,7 +2431,7 @@ static const char* const kECNum_specific[] = {
"2.4.1.104 o-dihydroxycoumarin 7-O-glucosyltransferase",
"2.4.1.105 Vitexin beta-glucosyltransferase",
"2.4.1.106 Isovitexin beta-glucosyltransferase",
- "2.4.1.109 Dolichyl-phosphate-mannose-protein mannosyltransferase",
+ "2.4.1.109 Dolichyl-phosphate-mannose--protein mannosyltransferase",
"2.4.1.110 tRNA-queuosine beta-mannosyltransferase",
"2.4.1.111 Coniferyl-alcohol glucosyltransferase",
"2.4.1.113 Alpha-1,4-glucan-protein synthase (ADP-forming)",
@@ -1957,7 +2440,6 @@ static const char* const kECNum_specific[] = {
"2.4.1.116 Cyanidin 3-O-rutinoside 5-O-glucosyltransferase",
"2.4.1.117 Dolichyl-phosphate beta-glucosyltransferase",
"2.4.1.118 Cytokinin 7-beta-glucosyltransferase",
- "2.4.1.119 Dolichyl-diphosphooligosaccharide--protein glycotransferase",
"2.4.1.120 Sinapate 1-glucosyltransferase",
"2.4.1.121 Indole-3-acetate beta-glucosyltransferase",
"2.4.1.122 Glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase",
@@ -1967,8 +2449,8 @@ static const char* const kECNum_specific[] = {
"2.4.1.127 Monoterpenol beta-glucosyltransferase",
"2.4.1.128 Scopoletin glucosyltransferase",
"2.4.1.129 Peptidoglycan glycosyltransferase",
- "2.4.1.131 GDP-Man:Man(3)GlcNAc(2)-PP-Dol alpha-1,2-mannosyltransferase",
- "2.4.1.132 GDP-Man:Man(1)GlcNAc(2)-PP-Dol alpha-1,3-mannosyltransferase",
+ "2.4.1.131 GDP-Man:Man(3)GlcNAc(2)-PP-dolichol alpha-1,2-mannosyltransferase",
+ "2.4.1.132 GDP-Man:Man(1)GlcNAc(2)-PP-dolichol alpha-1,3-mannosyltransferase",
"2.4.1.133 Xylosylprotein 4-beta-galactosyltransferase",
"2.4.1.134 Galactosylxylosylprotein 3-beta-galactosyltransferase",
"2.4.1.135 Galactosylgalactosylxylosylprotein 3-beta-glucuronosyltransferase",
@@ -1988,7 +2470,7 @@ static const char* const kECNum_specific[] = {
"2.4.1.149 N-acetyllactosaminide beta-1,3-N-acetylglucosaminyltransferase",
"2.4.1.150 N-acetyllactosaminide beta-1,6-N-acetylglucosaminyl-transferase",
"2.4.1.152 4-galactosyl-N-acetylglucosaminide 3-alpha-L-fucosyltransferase",
- "2.4.1.153 Dolichyl-phosphate alpha-N-acetylglucosaminyltransferase",
+ "2.4.1.153 UDP-N-acetylglucosamine--dolichyl-phosphate N-acetylglucosaminyltransferase",
"2.4.1.155 Alpha-1,6-mannosyl-glycoprotein 6-beta-N-acetylglucosaminyltransferase",
"2.4.1.156 Indolylacetyl-myo-inositol galactosyltransferase",
"2.4.1.157 1,2-diacylglycerol 3-glucosyltransferase",
@@ -2039,7 +2521,7 @@ static const char* const kECNum_specific[] = {
"2.4.1.205 Galactogen 6-beta-galactosyltransferase",
"2.4.1.206 Lactosylceramide 1,3-N-acetyl-beta-D-glucosaminyltransferase",
"2.4.1.207 Xyloglucan:xyloglucosyl transferase",
- "2.4.1.208 Diglucosyl diacylglycerol synthase",
+ "2.4.1.208 Diglucosyl diacylglycerol synthase (1,2-linking)",
"2.4.1.209 Cis-p-coumarate glucosyltransferase",
"2.4.1.210 Limonoid glucosyltransferase",
"2.4.1.211 1,3-beta-galactosyl-N-acetylhexosamine phosphorylase",
@@ -2067,7 +2549,7 @@ static const char* const kECNum_specific[] = {
"2.4.1.234 Kaempferol 3-O-galactosyltransferase",
"2.4.1.236 Flavanone 7-O-glucoside 2''-O-beta-L-rhamnosyltransferase",
"2.4.1.237 Flavonol 7-O-beta-glucosyltransferase",
- "2.4.1.238 Anthocyanin 3'-O-beta-glucosyltransferase",
+ "2.4.1.238 Delphinidin 3,5-di-O-glucoside 3'-O-glucosyltransferase",
"2.4.1.239 Flavonol-3-O-glucoside glucosyltransferase",
"2.4.1.240 Flavonol-3-O-glycoside glucosyltransferase",
"2.4.1.241 Digalactosyldiacylglycerol synthase",
@@ -2083,20 +2565,20 @@ static const char* const kECNum_specific[] = {
"2.4.1.251 GlcA-beta-(1->2)-D-Man-alpha-(1->3)-D-Glc-beta-(1->4)-D-Glc-alpha-1-diphospho-ditrans,octacis-undecaprenol 4-beta-mannosyltransferase",
"2.4.1.252 GDP-mannose:cellobiosyl-diphosphopolyprenol alpha-mannosyltransferase",
"2.4.1.253 Baicalein 7-O-glucuronosyltransferase",
- "2.4.1.254 Cyanidin-3-O-glucoside 2-O-glucuronosyltransferase",
+ "2.4.1.254 Cyanidin-3-O-glucoside 2''-O-glucuronosyltransferase",
"2.4.1.255 Protein O-GlcNAc transferase",
- "2.4.1.256 Dol-P-Glc:Glc(2)Man(9)GlcNAc(2)-PP-Dol alpha-1,2-glucosyltransferase",
- "2.4.1.257 GDP-Man:Man(2)GlcNAc(2)-PP-Dol alpha-1,6-mannosyltransferase",
- "2.4.1.258 Dol-P-Man:Man(5)GlcNAc(2)-PP-Dol alpha-1,3-mannosyltransferase",
- "2.4.1.259 Dol-P-Man:Man(6)GlcNAc(2)-PP-Dol alpha-1,2-mannosyltransferase",
- "2.4.1.260 Dol-P-Man:Man(7)GlcNAc(2)-PP-Dol alpha-1,6-mannosyltransferase",
- "2.4.1.261 Dol-P-Man:Man(8)GlcNAc(2)-PP-Dol alpha-1,2-mannosyltransferase",
+ "2.4.1.256 Dolichyl-P-Glc:Glc(2)Man(9)GlcNAc(2)-PP-dolichol alpha-1,2-glucosyltransferase",
+ "2.4.1.257 GDP-Man:Man(2)GlcNAc(2)-PP-dolichol alpha-1,6-mannosyltransferase",
+ "2.4.1.258 Dolichyl-P-Man:Man(5)GlcNAc(2)-PP-dolichol alpha-1,3-mannosyltransferase",
+ "2.4.1.259 Dolichyl-P-Man:Man(6)GlcNAc(2)-PP-dolichol alpha-1,2-mannosyltransferase",
+ "2.4.1.260 Dolichyl-P-Man:Man(7)GlcNAc(2)-PP-dolichol alpha-1,6-mannosyltransferase",
+ "2.4.1.261 Dolichyl-P-Man:Man(8)GlcNAc(2)-PP-dolichol alpha-1,2-mannosyltransferase",
"2.4.1.262 Soyasapogenol glucuronosyltransferase",
"2.4.1.263 Abscisate beta-glucosyltransferase",
"2.4.1.264 D-man-alpha-(1->3)-D-Glc-beta-(1->4)-D-Glc-alpha-1-diphosphoundecaprenol 2-beta-glucuronyltransferase",
- "2.4.1.265 Dol-P-Glc:Glc(1)Man(9)GlcNAc(2)-PP-Dol alpha-1->3-glucosyltransferase",
+ "2.4.1.265 Dolichyl-P-Glc:Glc(1)Man(9)GlcNAc(2)-PP-dolichol alpha-1,3-glucosyltransferase",
"2.4.1.266 Glucosyl-3-phosphoglycerate synthase",
- "2.4.1.267 Dol-P-Glc:Man(9)GlcNAc(2)-PP-Dol alpha-1->3-glucosyltransferase",
+ "2.4.1.267 Dolichyl-P-Glc:Man(9)GlcNAc(2)-PP-dolichol alpha-1,3-glucosyltransferase",
"2.4.1.268 Glucosylglycerate synthase",
"2.4.1.269 Mannosylglycerate synthase",
"2.4.1.270 Mannosylglucosyl-3-phosphoglycerate synthase",
@@ -2104,12 +2586,70 @@ static const char* const kECNum_specific[] = {
"2.4.1.272 Soyasapogenol B glucuronide galactosyltransferase",
"2.4.1.273 Soyasaponin III rhamnosyltransferase",
"2.4.1.274 Glucosylceramide beta-1,4-galactosyltransferase",
- "2.4.1.275 Lactotriaosylceramide beta-1,4-galactosyltransferase",
+ "2.4.1.275 Neolactotriaosylceramide beta-1,4-galactosyltransferase",
"2.4.1.276 Zeaxanthin glucosyltransferase",
- "2.4.1.277 Glycosyltransferase DesVII",
- "2.4.1.278 Desosaminyl transferase EryCIII",
+ "2.4.1.277 10-deoxymethynolide desosaminyltransferase",
+ "2.4.1.278 3-alpha-mycarosylerythronolide B desosaminyl transferase",
+ "2.4.1.279 Nigerose phosphorylase",
+ "2.4.1.280 N,N'-diacetylchitobiose phosphorylase",
+ "2.4.1.281 4-O-beta-D-mannosyl-D-glucose phosphorylase",
+ "2.4.1.282 3-O-alpha-D-glucosyl-L-rhamnose phosphorylase",
+ "2.4.1.283 2-deoxystreptamine N-acetyl-D-glucosaminyltransferase",
+ "2.4.1.284 2-deoxystreptamine glucosyltransferase",
+ "2.4.1.285 UDP-GlcNAc:ribostamycin N-acetylglucosaminyltransferase",
+ "2.4.1.286 Chalcone 4'-O-glucosyltransferase",
+ "2.4.1.287 Rhamnopyranosyl-N-acetylglucosaminyl-diphospho-decaprenol beta-1,3/1,4-galactofuranosyltransferase",
+ "2.4.1.288 Galactofuranosylgalactofuranosylrhamnosyl-N-acetylglucosaminyl-diphospho-decaprenol beta-1,5/1,6-galactofuranosyltransferase",
+ "2.4.1.289 N-acetylglucosaminyl-diphospho-decaprenol L-rhamnosyltransferase",
+ "2.4.1.290 N,N'-diacetylbacillosaminyl-diphospho-undecaprenol alpha-1,3-N-acetylgalactosaminyltransferase",
+ "2.4.1.291 N-acetylgalactosamine-N,N'-diacetylbacillosaminyl-diphospho-undecaprenol 4-alpha-N-acetylgalactosaminyltransferase",
+ "2.4.1.292 GalNAc-alpha-(1->4)-GalNAc-alpha-(1->3)-diNAcBac-PP-undecaprenol alpha-1,4-N-acetyl-D-galactosaminyltransferase",
+ "2.4.1.293 GalNAc(5)-diNAcBac-PP-undecaprenol beta-1,3-glucosyltransferase",
+ "2.4.1.294 Cyanidin 3-O-galactosyltransferase",
+ "2.4.1.295 Anthocyanin 3-O-sambubioside 5-O-glucosyltransferase",
+ "2.4.1.296 Anthocyanidin 3-O-coumaroylrutinoside 5-O-glucosyltransferase",
+ "2.4.1.297 Anthocyanidin 3-O-glucoside 2''-O-glucosyltransferase",
+ "2.4.1.298 Anthocyanidin 3-O-glucoside 5-O-glucosyltransferase",
+ "2.4.1.299 Cyanidin 3-O-glucoside 5-O-glucosyltransferase (acyl-glucose)",
+ "2.4.1.300 Cyanidin 3-O-glucoside 7-O-glucosyltransferase (acyl-glucose)",
+ "2.4.1.301 2'-deamino-2'-hydroxyneamine 1-alpha-D-kanosaminyltransferase",
+ "2.4.1.302 L-demethylnoviosyl transferase",
+ "2.4.1.303 UDP-Gal:alpha-D-GlcNAc-diphosphoundecaprenol beta-1,3-galactosyltransferase",
+ "2.4.1.304 UDP-Gal:alpha-D-GlcNAc-diphosphoundecaprenol beta-1,4-galactosyltransferase",
+ "2.4.1.305 UDP-Glc:alpha-D-GlcNAc-glucosaminyl-diphosphoundecaprenol beta-1,3-glucosyltransferase",
+ "2.4.1.306 UDP-GalNAc:alpha-D-GalNAc-diphosphoundecaprenol alpha-1,3-N-acetylgalactosaminyltransferase",
+ "2.4.1.307 UDP-Gal:alpha-D-GalNAc-1,3-alpha-D-GalNAc-diphosphoundecaprenol beta-1,3-galactosyltransferase",
+ "2.4.1.308 GDP-Fuc:beta-D-Gal-1,3-alpha-D-GalNAc-1,3-alpha-GalNAc-diphosphoundecaprenol alpha-1,2-fucosyltransferase",
+ "2.4.1.309 UDP-Gal:alpha-L-Fuc-1,2-beta-Gal-1,3-alpha-GalNAc-1,3-alpha-GalNAc-diphosphoundecaprenol alpha-1,3-galactosyltransferase",
+ "2.4.1.310 Vancomycin aglycone glucosyltransferase",
+ "2.4.1.311 dTDP-epi-vancosaminyltransferase",
+ "2.4.1.312 Protein O-mannose beta-1,4-N-acetylglucosaminyltransferase",
+ "2.4.1.313 Protein O-mannose beta-1,3-N-acetylgalactosaminyltransferase",
+ "2.4.1.314 Ginsenoside Rd glucosyltransferase",
+ "2.4.1.315 Diglucosyl diacylglycerol synthase (1,6-linking)",
+ "2.4.1.316 Tylactone mycaminosyltransferase",
+ "2.4.1.317 O-mycaminosyltylonolide 6-deoxyallosyltransferase",
+ "2.4.1.318 Demethyllactenocin mycarosyltransferase",
+ "2.4.1.319 Beta-1,4-mannooligosaccharide phosphorylase",
+ "2.4.1.320 1,4-beta-mannosyl-N-acetylglucosamine phosphorylase",
+ "2.4.1.321 Cellobionic acid phosphorylase",
+ "2.4.1.322 Devancosaminyl-vancomycin vancosaminetransferase",
+ "2.4.1.323 7-deoxyloganetic acid glucosyltransferase",
+ "2.4.1.324 7-deoxyloganetin glucosyltransferase",
+ "2.4.1.325 TDP-N-acetylfucosamine:lipid II N-acetylfucosaminyltransferase",
+ "2.4.1.326 Aklavinone 7-L-rhodosaminyltransferase",
+ "2.4.1.327 Aclacinomycin-T 2-deoxy-L-fucose transferase",
+ "2.4.1.328 Erythronolide mycarosyltransferase",
+ "2.4.1.329 Sucrose 6(F)-phosphate phosphorylase",
+ "2.4.1.330 Beta-D-glucosyl crocetin beta-1,6-glucosyltransferase",
+ "2.4.1.331 8-demethyltetracenomycin C L-rhamnosyltransferase",
+ "2.4.1.332 1,2-alpha-glucosylglycerol phosphorylase",
+ "2.4.1.333 1,2-beta-oligoglucan phosphorylase",
+ "2.4.1.334 1,3-alpha-oligoglucan phosphorylase",
+ "2.4.1.335 Dolichyl N-acetyl-alpha-D-glucosaminyl phosphate 3-beta-D-2,3-diacetamido-2,3-dideoxy-beta-D-glucuronosyltransferase",
+ "2.4.1.336 Monoglucosyldiacylglycerol synthase",
+ "2.4.1.337 1,2-diacylglycerol 3-alpha-glucosyltransferase",
"2.4.1.n2 Loliose synthase",
- "2.4.1.n5 Starch synthase (maltosyl-transferring)",
"2.4.2.1 Purine-nucleoside phosphorylase",
"2.4.2.2 Pyrimidine-nucleoside phosphorylase",
"2.4.2.3 Uridine phosphorylase",
@@ -2120,7 +2660,6 @@ static const char* const kECNum_specific[] = {
"2.4.2.8 Hypoxanthine phosphoribosyltransferase",
"2.4.2.9 Uracil phosphoribosyltransferase",
"2.4.2.10 Orotate phosphoribosyltransferase",
- "2.4.2.11 Nicotinate phosphoribosyltransferase",
"2.4.2.12 Nicotinamide phosphoribosyltransferase",
"2.4.2.14 Amidophosphoribosyltransferase",
"2.4.2.15 Guanosine phosphorylase",
@@ -2131,13 +2670,12 @@ static const char* const kECNum_specific[] = {
"2.4.2.20 Dioxotetrahydropyrimidine phosphoribosyltransferase",
"2.4.2.21 Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase",
"2.4.2.22 Xanthine phosphoribosyltransferase",
- "2.4.2.23 Deoxyuridine phosphorylase",
"2.4.2.24 1,4-beta-D-xylan synthase",
"2.4.2.25 Flavone apiosyltransferase",
"2.4.2.26 Protein xylosyltransferase",
"2.4.2.27 dTDP-dihydrostreptose--streptidine-6-phosphate dihydrostreptosyltransferase",
"2.4.2.28 S-methyl-5'-thioadenosine phosphorylase",
- "2.4.2.29 tRNA-guanine transglycosylase",
+ "2.4.2.29 tRNA-guanine(34) transglycosylase",
"2.4.2.30 NAD(+) ADP-ribosyltransferase",
"2.4.2.31 NAD(+)--protein-arginine ADP-ribosyltransferase",
"2.4.2.32 Dolichyl-phosphate D-xylosyltransferase",
@@ -2153,6 +2691,21 @@ static const char* const kECNum_specific[] = {
"2.4.2.42 UDP-D-xylose:beta-D-glucoside alpha-1,3-D-xylosyltransferase",
"2.4.2.43 Lipid IV(A) 4-amino-4-deoxy-L-arabinosyltransferase",
"2.4.2.44 S-methyl-5'-thioinosine phosphorylase",
+ "2.4.2.45 Decaprenyl-phosphate phosphoribosyltransferase",
+ "2.4.2.46 Galactan 5-O-arabinofuranosyltransferase",
+ "2.4.2.47 Arabinofuranan 3-O-arabinosyltransferase",
+ "2.4.2.48 tRNA-guanine(15) transglycosylase",
+ "2.4.2.49 Neamine phosphoribosyltransferase",
+ "2.4.2.50 Cyanidin 3-O-galactoside 2''-O-xylosyltransferase",
+ "2.4.2.51 Anthocyanidin 3-O-glucoside 2'''-O-xylosyltransferase",
+ "2.4.2.52 Triphosphoribosyl-dephospho-CoA synthase",
+ "2.4.2.53 Undecaprenyl-phosphate 4-deoxy-4-formamido-L-arabinose transferase",
+ "2.4.2.54 Beta-ribofuranosylphenol 5'-phosphate synthase",
+ "2.4.2.55 Nicotinate D-ribonucleotide:phenol phospho-D-ribosyltransferase",
+ "2.4.2.56 Kaempferol 3-O-xylosyltransferase",
+ "2.4.2.57 AMP phosphorylase",
+ "2.4.2.n2 Glucoside xylosyltransferase",
+ "2.4.2.n3 Xyloside xylosyltransferase",
"2.4.99.1 Beta-galactoside alpha-2,6-sialyltransferase",
"2.4.99.2 Monosialoganglioside sialyltransferase",
"2.4.99.3 Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase",
@@ -2165,12 +2718,18 @@ static const char* const kECNum_specific[] = {
"2.4.99.10 Neolactotetraosylceramide alpha-2,3-sialyltransferase",
"2.4.99.11 Lactosylceramide alpha-2,6-N-sialyltransferase",
"2.4.99.12 Lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase",
- "2.4.99.13 (KDO)-lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase",
- "2.4.99.14 (KDO)(2)-lipid IV(A) (2-8) 3-deoxy-D-manno-octulosonic acid transferase",
- "2.4.99.15 (KDO)(3)-lipid IV(A) (2-4) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.13 (Kdo)-lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.14 (Kdo)(2)-lipid IV(A) (2-8) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.15 (Kdo)(3)-lipid IV(A) (2-4) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.16 Starch synthase (maltosyl-transferring)",
+ "2.4.99.17 S-adenosylmethionine:tRNA ribosyltransferase-isomerase",
+ "2.4.99.18 Dolichyl-diphosphooligosaccharide--protein glycotransferase",
+ "2.4.99.19 Undecaprenyl-diphosphooligosaccharide--protein glycotransferase",
+ "2.4.99.20 2'-phospho-ADP-ribosyl cyclase/2'-phospho-cyclic-ADP-ribose transferase",
+ "2.4.99.21 Dolichyl-phosphooligosaccharide-protein glycotransferase",
"2.5.1.1 Dimethylallyltranstransferase",
"2.5.1.2 Thiamine pyridinylase",
- "2.5.1.3 Thiamine-phosphate diphosphorylase",
+ "2.5.1.3 Thiamine phosphate synthase",
"2.5.1.4 Adenosylmethionine cyclotransferase",
"2.5.1.5 Galactose-6-sulfurylase",
"2.5.1.6 Methionine adenosyltransferase",
@@ -2189,12 +2748,12 @@ static const char* const kECNum_specific[] = {
"2.5.1.24 Discadenine synthase",
"2.5.1.25 tRNA-uridine aminocarboxypropyltransferase",
"2.5.1.26 Alkylglycerone-phosphate synthase",
- "2.5.1.27 Adenylate dimethylallyltransferase",
+ "2.5.1.27 Adenylate dimethylallyltransferase (AMP-dependent)",
"2.5.1.28 Dimethylallylcistransferase",
"2.5.1.29 Geranylgeranyl diphosphate synthase",
"2.5.1.30 Heptaprenyl diphosphate synthase",
"2.5.1.31 Ditrans,polycis-undecaprenyl-diphosphate synthase ((2E,6E)-farnesyl-diphosphate specific)",
- "2.5.1.32 Phytoene synthase",
+ "2.5.1.32 15-cis-phytoene synthase",
"2.5.1.34 4-dimethylallyltryptophan synthase",
"2.5.1.35 Aspulvinone dimethylallyltransferase",
"2.5.1.36 Trihydroxypterocarpan dimethylallyltransferase",
@@ -2257,6 +2816,40 @@ static const char* const kECNum_specific[] = {
"2.5.1.96 4,4'-diapophytoene synthase",
"2.5.1.97 Pseudaminic acid synthase",
"2.5.1.98 Rhizobium leguminosarum exopolysaccharide glucosyl ketal-pyruvate-transferase",
+ "2.5.1.99 All-trans-phytoene synthase",
+ "2.5.1.100 Fumigaclavine A dimethylallyltransferase",
+ "2.5.1.101 N,N'-diacetyllegionaminate synthase",
+ "2.5.1.102 Geranylpyrophosphate--olivetolic acid geranyltransferase",
+ "2.5.1.103 Presqualene diphosphate synthase",
+ "2.5.1.104 N(1)-aminopropylagmatine synthase",
+ "2.5.1.105 7,8-dihydropterin-6-yl-methyl-4-(beta-D-ribofuranosyl)aminobenzene 5'-phosphate synthase",
+ "2.5.1.106 Tryprostatin B synthase",
+ "2.5.1.107 Verruculogen prenyltransferase",
+ "2.5.1.108 2-(3-amino-3-carboxypropyl)histidine synthase",
+ "2.5.1.109 Brevianamide F prenyltransferase (deoxybrevianamide E-forming)",
+ "2.5.1.110 12-alpha,13-alpha-dihydroxyfumitremorgin C prenyltransferase",
+ "2.5.1.111 4-hydroxyphenylpyruvate 3-dimethylallyltransferase",
+ "2.5.1.112 Adenylate dimethylallyltransferase (ADP/ATP-dependent)",
+ "2.5.1.113 [CysO sulfur-carrier protein]-thiocarboxylate-dependent cysteine synthase",
+ "2.5.1.114 tRNA(Phe) (4-demethylwyosine(37)-C(7)) aminocarboxypropyltransferase",
+ "2.5.1.115 Homogentisate phytyltransferase",
+ "2.5.1.116 Homogentisate geranylgeranyltransferase",
+ "2.5.1.117 Homogentisate solanesyltransferase",
+ "2.5.1.118 Beta-(isoxazolin-5-on-2-yl)-L-alanine synthase",
+ "2.5.1.119 Beta-(isoxazolin-5-on-4-yl)-L-alanine synthase",
+ "2.5.1.120 Aminodeoxyfutalosine synthase",
+ "2.5.1.121 5,10-dihydrophenazine-1-carboxylate 9-dimethylallyltransferase",
+ "2.5.1.122 4-O-dimethylallyl-L-tyrosine synthase",
+ "2.5.1.123 Flaviolin linalyltransferase",
+ "2.5.1.124 6-linalyl-2-O,3-dimethylflaviolin synthase",
+ "2.5.1.125 7-geranyloxy-5-hydroxy-2-methoxy-3-methylnaphthalene-1,4-dione synthase",
+ "2.5.1.126 Norspermine synthase",
+ "2.5.1.127 Caldopentamine synthase",
+ "2.5.1.128 N(4)-bis(aminopropyl)spermidine synthase",
+ "2.5.1.129 Flavin prenyltransferase",
+ "2.5.1.130 2-carboxy-1,4-naphthoquinone phytyltransferase",
+ "2.5.1.131 (4-(4-(2-(gamma-L-glutamylamino)ethyl)phenoxymethyl)furan-2-yl)methanamine synthase",
+ "2.5.1.n9 Heptaprenylglyceryl phosphate synthase",
"2.6.1.1 Aspartate transaminase",
"2.6.1.2 Alanine transaminase",
"2.6.1.3 Cysteine transaminase",
@@ -2274,7 +2867,7 @@ static const char* const kECNum_specific[] = {
"2.6.1.16 Glutamine--fructose-6-phosphate transaminase (isomerizing)",
"2.6.1.17 Succinyldiaminopimelate transaminase",
"2.6.1.18 Beta-alanine--pyruvate transaminase",
- "2.6.1.19 4-aminobutyrate transaminase",
+ "2.6.1.19 4-aminobutyrate--2-oxoglutarate transaminase",
"2.6.1.21 D-amino-acid transaminase",
"2.6.1.22 (S)-3-amino-2-methylpropionate transaminase",
"2.6.1.23 4-hydroxyglutamate transaminase",
@@ -2287,7 +2880,7 @@ static const char* const kECNum_specific[] = {
"2.6.1.31 Pyridoxamine--oxaloacetate transaminase",
"2.6.1.32 Valine--3-methyl-2-oxovalerate transaminase",
"2.6.1.33 dTDP-4-amino-4,6-dideoxy-D-glucose transaminase",
- "2.6.1.34 UDP-2-acetamido-4-amino-2,4,6-trideoxyglucose transaminase",
+ "2.6.1.34 UDP-N-acetylbacillosamine transaminase",
"2.6.1.35 Glycine--oxaloacetate transaminase",
"2.6.1.36 L-lysine 6-transaminase",
"2.6.1.37 2-aminoethylphosphonate--pyruvate transaminase",
@@ -2341,11 +2934,28 @@ static const char* const kECNum_specific[] = {
"2.6.1.88 Methionine transaminase",
"2.6.1.89 dTDP-3-amino-3,6-dideoxy-alpha-D-glucopyranose transaminase",
"2.6.1.90 dTDP-3-amino-3,6-dideoxy-alpha-D-galactopyranose transaminase",
- "2.6.1.91 UDP-4-amino-4,6-dideoxy-alpha-D-N-acetyl-D-glucosamine transaminase",
- "2.6.1.92 UDP-4-amino-4,6-dideoxy-L-N-acetyl-beta-L-altrosamine transaminase",
+ "2.6.1.92 UDP-4-amino-4,6-dideoxy-N-acetyl-beta-L-altrosamine transaminase",
+ "2.6.1.93 Neamine transaminase",
+ "2.6.1.94 2'-deamino-2'-hydroxyneamine transaminase",
+ "2.6.1.95 Neomycin C transaminase",
+ "2.6.1.96 4-aminobutyrate--pyruvate transaminase",
+ "2.6.1.97 Archaeosine synthase",
+ "2.6.1.98 UDP-2-acetamido-2-deoxy-ribo-hexuluronate aminotransferase",
+ "2.6.1.99 L-tryptophan--pyruvate aminotransferase",
+ "2.6.1.100 L-glutamine:2-deoxy-scyllo-inosose aminotransferase",
+ "2.6.1.101 L-glutamine:3-amino-2,3-dideoxy-scyllo-inosose aminotransferase",
+ "2.6.1.102 GDP-perosamine synthase",
+ "2.6.1.103 (S)-3,5-dihydroxyphenylglycine transaminase",
+ "2.6.1.104 3-dehydro-glucose-6-phosphate--glutamate transaminase",
+ "2.6.1.105 Lysine--8-amino-7-oxononanoate transaminase",
+ "2.6.1.106 dTDP-3-amino-3,4,6-trideoxy-alpha-D-glucose transaminase",
+ "2.6.1.107 Beta-methylphenylalanine transaminase",
+ "2.6.1.108 (5-formylfuran-3-yl)methyl phosphate transaminase",
+ "2.6.1.109 8-amino-3,8-dideoxy-alpha-D-manno-octulosonate transaminase",
"2.6.3.1 Oximinotransferase",
"2.6.99.1 dATP(dGTP)--DNA purinetransferase",
"2.6.99.2 Pyridoxine 5'-phosphate synthase",
+ "2.6.99.3 O-ureido-L-serine synthase",
"2.7.1.1 Hexokinase",
"2.7.1.2 Glucokinase",
"2.7.1.3 Ketohexokinase",
@@ -2375,7 +2985,7 @@ static const char* const kECNum_specific[] = {
"2.7.1.28 Triokinase",
"2.7.1.29 Glycerone kinase",
"2.7.1.30 Glycerol kinase",
- "2.7.1.31 Glycerate kinase",
+ "2.7.1.31 Glycerate 3-kinase",
"2.7.1.32 Choline kinase",
"2.7.1.33 Pantothenate kinase",
"2.7.1.34 Pantetheine kinase",
@@ -2410,7 +3020,6 @@ static const char* const kECNum_specific[] = {
"2.7.1.66 Undecaprenol kinase",
"2.7.1.67 1-phosphatidylinositol 4-kinase",
"2.7.1.68 1-phosphatidylinositol-4-phosphate 5-kinase",
- "2.7.1.69 Protein-N(pi)-phosphohistidine--sugar phosphotransferase",
"2.7.1.71 Shikimate kinase",
"2.7.1.72 Streptomycin 6-kinase",
"2.7.1.73 Inosine kinase",
@@ -2441,7 +3050,7 @@ static const char* const kECNum_specific[] = {
"2.7.1.103 Viomycin kinase",
"2.7.1.105 6-phosphofructo-2-kinase",
"2.7.1.106 Glucose-1,6-bisphosphate synthase",
- "2.7.1.107 Diacylglycerol kinase",
+ "2.7.1.107 Diacylglycerol kinase (ATP)",
"2.7.1.108 Dolichol kinase",
"2.7.1.113 Deoxyguanosine kinase",
"2.7.1.114 AMP--thymidine kinase",
@@ -2485,10 +3094,42 @@ static const char* const kECNum_specific[] = {
"2.7.1.170 Anhydro-N-acetylmuramic acid kinase",
"2.7.1.171 Protein-fructosamine 3-kinase",
"2.7.1.172 Protein-ribulosamine 3-kinase",
- "2.7.1.n4 Nicotinate riboside kinase",
- "2.7.1.n5 Diacylglycerol kinase (CTP dependent)",
- "2.7.1.n7 UDP-N-acetylglucosamine kinase",
- "2.7.1.n8 Maltokinase",
+ "2.7.1.173 Nicotinate riboside kinase",
+ "2.7.1.174 Diacylglycerol kinase (CTP)",
+ "2.7.1.175 Maltokinase",
+ "2.7.1.176 UDP-N-acetylglucosamine kinase",
+ "2.7.1.177 L-threonine kinase",
+ "2.7.1.178 2-dehydro-3-deoxyglucono/galactono-kinase",
+ "2.7.1.179 Kanosamine kinase",
+ "2.7.1.180 FAD:protein FMN transferase",
+ "2.7.1.181 Polymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol kinase",
+ "2.7.1.182 Phytol kinase",
+ "2.7.1.183 Glycoprotein-mannosyl O(6)-kinase",
+ "2.7.1.184 Sulfofructose kinase",
+ "2.7.1.185 Mevalonate 3-kinase",
+ "2.7.1.186 Mevalonate-3-phosphate 5-kinase",
+ "2.7.1.187 Acarbose 7(IV)-phosphotransferase",
+ "2.7.1.188 2-epi-5-epi-valiolone 7-kinase",
+ "2.7.1.189 Autoinducer-2 kinase",
+ "2.7.1.190 Aminoglycoside 2''-phosphotransferase",
+ "2.7.1.191 Protein-N(pi)-phosphohistidine--D-mannose phosphotransferase",
+ "2.7.1.192 Protein-N(pi)-phosphohistidine--N-acetylmuramate phosphotransferase",
+ "2.7.1.193 Protein-N(pi)-phosphohistidine--N-acetyl-D-glucosamine phosphotransferase",
+ "2.7.1.194 Protein-N(pi)-phosphohistidine--L-ascorbate phosphotransferase",
+ "2.7.1.195 Protein-N(pi)-phosphohistidine--2-O-alpha-mannosyl-D-glycerate phosphotransferase",
+ "2.7.1.196 Protein-N(pi)-phosphohistidine--N,N'-diacetylchitobiose phosphotransferase",
+ "2.7.1.197 Protein-N(pi)-phosphohistidine--D-mannitol phosphotransferase",
+ "2.7.1.198 Protein-N(pi)-phosphohistidine--D-sorbitol phosphotransferase",
+ "2.7.1.199 Protein-N(pi)-phosphohistidine--D-glucose phosphotransferase",
+ "2.7.1.200 Protein-N(pi)-phosphohistidine--galactitol phosphotransferase",
+ "2.7.1.201 Protein-N(pi)-phosphohistidine--trehalose phosphotransferase",
+ "2.7.1.202 Protein-N(pi)-phosphohistidine--D-fructose phosphotransferase",
+ "2.7.1.203 Protein-N(pi)-phosphohistidine--D-glucosaminate phosphotransferase",
+ "2.7.1.204 Protein-N(pi)-phosphohistidine--D-galactose phosphotransferase",
+ "2.7.1.205 Protein-N(pi)-phosphohistidine--D-cellobiose phosphotransferase",
+ "2.7.1.206 Protein-N(pi)-phosphohistidine--L-sorbose phosphotransferase",
+ "2.7.1.207 Protein-N(pi)-phosphohistidine--lactose phosphotransferase",
+ "2.7.1.208 Protein-N(pi)-phosphohistidine--maltose phosphotransferase",
"2.7.2.1 Acetate kinase",
"2.7.2.2 Carbamate kinase",
"2.7.2.3 Phosphoglycerate kinase",
@@ -2536,6 +3177,12 @@ static const char* const kECNum_specific[] = {
"2.7.4.23 Ribose 1,5-bisphosphate phosphokinase",
"2.7.4.24 Diphosphoinositol-pentakisphosphate kinase",
"2.7.4.25 (d)CMP kinase",
+ "2.7.4.26 Isopentenyl phosphate kinase",
+ "2.7.4.27 ([Pyruvate, phosphate dikinase] phosphate) phosphotransferase",
+ "2.7.4.28 ([Pyruvate, water dikinase] phosphate) phosphotransferase",
+ "2.7.4.29 Kdo(2)-lipid A phosphotransferase",
+ "2.7.4.30 Lipid A phosphoethanolamine transferase",
+ "2.7.4.31 (5-(aminomethyl)furan-3-yl)methyl phosphate kinase",
"2.7.6.1 Ribose-phosphate diphosphokinase",
"2.7.6.2 Thiamine diphosphokinase",
"2.7.6.3 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase",
@@ -2587,20 +3234,17 @@ static const char* const kECNum_specific[] = {
"2.7.7.51 Adenylylsulfate--ammonia adenylyltransferase",
"2.7.7.52 RNA uridylyltransferase",
"2.7.7.53 ATP adenylyltransferase",
- "2.7.7.54 Phenylalanine adenylyltransferase",
- "2.7.7.55 Anthranilate adenylyltransferase",
"2.7.7.56 tRNA nucleotidyltransferase",
"2.7.7.57 N-methylphosphoethanolamine cytidylyltransferase",
"2.7.7.58 (2,3-dihydroxybenzoyl)adenylate synthase",
"2.7.7.59 [Protein-PII] uridylyltransferase",
"2.7.7.60 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase",
- "2.7.7.61 Citrate lyase holo-[acyl-carrier-protein] synthase",
+ "2.7.7.61 Citrate lyase holo-[acyl-carrier protein] synthase",
"2.7.7.62 Adenosylcobinamide-phosphate guanylyltransferase",
- "2.7.7.63 Lipoate--protein ligase",
"2.7.7.64 UTP-monosaccharide-1-phosphate uridylyltransferase",
"2.7.7.65 Diguanylate cyclase",
- "2.7.7.66 Malonate decarboxylase holo-[acyl-carrier-protein] synthase",
- "2.7.7.67 CDP-archaeol synthase",
+ "2.7.7.66 Malonate decarboxylase holo-[acyl-carrier protein] synthase",
+ "2.7.7.67 CDP-2,3-bis-(O-geranylgeranyl)-sn-glycerol synthase",
"2.7.7.68 2-phospho-L-lactate guanylyltransferase",
"2.7.7.69 GDP-L-galactose phosphorylase",
"2.7.7.70 D-glycero-beta-D-manno-heptose 1-phosphate adenylyltransferase",
@@ -2615,7 +3259,16 @@ static const char* const kECNum_specific[] = {
"2.7.7.79 tRNA(His) guanylyltransferase",
"2.7.7.80 Molybdopterin-synthase adenylyltransferase",
"2.7.7.81 Pseudaminic acid cytidylyltransferase",
+ "2.7.7.82 CMP-N,N'-diacetyllegionaminic acid synthase",
+ "2.7.7.83 UDP-N-acetylgalactosamine diphosphorylase",
+ "2.7.7.84 2'-5' oligoadenylate synthase",
+ "2.7.7.85 Diadenylate cyclase",
+ "2.7.7.86 Cyclic GMP-AMP synthase",
+ "2.7.7.87 L-threonylcarbamoyladenylate synthase",
+ "2.7.7.88 GDP polyribonucleotidyltransferase",
+ "2.7.7.89 [Glutamate--ammonia ligase]-adenylyl-L-tyrosine phosphorylase",
"2.7.7.n1 Adenosine monophosphate-protein transferase",
+ "2.7.7.n6 Guanine phosphate-protein transferase",
"2.7.8.1 Ethanolaminephosphotransferase",
"2.7.8.2 Diacylglycerol cholinephosphotransferase",
"2.7.8.3 Ceramide cholinephosphotransferase",
@@ -2639,16 +3292,23 @@ static const char* const kECNum_specific[] = {
"2.7.8.22 1-alkenyl-2-acylglycerol choline phosphotransferase",
"2.7.8.23 Carboxyvinyl-carboxyphosphonate phosphorylmutase",
"2.7.8.24 Phosphatidylcholine synthase",
- "2.7.8.25 Triphosphoribosyl-dephospho-CoA synthase",
"2.7.8.26 Adenosylcobinamide-GDP ribazoletransferase",
"2.7.8.27 Sphingomyelin synthase",
"2.7.8.28 2-phospho-L-lactate transferase",
"2.7.8.29 L-serine-phosphatidylethanolamine phosphatidyltransferase",
- "2.7.8.30 Undecaprenyl-phosphate 4-deoxy-4-formamido-L-arabinose transferase",
"2.7.8.31 Undecaprenyl-phosphate glucose phosphotransferase",
"2.7.8.32 3-O-alpha-D-mannopyranosyl-alpha-D-mannopyranose xylosylphosphotransferase",
- "2.7.8.33 UDP-GlcNAc:undecaprenyl-phosphate GlcNAc-1-phosphate transferase",
+ "2.7.8.33 UDP-N-acetylglucosamine--undecaprenyl-phosphate N-acetylglucosaminephosphotransferase",
"2.7.8.34 CDP-L-myo-inositol myo-inositolphosphotransferase",
+ "2.7.8.35 UDP-N-acetylglucosamine--decaprenyl-phosphate N-acetylglucosaminephosphotransferase",
+ "2.7.8.36 Undecaprenyl phosphate N,N'-diacetylbacillosamine 1-phosphate transferase",
+ "2.7.8.37 Alpha-D-ribose 1-methylphosphonate 5-triphosphate synthase",
+ "2.7.8.38 Archaetidylserine synthase",
+ "2.7.8.39 Archaetidylinositol phosphate synthase",
+ "2.7.8.40 UDP-N-acetylgalactosamine-undecaprenyl-phosphate N-acetylgalactosaminephosphotransferase",
+ "2.7.8.41 Cardiolipin synthase (CMP-forming)",
+ "2.7.8.42 Kdo(2)-lipid A phosphoethanolamine 7''-transferase",
+ "2.7.8.n3 Ceramide phosphoethanolamine synthase",
"2.7.9.1 Pyruvate, phosphate dikinase",
"2.7.9.2 Pyruvate, water dikinase",
"2.7.9.3 Selenide, water dikinase",
@@ -2687,11 +3347,14 @@ static const char* const kECNum_specific[] = {
"2.7.11.29 [Low-density-lipoprotein receptor] kinase",
"2.7.11.30 Receptor protein serine/threonine kinase",
"2.7.11.31 [Hydroxymethylglutaryl-CoA reductase (NADPH)] kinase",
+ "2.7.11.32 [Pyruvate, phosphate dikinase] kinase",
+ "2.7.11.33 [Pyruvate, water dikinase] kinase",
"2.7.12.1 Dual-specificity kinase",
"2.7.12.2 Mitogen-activated protein kinase kinase",
"2.7.13.1 Protein-histidine pros-kinase",
"2.7.13.2 Protein-histidine tele-kinase",
"2.7.13.3 Histidine kinase",
+ "2.7.14.1 Protein arginine kinase",
"2.7.99.1 Triphosphate--protein phosphotransferase",
"2.8.1.1 Thiosulfate sulfurtransferase",
"2.8.1.2 3-mercaptopyruvate sulfurtransferase",
@@ -2705,6 +3368,8 @@ static const char* const kECNum_specific[] = {
"2.8.1.10 Thiazole synthase",
"2.8.1.11 Molybdopterin-synthase sulfurtransferase",
"2.8.1.12 Molybdopterin synthase",
+ "2.8.1.13 tRNA-uridine 2-sulfurtransferase",
+ "2.8.1.14 tRNA-5-taurinomethyluridine 2-sulfurtransferase",
"2.8.2.1 Aryl sulfotransferase",
"2.8.2.2 Alcohol sulfotransferase",
"2.8.2.3 Amine sulfotransferase",
@@ -2739,12 +3404,14 @@ static const char* const kECNum_specific[] = {
"2.8.2.33 N-acetylgalactosamine 4-sulfate 6-O-sulfotransferase",
"2.8.2.34 Glycochenodeoxycholate sulfotransferase",
"2.8.2.35 Dermatan 4-sulfotransferase",
+ "2.8.2.36 Desulfo-A47934 sulfotransferase",
+ "2.8.2.37 Trehalose 2-sulfotransferase",
+ "2.8.2.n2 Thyroxine sulfotransferase",
"2.8.3.1 Propionate CoA-transferase",
"2.8.3.2 Oxalate CoA-transferase",
"2.8.3.3 Malonate CoA-transferase",
"2.8.3.5 3-oxoacid CoA-transferase",
"2.8.3.6 3-oxoadipate CoA-transferase",
- "2.8.3.7 Succinate--citramalate CoA-transferase",
"2.8.3.8 Acetate CoA-transferase",
"2.8.3.9 Butyrate--acetoacetate CoA-transferase",
"2.8.3.10 Citrate CoA-transferase",
@@ -2755,8 +3422,17 @@ static const char* const kECNum_specific[] = {
"2.8.3.15 Succinyl-CoA:(R)-benzylsuccinate CoA-transferase",
"2.8.3.16 Formyl-CoA transferase",
"2.8.3.17 Cinnamoyl-CoA:phenyllactate CoA-transferase",
+ "2.8.3.18 Succinyl-CoA:acetate CoA-transferase",
+ "2.8.3.19 CoA:oxalate CoA-transferase",
+ "2.8.3.20 Succinyl-CoA--D-citramalate CoA-transferase",
+ "2.8.3.21 L-carnitine CoA-transferase",
+ "2.8.3.22 Succinyl-CoA--L-malate CoA-transferase",
+ "2.8.3.23 Caffeate CoA-transferase",
"2.8.4.1 Coenzyme-B sulfoethylthiotransferase",
"2.8.4.2 Arsenate-mycothiol transferase",
+ "2.8.4.3 tRNA-2-methylthio-N(6)-dimethylallyladenosine synthase",
+ "2.8.4.4 [Ribosomal protein S12] (aspartate(89)-C(3))-methylthiotransferase",
+ "2.8.4.5 tRNA (N(6)-L-threonylcarbamoyladenosine(37)-C(2))-methylthiotransferase",
"2.9.1.1 L-seryl-tRNA(Sec) selenium transferase",
"2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase",
"2.10.1.1 Molybdopterin molybdotransferase",
@@ -2837,12 +3513,21 @@ static const char* const kECNum_specific[] = {
"3.1.1.82 Pheophorbidase",
"3.1.1.83 Monoterpene epsilon-lactone hydrolase",
"3.1.1.84 Cocaine esterase",
- "3.1.1.85 Pimelyl-[acyl-carrier protein] methyl ester esterase",
+ "3.1.1.85 Pimeloyl-[acyl-carrier protein] methyl ester esterase",
"3.1.1.86 Rhamnogalacturonan acetylesterase",
"3.1.1.87 Fumonisin B1 esterase",
"3.1.1.88 Pyrethroid hydrolase",
"3.1.1.89 Protein phosphatase methylesterase-1",
"3.1.1.90 All-trans-retinyl ester 13-cis isomerohydrolase",
+ "3.1.1.91 2-oxo-3-(5-oxofuran-2-ylidene)propanoate lactonase",
+ "3.1.1.92 4-sulfomuconolactone hydrolase",
+ "3.1.1.93 Mycophenolic acid acyl-glucuronide esterase",
+ "3.1.1.94 Versiconal hemiacetal acetate esterase",
+ "3.1.1.95 Aclacinomycin methylesterase",
+ "3.1.1.96 D-aminoacyl-tRNA deacylase",
+ "3.1.1.97 Methylated diphthine methylhydrolase",
+ "3.1.1.98 [Wnt protein] O-palmitoleoyl-L-serine hydrolase",
+ "3.1.1.n2 Protein-S-isoprenylcysteine alpha-carbonyl methylesterase",
"3.1.2.1 Acetyl-CoA hydrolase",
"3.1.2.2 Palmitoyl-CoA hydrolase",
"3.1.2.3 Succinyl-CoA hydrolase",
@@ -2855,7 +3540,6 @@ static const char* const kECNum_specific[] = {
"3.1.2.12 S-formylglutathione hydrolase",
"3.1.2.13 S-succinylglutathione hydrolase",
"3.1.2.14 Oleoyl-[acyl-carrier-protein] hydrolase",
- "3.1.2.15 Ubiquitin thiolesterase",
"3.1.2.16 Citrate-lyase deacetylase",
"3.1.2.17 (S)-methylmalonyl-CoA hydrolase",
"3.1.2.18 ADP-dependent short-chain-acyl-CoA hydrolase",
@@ -2869,6 +3553,8 @@ static const char* const kECNum_specific[] = {
"3.1.2.27 Choloyl-CoA hydrolase",
"3.1.2.28 1,4-dihydroxy-2-naphthoyl-CoA hydrolase",
"3.1.2.29 Fluoroacetyl-CoA thioesterase",
+ "3.1.2.30 (3S)-malyl-CoA thioesterase",
+ "3.1.2.31 Dihydromonacolin L-[lovastatin nonaketide synthase] thioesterase",
"3.1.3.1 Alkaline phosphatase",
"3.1.3.2 Acid phosphatase",
"3.1.3.3 Phosphoserine phosphatase",
@@ -2884,7 +3570,7 @@ static const char* const kECNum_specific[] = {
"3.1.3.13 Bisphosphoglycerate phosphatase",
"3.1.3.14 Methylphosphothioglycerate phosphatase",
"3.1.3.15 Histidinol-phosphatase",
- "3.1.3.16 Phosphoprotein phosphatase",
+ "3.1.3.16 Protein-serine/threonine phosphatase",
"3.1.3.17 [Phosphorylase] phosphatase",
"3.1.3.18 Phosphoglycolate phosphatase",
"3.1.3.19 Glycerol-2-phosphatase",
@@ -2952,7 +3638,19 @@ static const char* const kECNum_specific[] = {
"3.1.3.84 ADP-ribose 1''-phosphate phosphatase",
"3.1.3.85 Glucosyl-3-phosphoglycerate phosphatase",
"3.1.3.86 Phosphatidylinositol-3,4,5-trisphosphate 5-phosphatase",
- "3.1.3.n4 2-hydroxy-3-keto-5-methylthiopentenyl-1-phosphate phosphatase",
+ "3.1.3.87 2-hydroxy-3-keto-5-methylthiopentenyl-1-phosphate phosphatase",
+ "3.1.3.88 5''-phosphoribostamycin phosphatase",
+ "3.1.3.89 5'-deoxynucleotidase",
+ "3.1.3.90 Maltose 6'-phosphate phosphatase",
+ "3.1.3.91 7-methylguanosine nucleotidase",
+ "3.1.3.92 Kanosamine-6-phosphate phosphatase",
+ "3.1.3.93 L-galactose 1-phosphate phosphatase",
+ "3.1.3.94 D-galactose 1-phosphate phosphatase",
+ "3.1.3.95 Phosphatidylinositol-3,5-bisphosphate 3-phosphatase",
+ "3.1.3.96 Pseudouridine 5'-phosphatase",
+ "3.1.3.97 3',5'-nucleoside bisphosphate phosphatase",
+ "3.1.3.98 Geranyl diphosphate phosphohydrolase",
+ "3.1.3.99 IMP-specific 5'-nucleotidase",
"3.1.4.1 Phosphodiesterase I",
"3.1.4.2 Glycerophosphocholine phosphodiesterase",
"3.1.4.3 Phospholipase C",
@@ -2961,7 +3659,6 @@ static const char* const kECNum_specific[] = {
"3.1.4.12 Sphingomyelin phosphodiesterase",
"3.1.4.13 Serine-ethanolaminephosphate phosphodiesterase",
"3.1.4.14 [Acyl-carrier-protein] phosphodiesterase",
- "3.1.4.15 Adenylyl-[glutamate--ammonia ligase] hydrolase",
"3.1.4.16 2',3'-cyclic-nucleotide 2'-phosphodiesterase",
"3.1.4.17 3',5'-cyclic-nucleotide phosphodiesterase",
"3.1.4.35 3',5'-cyclic-GMP phosphodiesterase",
@@ -2982,6 +3679,9 @@ static const char* const kECNum_specific[] = {
"3.1.4.52 Cyclic-guanylate-specific phosphodiesterase",
"3.1.4.53 3',5'-cyclic-AMP phosphodiesterase",
"3.1.4.54 N-acetylphosphatidylethanolamine-hydrolyzing phospholipase D",
+ "3.1.4.55 Phosphoribosyl 1,2-cyclic phosphate phosphodiesterase",
+ "3.1.4.56 7,8-dihydroneopterin 2',3'-cyclic phosphate phosphodiesterase",
+ "3.1.4.57 Phosphoribosyl 1,2-cyclic phosphate 1,2-diphosphodiesterase",
"3.1.5.1 dGTPase",
"3.1.6.1 Arylsulfatase",
"3.1.6.2 Steryl-sulfatase",
@@ -3000,16 +3700,17 @@ static const char* const kECNum_specific[] = {
"3.1.6.16 Monomethyl-sulfatase",
"3.1.6.17 D-lactate-2-sulfatase",
"3.1.6.18 Glucuronate-2-sulfatase",
+ "3.1.6.19 (R)-specific secondary-alkylsulfatase",
"3.1.7.1 Prenyl-diphosphatase",
"3.1.7.2 Guanosine-3',5'-bis(diphosphate) 3'-diphosphatase",
"3.1.7.3 Monoterpenyl-diphosphatase",
- "3.1.7.4 Sclareol cyclase",
"3.1.7.5 Geranylgeranyl diphosphate diphosphatase",
"3.1.7.6 Farnesyl diphosphatase",
"3.1.7.7 Drimenol cyclase",
"3.1.7.8 Tuberculosinol synthase",
"3.1.7.9 Isotuberculosinol synthase",
"3.1.7.10 (13E)-labda-7,13-dien-15-ol synthase",
+ "3.1.7.11 Geranyl diphosphate diphosphatase",
"3.1.8.1 Aryldialkylphosphatase",
"3.1.8.2 Diisopropyl-fluorophosphatase",
"3.1.11.1 Exodeoxyribonuclease I",
@@ -3018,6 +3719,7 @@ static const char* const kECNum_specific[] = {
"3.1.11.4 Exodeoxyribonuclease (phage SP3-induced)",
"3.1.11.5 Exodeoxyribonuclease V",
"3.1.11.6 Exodeoxyribonuclease VII",
+ "3.1.12.1 5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)",
"3.1.13.1 Exoribonuclease II",
"3.1.13.2 Exoribonuclease H",
"3.1.13.3 Oligonucleotidase",
@@ -3027,12 +3729,14 @@ static const char* const kECNum_specific[] = {
"3.1.15.1 Venom exonuclease",
"3.1.16.1 Spleen exonuclease",
"3.1.21.1 Deoxyribonuclease I",
- "3.1.21.2 Deoxyribonuclease IV (phage-T(4)-induced)",
+ "3.1.21.2 Deoxyribonuclease IV",
"3.1.21.3 Type I site-specific deoxyribonuclease",
"3.1.21.4 Type II site-specific deoxyribonuclease",
"3.1.21.5 Type III site-specific deoxyribonuclease",
"3.1.21.6 CC-preferring endodeoxyribonuclease",
"3.1.21.7 Deoxyribonuclease V",
+ "3.1.21.8 T(4) deoxyribonuclease II",
+ "3.1.21.9 T(4) deoxyribonuclease IV",
"3.1.22.1 Deoxyribonuclease II",
"3.1.22.2 Aspergillus deoxyribonuclease K(1)",
"3.1.22.4 Crossover junction endodeoxyribonuclease",
@@ -3051,6 +3755,7 @@ static const char* const kECNum_specific[] = {
"3.1.26.11 Ribonuclease Z",
"3.1.26.12 Ribonuclease E",
"3.1.26.13 Retroviral ribonuclease H",
+ "3.1.26.n2 Argonaute-2",
"3.1.27.1 Ribonuclease T(2)",
"3.1.27.2 Bacillus subtilis ribonuclease",
"3.1.27.3 Ribonuclease T(1)",
@@ -3059,7 +3764,6 @@ static const char* const kECNum_specific[] = {
"3.1.27.6 Enterobacter ribonuclease",
"3.1.27.7 Ribonuclease F",
"3.1.27.8 Ribonuclease V",
- "3.1.27.9 tRNA-intron endonuclease",
"3.1.27.10 rRNA endonuclease",
"3.1.30.1 Aspergillus nuclease S(1)",
"3.1.30.2 Serratia marcescens nuclease",
@@ -3108,7 +3812,7 @@ static const char* const kECNum_specific[] = {
"3.2.1.52 Beta-N-acetylhexosaminidase",
"3.2.1.53 Beta-N-acetylgalactosaminidase",
"3.2.1.54 Cyclomaltodextrinase",
- "3.2.1.55 Alpha-N-arabinofuranosidase",
+ "3.2.1.55 Non-reducing end alpha-L-arabinofuranosidase",
"3.2.1.56 Glucuronosyl-disulfoglucosamine glucuronidase",
"3.2.1.57 Isopullulanase",
"3.2.1.58 Glucan 1,3-beta-glucosidase",
@@ -3139,8 +3843,8 @@ static const char* const kECNum_specific[] = {
"3.2.1.85 6-phospho-beta-galactosidase",
"3.2.1.86 6-phospho-beta-glucosidase",
"3.2.1.87 Capsular-polysaccharide endo-1,3-alpha-galactosidase",
- "3.2.1.88 Beta-L-arabinosidase",
- "3.2.1.89 Arabinogalactan endo-1,4-beta-galactosidase",
+ "3.2.1.88 Non-reducing end beta-L-arabinopyranosidase",
+ "3.2.1.89 Arabinogalactan endo-beta-1,4-galactanase",
"3.2.1.91 Cellulose 1,4-beta-cellobiosidase (non-reducing end)",
"3.2.1.92 Peptidoglycan beta-N-acetylmuramidase",
"3.2.1.93 Alpha,alpha-phosphotrehalase",
@@ -3227,6 +3931,21 @@ static const char* const kECNum_specific[] = {
"3.2.1.178 Beta-porphyranase",
"3.2.1.179 Gellan tetrasaccharide unsaturated glucuronyl hydrolase",
"3.2.1.180 Unsaturated chondroitin disaccharide hydrolase",
+ "3.2.1.181 Galactan endo-beta-1,3-galactanase",
+ "3.2.1.182 4-hydroxy-7-methoxy-3-oxo-3,4-dihydro-2H-1,4-benzoxazin-2-yl glucoside beta-D-glucosidase",
+ "3.2.1.183 UDP-N-acetylglucosamine 2-epimerase (hydrolyzing)",
+ "3.2.1.184 UDP-N,N'-diacetylbacillosamine 2-epimerase (hydrolyzing)",
+ "3.2.1.185 Non-reducing end beta-L-arabinofuranosidase",
+ "3.2.1.186 Protodioscin 26-O-beta-D-glucosidase",
+ "3.2.1.187 (Ara-f)(3)-Hyp beta-L-arabinobiosidase",
+ "3.2.1.188 Avenacosidase",
+ "3.2.1.189 Dioscin glycosidase (diosgenin-forming)",
+ "3.2.1.190 Dioscin glycosidase (3-O-beta-D-Glc-diosgenin-forming)",
+ "3.2.1.191 Ginsenosidase type III",
+ "3.2.1.192 Ginsenoside Rb1 beta-glucosidase",
+ "3.2.1.193 Ginsenosidase type I",
+ "3.2.1.194 Ginsenosidase type IV",
+ "3.2.1.195 20-O-multi-glycoside ginsenosidase",
"3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase",
"3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase",
"3.2.1.n3 Dictyostelium lysozyme A",
@@ -3234,8 +3953,8 @@ static const char* const kECNum_specific[] = {
"3.2.2.2 Inosine nucleosidase",
"3.2.2.3 Uridine nucleosidase",
"3.2.2.4 AMP nucleosidase",
- "3.2.2.5 NAD(+) nucleosidase",
- "3.2.2.6 NAD(P)(+) nucleosidase",
+ "3.2.2.5 NAD(+) glycohydrolase",
+ "3.2.2.6 ADP-ribosyl cyclase/cyclic ADP-ribose hydrolase",
"3.2.2.7 Adenosine nucleosidase",
"3.2.2.8 Ribosylpyrimidine nucleosidase",
"3.2.2.9 Adenosylhomocysteine nucleosidase",
@@ -3258,6 +3977,8 @@ static const char* const kECNum_specific[] = {
"3.2.2.27 Uracil-DNA glycosylase",
"3.2.2.28 Double-stranded uracil-DNA glycosylase",
"3.2.2.29 Thymine-DNA glycosylase",
+ "3.2.2.30 Aminodeoxyfutalosine nucleosidase",
+ "3.2.2.n1 Cytokinin riboside 5'-monophosphate phosphoribohydrolase",
"3.3.1.1 Adenosylhomocysteinase",
"3.3.1.2 Adenosylmethionine hydrolase",
"3.3.2.1 Isochorismatase",
@@ -3270,6 +3991,9 @@ static const char* const kECNum_specific[] = {
"3.3.2.9 Microsomal epoxide hydrolase",
"3.3.2.10 Soluble epoxide hydrolase",
"3.3.2.11 Cholesterol-5,6-oxide hydrolase",
+ "3.3.2.12 Oxepin-CoA hydrolase",
+ "3.3.2.13 Chorismatase",
+ "3.3.2.14 2,4-dinitroanisole O-demethylase",
"3.4.11.1 Leucyl aminopeptidase",
"3.4.11.2 Membrane alanyl aminopeptidase",
"3.4.11.3 Cystinyl aminopeptidase",
@@ -3313,6 +4037,7 @@ static const char* const kECNum_specific[] = {
"3.4.14.10 Tripeptidyl-peptidase II",
"3.4.14.11 Xaa-Pro dipeptidyl-peptidase",
"3.4.14.12 Xaa-Xaa-Pro tripeptidyl-peptidase",
+ "3.4.14.13 Gamma-D-glutamyl-L-lysine dipeptidyl-peptidase",
"3.4.15.1 Peptidyl-dipeptidase A",
"3.4.15.4 Peptidyl-dipeptidase B",
"3.4.15.5 Peptidyl-dipeptidase Dcp",
@@ -3341,6 +4066,7 @@ static const char* const kECNum_specific[] = {
"3.4.17.21 Glutamate carboxypeptidase II",
"3.4.17.22 Metallocarboxypeptidase D",
"3.4.17.23 Angiotensin-converting enzyme 2",
+ "3.4.17.n1 [CysO]-cysteine peptidase",
"3.4.18.1 Cathepsin X",
"3.4.19.1 Acylaminoacyl-peptidase",
"3.4.19.2 Peptidyl-glycinamidase",
@@ -3418,7 +4144,7 @@ static const char* const kECNum_specific[] = {
"3.4.21.84 Limulus clotting factor C",
"3.4.21.85 Limulus clotting factor B",
"3.4.21.86 Limulus clotting enzyme",
- "3.4.21.88 Repressor lexA",
+ "3.4.21.88 Repressor LexA",
"3.4.21.89 Signal peptidase I",
"3.4.21.90 Togavirin",
"3.4.21.91 Flavivirin",
@@ -3451,6 +4177,7 @@ static const char* const kECNum_specific[] = {
"3.4.21.118 Kallikrein 8",
"3.4.21.119 Kallikrein 13",
"3.4.21.120 Oviductin",
+ "3.4.21.121 Lys-Lys/Arg-Xaa endopeptidase",
"3.4.22.1 Cathepsin B",
"3.4.22.2 Papain",
"3.4.22.3 Ficain",
@@ -3631,6 +4358,8 @@ static const char* const kECNum_specific[] = {
"3.4.24.85 S2P endopeptidase",
"3.4.24.86 ADAM 17 endopeptidase",
"3.4.24.87 ADAMTS13 endopeptidase",
+ "3.4.24.88 Desampylase",
+ "3.4.24.89 Pro-Pro endopeptidase",
"3.4.25.1 Proteasome endopeptidase complex",
"3.4.25.2 HslU--HslV peptidase",
"3.5.1.1 Asparaginase",
@@ -3646,7 +4375,7 @@ static const char* const kECNum_specific[] = {
"3.5.1.11 Penicillin amidase",
"3.5.1.12 Biotinidase",
"3.5.1.13 Aryl-acylamidase",
- "3.5.1.14 Aminoacylase",
+ "3.5.1.14 N-acyl-aliphatic-L-amino acid amidohydrolase",
"3.5.1.15 Aspartoacylase",
"3.5.1.16 Acetylornithine deacetylase",
"3.5.1.17 Acyl-lysine deacylase",
@@ -3659,7 +4388,6 @@ static const char* const kECNum_specific[] = {
"3.5.1.24 Choloylglycine hydrolase",
"3.5.1.25 N-acetylglucosamine-6-phosphate deacetylase",
"3.5.1.26 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase",
- "3.5.1.27 N-formylmethionylaminoacyl-tRNA deformylase",
"3.5.1.28 N-acetylmuramoyl-L-alanine amidase",
"3.5.1.29 2-(acetamidomethylene)succinate hydrolase",
"3.5.1.30 5-aminopentanamidase",
@@ -3675,7 +4403,7 @@ static const char* const kECNum_specific[] = {
"3.5.1.42 Nicotinamide-nucleotide amidase",
"3.5.1.43 Peptidyl-glutaminase",
"3.5.1.44 Protein-glutamine glutaminase",
- "3.5.1.46 6-aminohexanoate-dimer hydrolase",
+ "3.5.1.46 6-aminohexanoate-oligomer exohydrolase",
"3.5.1.47 N-acetyldiaminopimelate deacetylase",
"3.5.1.48 Acetylspermidine deacetylase",
"3.5.1.49 Formamidase",
@@ -3738,8 +4466,17 @@ static const char* const kECNum_specific[] = {
"3.5.1.107 Maleamate amidohydrolase",
"3.5.1.108 UDP-3-O-acyl-N-acetylglucosamine deacetylase",
"3.5.1.109 Sphingomyelin deacylase",
+ "3.5.1.110 Peroxyureidoacrylate/ureidoacrylate amidohydrolase",
+ "3.5.1.111 2-oxoglutaramate amidase",
+ "3.5.1.112 2'-N-acetylparomamine deacetylase",
+ "3.5.1.113 2'''-acetyl-6'''-hydroxyneomycin C deacetylase",
+ "3.5.1.114 N-acyl-aromatic-L-amino acid amidohydrolase",
+ "3.5.1.115 Mycothiol S-conjugate amidase",
+ "3.5.1.116 Ureidoglycolate amidohydrolase",
+ "3.5.1.117 6-aminohexanoate-oligomer endohydrolase",
+ "3.5.1.118 Gamma-glutamyl hercynylcysteine S-oxide hydrolase",
+ "3.5.1.119 Pup amidohydrolase",
"3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase",
- "3.5.1.n4 Peroxyureidoacrylate/ureidoacrylate amidohydrolase",
"3.5.2.1 Barbiturase",
"3.5.2.2 Dihydropyrimidinase",
"3.5.2.3 Dihydroorotase",
@@ -3758,6 +4495,7 @@ static const char* const kECNum_specific[] = {
"3.5.2.17 Hydroxyisourate hydrolase",
"3.5.2.18 Enamidase",
"3.5.2.19 Streptothricin hydrolase",
+ "3.5.2.20 Isatin hydrolase",
"3.5.3.1 Arginase",
"3.5.3.2 Guanidinoacetase",
"3.5.3.3 Creatinase",
@@ -3776,11 +4514,13 @@ static const char* const kECNum_specific[] = {
"3.5.3.16 Methylguanidinase",
"3.5.3.17 Guanidinopropionase",
"3.5.3.18 Dimethylargininase",
- "3.5.3.19 Ureidoglycolate hydrolase",
"3.5.3.20 Diguanidinobutanase",
"3.5.3.21 Methylenediurea deaminase",
"3.5.3.22 Proclavaminate amidinohydrolase",
"3.5.3.23 N-succinylarginine dihydrolase",
+ "3.5.3.24 N(1)-aminopropylagmatine ureohydrolase",
+ "3.5.3.25 N(omega)-hydroxy-L-arginine amidinohydrolase",
+ "3.5.3.26 (S)-ureidoglycine aminohydrolase",
"3.5.4.1 Cytosine deaminase",
"3.5.4.2 Adenine deaminase",
"3.5.4.3 Guanine deaminase",
@@ -3794,7 +4534,6 @@ static const char* const kECNum_specific[] = {
"3.5.4.11 Pterin deaminase",
"3.5.4.12 dCMP deaminase",
"3.5.4.13 dCTP deaminase",
- "3.5.4.14 Deoxycytidine deaminase",
"3.5.4.15 Guanosine deaminase",
"3.5.4.16 GTP cyclohydrolase I",
"3.5.4.17 Adenosine-phosphate deaminase",
@@ -3813,7 +4552,16 @@ static const char* const kECNum_specific[] = {
"3.5.4.30 dCTP deaminase (dUMP-forming)",
"3.5.4.31 S-methyl-5'-thioadenosine deaminase",
"3.5.4.32 8-oxoguanine deaminase",
- "3.5.4.n2 GTP cyclohydrolase (cyclic phosphate forming)",
+ "3.5.4.33 tRNA(adenine(34)) deaminase",
+ "3.5.4.34 tRNA(Ala)(adenine(37)) deaminase",
+ "3.5.4.35 tRNA(cytosine(8)) deaminase",
+ "3.5.4.36 mRNA(cytosine(6666)) deaminase",
+ "3.5.4.37 Double-stranded RNA adenine deaminase",
+ "3.5.4.38 Single-stranded DNA cytosine deaminase",
+ "3.5.4.39 GTP cyclohydrolase IV",
+ "3.5.4.40 Aminodeoxyfutalosine deaminase",
+ "3.5.4.41 5'-deoxyadenosine deaminase",
+ "3.5.4.n3 Melamine deaminase",
"3.5.5.1 Nitrilase",
"3.5.5.2 Ricinine nitrilase",
"3.5.5.4 Cyanoalanine nitrilase",
@@ -3829,11 +4577,13 @@ static const char* const kECNum_specific[] = {
"3.5.99.6 Glucosamine-6-phosphate deaminase",
"3.5.99.7 1-aminocyclopropane-1-carboxylate deaminase",
"3.5.99.8 5-nitroanthranilic acid aminohydrolase",
+ "3.5.99.9 2-nitroimidazole nitrohydrolase",
+ "3.5.99.10 2-iminobutanoate/2-iminopropanoate deaminase",
"3.6.1.1 Inorganic diphosphatase",
"3.6.1.2 Trimetaphosphatase",
"3.6.1.3 Adenosinetriphosphatase",
"3.6.1.5 Apyrase",
- "3.6.1.6 Nucleoside-diphosphatase",
+ "3.6.1.6 Nucleoside diphosphate phosphatase",
"3.6.1.7 Acylphosphatase",
"3.6.1.8 ATP diphosphatase",
"3.6.1.9 Nucleotide diphosphatase",
@@ -3842,7 +4592,7 @@ static const char* const kECNum_specific[] = {
"3.6.1.12 dCTP diphosphatase",
"3.6.1.13 ADP-ribose diphosphatase",
"3.6.1.14 Adenosine-tetraphosphatase",
- "3.6.1.15 Nucleoside-triphosphatase",
+ "3.6.1.15 Nucleoside-triphosphate phosphatase",
"3.6.1.16 CDP-glycerol diphosphatase",
"3.6.1.17 Bis(5'-nucleosyl)-tetraphosphatase (asymmetrical)",
"3.6.1.18 FAD diphosphatase",
@@ -3854,7 +4604,7 @@ static const char* const kECNum_specific[] = {
"3.6.1.24 Nucleoside phosphoacylhydrolase",
"3.6.1.25 Triphosphatase",
"3.6.1.26 CDP-diacylglycerol diphosphatase",
- "3.6.1.27 Undecaprenyl-diphosphatase",
+ "3.6.1.27 Undecaprenyl-diphosphate phosphatase",
"3.6.1.28 Thiamine-triphosphatase",
"3.6.1.29 Bis(5'-adenosyl)-triphosphatase",
"3.6.1.31 Phosphoribosyl-ATP diphosphatase",
@@ -3872,20 +4622,24 @@ static const char* const kECNum_specific[] = {
"3.6.1.56 2-hydroxy-dATP diphosphatase",
"3.6.1.57 UDP-2,4-diacetamido-2,4,6-trideoxy-beta-L-altropyranose hydrolase",
"3.6.1.58 8-oxo-dGDP phosphatase",
- "3.6.1.59 M(7)GpppX diphosphatase",
+ "3.6.1.59 5'-(N(7)-methyl 5'-triphosphoguanosine)-[mRNA] diphosphatase",
"3.6.1.60 Diadenosine hexaphosphate hydrolase (AMP-forming)",
"3.6.1.61 Diadenosine hexaphosphate hydrolase (ATP-forming)",
- "3.6.1.62 M(7)GpppN-mRNA hydrolase",
+ "3.6.1.62 5'-(N(7)-methylguanosine 5'-triphospho)-[mRNA] hydrolase",
+ "3.6.1.63 Alpha-D-ribose 1-methylphosphonate 5-triphosphate diphosphatase",
+ "3.6.1.64 Inosine diphosphate phosphatase",
+ "3.6.1.65 (d)CTP diphosphatase",
+ "3.6.1.66 XTP/dITP diphosphatase",
+ "3.6.1.67 Dihydroneopterin triphosphate diphosphatase",
"3.6.1.n1 D-tyrosyl-tRNA(Tyr) hydrolase",
"3.6.1.n2 L-cysteinyl-tRNA(Pro)",
"3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase",
- "3.6.1.n4 Dihydroneopterin triphosphate diphosphatase",
"3.6.2.1 Adenylylsulfatase",
"3.6.2.2 Phosphoadenylylsulfatase",
"3.6.3.1 Phospholipid-translocating ATPase",
"3.6.3.2 Magnesium-importing ATPase",
"3.6.3.3 Cadmium-exporting ATPase",
- "3.6.3.4 Copper-exporting ATPase",
+ "3.6.3.4 Cu(2+)-exporting ATPase",
"3.6.3.5 Zinc-exporting ATPase",
"3.6.3.6 Proton-exporting ATPase",
"3.6.3.7 Sodium-exporting ATPase",
@@ -3933,7 +4687,8 @@ static const char* const kECNum_specific[] = {
"3.6.3.51 Mitochondrial protein-transporting ATPase",
"3.6.3.52 Chloroplast protein-transporting ATPase",
"3.6.3.53 Ag(+)-exporting ATPase",
- "3.6.3.n1 Cu(+) exporting ATPase",
+ "3.6.3.54 Cu(+) exporting ATPase",
+ "3.6.3.55 Tungstate-importing ATPase",
"3.6.4.1 Myosin ATPase",
"3.6.4.2 Dynein ATPase",
"3.6.4.3 Microtubule-severing ATPase",
@@ -3962,16 +4717,18 @@ static const char* const kECNum_specific[] = {
"3.7.1.6 Acetylpyruvate hydrolase",
"3.7.1.7 Beta-diketone hydrolase",
"3.7.1.8 2,6-dioxo-6-phenylhexa-3-enoate hydrolase",
- "3.7.1.9 2-hydroxymuconate-semialdehyde hydrolase",
+ "3.7.1.9 2-hydroxymuconate-6-semialdehyde hydrolase",
"3.7.1.10 Cyclohexane-1,3-dione hydrolase",
"3.7.1.11 Cyclohexane-1,2-dione hydrolase",
"3.7.1.12 Cobalt-precorrin 5A hydrolase",
"3.7.1.13 2-hydroxy-6-oxo-6-(2-aminophenyl)hexa-2,4-dienoate hydrolase",
"3.7.1.14 2-hydroxy-6-oxonona-2,4-dienedioate hydrolase",
- "3.7.1.15 (+)-caryolan-1-ol synthase",
- "3.7.1.16 Oxepin-CoA hydrolase",
"3.7.1.17 4,5-9,10-diseco-3-hydroxy-5,9,17-trioxoandrosta-1(10),2-diene-4-oate hydrolase",
- "3.7.1.n2 3,5/4-trihydroxycyclohexa-1,2-dione hydrolase",
+ "3.7.1.18 6-oxocamphor hydrolase",
+ "3.7.1.19 2,6-dihydroxypseudooxynicotine hydrolase",
+ "3.7.1.20 3-fumarylpyruvate hydrolase",
+ "3.7.1.21 6-oxocyclohex-1-ene-1-carbonyl-CoA hydratase",
+ "3.7.1.22 3D-(3,5/4)-trihydroxycyclohexane-1,2-dione acylhydrolase (decyclizing)",
"3.8.1.1 Alkylhalidase",
"3.8.1.2 (S)-2-haloacid dehalogenase",
"3.8.1.3 Haloacetate dehalogenase",
@@ -3983,6 +4740,7 @@ static const char* const kECNum_specific[] = {
"3.8.1.10 2-haloacid dehalogenase (configuration-inverting)",
"3.8.1.11 2-haloacid dehalogenase (configuration-retaining)",
"3.9.1.1 Phosphoamidase",
+ "3.9.1.2 Protein arginine phosphatase",
"3.10.1.1 N-sulfoglucosamine sulfohydrolase",
"3.10.1.2 Cyclamate sulfohydrolase",
"3.11.1.1 Phosphonoacetaldehyde hydrolase",
@@ -3991,6 +4749,7 @@ static const char* const kECNum_specific[] = {
"3.12.1.1 Trithionate hydrolase",
"3.13.1.1 UDP-sulfoquinovose synthase",
"3.13.1.3 2'-hydroxybiphenyl-2-sulfinate desulfinase",
+ "3.13.1.4 3-sulfinopropanoyl-CoA desulfinase",
"4.1.1.1 Pyruvate decarboxylase",
"4.1.1.2 Oxalate decarboxylase",
"4.1.1.3 Oxaloacetate decarboxylase",
@@ -4063,7 +4822,7 @@ static const char* const kECNum_specific[] = {
"4.1.1.74 Indolepyruvate decarboxylase",
"4.1.1.75 5-guanidino-2-oxopentanoate decarboxylase",
"4.1.1.76 Arylmalonate decarboxylase",
- "4.1.1.77 4-oxalocrotonate decarboxylase",
+ "4.1.1.77 2-oxo-3-hexenedioate decarboxylase",
"4.1.1.78 Acetylenedicarboxylate decarboxylase",
"4.1.1.79 Sulfopyruvate decarboxylase",
"4.1.1.80 4-hydroxyphenylpyruvate decarboxylase",
@@ -4080,8 +4839,15 @@ static const char* const kECNum_specific[] = {
"4.1.1.91 Salicylate decarboxylase",
"4.1.1.92 Indole-3-carboxylate decarboxylase",
"4.1.1.93 Pyrrole-2-carboxylate decarboxylase",
- "4.1.1.n1 2-oxo-4-hydroxy-4-carboxy-5-ureidoimidazoline decarboxylase",
- "4.1.1.n2 Ethylmalonyl-CoA decarboxylase",
+ "4.1.1.94 Ethylmalonyl-CoA decarboxylase",
+ "4.1.1.95 L-glutamyl-[BtrI acyl-carrier protein] decarboxylase",
+ "4.1.1.96 Carboxynorspermidine decarboxylase",
+ "4.1.1.97 2-oxo-4-hydroxy-4-carboxy-5-ureidoimidazoline decarboxylase",
+ "4.1.1.98 4-hydroxy-3-polyprenylbenzoate decarboxylase",
+ "4.1.1.99 Phosphomevalonate decarboxylase",
+ "4.1.1.100 Prephenate decarboxylase",
+ "4.1.1.101 Malolactic enzyme",
+ "4.1.1.102 Phenacrylate decarboxylase",
"4.1.2.2 Ketotetrose-phosphate aldolase",
"4.1.2.4 Deoxyribose-phosphate aldolase",
"4.1.2.5 L-threonine aldolase",
@@ -4116,16 +4882,21 @@ static const char* const kECNum_specific[] = {
"4.1.2.41 Vanillin synthase",
"4.1.2.42 D-threonine aldolase",
"4.1.2.43 3-hexulose-6-phosphate synthase",
- "4.1.2.44 Benzoyl-CoA-dihydrodiol lyase",
+ "4.1.2.44 2,3-epoxybenzoyl-CoA dihydrolase",
"4.1.2.45 Trans-o-hydroxybenzylidenepyruvate hydratase-aldolase",
"4.1.2.46 Aliphatic (R)-hydroxynitrile lyase",
"4.1.2.47 (S)-hydroxynitrile lyase",
"4.1.2.48 Low-specificity L-threonine aldolase",
"4.1.2.49 L-allo-threonine aldolase",
+ "4.1.2.50 6-carboxytetrahydropterin synthase",
+ "4.1.2.51 2-dehydro-3-deoxy-D-gluconate aldolase",
+ "4.1.2.52 4-hydroxy-2-oxoheptanedioate aldolase",
+ "4.1.2.53 2-keto-3-deoxy-L-rhamnonate aldolase",
+ "4.1.2.54 L-threo-3-deoxy-hexylosonate aldolase",
+ "4.1.2.55 2-dehydro-3-deoxy-phosphogluconate/2-dehydro-3-deoxy-6-phosphogalactonate aldolase",
+ "4.1.2.56 2-amino-4,5-dihydroxy-6-oxo-7-(phosphonooxy)heptanoate synthase",
+ "4.1.2.57 Sulfofructosephosphate aldolase",
"4.1.2.n2 2-hydroxyphytanoyl-CoA lyase",
- "4.1.2.n3 2-keto-3-deoxy-L-rhamnonate aldolase",
- "4.1.2.n4 4-hydroxy-2-oxo-heptane-1,7-dioate aldolase",
- "4.1.2.n5 2-amino-3,7-dideoxy-D-threo-hept-6-ulosonate synthase",
"4.1.3.1 Isocitrate lyase",
"4.1.3.3 N-acetylneuraminate lyase",
"4.1.3.4 Hydroxymethylglutaryl-CoA lyase",
@@ -4136,7 +4907,7 @@ static const char* const kECNum_specific[] = {
"4.1.3.17 4-hydroxy-4-methyl-2-oxoglutarate aldolase",
"4.1.3.22 Citramalate lyase",
"4.1.3.24 Malyl-CoA lyase",
- "4.1.3.25 Citramalyl-CoA lyase",
+ "4.1.3.25 (S)-citramalyl-CoA lyase",
"4.1.3.26 3-hydroxy-3-isohexenylglutaryl-CoA lyase",
"4.1.3.27 Anthranilate synthase",
"4.1.3.30 Methylisocitrate lyase",
@@ -4148,22 +4919,27 @@ static const char* const kECNum_specific[] = {
"4.1.3.39 4-hydroxy-2-oxovalerate aldolase",
"4.1.3.40 Chorismate lyase",
"4.1.3.41 3-hydroxy-D-aspartate aldolase",
+ "4.1.3.42 (4S)-4-hydroxy-2-oxoglutarate aldolase",
+ "4.1.3.43 4-hydroxy-2-oxohexanoate aldolase",
+ "4.1.3.44 tRNA 4-demethylwyosine synthase (AdoMet-dependent)",
+ "4.1.3.45 3-hydroxybenzoate synthase",
+ "4.1.3.46 (R)-citramalyl-CoA lyase",
"4.1.99.1 Tryptophanase",
"4.1.99.2 Tyrosine phenol-lyase",
"4.1.99.3 Deoxyribodipyrimidine photo-lyase",
- "4.1.99.5 Octadecanal decarbonylase",
+ "4.1.99.5 Aldehyde oxygenase (deformylating)",
"4.1.99.11 Benzylsuccinate synthase",
"4.1.99.12 3,4-dihydroxy-2-butanone-4-phosphate synthase",
"4.1.99.13 (6-4)DNA photolyase",
"4.1.99.14 Spore photoproduct lyase",
"4.1.99.16 Geosmin synthase",
"4.1.99.17 Phosphomethylpyrimidine synthase",
- "4.1.99.18 Cyclic pyranopterin monophosphate synthase",
+ "4.1.99.18 Cyclic pyranopterin phosphate synthase",
"4.1.99.19 2-iminoacetate synthase",
+ "4.1.99.20 3-amino-4-hydroxybenzoate synthase",
"4.2.1.1 Carbonate dehydratase",
"4.2.1.2 Fumarate hydratase",
"4.2.1.3 Aconitate hydratase",
- "4.2.1.4 Citrate dehydratase",
"4.2.1.5 Arabinonate dehydratase",
"4.2.1.6 Galactonate dehydratase",
"4.2.1.7 Altronate dehydratase",
@@ -4201,16 +4977,12 @@ static const char* const kECNum_specific[] = {
"4.2.1.49 Urocanate hydratase",
"4.2.1.50 Pyrazolylalanine synthase",
"4.2.1.51 Prephenate dehydratase",
- "4.2.1.52 Dihydrodipicolinate synthase",
"4.2.1.53 Oleate hydratase",
"4.2.1.54 Lactoyl-CoA dehydratase",
"4.2.1.55 3-hydroxybutyryl-CoA dehydratase",
"4.2.1.56 Itaconyl-CoA hydratase",
"4.2.1.57 Isohexenylglutaconyl-CoA hydratase",
- "4.2.1.58 Crotonoyl-[acyl-carrier-protein] hydratase",
- "4.2.1.59 3-hydroxyoctanoyl-[acyl-carrier-protein] dehydratase",
- "4.2.1.60 3-hydroxydecanoyl-[acyl-carrier-protein] dehydratase",
- "4.2.1.61 3-hydroxypalmitoyl-[acyl-carrier-protein] dehydratase",
+ "4.2.1.59 3-hydroxyacyl-[acyl-carrier-protein] dehydratase",
"4.2.1.62 5-alpha-hydroxysteroid dehydratase",
"4.2.1.65 3-cyanoalanine hydratase",
"4.2.1.66 Cyanide hydratase",
@@ -4233,7 +5005,6 @@ static const char* const kECNum_specific[] = {
"4.2.1.85 Dimethylmaleate hydratase",
"4.2.1.87 Octopamine dehydratase",
"4.2.1.88 (R)-synephrine",
- "4.2.1.89 Carnitine dehydratase",
"4.2.1.90 L-rhamnonate dehydratase",
"4.2.1.91 Arogenate dehydratase",
"4.2.1.92 Hydroperoxide dehydratase",
@@ -4275,16 +5046,47 @@ static const char* const kECNum_specific[] = {
"4.2.1.129 Squalene--hopanol cyclase",
"4.2.1.130 D-lactate dehydratase",
"4.2.1.131 Carotenoid 1,2-hydratase",
+ "4.2.1.132 2-hydroxyhexa-2,4-dienoate hydratase",
+ "4.2.1.133 Copal-8-ol diphosphate hydratase",
+ "4.2.1.134 Very-long-chain (3R)-3-hydroxyacyl-CoA dehydratase",
+ "4.2.1.135 UDP-N-acetylglucosamine 4,6-dehydratase (configuration-retaining)",
+ "4.2.1.136 ADP-dependent NAD(P)H-hydrate dehydratase",
+ "4.2.1.137 Sporulenol synthase",
+ "4.2.1.138 (+)-caryolan-1-ol synthase",
+ "4.2.1.139 Medicarpin synthase",
+ "4.2.1.140 Gluconate/galactonate dehydratase",
+ "4.2.1.141 2-dehydro-3-deoxy-D-arabinonate dehydratase",
+ "4.2.1.142 5'-oxoaverantin cyclase",
+ "4.2.1.143 Versicolorin B synthase",
+ "4.2.1.144 3-amino-5-hydroxybenzoate synthase",
+ "4.2.1.145 Capreomycidine synthase",
+ "4.2.1.146 L-galactonate dehydratase",
+ "4.2.1.147 5,6,7,8-tetrahydromethanopterin hydro-lyase",
+ "4.2.1.148 2-methylfumaryl-CoA hydratase",
+ "4.2.1.149 Crotonobetainyl-CoA hydratase",
+ "4.2.1.150 Short-chain-enoyl-CoA hydratase",
+ "4.2.1.151 Chorismate dehydratase",
+ "4.2.1.152 Hydroperoxy icosatetraenoate dehydratase",
+ "4.2.1.153 3-methylfumaryl-CoA hydratase",
+ "4.2.1.154 Tetracenomycin F2 cyclase",
+ "4.2.1.155 Methylthioacryloyl-CoA hydratase",
+ "4.2.1.156 L-talarate dehydratase",
+ "4.2.1.157 (R)-2-hydroxyisocaproyl-CoA dehydratase",
+ "4.2.1.158 Galactarate dehydratase (D-threo-forming)",
+ "4.2.1.159 dTDP-4-dehydro-6-deoxy-alpha-D-glucopyranose 2,3-dehydratase",
+ "4.2.1.160 2,5-diamino-6-(5-phospho-D-ribosylamino)pyrimidin-4(3H)-one isomerase/dehydratase",
+ "4.2.1.161 Bisanhydrobacterioruberin hydratase",
+ "4.2.1.162 6-deoxy-6-sulfo-D-gluconate dehydratase",
"4.2.2.1 Hyaluronate lyase",
"4.2.2.2 Pectate lyase",
- "4.2.2.3 Poly(beta-D-mannuronate) lyase",
+ "4.2.2.3 Mannuronate-specific alginate lyase",
"4.2.2.5 Chondroitin AC lyase",
"4.2.2.6 Oligogalacturonide lyase",
"4.2.2.7 Heparin lyase",
"4.2.2.8 Heparin-sulfate lyase",
"4.2.2.9 Pectate disaccharide-lyase",
"4.2.2.10 Pectin lyase",
- "4.2.2.11 Poly(alpha-L-guluronate) lyase",
+ "4.2.2.11 Guluronate-specific alginate lyase",
"4.2.2.12 Xanthan lyase",
"4.2.2.13 Exo-(1->4)-alpha-D-glucan lyase",
"4.2.2.14 Glucuronan lyase",
@@ -4299,6 +5101,7 @@ static const char* const kECNum_specific[] = {
"4.2.2.23 Rhamnogalacturonan endolyase",
"4.2.2.24 Rhamnogalacturonan exolyase",
"4.2.2.25 Gellan lyase",
+ "4.2.2.26 Oligo-alginate lyase",
"4.2.2.n1 Peptidoglycan lytic exotransglycosylase",
"4.2.2.n2 Peptidoglycan lytic endotransglycosylase",
"4.2.3.1 Threonine synthase",
@@ -4314,7 +5117,6 @@ static const char* const kECNum_specific[] = {
"4.2.3.11 Sabinene-hydrate synthase",
"4.2.3.12 6-pyruvoyltetrahydropterin synthase",
"4.2.3.13 (+)-delta-cadinene synthase",
- "4.2.3.14 Pinene synthase",
"4.2.3.15 Myrcene synthase",
"4.2.3.16 (4S)-limonene synthase",
"4.2.3.17 Taxadiene synthase",
@@ -4405,16 +5207,62 @@ static const char* const kECNum_specific[] = {
"4.2.3.102 Sesquithujene synthase",
"4.2.3.103 Ent-isokaurene synthase",
"4.2.3.104 Alpha-humulene synthase",
+ "4.2.3.105 Tricyclene synthase",
+ "4.2.3.106 (E)-beta-ocimene synthase",
+ "4.2.3.107 (+)-car-3-ene synthase",
+ "4.2.3.108 1,8-cineole synthase",
+ "4.2.3.109 (-)-sabinene synthase",
+ "4.2.3.110 (+)-sabinene synthase",
+ "4.2.3.111 (-)-alpha-terpineol synthase",
+ "4.2.3.112 (+)-alpha-terpineol synthase",
+ "4.2.3.113 Terpinolene synthase",
+ "4.2.3.114 Gamma-terpinene synthase",
+ "4.2.3.115 Alpha-terpinene synthase",
+ "4.2.3.116 (+)-camphene synthase",
+ "4.2.3.117 (-)-camphene synthase",
+ "4.2.3.118 2-methylisoborneol synthase",
+ "4.2.3.119 (-)-alpha-pinene synthase",
+ "4.2.3.120 (-)-beta-pinene synthase",
+ "4.2.3.121 (+)-alpha-pinene synthase",
+ "4.2.3.122 (+)-beta-pinene synthase",
+ "4.2.3.123 Beta-sesquiphellandrene synthase",
+ "4.2.3.124 2-deoxy-scyllo-inosose synthase",
+ "4.2.3.125 Alpha-muurolene synthase",
+ "4.2.3.126 Gamma-muurolene synthase",
+ "4.2.3.127 Beta-copaene synthase",
+ "4.2.3.128 Beta-cubebene synthase",
+ "4.2.3.129 (+)-sativene synthase",
+ "4.2.3.130 Tetraprenyl-beta-curcumene synthase",
+ "4.2.3.131 Miltiradiene synthase",
+ "4.2.3.132 Neoabietadiene synthase",
+ "4.2.3.133 Alpha-copaene synthase",
+ "4.2.3.134 5-phosphonooxy-L-lysine phospho-lyase",
+ "4.2.3.135 Delta(6)-protoilludene synthase",
+ "4.2.3.136 Alpha-isocomene synthase",
+ "4.2.3.137 (E)-2-epi-beta-caryophyllene synthase",
+ "4.2.3.138 (+)-epi-alpha-bisabolol synthase",
+ "4.2.3.139 Valerena-4,7(11)-diene synthase",
+ "4.2.3.140 Cis-abienol synthase",
+ "4.2.3.141 Sclareol synthase",
+ "4.2.3.142 7-epizingiberene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)",
+ "4.2.3.143 Kunzeaol synthase",
+ "4.2.3.144 Geranyllinalool synthase",
+ "4.2.3.145 Ophiobolin F synthase",
+ "4.2.3.146 Cyclooctat-9-en-7-ol synthase",
+ "4.2.3.147 Pimaradiene synthase",
+ "4.2.3.148 Cembrene C synthase",
+ "4.2.3.149 Nephthenol synthase",
+ "4.2.3.150 Cembrene A synthase",
+ "4.2.3.151 Pentamethylcyclopentadecatrienol synthase",
+ "4.2.3.152 2-epi-5-epi-valiolone synthase",
+ "4.2.3.153 (5-formylfuran-3-yl)methyl phosphate synthase",
"4.2.3.n2 Delta-selinene synthase",
- "4.2.3.n4 (-)-camphene synthase",
- "4.2.3.n6 Terpinolene synthase",
- "4.2.3.n7 (-)-alpha-pinene synthase",
"4.2.3.n11 Selinene synthase",
- "4.2.3.n14 2-methylisoborneol synthase",
"4.2.99.12 Carboxymethyloxysuccinate lyase",
"4.2.99.18 DNA-(apurinic or apyrimidinic site) lyase",
"4.2.99.20 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase",
"4.2.99.21 Isochorismate lyase",
+ "4.2.99.22 Tuliposide A-converting enzyme",
"4.3.1.1 Aspartate ammonia-lyase",
"4.3.1.2 Methylaspartate ammonia-lyase",
"4.3.1.3 Histidine ammonia-lyase",
@@ -4436,21 +5284,26 @@ static const char* const kECNum_specific[] = {
"4.3.1.23 Tyrosine ammonia-lyase",
"4.3.1.24 Phenylalanine ammonia-lyase",
"4.3.1.25 Phenylalanine/tyrosine ammonia-lyase",
- "4.3.1.26 Chromopyrrolate synthase",
"4.3.1.27 Threo-3-hydroxy-D-aspartate ammonia-lyase",
+ "4.3.1.28 L-lysine cyclodeaminase",
+ "4.3.1.29 D-glucosaminate-6-phosphate ammonia lyase",
+ "4.3.1.30 dTDP-4-amino-4,6-dideoxy-D-glucose ammonia-lyase",
"4.3.2.1 Argininosuccinate lyase",
"4.3.2.2 Adenylosuccinate lyase",
"4.3.2.3 Ureidoglycolate lyase",
"4.3.2.4 Purine imidazole-ring cyclase",
"4.3.2.5 Peptidylamidoglycolate lyase",
+ "4.3.2.6 Gamma-L-glutamyl-butirosin B gamma-glutamyl cyclotransferase",
"4.3.3.1 3-ketovalidoxylamine C-N-lyase",
"4.3.3.2 Strictosidine synthase",
"4.3.3.3 Deacetylisoipecoside synthase",
"4.3.3.4 Deacetylipecoside synthase",
"4.3.3.5 4'-demethylrebeccamycin synthase",
"4.3.3.6 Pyridoxal 5'-phosphate synthase (glutamine hydrolyzing)",
- "4.3.3.n1 3,4-AHBA synthase",
+ "4.3.3.7 4-hydroxy-tetrahydrodipicolinate synthase",
"4.3.99.2 Carboxybiotin decarboxylase",
+ "4.3.99.3 7-carboxy-7-deazaguanine synthase",
+ "4.3.99.4 Choline trimethylamine-lyase",
"4.4.1.1 Cystathionine gamma-lyase",
"4.4.1.2 Homocysteine desulfhydrase",
"4.4.1.3 Dimethylpropiothetin dethiomethylase",
@@ -4473,6 +5326,14 @@ static const char* const kECNum_specific[] = {
"4.4.1.23 2-hydroxypropyl-CoM lyase",
"4.4.1.24 (2R)-sulfolactate sulfo-lyase",
"4.4.1.25 L-cysteate sulfo-lyase",
+ "4.4.1.26 Olivetolic acid cyclase",
+ "4.4.1.27 Carbon disulfide lyase",
+ "4.4.1.28 L-cysteine desulfidase",
+ "4.4.1.29 Phycobiliprotein cysteine-84 phycobilin lyase",
+ "4.4.1.30 Phycobiliprotein beta-cysteine-155 phycobilin lyase",
+ "4.4.1.31 Phycoerythrocyanin alpha-cysteine-84 phycoviolobilin lyase/isomerase",
+ "4.4.1.32 C-phycocyanin alpha-cysteine-84 phycocyanobilin lyase",
+ "4.4.1.33 R-phycocyanin alpha-cysteine-84 phycourobilin lyase/isomerase",
"4.5.1.1 DDT-dehydrochlorinase",
"4.5.1.2 3-chloro-D-alanine dehydrochlorinase",
"4.5.1.3 Dichloromethane dehalogenase",
@@ -4485,7 +5346,9 @@ static const char* const kECNum_specific[] = {
"4.6.1.13 Phosphatidylinositol diacylglycerol-lyase",
"4.6.1.14 Glycosylphosphatidylinositol diacylglycerol-lyase",
"4.6.1.15 FAD-AMP lyase (cyclizing)",
- "4.99.1.1 Ferrochelatase",
+ "4.6.1.16 tRNA-intron lyase",
+ "4.7.1.1 Alpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase",
+ "4.99.1.1 Protoporphyrin ferrochelatase",
"4.99.1.2 Alkylmercury lyase",
"4.99.1.3 Sirohydrochlorin cobaltochelatase",
"4.99.1.4 Sirohydrochlorin ferrochelatase",
@@ -4511,7 +5374,9 @@ static const char* const kECNum_specific[] = {
"5.1.1.16 Protein-serine epimerase",
"5.1.1.17 Isopenicillin-N epimerase",
"5.1.1.18 Serine racemase",
- "5.1.1.n1 L-Ala-D/L-Glu epimerase",
+ "5.1.1.19 O-ureido-serine racemase",
+ "5.1.1.20 L-Ala-D/L-Glu epimerase",
+ "5.1.1.21 Isoleucine 2-epimerase",
"5.1.2.1 Lactate racemase",
"5.1.2.2 Mandelate racemase",
"5.1.2.3 3-hydroxybutyryl-CoA epimerase",
@@ -4531,7 +5396,7 @@ static const char* const kECNum_specific[] = {
"5.1.3.11 Cellobiose epimerase",
"5.1.3.12 UDP-glucuronate 5'-epimerase",
"5.1.3.13 dTDP-4-dehydrorhamnose 3,5-epimerase",
- "5.1.3.14 UDP-N-acetylglucosamine 2-epimerase",
+ "5.1.3.14 UDP-N-acetylglucosamine 2-epimerase (non-hydrolyzing)",
"5.1.3.15 Glucose-6-phosphate 1-epimerase",
"5.1.3.16 UDP-glucosamine 4-epimerase",
"5.1.3.17 Heparosan-N-sulfate-glucuronate 5-epimerase",
@@ -4542,13 +5407,27 @@ static const char* const kECNum_specific[] = {
"5.1.3.22 L-ribulose-5-phosphate 3-epimerase",
"5.1.3.23 UDP-2,3-diacetamido-2,3-dideoxyglucuronic acid 2-epimerase",
"5.1.3.24 N-acetylneuraminate epimerase",
- "5.1.3.n2 L-fucose mutarotase",
- "5.1.3.n3 L-rhamnose mutarotase",
+ "5.1.3.25 dTDP-L-rhamnose 4-epimerase",
+ "5.1.3.26 N-acetyl-alpha-D-glucosaminyl-diphospho-ditrans,octacis-undecaprenol 4-epimerase",
+ "5.1.3.27 dTDP-4-dehydro-6-deoxy-D-glucose 3-epimerase",
+ "5.1.3.28 UDP-N-acetyl-L-fucosamine synthase",
+ "5.1.3.29 L-fucose mutarotase",
+ "5.1.3.30 D-psicose 3-epimerase",
+ "5.1.3.31 D-tagatose 3-epimerase",
+ "5.1.3.32 L-rhamnose mutarotase",
+ "5.1.3.33 2-epi-5-epi-valiolone epimerase",
+ "5.1.3.34 Monoglucosyldiacylglycerol epimerase",
+ "5.1.3.35 2-epi-5-epi-valiolone 7-phosphate 2-epimerase",
+ "5.1.3.36 Heparosan-glucuronate 5-epimerase",
+ "5.1.3.37 Mannuronan 5-epimerase",
"5.1.99.1 Methylmalonyl-CoA epimerase",
"5.1.99.2 16-hydroxysteroid epimerase",
"5.1.99.3 Allantoin racemase",
"5.1.99.4 Alpha-methylacyl-CoA racemase",
"5.1.99.5 Hydantoin racemase",
+ "5.1.99.6 NAD(P)H-hydrate epimerase",
+ "5.1.99.7 Dihydroneopterin triphosphate 2'-epimerase",
+ "5.1.99.8 7,8-dihydroneopterin epimerase",
"5.2.1.1 Maleate isomerase",
"5.2.1.2 Maleylacetoacetate isomerase",
"5.2.1.4 Maleylpyruvate isomerase",
@@ -4559,8 +5438,9 @@ static const char* const kECNum_specific[] = {
"5.2.1.10 2-chloro-4-carboxymethylenebut-2-en-1,4-olide isomerase",
"5.2.1.12 Zeta-carotene isomerase",
"5.2.1.13 Prolycopene isomerase",
+ "5.2.1.14 Beta-carotene isomerase",
"5.3.1.1 Triose-phosphate isomerase",
- "5.3.1.3 Arabinose isomerase",
+ "5.3.1.3 D-arabinose isomerase",
"5.3.1.4 L-arabinose isomerase",
"5.3.1.5 Xylose isomerase",
"5.3.1.6 Ribose-5-phosphate isomerase",
@@ -4582,13 +5462,18 @@ static const char* const kECNum_specific[] = {
"5.3.1.26 Galactose-6-phosphate isomerase",
"5.3.1.27 6-phospho-3-hexuloisomerase",
"5.3.1.28 D-sedoheptulose 7-phosphate isomerase",
- "5.3.1.n1 5-deoxy-glucuronate isomerase",
- "5.3.1.n2 Ribose 1,5-bisphosphate isomerase",
+ "5.3.1.29 Ribose 1,5-bisphosphate isomerase",
+ "5.3.1.30 5-deoxy-glucuronate isomerase",
+ "5.3.1.31 Sulfoquinovose isomerase",
+ "5.3.1.32 (4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase",
"5.3.2.1 Phenylpyruvate tautomerase",
"5.3.2.2 Oxaloacetate tautomerase",
"5.3.2.3 TDP-4-oxo-6-deoxy-alpha-D-glucose-3,4-oxoisomerase (dTDP-3-dehydro-6-deoxy-alpha-D-galactopyranose-forming)",
"5.3.2.4 TDP-4-oxo-6-deoxy-alpha-D-glucose-3,4-oxoisomerase (dTDP-3-dehydro-6-deoxy-alpha-D-glucopyranose-forming)",
- "5.3.2.n1 2,3-diketo-5-methylthiopentyl-1-phosphate enolase",
+ "5.3.2.5 2,3-diketo-5-methylthiopentyl-1-phosphate enolase",
+ "5.3.2.6 2-hydroxymuconate tautomerase",
+ "5.3.2.7 Ascopyrone tautomerase",
+ "5.3.2.8 4-oxalomesaconate tautomerase",
"5.3.3.1 Steroid Delta-isomerase",
"5.3.3.2 Isopentenyl-diphosphate Delta-isomerase",
"5.3.3.3 Vinylacetyl-CoA Delta-isomerase",
@@ -4603,10 +5488,9 @@ static const char* const kECNum_specific[] = {
"5.3.3.12 L-dopachrome isomerase",
"5.3.3.13 Polyenoic fatty acid isomerase",
"5.3.3.14 Trans-2-decenoyl-[acyl-carrier-protein] isomerase",
- "5.3.3.15 Ascopyrone tautomerase",
- "5.3.3.16 4-oxalomesaconate tautomerase",
"5.3.3.17 Trans-2,3-dihydro-3-hydroxyanthranilate isomerase",
"5.3.3.18 2-(1,2-epoxy-1,2-dihydrophenyl)acetyl-CoA isomerase",
+ "5.3.3.19 3-((4R)-4-hydroxycyclohexa-1,5-dien-1-yl)-2-oxopropanoate isomerase",
"5.3.4.1 Protein disulfide-isomerase",
"5.3.99.2 Prostaglandin-D synthase",
"5.3.99.3 Prostaglandin-E synthase",
@@ -4616,11 +5500,12 @@ static const char* const kECNum_specific[] = {
"5.3.99.7 Styrene-oxide isomerase",
"5.3.99.8 Capsanthin/capsorubin synthase",
"5.3.99.9 Neoxanthin synthase",
- "5.3.99.n1 2-keto-myo-inositol isomerase",
+ "5.3.99.10 Thiazole tautomerase",
+ "5.3.99.11 2-keto-myo-inositol isomerase",
"5.4.1.1 Lysolecithin acylmutase",
- "5.4.1.2 Precorrin-8X methylmutase",
- "5.4.2.1 Phosphoglycerate mutase",
- "5.4.2.2 Phosphoglucomutase",
+ "5.4.1.3 2-methylfumaryl-CoA isomerase",
+ "5.4.1.4 D-galactarolactone isomerase",
+ "5.4.2.2 Phosphoglucomutase (alpha-D-glucose-1,6-bisphosphate-dependent)",
"5.4.2.3 Phosphoacetylglucosamine mutase",
"5.4.2.4 Bisphosphoglycerate mutase",
"5.4.2.5 Phosphoglucomutase (glucose-cofactor)",
@@ -4629,6 +5514,8 @@ static const char* const kECNum_specific[] = {
"5.4.2.8 Phosphomannomutase",
"5.4.2.9 Phosphoenolpyruvate mutase",
"5.4.2.10 Phosphoglucosamine mutase",
+ "5.4.2.11 Phosphoglycerate mutase (2,3-diphosphoglycerate-dependent)",
+ "5.4.2.12 Phosphoglycerate mutase (2,3-diphosphoglycerate-independent)",
"5.4.3.2 Lysine 2,3-aminomutase",
"5.4.3.3 Beta-lysine 5,6-aminomutase",
"5.4.3.4 D-lysine 5,6-aminomutase",
@@ -4636,12 +5523,16 @@ static const char* const kECNum_specific[] = {
"5.4.3.6 Tyrosine 2,3-aminomutase",
"5.4.3.7 Leucine 2,3-aminomutase",
"5.4.3.8 Glutamate-1-semialdehyde 2,1-aminomutase",
+ "5.4.3.9 Glutamate 2,3-aminomutase",
+ "5.4.3.10 Phenylalanine aminomutase (L-beta-phenylalanine forming)",
+ "5.4.3.11 Phenylalanine aminomutase (D-beta-phenylalanine forming)",
"5.4.4.1 (Hydroxyamino)benzene mutase",
"5.4.4.2 Isochorismate synthase",
"5.4.4.3 3-(hydroxyamino)phenol mutase",
"5.4.4.4 Geraniol isomerase",
"5.4.4.5 9,12-octadecadienoate 8-hydroperoxide 8R-isomerase",
"5.4.4.6 9,12-octadecadienoate 8-hydroperoxide 8S-isomerase",
+ "5.4.4.7 Hydroperoxy icosatetraenoate isomerase",
"5.4.99.1 Methylaspartate mutase",
"5.4.99.2 Methylmalonyl-CoA mutase",
"5.4.99.3 2-acetolactate mutase",
@@ -4697,6 +5588,12 @@ static const char* const kECNum_specific[] = {
"5.4.99.55 Delta-amyrin synthase",
"5.4.99.56 Tirucalladienol synthase",
"5.4.99.57 Baruol synthase",
+ "5.4.99.58 Methylornithine synthase",
+ "5.4.99.59 dTDP-fucopyranose mutase",
+ "5.4.99.60 Cobalt-precorrin-8 methylmutase",
+ "5.4.99.61 Precorrin-8X methylmutase",
+ "5.4.99.62 D-ribose pyranase",
+ "5.4.99.63 Ethylmalonyl-CoA mutase",
"5.5.1.1 Muconate cycloisomerase",
"5.5.1.2 3-carboxy-cis,cis-muconate cycloisomerase",
"5.5.1.3 Tetrahydroxypteridine cycloisomerase",
@@ -4704,7 +5601,7 @@ static const char* const kECNum_specific[] = {
"5.5.1.5 Carboxy-cis,cis-muconate cyclase",
"5.5.1.6 Chalcone isomerase",
"5.5.1.7 Chloromuconate cycloisomerase",
- "5.5.1.8 Bornyl diphosphate synthase",
+ "5.5.1.8 (+)-bornyl diphosphate synthase",
"5.5.1.9 Cycloeucalenol cycloisomerase",
"5.5.1.10 Alpha-pinene-oxide decyclase",
"5.5.1.11 Dichloromuconate cycloisomerase",
@@ -4717,7 +5614,12 @@ static const char* const kECNum_specific[] = {
"5.5.1.18 Lycopene epsilon-cyclase",
"5.5.1.19 Lycopene beta-cyclase",
"5.5.1.20 Prosolanapyrone-III cycloisomerase",
- "5.5.1.n1 D-ribose pyranase",
+ "5.5.1.22 (-)-bornyl diphosphate synthase",
+ "5.5.1.23 Aklanonic acid methyl ester cyclase",
+ "5.5.1.24 Tocopherol cyclase",
+ "5.5.1.25 3,6-anhydro-L-galactonate cycloisomerase",
+ "5.5.1.26 Nogalonic acid methyl ester cyclase",
+ "5.5.1.27 D-galactarolactone cycloisomerase",
"5.99.1.1 Thiocyanate isomerase",
"5.99.1.2 DNA topoisomerase",
"5.99.1.3 DNA topoisomerase (ATP-hydrolyzing)",
@@ -4745,10 +5647,10 @@ static const char* const kECNum_specific[] = {
"6.1.1.22 Asparagine--tRNA ligase",
"6.1.1.23 Aspartate--tRNA(Asn) ligase",
"6.1.1.24 Glutamate--tRNA(Gln) ligase",
- "6.1.1.25 Lysine--tRNA(Pyl) ligase",
"6.1.1.26 Pyrrolysine--tRNA(Pyl) ligase",
"6.1.1.27 O-phosphoserine--tRNA ligase",
"6.1.2.1 D-alanine--(R)-lactate ligase",
+ "6.1.2.2 Nebramycin 5' synthase",
"6.2.1.1 Acetate--CoA ligase",
"6.2.1.2 Butyrate--CoA ligase",
"6.2.1.3 Long-chain-fatty-acid--CoA ligase",
@@ -4784,6 +5686,15 @@ static const char* const kECNum_specific[] = {
"6.2.1.35 ACP-SH:acetate ligase",
"6.2.1.36 3-hydroxypropionyl-CoA synthase",
"6.2.1.37 3-hydroxybenzoate--CoA ligase",
+ "6.2.1.38 (2,2,3-trimethyl-5-oxocyclopent-3-enyl)acetyl-CoA synthase",
+ "6.2.1.39 [Butirosin acyl-carrier protein]--L-glutamate ligase",
+ "6.2.1.40 4-hydroxybutyrate--CoA ligase",
+ "6.2.1.41 3-((3aS,4S,7aS)-7a-methyl-1,5-dioxo-octahydro-1H-inden-4-yl)propanoate--CoA ligase",
+ "6.2.1.42 3-oxocholest-4-en-26-oate--CoA ligase",
+ "6.2.1.43 2-hydroxy-7-methoxy-5-methyl-1-naphthoate--CoA ligase",
+ "6.2.1.44 3-(methylthio)propionyl--CoA ligase",
+ "6.2.1.45 E1 ubiquitin-activating enzyme",
+ "6.2.1.46 L-allo-isoleucine:holo-[CmaA peptidyl-carrier protein] ligase",
"6.2.1.n2 Amino acid--[acyl-carrier-protein] ligase",
"6.2.1.n3 Malonate--CoA ligase",
"6.3.1.1 Aspartate--ammonia ligase",
@@ -4799,7 +5710,12 @@ static const char* const kECNum_specific[] = {
"6.3.1.12 D-aspartate ligase",
"6.3.1.13 L-cysteine:1D-myo-inositol 2-amino-2-deoxy-alpha-D-glucopyranoside ligase",
"6.3.1.14 Diphthine--ammonia ligase",
- "6.3.2.1 Pantoate--beta-alanine ligase",
+ "6.3.1.15 8-demethylnovobiocic acid synthase",
+ "6.3.1.17 Beta-citrylglutamate synthase",
+ "6.3.1.18 Gamma-glutamylanilide synthase",
+ "6.3.1.19 Prokaryotic ubiquitin-like protein ligase",
+ "6.3.1.20 Lipoate--protein ligase",
+ "6.3.2.1 Pantoate--beta-alanine ligase (AMP-forming)",
"6.3.2.2 Glutamate--cysteine ligase",
"6.3.2.3 Glutathione synthase",
"6.3.2.4 D-alanine--D-alanine ligase",
@@ -4816,15 +5732,12 @@ static const char* const kECNum_specific[] = {
"6.3.2.16 D-alanine--alanyl-poly(glycerolphosphate) ligase",
"6.3.2.17 Tetrahydrofolate synthase",
"6.3.2.18 Gamma-glutamylhistamine synthase",
- "6.3.2.19 Ubiquitin--protein ligase",
"6.3.2.20 Indoleacetate--lysine synthetase",
"6.3.2.21 Ubiquitin--calmodulin ligase",
"6.3.2.23 Homoglutathione synthase",
"6.3.2.24 Tyrosine--arginine ligase",
"6.3.2.25 Tubulin--tyrosine ligase",
"6.3.2.26 N-(5-amino-5-carboxypentanoyl)-L-cysteinyl-D-valine synthase",
- "6.3.2.27 Aerobactin synthase",
- "6.3.2.28 L-amino-acid alpha-ligase",
"6.3.2.29 Cyanophycin synthase (L-aspartate-adding)",
"6.3.2.30 Cyanophycin synthase (L-arginine-adding)",
"6.3.2.31 Coenzyme F420-0:L-glutamate ligase",
@@ -4834,16 +5747,26 @@ static const char* const kECNum_specific[] = {
"6.3.2.35 D-alanine--D-serine ligase",
"6.3.2.36 4-phosphopantoate--beta-alanine ligase",
"6.3.2.37 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--D-lysine ligase",
- "6.3.2.n2 Pup--protein ligase",
+ "6.3.2.38 N(2)-citryl-N(6)-acetyl-N(6)-hydroxylysine synthase",
+ "6.3.2.39 Aerobactin synthase",
+ "6.3.2.40 Cyclopeptine synthase",
+ "6.3.2.41 N-acetylaspartylglutamate synthase",
+ "6.3.2.42 N-acetylaspartylglutamylglutamate synthase",
+ "6.3.2.43 [Lysine-biosynthesis-protein LysW]--L-2-aminoadipate ligase",
+ "6.3.2.44 Pantoate--beta-alanine ligase (ADP-forming)",
+ "6.3.2.45 UDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase",
+ "6.3.2.46 Fumarate--(S)-2,3-diaminopropanoate ligase",
+ "6.3.2.47 Dapdiamide A synthase",
+ "6.3.2.48 L-arginine-specific L-amino acid ligase",
+ "6.3.2.49 L-alanine--L-anticapsin ligase",
"6.3.2.n3 ISG15--protein ligase",
- "6.3.2.n4 Alpha-aminoadipate--LysW ligase",
- "6.3.2.n5 Pantoate--beta-alanine ligase (ADP-forming)",
"6.3.3.1 Phosphoribosylformylglycinamidine cyclo-ligase",
"6.3.3.2 5-formyltetrahydrofolate cyclo-ligase",
"6.3.3.3 Dethiobiotin synthase",
"6.3.3.4 (Carboxyethyl)arginine beta-lactam-synthase",
- "6.3.4.1 GMP synthase",
- "6.3.4.2 CTP synthase",
+ "6.3.3.5 O-ureido-D-serine cyclo-ligase",
+ "6.3.3.6 Carbapenam-3-carboxylate synthase",
+ "6.3.4.2 CTP synthase (glutamine hydrolyzing)",
"6.3.4.3 Formate--tetrahydrofolate ligase",
"6.3.4.4 Adenylosuccinate synthase",
"6.3.4.5 Argininosuccinate synthase",
@@ -4861,6 +5784,11 @@ static const char* const kECNum_specific[] = {
"6.3.4.17 Formate--dihydrofolate ligase",
"6.3.4.18 5-(carboxyamino)imidazole ribonucleotide synthase",
"6.3.4.19 tRNA(Ile)-lysidine synthetase",
+ "6.3.4.20 7-cyano-7-deazaguanine synthase",
+ "6.3.4.21 Nicotinate phosphoribosyltransferase",
+ "6.3.4.22 tRNA(Ile)(2)-agmatinylcytidine synthase",
+ "6.3.4.23 Formate--phosphoribosylaminoimidazolecarboxamide ligase",
+ "6.3.4.24 Tyramine--L-glutamate ligase",
"6.3.5.1 NAD(+) synthase (glutamine-hydrolyzing)",
"6.3.5.2 GMP synthase (glutamine-hydrolyzing)",
"6.3.5.3 Phosphoribosylformylglycinamidine synthase",
@@ -4882,7 +5810,10 @@ static const char* const kECNum_specific[] = {
"6.5.1.1 DNA ligase (ATP)",
"6.5.1.2 DNA ligase (NAD(+))",
"6.5.1.3 RNA ligase (ATP)",
- "6.5.1.4 RNA-3'-phosphate cyclase",
+ "6.5.1.4 RNA 3'-terminal-phosphate cyclase (ATP)",
+ "6.5.1.5 RNA 3'-terminal-phosphate cyclase (GTP)",
+ "6.5.1.6 DNA ligase (ATP or NAD(+))",
+ "6.5.1.7 DNA ligase (ATP, ADP or GTP)",
"6.6.1.1 Magnesium chelatase",
"6.6.1.2 Cobaltochelatase"
};
diff --git a/api/explore.h b/api/explore.h
index 20044b92..b662d6e8 100644
--- a/api/explore.h
+++ b/api/explore.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/30/98
*
-* $Revision: 6.58 $
+* $Revision: 6.59 $
*
* File Description: Reengineered and optimized exploration functions
* to be used for future code
@@ -121,6 +121,7 @@ typedef struct seqmgrfeatcontext {
Int4 dnaStop;
Boolean partialL;
Boolean partialR;
+ Boolean external;
Boolean farloc;
Boolean bad_order;
Boolean mixed_strand;
diff --git a/api/fdlKludge.h b/api/fdlKludge.h
index 15549383..26b28ce6 100644
--- a/api/fdlKludge.h
+++ b/api/fdlKludge.h
@@ -28,54 +28,12 @@
*
* Version Creation Date: 10/15/01
*
-* $Revision: 6.15 $
+* $Revision: 6.18 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
-* $Log: fdlKludge.h,v $
-* Revision 6.15 2004/09/09 19:39:49 jianye
-* Added gene linkout
-*
-* Revision 6.14 2004/08/11 18:14:55 jianye
-* not turn on gene linkout yet
-*
-* Revision 6.13 2004/08/10 20:02:03 jianye
-* Added gene linkout
-*
-* Revision 6.12 2003/06/11 20:15:45 jianye
-* changed unigene linkout
-*
-* Revision 6.11 2003/06/02 20:02:23 jianye
-* Added geo linkout
-*
-* Revision 6.10 2003/05/05 19:33:54 jianye
-* Change url for structure linkout
-*
-* Revision 6.9 2003/04/21 21:49:37 jianye
-* changed some url
-*
-* Revision 6.8 2003/04/14 20:43:22 jianye
-* Adde geo url and modified structure linkout url
-*
-* Revision 6.7 2002/12/11 16:24:58 jianye
-* added structure linkout
-*
-* Revision 6.6 2002/09/11 19:53:09 jianye
-* Added url defines
-*
-* Revision 6.5 2002/08/22 20:32:35 jianye
-* add parentheses to bit shift
-*
-* Revision 6.4 2002/08/21 21:15:32 camacho
-* Added #define value for structure link bits
-*
-* Revision 6.3 2001/10/19 14:40:41 jianye
-* *** empty log message ***
-*
-* Revision 6.2 2001/10/18 19:20:20 jianye
-* Initial check in
*
*/
@@ -93,14 +51,14 @@
/* url for linkout*/
#define URL_LocusLink "<a href=\"http://www.ncbi.nlm.nih.gov/LocusLink/list.cgi?Q=%d%s\"><img border=0 height=16 width=16 src=\"/blast/images/L.gif\" alt=\"LocusLink info\"></a>"
-#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=unigene&cmd=search&term=%d[Nucleotide+UID]\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>"
+#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=unigene&cmd=search&term=%ld[Nucleotide+UID]\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>"
-#define URL_Structure "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>"
+#define URL_Structure "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%ld&hit=%ld&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>"
-#define URL_Structure_Overview "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>"
+#define URL_Structure_Overview "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%ld&hit=%ld&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>"
-#define URL_Geo "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=geo&term=%d[gi]\"><img border=0 height=16 width=16 src=\"/blast/images/E.gif\" alt=\"Geo\"></a>"
+#define URL_Geo "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=geo&term=%ld[gi]\"><img border=0 height=16 width=16 src=\"/blast/images/E.gif\" alt=\"Geo\"></a>"
-#define URL_Gene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=search&term=%d[%s]\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Gene info\"></a>"
+#define URL_Gene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=search&term=%ld[%s]\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Gene info\"></a>"
#endif
diff --git a/api/ffprint.h b/api/ffprint.h
index b3319324..59c2cdb4 100644
--- a/api/ffprint.h
+++ b/api/ffprint.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.11 $
+* $Revision: 6.14 $
*
* File Description:
*
@@ -42,107 +42,6 @@
* ==========================================================================
*/
-/*************************************
-*
- * $Log: ffprint.h,v $
- * Revision 6.11 2006/07/13 17:06:38 bollin
- * use Uint4 instead of Uint2 for itemID values
- * removed unused variables
- * resolved compiler warnings
- *
- * Revision 6.10 2002/08/26 22:06:57 kans
- * ff_RecalculateLinks (MS) to fix hotlink artifact
- *
- * Revision 6.9 1999/08/31 14:36:39 tatiana
- * ff_print_string_mem() added
- *
- * Revision 6.8 1999/04/09 22:21:53 kans
- * fixed prototype for FFBSPrint
- *
- * Revision 6.7 1999/04/09 21:15:27 bazhin
- * Added function "FFBSPrint()".
- *
- * Revision 6.6 1999/04/06 22:37:07 tatiana
- * www_protein_id() added
- *
- * Revision 6.5 1999/03/30 21:02:24 tatiana
- * www_accession www_taxid added
- *
- * Revision 6.4 1999/03/12 17:34:26 tatiana
- * www_featkey() added
- *
- * Revision 6.3 1999/02/02 17:29:21 kans
- * added ff_MergeString
- *
- * Revision 6.2 1998/07/23 22:43:08 tatiana
- * added www_PrintComment()
- *
- * Revision 6.1 1998/05/28 18:30:57 tatiana
- * changed prototype for head_tail_ff()
- *
- * Revision 6.0 1997/08/25 18:05:35 madden
- * Revision changed to 6.0
- *
- * Revision 5.9 1997/08/04 22:56:28 tatiana
- * init_buff_ex() added
- *
- * Revision 5.8 1997/07/18 15:45:09 tatiana
- * AddLinkLater defined as NLM_EXTERN
- *
- * Revision 5.7 1997/07/16 21:22:49 tatiana
- * add AddPintLater
- *
- * Revision 5.6 1997/06/19 18:37:39 vakatov
- * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
- *
- * Revision 5.5 1997/03/24 20:35:13 shavirin
- * Added protection for usage with C++ compiler
- *
- * Revision 5.4 1997/03/13 19:42:37 tatiana
- * *** empty log message ***
- *
- * Revision 5.3 1997/01/08 23:00:42 kans
- * added ifndef _FFPRINT_ multiple include protection
- *
- * Revision 5.2 1997/01/08 18:52:45 madden
- * Added LIBCALL's.
- *
- * Revision 5.1 1996/06/27 17:18:00 tatiana
- * www_map added
- *
- * Revision 4.12 1996/04/15 18:44:55 tatiana
- * free_buff() added
- *
- * Revision 4.11 1996/04/08 21:52:55 tatiana
- * change in www_featloc
- *
- * Revision 4.10 1996/03/25 15:22:07 tatiana
- * www_featloc added
- *
- * Revision 4.8 1996/02/21 20:11:04 tatiana
- * *** empty log message ***
- *
- * Revision 4.7 1996/01/29 22:45:22 tatiana
- * ChangeStringWithTildes added
- *
- * Revision 4.6 1995/12/20 22:46:19 tatiana
- * Int2 changed to Int4 in www_organism()
- *
- * Revision 4.5 1995/12/13 16:37:46 tatiana
- * www_dbxref added
- *
- * Revision 4.4 1995/11/17 21:52:50 tatiana
- * hot link to genetic code added.c
- *
- * Revision 4.3 1995/11/17 21:28:35 kans
- * asn2ff now uses gather (Tatiana)
- *
- * Revision 1.10 1995/07/17 19:33:20 kans
- * parameters combined into Asn2ffJobPtr structure
- *
-*
-**************************************/
-
#ifndef _FFPRINT_
#define _FFPRINT_
@@ -250,7 +149,7 @@ NLM_EXTERN Boolean LIBCALL ff_PrintLine PROTO((Asn2ffJobPtr ajp, GBEntryPtr gbp,
NLM_EXTERN CharPtr LIBCALL www_featloc PROTO((CharPtr loc));
NLM_EXTERN void LIBCALL GetHelpMsg PROTO((SeqEntryPtr sep));
NLM_EXTERN void LIBCALL www_PrintComment PROTO((CharPtr string, Boolean identifier, Uint1 format));
-NLM_EXTERN Boolean LIBCALL www_featkey PROTO((CharPtr key, Int4 gi, Int2 entityID, Uint4 itemID));
+NLM_EXTERN Boolean LIBCALL www_featkey PROTO((CharPtr key, BIG_ID gi, Int2 entityID, Uint4 itemID));
NLM_EXTERN void LIBCALL www_accession PROTO((CharPtr string));
NLM_EXTERN void LIBCALL ff_RecalculateLinks(Int4 indent);
diff --git a/api/gather.c b/api/gather.c
index baef0f11..cf311e13 100644
--- a/api/gather.c
+++ b/api/gather.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/7/94
*
-* $Revision: 6.57 $
+* $Revision: 6.62 $
*
* File Description:
*
@@ -174,7 +174,7 @@ NLM_EXTERN Boolean SeqLocOffset (SeqLocPtr seq_loc, SeqLocPtr sfp_loc, GatherRan
SeqInt si;
Boolean across_zero;
Int4 toffset, l, r, t;
- Boolean ltrunc, rtrunc;
+ Boolean ltrunc = FALSE, rtrunc = FALSE;
SeqLocPtr tslp;
if (seq_loc == NULL || sfp_loc == NULL || range == NULL) {
@@ -1402,7 +1402,7 @@ static Boolean NEAR GatherSeqFeat(InternalGCCPtr gccp, SeqFeatPtr sfp,
takecit, checkseq=FALSE;
SeqLocPtr slp, head, tslp, target[2];
GatherRangePtr rdp = NULL, trdp, lrdp;
- Int4 offset, totlen, left_end;
+ Int4 offset = 0, totlen, left_end;
Boolean rev, revs[2];
Int2 ctr, max_interval, i, numcheck, j;
GatherRange trange;
@@ -2225,6 +2225,8 @@ NLM_EXTERN AlignDataPtr gather_align_data(SeqLocPtr m_slp, SeqAlignPtr align,
SeqAlignPtr sap;
strand = 0;
+ MemSet ((Pointer) &gr, 0, sizeof (GatherRange));
+ MemSet ((Pointer) &t_range, 0, sizeof (GatherRange));
m_sip = SeqLocId(m_slp);
if(!get_align_ends(align, m_sip, &start, &stop, &c_strand))
return NULL;
@@ -5935,6 +5937,7 @@ typedef struct internalacc {
GatherObjectProc callback;
Pointer userdata;
BoolPtr objMgrFilter;
+ Boolean external;
} InternalACC, PNTR InternalACCPtr;
static void AssignIDs (InternalACCPtr iap, GatherIndexPtr gip, Uint1 itemtype, Uint1 subtype, Pointer parent, Uint2 parenttype, Pointer PNTR prevlink)
@@ -5972,6 +5975,7 @@ static Boolean VisitCallback (InternalACCPtr iap, Pointer dataptr, Uint1 itemtyp
go.parentptr = parent;
go.prevlink = prevlink;
go.userdata = iap->userdata;
+ go.external = iap->external;
if (! iap->callback (&go)) return FALSE;
}
}
@@ -6566,6 +6570,7 @@ static Boolean VisitEntity (
iac.callback = callback;
iac.userdata = userdata;
iac.objMgrFilter = objMgrFilter;
+ iac.external = FALSE;
if (entityID > 0) {
omp = ObjMgrReadLock ();
@@ -6644,6 +6649,7 @@ static Boolean VisitEntity (
for (vnp = extra; vnp != NULL; vnp = vnp->next) {
bsp = (BioseqPtr) vnp->data.ptrvalue;
if (bsp == NULL || bsp->annot == NULL) continue;
+ iac.external = TRUE;
if (! VisitSeqAnnot (&iac, bsp->annot, (Pointer) bsp, OBJ_BIOSEQ, (Pointer PNTR) &(bsp->annot))) return FALSE;
}
@@ -6870,6 +6876,37 @@ static void DeleteMarkedSeqGraph (SeqGraphPtr sgp, Pointer PNTR prevlink)
}
}
+static Boolean NoGenomeAnnotationInAnnotDescr (SeqAnnotPtr sap)
+
+{
+ AnnotDescrPtr adp;
+ ObjectIdPtr oip;
+ CharPtr str;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+
+ if (sap == NULL) return TRUE;
+
+ for (adp = sap->desc; adp != NULL; adp = adp->next) {
+ if (adp->choice != Annot_descr_user) continue;
+ uop = (UserObjectPtr) adp->data.ptrvalue;
+ if (uop == NULL) continue;
+ oip = uop->type;
+ if (oip == NULL) continue;
+ if (StringICmp (oip->str, "StructuredComment") != 0) continue;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "StructuredCommentPrefix") != 0) continue;
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringCmp (str, "##Genome-Annotation-Data-START##") == 0) return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
static void DeleteMarkedSeqAnnot (SeqAnnotPtr sap, Pointer PNTR prevlink)
{
@@ -6892,7 +6929,8 @@ static void DeleteMarkedSeqAnnot (SeqAnnotPtr sap, Pointer PNTR prevlink)
default :
break;
}
- if (sap->data == NULL) {
+ /* now keep empty annot if annot_descr present */
+ if (sap->data == NULL && /* sap->desc == NULL */ NoGenomeAnnotationInAnnotDescr (sap)) {
sap->idx.deleteme = 1;
}
}
diff --git a/api/gather.h b/api/gather.h
index 834243ff..2aeb495d 100644
--- a/api/gather.h
+++ b/api/gather.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/7/94
*
-* $Revision: 6.14 $
+* $Revision: 6.15 $
*
* File Description:
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: gather.h,v $
+* Revision 6.15 2012/07/30 21:52:07 kans
+* JIRA:GPI-1532 Track indexing of external AnnotDesc, assign to most recent Bioseq if sequence component ID not already loaded
+*
* Revision 6.14 2006/07/13 17:06:38 bollin
* use Uint4 instead of Uint2 for itemID values
* removed unused variables
@@ -606,6 +609,7 @@ typedef struct gatherobject {
Pointer parentptr;
Pointer PNTR prevlink;
Pointer userdata;
+ Boolean external;
} GatherObject, PNTR GatherObjectPtr;
typedef Boolean (*GatherObjectProc) (GatherObjectPtr gop);
diff --git a/api/gbfeat.c b/api/gbfeat.c
index 55eeac51..76cf0f3e 100644
--- a/api/gbfeat.c
+++ b/api/gbfeat.c
@@ -3,73 +3,7 @@
* -- all routines for checking genbank feature table
* -- all extern variables are in gbftglob.c
* 10-11-93
-$Revision: 6.9 $
-*
-* $Log: gbfeat.c,v $
-* Revision 6.9 2003/10/09 15:35:51 bazhin
-* Qualifier "rpt_unit" is removed from the list of ones to be splitted
-* by commas.
-*
-* Revision 6.8 2001/12/06 17:00:41 kans
-* TextSave takes size_t, not Int2, otherwise titin protein tries to allocate negative number
-*
-* Revision 6.7 2001/06/08 20:09:53 bazhin
-* From now on "absent" is a legal value for /cons_splice qualifier.
-*
-* Revision 6.6 2000/02/02 22:10:19 kans
-* use TextSave instead of TextSaveEx, which is not available
-*
-* Revision 6.5 2000/02/02 21:03:09 tatiana
-* CkNumberType() added
-*
-* Revision 6.4 1998/06/15 15:00:17 tatiana
-* UNIX compiler warnings fixed
-*
-* Revision 6.3 1998/04/30 21:44:05 tatiana
-* *** empty log message ***
-*
-* Revision 6.2 1998/02/10 17:00:19 tatiana
-* GBQualValidToAdd(0 added
-*
-* Revision 6.1 1998/01/08 23:42:35 tatiana
-* type fixed in GBQual_names_split_ignore
-*
-* Revision 6.0 1997/08/25 18:05:54 madden
-* Revision changed to 6.0
-*
-* Revision 5.5 1997/06/19 18:37:55 vakatov
-* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
-*
-* Revision 5.4 1997/01/21 23:11:36 tatiana
-* error msg changed
-*
- * Revision 5.3 1996/08/28 20:43:42 tatiana
- * skip gsdb_id qualifier in GBQualSemanticValid()
- *
- * Revision 5.3 1996/08/28 20:43:42 tatiana
- * skip gsdb_id qualifier in GBQualSemanticValid()
- *
- * Revision 5.2 1996/07/30 17:28:07 kans
- * ParFlat_... arrays now external in header file
- *
- * Revision 5.1 1996/07/29 19:45:59 tatiana
- * GBQual_names changed to use a structure
- *
- * Revision 4.4 1996/02/26 00:46:18 ostell
- * removed unused local variables and integer size mismatch fusses
- *
- * Revision 4.3 1995/11/08 22:54:38 tatiana
- * case sensitive feature key check
- *
- * Revision 4.2 1995/09/20 20:37:01 tatiana
- * a bug fixed in CkQualText
- *
- * Revision 4.1 1995/08/15 22:04:04 tatiana
- * change check for mandatory /citation and add additional check for sfp-cit
- *
- * Revision 1.23 1995/05/15 21:46:05 ostell
- * added Log line
- *
+$Revision: 6.11 $
*
****************************************************************************/
@@ -788,7 +722,9 @@ NLM_EXTERN int CkQualNote(GBQualPtr PNTR head_gbqp, GBQualPtr gbqp,
for (; *str != '\0'; str++)
if (*str == '\"')
*str = '\'';
+ /*
ConvertEmbedQual(gbqp->val);
+ */
}
return retval;
@@ -1151,7 +1087,7 @@ NLM_EXTERN int CkQualEcnum( GBQualPtr PNTR head_gbqp, GBQualPtr gbqp,
str++;
for (; *str != '\0' && *str != '\"'; str++)
- if (!IS_DIGIT(*str) && *str != '.' && *str != '-') {
+ if (!IS_DIGIT(*str) && *str != '.' && *str != '-' && *str != 'n') {
if (error_msgs){
ErrPostEx(SEV_ERROR, ERR_QUALIFIER_BadECnum,
"At <%c>(%d) /%s=%s",
diff --git a/api/gbftdef.h b/api/gbftdef.h
index 0e131181..8c440938 100644
--- a/api/gbftdef.h
+++ b/api/gbftdef.h
@@ -124,13 +124,17 @@
#define GBQUAL_mobile_element_type 114
#define GBQUAL_gap_type 115
#define GBQUAL_linkage_evidence 116
+#define GBQUAL_altitude 117
+#define GBQUAL_metagenome_source 118
+#define GBQUAL_type_material 119
+#define GBQUAL_regulatory_class 120
-#define ParFlat_TOTAL_GBQUAL 117
+#define ParFlat_TOTAL_GBQUAL 121
#define ParFlat_TOTAL_IntOr 3
#define ParFlat_TOTAL_LRB 3
#define ParFlat_TOTAL_Exp 2
-#define ParFlat_TOTAL_Rpt 7
-#define ParFlat_TOTAL_GBFEAT 73
+#define ParFlat_TOTAL_Rpt 15
+#define ParFlat_TOTAL_GBFEAT 75
#define Class_pos_aa 1
#define Class_text 2
diff --git a/api/gbftglob.c b/api/gbftglob.c
index 90a34b8b..641c4a51 100644
--- a/api/gbftglob.c
+++ b/api/gbftglob.c
@@ -66,8 +66,10 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = {
{"satellite", Class_text}, { "gene_synonym", Class_text},
{"UniProtKB_evidence", Class_text}, {"haplogroup", Class_text},
{"artificial_location", Class_text}, {"non_functional", Class_text},
- {"pseudogene", Class_text}, {"mobile_element_type", Class_text} ,
- {"gap_type", Class_text }, {"linkage_evidence", Class_text }
+ {"pseudogene", Class_text}, {"mobile_element_type", Class_text},
+ {"gap_type", Class_text }, {"linkage_evidence", Class_text },
+ {"altitude", Class_text }, {"metagenome_source", Class_text},
+ {"type_material", Class_text}, {"regulatory_class", Class_text}
};
NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) {
@@ -82,8 +84,22 @@ CharPtr ParFlat_ExpString[ParFlat_TOTAL_Exp] = {
"EXPERIMENTAL", "NOT_EXPERIMENTAL"};
CharPtr ParFlat_RptString[ParFlat_TOTAL_Rpt] = {
- "tandem", "inverted", "flanking", "terminal", "direct",
- "dispersed", "other"};
+ "tandem",
+ "inverted",
+ "flanking",
+ "nested",
+ "terminal",
+ "direct",
+ "dispersed",
+ "long_terminal_repeat",
+ "non_LTR_retrotransposon_polymeric_tract",
+ "X_element_combinatorial_repeat",
+ "Y_prime_element",
+ "telomeric_repeat",
+ "centromeric_repeat",
+ "engineered_foreign_repetitive_element",
+ "other"
+};
static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
{"allele", 0, {-1, -1, -1, -1, -1}, 18,
@@ -1382,7 +1398,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
{"source", 1, {
- GBQUAL_organism, -1, -1, -1, -1}, 62,
+ GBQUAL_organism, -1, -1, -1, -1}, 65,
{
GBQUAL_bio_material,
GBQUAL_cell_line,
@@ -1446,7 +1462,9 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_variety,
GBQUAL_virion,
GBQUAL_haplogroup,
- -1, -1, -1}},
+ GBQUAL_altitude,
+ GBQUAL_metagenome_source,
+ GBQUAL_type_material}},
{"stem_loop", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
@@ -1934,7 +1952,62 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1}}
+ -1, -1, -1, -1}},
+ {"regulatory", 1, {
+ GBQUAL_regulatory_class, -1, -1, -1, -1}, 20,
+ {
+ GBQUAL_allele,
+ GBQUAL_bound_moiety,
+ GBQUAL_citation,
+ GBQUAL_db_xref,
+ GBQUAL_evidence,
+ GBQUAL_experiment,
+ GBQUAL_function,
+ GBQUAL_gene,
+ GBQUAL_gene_synonym,
+ GBQUAL_inference,
+ GBQUAL_locus_tag,
+ GBQUAL_map,
+ GBQUAL_note,
+ GBQUAL_old_locus_tag,
+ GBQUAL_operon,
+ GBQUAL_partial,
+ GBQUAL_phenotype,
+ GBQUAL_pseudo,
+ GBQUAL_pseudogene,
+ GBQUAL_standard_name,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1}},
+ {"propeptide", 0, {-1, -1, -1, -1, -1}, 20,
+ {
+ GBQUAL_allele,
+ GBQUAL_citation,
+ GBQUAL_db_xref,
+ GBQUAL_evidence,
+ GBQUAL_experiment,
+ GBQUAL_function,
+ GBQUAL_gene,
+ GBQUAL_gene_synonym,
+ GBQUAL_inference,
+ GBQUAL_locus_tag,
+ GBQUAL_map,
+ GBQUAL_non_functional,
+ GBQUAL_note,
+ GBQUAL_old_locus_tag,
+ GBQUAL_partial,
+ GBQUAL_product,
+ GBQUAL_pseudo,
+ GBQUAL_pseudogene,
+ GBQUAL_standard_name,
+ GBQUAL_usedin,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1}}
};
NLM_EXTERN SematicFeatPtr x_ParFlat_GBFeat(void) {
diff --git a/api/gbparint.c b/api/gbparint.c
index a54694d6..82b49203 100644
--- a/api/gbparint.c
+++ b/api/gbparint.c
@@ -28,6 +28,9 @@
* Author: Karl Sirotkin
*
* $Log: gbparint.c,v $
+* Revision 6.10 2014/08/01 17:14:01 bazhin
+* Added support for new format (4+2+S+{6|7|8}) WGS scaffolds.
+*
* Revision 6.9 2009/10/02 19:46:00 kans
* address clang static analyzer warnings
*
@@ -1877,6 +1880,9 @@ static int Nlm_gbparse_accprefix(CharPtr acc)
else
ret = 1;
}
+ else if(p[0] != '\0' && p[0] >= '0' && p[0] <= '9' &&
+ p[1] != '\0' && p[1] >= '0' && p[1] <= '9' && p[2] == 'S')
+ ret = 7;
else if(ret != 1 && ret != 2 && ret != 4)
ret = 1;
return(ret);
diff --git a/api/lsqfetch.c b/api/lsqfetch.c
index 4a7a7280..0b739c4e 100644
--- a/api/lsqfetch.c
+++ b/api/lsqfetch.c
@@ -581,6 +581,93 @@ NLM_EXTERN Boolean seqid_to_string(SeqIdPtr sip, CharPtr name, Boolean use_locus
return TRUE;
}
+
+static void SafeStringCopy (CharPtr dst, CharPtr src, Int4 dst_size)
+{
+ Int4 len;
+
+ len = StringLen (src);
+ if (len > dst_size - 1) {
+ StringNCpy (dst, src, dst_size - 1);
+ dst[dst_size - 1] = 0;
+ } else {
+ StringCpy (dst, src);
+ }
+}
+
+
+/*********************************************************************
+*
+* seqid_to_string(sip, name, use_locus)
+* print the most important field in Seqid to a string stored in
+* name.
+*
+**********************************************************************/
+NLM_EXTERN Boolean Safe_seqid_to_string(SeqIdPtr sip, CharPtr name, Int4 name_size, Boolean use_locus)
+{
+ DbtagPtr db_tag;
+ ObjectIdPtr obj_id;
+ TextSeqIdPtr tsip;
+ PDBSeqIdPtr pip;
+ GiimPtr gip;
+
+ switch(sip->choice)
+ {
+ case 1: /**local**/
+ obj_id = sip->data.ptrvalue;
+ if(obj_id->str)
+ SafeStringCopy (name, obj_id->str, name_size);
+ else
+ sprintf(name, "%ld", (long) obj_id->id);
+ break;
+
+ case 5: /**genbank**/
+ case 6: /**EMBL**/
+ case 7: /**PIR**/
+ case 8: /**SwissProt**/
+ case 10: /**Other**/
+ case 13: /**DDBJ**/
+ case 14: /**PRF**/
+ tsip = sip->data.ptrvalue;
+ if(tsip->accession)
+ SafeStringCopy (name, tsip->accession, name_size);
+ if((tsip->name && use_locus) || tsip->accession == NULL)
+ SafeStringCopy(name, tsip->name, name_size);
+
+ break;
+
+ case 11: /**general**/
+ db_tag = sip->data.ptrvalue;
+ obj_id = db_tag->tag;
+ if(obj_id->str)
+ SafeStringCopy(name, obj_id->str, name_size);
+ else
+ sprintf(name, "%ld", (long) obj_id->id);
+ break;
+
+ case 4: /**giim**/
+ gip = sip->data.ptrvalue;
+ sprintf(name, "%ld", (long)(gip->id));
+ break;
+
+ case 2: /*gibbseq*/
+ case 3: /*gibbmt*/
+ case 12: /*gi*/
+ sprintf(name, "%ld", (long)(sip->data.intvalue));
+ break;
+
+ case 15: /*pdb*/
+ pip = sip->data.ptrvalue;
+ SafeStringCopy (name, pip->mol, name_size);
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
/*********************************************************************
*
* FileBioseqFetchEnable(path, ext)
@@ -603,7 +690,7 @@ static Int2 LIBCALLBACK FileBioseqFetchFunc (Pointer data)
SeqIdPtr sip;
OMUserDataPtr omdp;
CharPtr file_name = NULL;
- Char name[100], f_name[100];
+ Char name[100], f_name[PATH_MAX];
CharPtr c_name;
FILE *fp;
AsnIoPtr aip;
@@ -632,7 +719,7 @@ static Int2 LIBCALLBACK FileBioseqFetchFunc (Pointer data)
fbp = sbfp->data.ptrvalue;
if(file_name == NULL)
{
- seqid_to_string(sip, name, fbp->use_locus);
+ Safe_seqid_to_string(sip, name, sizeof(name) / sizeof (Char), fbp->use_locus);
if(fbp->path)
sprintf(f_name, "%s%s", fbp->path, name);
else
@@ -2185,11 +2272,11 @@ static void CreateBinaryAsnIndex (
}
FileClose (ofp);
+
+ ValNodeFreeData (aid.head);
}
AsnIoClose (aip);
-
- ValNodeFreeData (aid.head);
}
static void CreateTextAsnIndex (
@@ -2282,11 +2369,11 @@ static void CreateTextAsnIndex (
}
FileClose (ofp);
+
+ ValNodeFreeData (aid.head);
}
FileClose (ifp);
-
- ValNodeFreeData (aid.head);
}
NLM_EXTERN void CreateAsnIndex (
diff --git a/api/macroapi.c b/api/macroapi.c
index 7141726c..8d9975eb 100755
--- a/api/macroapi.c
+++ b/api/macroapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/8/2007
*
-* $Revision: 1.498 $
+* $Revision: 1.598 $
*
* File Description:
*
@@ -64,6 +64,8 @@
#include <valid.h>
#include <objvalid.h>
#include <valapi.h>
+#include <tax3api.h>
+#include <tofasta.h>
/* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); */
static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
@@ -98,24 +100,6 @@ static ValNodePtr CollectNucBioseqs (SeqEntryPtr sep)
return vnb.head;
}
-static Boolean IsAllDigits (CharPtr str)
-{
- CharPtr cp;
-
- if (StringHasNoText (str)) return FALSE;
-
- cp = str;
- while (*cp != 0 && isdigit (*cp)) {
- cp++;
- }
- if (*cp == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static Boolean IsAllCaps (CharPtr str)
{
CharPtr cp;
@@ -171,6 +155,33 @@ static Boolean IsAllPunctuation (CharPtr str)
}
+static CharPtr PrintPartialOrCompleteDate(DatePtr date)
+{
+ CharPtr str = NULL;
+ Char year[5];
+ Char result[15];
+
+ if (date == NULL) {
+ return NULL;
+ }
+ str = PrintDate(date);
+ if (str == NULL && date->data[0] > 0 && date->data[1]) {
+ if ((int) (date -> data[1]) < 30) {
+ sprintf(year, "%4d", (int) (date -> data[1] + 2000));
+ } else {
+ sprintf(year, "%4d", (int) (date -> data[1] + 1900));
+ }
+ if (date->data[2]) {
+ sprintf(result, "%s %s", NCBI_months[date->data[2] -1 ], year);
+ } else {
+ StringCpy (result, year);
+ }
+ str = StringSave (result);
+ }
+ return str;
+}
+
+
static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt);
static Int4 CompareRnaTypes (RnaFeatTypePtr rt1, RnaFeatTypePtr rt2);
@@ -714,7 +725,7 @@ static int CompareSourceQuals (VoidPtr ptr1, VoidPtr ptr2)
static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2);
-static int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort)
+NLM_EXTERN int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort)
{
int rval = 0;
FeatureFieldPtr field1, field2;
@@ -1356,7 +1367,8 @@ static FeatTypeFeatDefData feattype_featdef[] = {
{ Macro_feature_type_oriT , FEATDEF_oriT , "oriT" } ,
{ Macro_feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } ,
{ Macro_feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" } ,
- { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" }
+ { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" } ,
+ { Macro_feature_type_regulatory, FEATDEF_regulatory, "regulatory" }
};
#define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData)
@@ -1477,6 +1489,7 @@ NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list
for (i = 1; i < NUM_feattype_featdef; i++) {
if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
if (feattype_featdef[i].feattype == Macro_feature_type_conflict) continue;
+ if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue;
seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef);
if (seqfeattype == SEQFEAT_IMP) {
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
@@ -1549,6 +1562,7 @@ NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
for (i = 1; i < NUM_feattype_featdef; i++) {
if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
+ if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue;
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
if (featname != NULL) {
ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
@@ -1579,7 +1593,7 @@ static FeatQualGBQualData featqual_gbqual[] = {
{ Feat_qual_legal_db_xref , GBQUAL_db_xref , 0, "db-xref" } ,
{ Feat_qual_legal_direction , GBQUAL_direction , 0, "direction" } ,
{ Feat_qual_legal_ec_number , GBQUAL_EC_number , 0, "EC number" } ,
- { Feat_qual_legal_environmental_sample , 0, GBQUAL_environmental_sample , "environmental-sample" } ,
+ { Feat_qual_legal_environmental_sample , GBQUAL_environmental_sample , 0, "environmental-sample" } ,
{ Feat_qual_legal_evidence , GBQUAL_evidence , 0, "evidence" } ,
{ Feat_qual_legal_exception , GBQUAL_exception , 0, "exception" } ,
{ Feat_qual_legal_experiment , GBQUAL_experiment , 0, "experiment" } ,
@@ -1605,12 +1619,14 @@ static FeatQualGBQualData featqual_gbqual[] = {
{ Feat_qual_legal_organism , GBQUAL_organism , 0, "organism" } ,
{ Feat_qual_legal_organelle , GBQUAL_organelle , 0, "organelle" } ,
{ Feat_qual_legal_partial , GBQUAL_partial , 0, "partial" } ,
+ { Feat_qual_legal_pcr_conditions, GBQUAL_PCR_conditions , 0, "pcr-conditions" } ,
{ Feat_qual_legal_phenotype , GBQUAL_phenotype , 0, "phenotype" } ,
{ Feat_qual_legal_plasmid , GBQUAL_plasmid , 0, "plasmid" } ,
{ Feat_qual_legal_product , GBQUAL_product , 0, "product" } ,
{ Feat_qual_legal_protein_id , GBQUAL_protein_id , 0, "protein-id" } ,
{ Feat_qual_legal_pseudo , GBQUAL_pseudogene , 0, "pseudogene" } ,
{ Feat_qual_legal_rearranged , GBQUAL_rearranged , 0, "rearranged" } ,
+ { Feat_qual_legal_regulatory_class , GBQUAL_regulatory_class , 0, "regulatory-class" } ,
{ Feat_qual_legal_replace , GBQUAL_replace , 0, "replace" } ,
{ Feat_qual_legal_rpt_family , GBQUAL_rpt_family , 0, "rpt-family" } ,
{ Feat_qual_legal_rpt_type , GBQUAL_rpt_type , 0, "rpt-type" } ,
@@ -2172,6 +2188,7 @@ typedef struct srcqualscqual {
#define kAllNotesStr "All Notes"
#define kAllQualsStr "All"
+#define kAllPrimersStr "All Primers"
static SrcQualSCQualData srcqual_scqual[] = {
{ Source_qual_acronym , ORGMOD_acronym , IS_ORGMOD , 0 , "acronym" } ,
@@ -2264,6 +2281,7 @@ static SrcQualSCQualData srcqual_scqual[] = {
{ Source_qual_transgenic , SUBSRC_transgenic , IS_SUBSRC , 0 , "transgenic" } ,
{ Source_qual_transposon_name , SUBSRC_transposon_name , IS_SUBSRC , 0 , "transposon-name" } ,
{ Source_qual_type , ORGMOD_type , IS_ORGMOD , 0 , "type" } ,
+ { Source_qual_type_material , ORGMOD_type_material , IS_ORGMOD , 0 , "type-material" } ,
{ Source_qual_variety , ORGMOD_variety , IS_ORGMOD , 0 , "variety" } ,
{ Source_qual_all_notes , 255 , IS_OTHER , 0 , kAllNotesStr } ,
{ Source_qual_all_quals , 0 , IS_OTHER , 0, kAllQualsStr } ,
@@ -2271,6 +2289,8 @@ static SrcQualSCQualData srcqual_scqual[] = {
{ Source_qual_linkage_group , SUBSRC_linkage_group , IS_SUBSRC , 0 , "linkage-group" } ,
{ Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} ,
{ Source_qual_taxid , 0 , IS_OTHER , 0 , "taxid" } ,
+ { Source_qual_all_primers , 0, IS_OTHER , 0, kAllPrimersStr } ,
+ { Source_qual_altitude , SUBSRC_altitude, IS_SUBSRC , 0 , "altitude"}
};
#define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData)
@@ -2286,7 +2306,7 @@ static StringAliasData src_qual_alias_list[] = {
};
-static Int4 GetSubSrcQualFromSrcQual (Int4 srcqual, Int4Ptr subfield)
+NLM_EXTERN Int4 GetSubSrcQualFromSrcQual (Int4 srcqual, Int4Ptr subfield)
{
Int4 i;
@@ -2411,6 +2431,11 @@ NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname)
return Source_qual_variety;
} else if (StringICmp (qualname, "str.") == 0) {
return Source_qual_strain;
+ } else if (StringICmp (qualname, "note") == 0) {
+ return Source_qual_orgmod_note;
+ } else if (Matchnamestring (qualname, "latitude-longitude")
+ || Matchnamestring (qualname, "lat-long")) {
+ return Source_qual_lat_lon;
}
return -1;
}
@@ -2424,10 +2449,12 @@ NLM_EXTERN ValNodePtr GetSourceQualList (Boolean for_remove)
if (for_remove) {
ValNodeAddPointer (&list, 0, StringSave (kAllQualsStr));
last = ValNodeAddPointer (&list, 0, StringSave (kAllNotesStr));
+ last = ValNodeAddPointer (&list, 0, StringSave (kAllPrimersStr));
}
for (i = 0; i < NUM_srcqual_scqual; i++) {
if (srcqual_scqual[i].srcqual != Source_qual_all_notes
- && srcqual_scqual[i].srcqual != Source_qual_all_quals) {
+ && srcqual_scqual[i].srcqual != Source_qual_all_quals
+ && srcqual_scqual[i].srcqual != Source_qual_all_primers) {
ValNodeAddPointer (&tmp, 0, StringSave (srcqual_scqual[i].qualname));
}
}
@@ -2473,7 +2500,7 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop)
vnp->data.intvalue = Source_qual_lineage;
ValNodeAddPointer (&list, FieldType_source_qual, vnp);
}
- if (!StringHasNoText (biop->org->orgname->lineage)) {
+ if (!StringHasNoText (biop->org->orgname->div)) {
vnp = ValNodeNew (NULL);
vnp->choice = SourceQualChoice_textqual;
vnp->data.intvalue = Source_qual_division;
@@ -2701,6 +2728,25 @@ NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove)
}
+static ValNodePtr SrcLocationFieldFromValue (CharPtr value)
+{
+ ValNodePtr field, sq;
+ Int4 genome;
+
+ genome = GenomeFromLocName(value);
+ if (genome < 0) {
+ return NULL;
+ }
+ sq = ValNodeNew (NULL);
+ sq->choice = SourceQualValChoice_location;
+ sq->data.intvalue = genome;
+ field = ValNodeNew (NULL);
+ field->choice = FieldType_source_qual;
+ field->data.ptrvalue = sq;
+ return field;
+}
+
+
typedef struct srcorigorigin {
Int4 srcorig;
Int4 origin;
@@ -3620,6 +3666,7 @@ static TechniqueTypeTechData techniquetype_tech[] = {
{ Technique_type_barcode, MI_TECH_barcode, "BARCODE" } ,
{ Technique_type_composite_wgs_htgs, MI_TECH_composite_wgs_htgs, "composite WGS-HTGS" } ,
{ Technique_type_tsa, MI_TECH_tsa, "TSA" } ,
+ { Technique_type_targeted, MI_TECH_targeted, "targeted" } ,
{ Technique_type_other, MI_TECH_other, "other" }
};
@@ -4284,7 +4331,7 @@ NLM_EXTERN ValNodePtr GetSiteTypeList (void)
/* Simple constraints */
static Boolean DisallowCharacter (Char ch, Boolean disallow_slash)
{
- if (isalpha ((Int4) ch) || isdigit ((Int4) ch) || ch == '_')
+ if (isalpha ((Int4) ch) || isdigit ((Int4) ch) || ch == '_' || ch == '-')
{
return TRUE;
}
@@ -4692,14 +4739,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
switch (scp->match_location)
{
case String_location_contains:
- if (scp->case_sensitive)
- {
- pFound = StringSearch (search, pattern);
- }
- else
- {
- pFound = StringISearch (search, pattern);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (search, pattern);
+ }
+ else
+ {
+ pFound = StringISearch (search, pattern);
+ }
if (pFound == NULL)
{
rval = FALSE;
@@ -4709,14 +4756,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
while (!rval && pFound != NULL)
{
- if (scp->case_sensitive)
- {
- pFound = StringSearch (pFound + 1, pattern);
- }
- else
- {
- pFound = StringISearch (pFound + 1, pattern);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (pFound + 1, pattern);
+ }
+ else
+ {
+ pFound = StringISearch (pFound + 1, pattern);
+ }
if (pFound != NULL)
{
rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
@@ -4729,14 +4776,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
}
break;
case String_location_starts:
- if (scp->case_sensitive)
- {
- pFound = StringSearch (search, pattern);
- }
- else
- {
- pFound = StringISearch (search, pattern);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (search, pattern);
+ }
+ else
+ {
+ pFound = StringISearch (search, pattern);
+ }
if (pFound == search)
{
if (scp->whole_word)
@@ -4750,16 +4797,16 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
}
break;
case String_location_ends:
- if (scp->case_sensitive)
- {
- pFound = StringSearch (search, pattern);
- }
- else
- {
- pFound = StringISearch (search, pattern);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (search, pattern);
+ }
+ else
+ {
+ pFound = StringISearch (search, pattern);
+ }
while (pFound != NULL && !rval) {
- char_after = *(pFound + StringLen (pattern));
+ char_after = *(pFound + StringLen (pattern));
if (char_after == 0)
{
if (scp->whole_word)
@@ -4775,14 +4822,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
}
else
{
- if (scp->case_sensitive)
- {
- pFound = StringSearch (pFound + 1, pattern);
- }
- else
- {
- pFound = StringISearch (pFound + 1, pattern);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (pFound + 1, pattern);
+ }
+ else
+ {
+ pFound = StringISearch (pFound + 1, pattern);
+ }
}
}
break;
@@ -4848,7 +4895,7 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
pattern = MemFree (pattern);
}
scp->match_text = tmp_match;
- return rval;
+ return rval;
}
@@ -5051,23 +5098,23 @@ NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, St
rval = TRUE;
pFound = NULL;
} else {
- if (scp->case_sensitive)
- {
- pFound = StringSearch (pFound + 1, scp->match_text);
- }
- else
- {
- pFound = StringISearch (pFound + 1, scp->match_text);
- }
+ if (scp->case_sensitive)
+ {
+ pFound = StringSearch (pFound + 1, scp->match_text);
+ }
+ else
+ {
+ pFound = StringISearch (pFound + 1, scp->match_text);
+ }
}
break;
}
}
- }
+ }
if (rval && StringHasNoText (*str)) {
*str = MemFree (*str);
}
- return rval;
+ return rval;
}
@@ -5671,6 +5718,11 @@ static CharPtr FindTextMarker(CharPtr str, Int4Ptr len, TextMarkerPtr marker, Bo
CharPtr digits = "0123456789";
CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ if (str == NULL)
+ {
+ return NULL;
+ }
+
if (marker == NULL)
{
if (len != NULL)
@@ -6604,7 +6656,7 @@ static Boolean SetInt2ValueWithString (Int2Ptr val, CharPtr val_str, Uint2 exist
sprintf (num, "%d", *val);
tmp = StringSave (num);
if (SetStringValue (&tmp, val_str, existing_text)
- && IsAllDigits (tmp)) {
+ && StringIsAllDigits (tmp)) {
*val = atoi (tmp);
rval = TRUE;
}
@@ -6645,7 +6697,7 @@ NLM_EXTERN Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 exis
if (SetStringValue (&tmp, value, existing_text)) {
oip->str = MemFree (oip->str);
oip->id = 0;
- if (IsAllDigits (tmp)) {
+ if (StringIsAllDigits (tmp) && StringLen (tmp) < 8 && *tmp != '0') {
oip->id = atoi (tmp);
} else {
oip->str = tmp;
@@ -6677,9 +6729,20 @@ NLM_EXTERN CharPtr GetObjectIdString (ObjectIdPtr oip)
}
+static Boolean DoesNumberMatchStringConstraint (Int4 num, StringConstraintPtr scp)
+{
+ Char tmp[15];
+
+ if (IsStringConstraintEmpty (scp)) {
+ return TRUE;
+ }
+ sprintf (tmp, "%d", num);
+ return DoesStringMatchConstraint(tmp, scp);
+}
+
+
static Boolean DoesObjectIdMatchStringConstraint (ObjectIdPtr oip, StringConstraintPtr scp)
{
- Char num[15];
Boolean rval = FALSE;
if (oip == NULL) {
@@ -6687,8 +6750,7 @@ static Boolean DoesObjectIdMatchStringConstraint (ObjectIdPtr oip, StringConstra
} else if (IsStringConstraintEmpty (scp)) {
return TRUE;
} else if (oip->id > 0) {
- sprintf (num, "%d", oip->id);
- rval = DoesStringMatchConstraint (num, scp);
+ rval = DoesNumberMatchStringConstraint (oip->id, scp);
} else {
rval = DoesStringMatchConstraint (oip->str, scp);
}
@@ -7185,7 +7247,7 @@ NLM_EXTERN CharPtr GetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr sc
if (rrp->ext.choice == 1) {
str = StringSave (rrp->ext.value.ptrvalue);
} else if (rrp->ext.choice == 2) {
- str = GetTrnaProductString (rrp->ext.value.ptrvalue);
+ str = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue));
} else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL) {
if (!StringHasNoText (rgp->product)) {
str = StringSave (rgp->product);
@@ -7306,7 +7368,7 @@ NLM_EXTERN Boolean SetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr sc
rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
rval = SetStringValue (&(rgp->product), new_val, existing_text);
} else if (rrp->ext.choice == 2) {
- tmp = StringSave (GetTrnaProductString (rrp->ext.value.ptrvalue));
+ tmp = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue));
if (DoesStringMatchConstraint (tmp, scp)
&& SetStringValue (&tmp, new_val, existing_text)) {
@@ -7534,6 +7596,87 @@ static Boolean RemovetRNACodons_Recognized (SeqFeatPtr sfp)
}
+/*
+M A or C
+R A or G
+W A or T
+S C or G
+Y C or T
+K G or T
+V A or C or G
+H A or C or T
+D A or G or T
+B C or G or T
+X G or A or T or C
+N G or A or T or C
+*/
+typedef struct ambiguitychar {
+ Char ch;
+ CharPtr replacements;
+} AmbiguityCharData, PNTR AmbiguityCharPtr;
+
+
+static AmbiguityCharData s_AmbiguityChars[] = {
+ {'M', "AC"},
+ {'R', "AG"},
+ {'W', "AT"},
+ {'S', "CG"},
+ {'Y', "CT"},
+ {'K', "GT"},
+ {'V', "ACG"},
+ {'H', "ACT"},
+ {'D', "AGT"},
+ {'B', "CGT"},
+ {'X', "GATC"},
+ {'N', "GATC"},
+ {'\0', NULL}
+};
+
+
+static ValNodePtr ExpandWobbleCodon (CharPtr codon)
+{
+ ValNodePtr list = NULL, vnp, new_list;
+ Int4 i, j, len;
+ CharPtr this_codon, cp, new_codon;
+ Boolean any;
+
+ if (StringHasNoText (codon)) {
+ return NULL;
+ }
+ len = StringLen (codon);
+ ValNodeAddPointer (&list, 0, StringSave (codon));
+
+ for (j = 0; j < len; j++) {
+ new_list = NULL;
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ this_codon = vnp->data.ptrvalue;
+ any = FALSE;
+ for (i = 0; s_AmbiguityChars[i].ch != 0 && !any; i++) {
+ if (this_codon[j] == s_AmbiguityChars[i].ch) {
+ cp = s_AmbiguityChars[i].replacements;
+ while (*cp != 0) {
+ new_codon = StringSave (this_codon);
+ new_codon[j] = *cp;
+ ValNodeAddPointer (&new_list, 0, new_codon);
+ cp++;
+ }
+ any = TRUE;
+ }
+ }
+ if (!any) {
+ ValNodeAddPointer (&new_list, 0, StringSave (this_codon));
+ }
+ }
+ list = ValNodeFreeData (list);
+ list = new_list;
+ }
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ vnp->choice = IndexForCodon (vnp->data.ptrvalue, Seq_code_iupacna);
+ }
+ return list;
+}
+
static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Ptr codons)
{
@@ -7541,7 +7684,7 @@ static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Pt
Char ch;
Boolean rval = TRUE;
Uint1 codon[4];
- Uint1 code;
+ ValNodePtr wobble_list, vnp;
if (StringHasNoText (str) || codons == NULL) {
return FALSE;
@@ -7580,12 +7723,19 @@ static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Pt
if (rval) {
codon [q] = 0;
if (q == 3) {
- code = IndexForCodon (codon, Seq_code_iupacna);
- if (code == INVALID_RESIDUE) {
+ wobble_list = ExpandWobbleCodon(codon);
+ for (vnp = wobble_list; vnp != NULL && codon_num < 6 && rval; vnp = vnp->next) {
+ if (vnp->choice == INVALID_RESIDUE) {
+ rval = FALSE;
+ } else {
+ codons [codon_num++] = vnp->choice;
+ }
+ }
+ if (vnp != NULL) {
+ /* too many ambiguities */
rval = FALSE;
- } else {
- codons [codon_num++] = code;
}
+ wobble_list = ValNodeFreeData (wobble_list);
}
str += 3;
while (isspace (*str)) {
@@ -7606,7 +7756,7 @@ static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Pt
}
-static Boolean SettRNACodons_Recognized (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
+NLM_EXTERN Boolean SettRNACodons_Recognized (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
{
RnaRefPtr rrp;
tRNAPtr trp;
@@ -7719,12 +7869,12 @@ static CharPtr GettRNACodonsRecognized (SeqFeatPtr sfp, StringConstraintPtr scp)
for (j = 0; j < 6; j++) {
if (trp->codon [j] < 64) {
- /* Note - it is important to set the fourth character in the codon array to NULL
- * because CodonForIndex only fills in the three characters of actual codon,
- * so if you StringCpy the codon array and the NULL character is not found after
- * the three codon characters, you will write in memory you did not intend to.
- */
- codon [3] = 0;
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)) {
if (buf[0] != 0) {
StringCat (buf, ", ");
@@ -8383,8 +8533,9 @@ static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, Stri
}
/* pseudo */
- if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
- || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue)))
+ if (str == NULL
+ && (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue)))
{
str = GetFirstGBQualMatch (sfp->qual, "pseudogene", 0, scp);
if (str == NULL && sfp->pseudo) {
@@ -8512,13 +8663,16 @@ static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, Stri
/* coding region fields */
/* transl_except */
- if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
- && sfp->data.choice == SEQFEAT_CDREGION)
+ if (str == NULL
+ && field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_transl_except
+ && sfp->data.choice == SEQFEAT_CDREGION)
{
str = GetCodeBreakString (sfp);
}
/* transl_table */
- if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
+ if (str == NULL
+ && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
&& sfp->data.choice == SEQFEAT_CDREGION
&& (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL)
{
@@ -8529,7 +8683,8 @@ static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, Stri
}
}
/* translation */
- if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation
+ if (str == NULL
+ && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation
&& sfp->data.choice == SEQFEAT_CDREGION)
{
if (sfp->product != NULL)
@@ -9085,6 +9240,7 @@ static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp)
{
BioseqPtr bsp;
SeqFeatPtr gene = NULL;
+ SeqLocPtr slp_new;
if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) {
return NULL;
@@ -9093,6 +9249,11 @@ static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp)
if (bsp != NULL) {
gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location);
if (gene != NULL) {
+ slp_new = SeqLocMerge (bsp, gene->location, NULL, TRUE, FALSE, FALSE);
+ if (slp_new != NULL && slp_new != gene->location) {
+ gene->location = SeqLocFree (gene->location);
+ gene->location = slp_new;
+ }
gene->data.value.ptrvalue = GeneRefNew();
}
}
@@ -9121,7 +9282,7 @@ static Boolean SetCitationTextOnFeature (SeqFeatPtr sfp, StringConstraintPtr scp
return FALSE;
}
- if (!IsAllDigits (value)) {
+ if (!StringIsAllDigits (value)) {
return FALSE;
}
@@ -9365,14 +9526,18 @@ static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, String
gene->pseudo = TRUE;
rval = TRUE;
}
- rval |= SetStringInGBQualList (&(gene->qual), field, scp, value, existing_text);
+ if (StringICmp (value, "Unqualified") != 0) {
+ rval |= SetStringInGBQualList (&(gene->qual), field, scp, value, existing_text);
+ }
return rval;
} else {
if (!sfp->pseudo) {
sfp->pseudo = TRUE;
rval = TRUE;
}
- rval |= SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text);
+ if (StringICmp (value, "Unqualified") != 0) {
+ rval |= SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text);
+ }
return rval;
}
}
@@ -9551,7 +9716,7 @@ static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, String
if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
&& sfp->data.choice == SEQFEAT_CDREGION
&& (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL
- && IsAllDigits (value))
+ && StringIsAllDigits (value))
{
if (crp->genetic_code != NULL && existing_text == ExistingTextOption_leave_old) {
matched_term = TRUE;
@@ -10833,7 +10998,7 @@ NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoi
vn.data.intvalue = Source_qual_orgmod_note;
str = GetSourceQualFromBioSource (biop, &vn, constraint);
}
- } else if (scp->data.intvalue == Source_qual_all_quals) {
+ } else if (scp->data.intvalue == Source_qual_all_quals || scp->data.intvalue == Source_qual_all_primers) {
/* will not do */
} else if (scp->data.intvalue == Source_qual_fwd_primer_name
|| scp->data.intvalue == Source_qual_fwd_primer_seq
@@ -10986,7 +11151,8 @@ NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, So
if (str != NULL) {
ValNodeAddPointer (&val_list, 0, str);
}
- } else if (scp->data.intvalue == Source_qual_all_quals) {
+ } else if (scp->data.intvalue == Source_qual_all_quals
+ || scp->data.intvalue == Source_qual_all_primers) {
/* will not do */
} else {
orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
@@ -11064,7 +11230,8 @@ static Boolean RemoveAllSourceQualsFromBioSource (BioSourcePtr biop, StringConst
for (i = 0; i < NUM_srcqual_scqual; i++) {
if (srcqual_scqual[i].srcqual != Source_qual_all_quals
- && srcqual_scqual[i].srcqual != Source_qual_all_notes) {
+ && srcqual_scqual[i].srcqual != Source_qual_all_notes
+ && srcqual_scqual[i].srcqual != Source_qual_all_primers) {
vn.data.intvalue = srcqual_scqual[i].srcqual;
rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
}
@@ -11163,6 +11330,17 @@ NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualC
rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
vn.data.intvalue = Source_qual_orgmod_note;
rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
+ } else if (scp->data.intvalue == Source_qual_all_primers) {
+ vn.choice = SourceQualChoice_textqual;
+ vn.data.intvalue = Source_qual_fwd_primer_name;
+ vn.next = NULL;
+ rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
+ vn.data.intvalue = Source_qual_rev_primer_name;
+ rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
+ vn.data.intvalue = Source_qual_fwd_primer_seq;
+ rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
+ vn.data.intvalue = Source_qual_rev_primer_seq;
+ rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
} else if (scp->data.intvalue == Source_qual_all_quals) {
rval |= RemoveAllSourceQualsFromBioSource (biop, constraint);
} else {
@@ -11362,6 +11540,11 @@ NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoice
rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
vn.data.intvalue = Source_qual_orgmod_note;
rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
+ } else if (scp->data.intvalue == Source_qual_all_primers) {
+ rval = SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_name, constraint, value, existing_text);
+ rval |= SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_seq, constraint, value, existing_text);
+ rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_name, constraint, value, existing_text);
+ rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_seq, constraint, value, existing_text);
} else if (scp->data.intvalue == Source_qual_fwd_primer_name
|| scp->data.intvalue == Source_qual_fwd_primer_seq
|| scp->data.intvalue == Source_qual_rev_primer_name
@@ -11818,7 +12001,7 @@ static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstrai
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
if (sfp != NULL) {
- str = GetFirstGBQualMatch (sfp->qual, "old-locus-tag", 0, scp);
+ str = GetFirstGBQualMatch (sfp->qual, "old_locus_tag", 0, scp);
}
}
break;
@@ -12064,7 +12247,7 @@ static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConst
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
if (sfp != NULL) {
- rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp);
+ rval |= RemoveGBQualMatch (&(sfp->qual), "old_locus_tag", 0, scp);
}
}
break;
@@ -12213,7 +12396,6 @@ static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConst
static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c)
{
SeqFeatPtr gene = NULL, sfp = NULL;
- BioseqPtr bsp;
ValNodePtr vnp;
if (c == NULL) return NULL;
@@ -12224,15 +12406,7 @@ static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c)
for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) {
sfp = vnp->data.ptrvalue;
}
- if (sfp != NULL) {
- bsp = BioseqFindFromSeqLoc (sfp->location);
- if (bsp != NULL) {
- gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location);
- if (gene != NULL) {
- gene->data.value.ptrvalue = GeneRefNew();
- }
- }
- }
+ gene = CreateGeneForFeature (sfp);
return gene;
}
@@ -12337,12 +12511,14 @@ static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraint
}
break;
case CDSGeneProt_field_gene_old_locus_tag:
+ ffield = FeatureFieldFromCDSGeneProtField (field);
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
if (sfp != NULL) {
- rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", 0, scp);
+ rval |= SetStringInGBQualList (&(sfp->qual), ffield->field, scp, value, existing_text);
}
}
+ ffield = FeatureFieldFree (ffield);
break;
case CDSGeneProt_field_mrna_product:
for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
@@ -12765,7 +12941,7 @@ static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr sc
UserFieldPtr ufp;
Boolean rval = FALSE;
- if (bsp == NULL || !IsAllDigits (value)) {
+ if (bsp == NULL || !StringIsAllDigits (value)) {
return FALSE;
}
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
@@ -12781,7 +12957,7 @@ static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr sc
sprintf (buf, "%d", ufp->data.intvalue);
if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
tmp = StringSave (buf);
- if (SetStringValue (&tmp, value, existing_text) && IsAllDigits (tmp)) {
+ if (SetStringValue (&tmp, value, existing_text) && StringIsAllDigits (tmp)) {
ufp->data.intvalue = atoi (tmp);
rval = TRUE;
}
@@ -12821,7 +12997,7 @@ NLM_EXTERN CharPtr GetBioProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr
{
ufp = uop->data;
while (ufp != NULL) {
- if (ufp->label != NULL && StringCmp (ufp->label->str, "BioProject") == 0)
+ if (ufp->label != NULL && StringCmp (ufp->label->str, "BioProject") == 0)
{
if (ufp->choice == 1)
{
@@ -12830,15 +13006,15 @@ NLM_EXTERN CharPtr GetBioProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr
{
return StringSave (val);
}
- }
+ }
else if (ufp->choice == 7 && ufp->num > 0 && (cpp = (CharPtr PNTR) ufp->data.ptrvalue) != NULL)
{
- for (i = 0; i < ufp->num; i++)
+ for (i = 0; i < ufp->num; i++)
{
- if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp))
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp))
{
- return StringSave (cpp[i]);
- }
+ return StringSave (cpp[i]);
+ }
}
}
}
@@ -12925,7 +13101,7 @@ static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val)
}
-static Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp)
+NLM_EXTERN Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp)
{
if (ufp == NULL || ufp->label == NULL) {
return FALSE;
@@ -13170,8 +13346,10 @@ static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, Structu
curr->data.ptrvalue = StringSave ("");
if (last == NULL) {
ufp = uop->data;
- curr->next = ufp->next;
- ufp->next = curr;
+ if (ufp != NULL) {
+ curr->next = ufp->next;
+ ufp->next = curr;
+ }
} else {
curr->next = last->next;
last->next = curr;
@@ -13193,7 +13371,8 @@ static DBLinkNameData dblink_names[] = {
{ DBLink_field_type_bio_sample , "BioSample" } ,
{ DBLink_field_type_probe_db , "ProbeDB" } ,
{ DBLink_field_type_sequence_read_archve , "Sequence Read Archive" } ,
- { DBLink_field_type_bio_project , "BioProject" }
+ { DBLink_field_type_bio_project , "BioProject" } ,
+ { DBLink_field_type_assembly , "Assembly" }
};
#define NUM_dblinkname sizeof (dblink_names) / sizeof (DBLinkNameData)
@@ -13276,6 +13455,47 @@ static CharPtr GetDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, Stri
}
+static ValNodePtr GetMultipleDBLinkFieldValuesFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
+{
+ UserFieldPtr curr;
+ ValNodePtr rval = NULL;
+ CharPtr field_name;
+ Char buf[15];
+ CharPtr PNTR cpp;
+ Int4Ptr ipp;
+ Int4 i;
+
+ if (!IsUserObjectDBLink(uop) || field < 1) {
+ return NULL;
+ }
+
+ field_name = GetDBLinkNameFromDBLinkFieldType (field);
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
+ if (curr->choice == 7) {
+ if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
+ ValNodeAddPointer (&rval, 0, StringSave (cpp[i]));
+ }
+ }
+ }
+ } else if (curr->choice == 8) {
+ if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ sprintf (buf, "%d", ipp[i]);
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
+ ValNodeAddPointer (&rval, 0, StringSave (buf));
+ }
+ }
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+
static Boolean RemoveDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
{
UserFieldPtr curr, prev_type = NULL, next_type;
@@ -13347,9 +13567,9 @@ static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, String
Boolean rval = FALSE;
CharPtr newval;
CharPtr field_name;
- CharPtr PNTR cpp;
+ CharPtr PNTR cpp = NULL;
CharPtr PNTR new_cpp;
- Int4Ptr ipp, new_ipp;
+ Int4Ptr ipp = NULL, new_ipp;
Int4 i;
Char buf[15];
@@ -13374,24 +13594,26 @@ static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, String
}
if (!rval && IsStringConstraintEmpty (scp)) {
new_cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (curr->num + 1));
- for (i = 0; i < curr->num; i++) {
- new_cpp[i] = cpp[i];
- cpp[i] = NULL;
+ if (cpp != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ new_cpp[i] = cpp[i];
+ cpp[i] = NULL;
+ }
+ new_cpp[i] = StringSave (value);
}
- new_cpp[i] = StringSave (value);
cpp = MemFree (cpp);
curr->data.ptrvalue = new_cpp;
curr->num++;
rval = TRUE;
}
- } else if (curr->choice == 8 && IsAllDigits (value)) {
+ } else if (curr->choice == 8 && StringIsAllDigits (value)) {
if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL && existing_text != ExistingTextOption_add_qual) {
for (i = 0; i < curr->num; i++) {
sprintf (buf, "%d", ipp[i]);
if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
newval = StringSave (buf);
SetStringValue (&newval, value, existing_text);
- if (IsAllDigits (newval)) {
+ if (StringIsAllDigits (newval)) {
ipp[i] = atoi (newval);
rval = TRUE;
}
@@ -13401,10 +13623,12 @@ static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, String
}
if (!rval && IsStringConstraintEmpty (scp)) {
new_ipp = (Int4Ptr) MemNew (sizeof (Int4) * (curr->num + 1));
- for (i = 0; i < curr->num; i++) {
- new_ipp[i] = ipp[i];
+ if (ipp != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ new_ipp[i] = ipp[i];
+ }
+ new_ipp[i] = atoi (value);
}
- new_ipp[i] = atoi (value);
ipp = MemFree (ipp);
curr->data.ptrvalue = new_ipp;
curr->num++;
@@ -13414,7 +13638,7 @@ static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, String
}
last = curr;
}
- if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || IsAllDigits (value))) {
+ if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || StringIsAllDigits (value))) {
curr = UserFieldNew ();
curr->label = ObjectIdNew ();
curr->label->str = StringSave (field_name);
@@ -13652,7 +13876,7 @@ static CharPtr GetAuthorListStringEx (AuthListPtr alp, StringConstraintPtr scp,
}
-static CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp)
+NLM_EXTERN CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp)
{
return GetAuthorListStringEx (alp, scp, FALSE);
}
@@ -13823,7 +14047,7 @@ static NameStdPtr ReadNameFromString (CharPtr str, CharPtr PNTR next_name)
}
-static ValNodePtr ReadNameListFromString (CharPtr value)
+NLM_EXTERN ValNodePtr ReadNameListFromString (CharPtr value)
{
ValNodePtr names = NULL;
AuthorPtr ap;
@@ -14217,52 +14441,52 @@ static Boolean SetAffilPubField (AffilPtr ap, Int4 field, StringConstraintPtr sc
switch (field) {
case Publication_field_affiliation:
- if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) {
+ if (!StringHasNoText (ap->affil) || DoesStringMatchConstraint (ap->affil, scp)) {
rval = SetStringValue (&(ap->affil), value, existing_text);
}
break;
case Publication_field_affil_div:
- if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) {
+ if (!StringHasNoText (ap->div) || DoesStringMatchConstraint (ap->div, scp)) {
rval = SetStringValue (&(ap->div), value, existing_text);
}
break;
case Publication_field_affil_city:
- if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) {
+ if (!StringHasNoText (ap->city) || DoesStringMatchConstraint (ap->city, scp)) {
rval = SetStringValue (&(ap->city), value, existing_text);
}
break;
case Publication_field_affil_sub:
- if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) {
+ if (!StringHasNoText (ap->sub) || DoesStringMatchConstraint (ap->sub, scp)) {
rval = SetStringValue (&(ap->sub), value, existing_text);
}
break;
case Publication_field_affil_country:
- if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) {
+ if (!StringHasNoText (ap->country) || DoesStringMatchConstraint (ap->country, scp)) {
rval = SetStringValue (&(ap->country), value, existing_text);
}
break;
case Publication_field_affil_street:
- if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) {
+ if (!StringHasNoText (ap->street) || DoesStringMatchConstraint (ap->street, scp)) {
rval = SetStringValue (&(ap->street), value, existing_text);
}
break;
case Publication_field_affil_email:
- if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) {
+ if (!StringHasNoText (ap->email) || DoesStringMatchConstraint (ap->email, scp)) {
rval = SetStringValue (&(ap->email), value, existing_text);
}
break;
case Publication_field_affil_fax:
- if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) {
+ if (!StringHasNoText (ap->fax) || DoesStringMatchConstraint (ap->fax, scp)) {
rval = SetStringValue (&(ap->fax), value, existing_text);
}
break;
case Publication_field_affil_phone:
- if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) {
+ if (!StringHasNoText (ap->phone) || DoesStringMatchConstraint (ap->phone, scp)) {
rval = SetStringValue (&(ap->phone), value, existing_text);
}
break;
case Publication_field_affil_zipcode:
- if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) {
+ if (!StringHasNoText (ap->postal_code) || DoesStringMatchConstraint (ap->postal_code, scp)) {
rval = SetStringValue (&(ap->postal_code), value, existing_text);
}
break;
@@ -14293,9 +14517,11 @@ static CharPtr GetPubFieldFromImprint (ImprintPtr imprint, Int4 field, StringCon
}
break;
case Publication_field_date:
- str = PrintDate (imprint->date);
- if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
- str = MemFree (str);
+ if (imprint->date != NULL) {
+ str = PrintPartialOrCompleteDate (imprint->date);
+ if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
+ str = MemFree (str);
+ }
}
break;
}
@@ -14312,7 +14538,7 @@ static Boolean RemovePubDate (DatePtr PNTR pDate, StringConstraintPtr scp)
return FALSE;
}
- str = PrintDate (*pDate);
+ str = PrintPartialOrCompleteDate (*pDate);
if (!StringHasNoText (str) && DoesStringMatchConstraint (str, scp)) {
*pDate = DateFree (*pDate);
rval = TRUE;
@@ -14326,12 +14552,17 @@ static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr
{
CharPtr tmp;
DatePtr dp = NULL;
+ Boolean made_new_date = FALSE;
Boolean rval = FALSE;
if (pDate == NULL) {
return FALSE;
}
- tmp = PrintDate (*pDate);
+ if (*pDate == NULL) {
+ *pDate = DateNew();
+ made_new_date = TRUE;
+ }
+ tmp = PrintPartialOrCompleteDate (*pDate);
if (DoesStringMatchConstraint (tmp, scp)
&& SetStringValue (&tmp, value, existing_text)) {
dp = ReadDateFromString (tmp);
@@ -14342,6 +14573,9 @@ static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr
}
}
tmp = MemFree (tmp);
+ if (!rval && made_new_date) {
+ *pDate = DateFree (*pDate);
+ }
return rval;
}
@@ -14391,12 +14625,12 @@ static Boolean SetPubFieldOnImprint (ImprintPtr imprint, Int4 field, StringConst
}
break;
case Publication_field_issue:
- if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) {
+ if (StringHasNoText (imprint->issue) || DoesStringMatchConstraint (imprint->issue, scp)) {
rval = SetStringValue (&(imprint->issue), value, existing_text);
}
break;
case Publication_field_pages:
- if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) {
+ if (StringHasNoText (imprint->pages) || DoesStringMatchConstraint (imprint->pages, scp)) {
rval = SetStringValue (&(imprint->pages), value, existing_text);
}
break;
@@ -14574,6 +14808,9 @@ static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstrain
SetValNodeChoices (cbp->title, 1);
break;
case Publication_field_authors:
+ if (cbp->authors == NULL) {
+ cbp->authors = AuthListNew();
+ }
rval = SetAuthorListFromString (cbp->authors, scp, value, existing_text);
break;
case Publication_field_affiliation:
@@ -14586,14 +14823,21 @@ static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstrain
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (cbp->authors != NULL) {
- rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text);
+ if (cbp->authors == NULL) {
+ cbp->authors = AuthListNew();
+ }
+ if (cbp->authors->affil == NULL) {
+ cbp->authors->affil = AffilNew();
}
+ rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text);
break;
case Publication_field_volume:
case Publication_field_issue:
case Publication_field_pages:
case Publication_field_date:
+ if (cbp->imp == NULL) {
+ cbp->imp = ImprintNew();
+ }
rval = SetPubFieldOnImprint (cbp->imp, field, scp, value, existing_text);
break;
}
@@ -14613,6 +14857,10 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
CharPtr str = NULL;
if (the_pub == NULL || the_pub->data.ptrvalue == NULL) return NULL;
+
+ if (field == Publication_field_pub_class) {
+ return GetPubclassFromPub(the_pub);
+ }
switch (the_pub->choice) {
case PUB_Gen :
@@ -14639,7 +14887,7 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (cgp->authors != NULL) {
+ if (cgp->authors != NULL && cgp->authors->affil != NULL) {
str = GetPubFieldFromAffil (cgp->authors->affil, field, scp);
}
break;
@@ -14663,7 +14911,7 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
break;
case Publication_field_date:
if (cgp->date != NULL) {
- str = PrintDate (cgp->date);
+ str = PrintPartialOrCompleteDate (cgp->date);
if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
str = MemFree (str);
}
@@ -14708,7 +14956,7 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
}
break;
case Publication_field_date:
- str = PrintDate (csp->date);
+ str = PrintPartialOrCompleteDate (csp->date);
if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
str = MemFree (str);
}
@@ -14789,6 +15037,12 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
break;
}
break;
+ case PUB_PMid:
+ if (field == Publication_field_pmid) {
+ str = (CharPtr) MemNew (sizeof (Char) * 15);
+ sprintf (str, "%d", the_pub->data.intvalue);
+ }
+ break;
default :
break;
}
@@ -14796,6 +15050,34 @@ NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstra
}
+static Boolean RemovePMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp)
+{
+ Boolean rval = FALSE;
+ ValNodePtr vnp, vnp_prev = NULL, vnp_next;
+
+ if (cap == NULL) {
+ return FALSE;
+ }
+
+ for (vnp = cap->ids; vnp != NULL; vnp = vnp_next) {
+ vnp_next = vnp->next;
+ if (vnp->choice == ARTICLEID_PUBMED && DoesNumberMatchStringConstraint (vnp->data.intvalue, scp)) {
+ if (vnp_prev == NULL) {
+ cap->ids->next = vnp_next;
+ } else {
+ vnp_prev->next = vnp_next;
+ }
+ vnp->next = NULL;
+ vnp = ArticleIdFree (vnp);
+ rval = TRUE;
+ } else {
+ vnp_prev = vnp;
+ }
+ }
+ return rval;
+}
+
+
static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp)
{
CitGenPtr cgp;
@@ -14807,6 +15089,10 @@ static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstrai
Char num[15];
if (the_pub == NULL) return FALSE;
+
+ if (field == Publication_field_pub_class) {
+ return SetPubclassOnPub(the_pub, "unpublished");
+ }
switch (the_pub->choice) {
case PUB_Gen :
@@ -14910,6 +15196,9 @@ static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstrai
case PUB_Article :
cap = (CitArtPtr) the_pub->data.ptrvalue;
switch (field) {
+ case Publication_field_pmid:
+ rval = RemovePMIDOnCitArt (cap, scp);
+ break;
case Publication_field_title:
rval = RemoveValNodeStringMatch (&(cap->title), scp);
break;
@@ -14975,6 +15264,11 @@ static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstrai
break;
}
break;
+ case PUB_PMid:
+ if (field == Publication_field_pmid) {
+ the_pub->data.intvalue = 0;
+ }
+ break;
default :
break;
}
@@ -14982,6 +15276,32 @@ static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstrai
}
+static Boolean SetPMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
+{
+ Boolean found = FALSE, rval = FALSE;
+ ValNodePtr vnp;
+
+ if (cap == NULL || !StringIsAllDigits(value)) {
+ return FALSE;
+ }
+
+ for (vnp = cap->ids; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == ARTICLEID_PUBMED) {
+ found = TRUE;
+ if (existing_text == ExistingTextOption_replace_old && DoesNumberMatchStringConstraint(vnp->data.intvalue, scp)) {
+ vnp->data.intvalue = atoi (value);
+ rval = TRUE;
+ }
+ }
+ }
+ if (!found && IsStringConstraintEmpty (scp)) {
+ ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, atoi (value));
+ rval = TRUE;
+ }
+ return rval;
+}
+
+
static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
{
CitGenPtr cgp;
@@ -14992,6 +15312,10 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
Boolean rval = FALSE;
if (the_pub == NULL || value == NULL) return FALSE;
+
+ if (field == Publication_field_pub_class) {
+ return SetPubclassOnPub(the_pub, value);
+ }
switch (the_pub->choice) {
case PUB_Gen :
@@ -15003,6 +15327,9 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
}
break;
case Publication_field_authors:
+ if (cgp->authors == NULL) {
+ cgp->authors = AuthListNew();
+ }
rval = SetAuthorListFromString (cgp->authors, scp, value, existing_text);
break;
case Publication_field_affiliation:
@@ -15015,9 +15342,13 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (cgp->authors != NULL) {
- rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text);
+ if (cgp->authors == NULL) {
+ cgp->authors = AuthListNew();
+ }
+ if (cgp->authors->affil == NULL) {
+ cgp->authors->affil = AffilNew();
}
+ rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text);
break;
case Publication_field_journal:
rval = SetStringsInValNodeStringList (&(cgp->journal), scp, value, existing_text);
@@ -15060,6 +15391,9 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
}
break;
case Publication_field_authors:
+ if (csp->authors == NULL) {
+ csp->authors = AuthListNew();
+ }
rval = SetAuthorListFromString (csp->authors, scp, value, existing_text);
break;
case Publication_field_affiliation:
@@ -15072,9 +15406,13 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (csp->authors != NULL) {
- rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text);
+ if (csp->authors == NULL) {
+ csp->authors = AuthListNew();
+ }
+ if (csp->authors->affil == NULL) {
+ csp->authors->affil = AffilNew();
}
+ rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text);
break;
case Publication_field_date:
rval = SetPubDate (&(csp->date), scp, value, existing_text);
@@ -15084,11 +15422,17 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
case PUB_Article :
cap = (CitArtPtr) the_pub->data.ptrvalue;
switch (field) {
+ case Publication_field_pmid:
+ rval = SetPMIDOnCitArt (cap, scp, value, existing_text);
+ break;
case Publication_field_title:
rval = SetStringsInValNodeStringList (&(cap->title), scp, value, existing_text);
SetValNodeChoices (cap->title, 1);
break;
case Publication_field_authors:
+ if (cap->authors == NULL) {
+ cap->authors = AuthListNew();
+ }
rval = SetAuthorListFromString (cap->authors, scp, value, existing_text);
break;
case Publication_field_affiliation:
@@ -15101,9 +15445,13 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (cap->authors != NULL) {
- rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text);
+ if (cap->authors == NULL) {
+ cap->authors = AuthListNew();
}
+ if (cap->authors->affil == NULL) {
+ cap->authors->affil = AffilNew();
+ }
+ rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text);
break;
default:
if (cap->from == 1) {
@@ -15131,6 +15479,9 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
}
break;
case Publication_field_authors:
+ if (cpp->authors == NULL) {
+ cpp->authors = AuthListNew();
+ }
rval = SetAuthorListFromString (cpp->authors, scp, value, existing_text);
break;
case Publication_field_affiliation:
@@ -15143,12 +15494,23 @@ static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr
case Publication_field_affil_fax:
case Publication_field_affil_phone:
case Publication_field_affil_zipcode:
- if (cpp->authors != NULL) {
- rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text);
+ if (cpp->authors == NULL) {
+ cpp->authors = AuthListNew();
}
+ if (cpp->authors->affil == NULL) {
+ cpp->authors->affil = AffilNew();
+ }
+ rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text);
break;
}
break;
+ case PUB_PMid:
+ if (field == Publication_field_pmid && StringIsAllDigits (value) && DoesNumberMatchStringConstraint(the_pub->data.intvalue, scp)
+ && existing_text == ExistingTextOption_replace_old) {
+ the_pub->data.intvalue = atoi (value);
+ rval = TRUE;
+ }
+ break;
default :
break;
}
@@ -15191,7 +15553,7 @@ static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field,
{
Boolean rval = FALSE;
PubdescPtr pdp = NULL;
- PubPtr pub;
+ PubPtr pub, pub_prev = NULL, pub_next;
SeqFeatPtr sfp;
SeqDescrPtr sdp;
@@ -15210,8 +15572,22 @@ static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field,
if (pdp == NULL) return FALSE;
- for (pub = pdp->pub; pub != NULL; pub = pub->next) {
+ pub = pdp->pub;
+ while (pub != NULL) {
+ pub_next = pub->next;
rval |= RemovePubFieldFromPub (pub, field, scp);
+ if (field == Publication_field_pmid && pub->choice == PUB_PMid && pub->data.intvalue == 0) {
+ if (pub_prev == NULL) {
+ pdp->pub = pub_next;
+ } else {
+ pub_prev->next = pub_next;
+ }
+ pub->next = NULL;
+ pub = PubFree (pub);
+ } else {
+ pub_prev = pub;
+ }
+ pub = pub->next;
}
return rval;
}
@@ -15223,7 +15599,7 @@ static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, Stri
PubdescPtr pdp = NULL;
PubPtr pub;
SeqFeatPtr sfp;
- SeqDescrPtr sdp;
+ SeqDescrPtr sdp = NULL;
if (data == NULL) return FALSE;
if (choice == OBJ_SEQFEAT) {
@@ -15243,11 +15619,17 @@ static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, Stri
for (pub = pdp->pub; pub != NULL; pub = pub->next) {
rval |= SetPubFieldOnPub (pub, field, scp, value, existing_text);
}
+ if (!rval && field == Publication_field_pmid && IsStringConstraintEmpty (scp) && StringIsAllDigits(value)) {
+ /* first, set pub class to published for pre-existing pub */
+ if (pdp->pub != NULL && pdp->pub->choice == PUB_Gen) {
+ SetPubclassOnPub(pdp->pub, "journal");
+ }
+ ValNodeAddInt (&pdp->pub, PUB_PMid, atoi (value));
+ }
return rval;
}
-
NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action)
{
Uint1 field_type = 0;
@@ -15381,7 +15763,7 @@ static void CaptureRefBlockSerialNumbers
SeqMgrFeatContext fcontext;
SeqMgrDescContext dcontext;
PubSerialNumberPtr psn;
- ValNodePtr ppr;
+ ValNodePtr ppr = NULL;
PubdescPtr pdp = NULL;
if (blocktype != REFERENCE_BLOCK || userdata == NULL) return;
@@ -15712,6 +16094,12 @@ NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
if (sdp != NULL && sdp->choice == Seq_descr_user) {
str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
}
+ } else if (choice == OBJ_BIOSEQ) {
+ for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_user, &context);
+ sdp != NULL && str == NULL;
+ sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_user, &context)) {
+ str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
+ }
}
break;
case FieldType_misc:
@@ -15775,13 +16163,17 @@ NLM_EXTERN CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldType
NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
{
- CharPtr str = NULL;
- ValNodePtr val_list = NULL;
+ CharPtr str = NULL;
+ ValNodePtr val_list = NULL;
+ SeqDescPtr sdp;
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
if (field->choice == FieldType_source_qual) {
val_list = GetMultipleSourceQualsFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
+ } else if (field->choice == FieldType_dblink && choice == OBJ_SEQDESC
+ && (sdp = (SeqDescPtr) data) != NULL && sdp->choice == Seq_descr_user) {
+ val_list = GetMultipleDBLinkFieldValuesFromUserObject ((UserObjectPtr) sdp->data.ptrvalue, field->data.intvalue, scp);
} else {
str = GetFieldValueForObjectEx (choice, data, field, scp, batch_extra);
if (str != NULL) {
@@ -15868,6 +16260,10 @@ static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypeP
sdp = (SeqDescrPtr) data;
if (sdp != NULL && sdp->choice == Seq_descr_user) {
rval = RemoveDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
+ if (rval && IsEmptyDBLink (sdp->data.ptrvalue)) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
}
}
break;
@@ -15986,6 +16382,7 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
sdp = (SeqDescrPtr) data;
if (sdp->choice == Seq_descr_title) {
rval = SetTextDescriptor (sdp, scp, value, existing_text);
+ RemoveAutodefObjectsForDesc(sdp);
}
} else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) {
sdp = (SeqDescrPtr) data;
@@ -16342,8 +16739,8 @@ static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS)
ohsp = (ObjectHasStringPtr) pAEOS->data;
if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp)))
{
- pchSource = (CharPtr) pAEOS->dvp->ptrvalue;
- ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp);
+ pchSource = (CharPtr) pAEOS->dvp->ptrvalue;
+ ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp);
}
}
@@ -17094,15 +17491,20 @@ static void UnmarkFeatureList (ValNodePtr list)
static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene)
{
SeqFeatPtr cds, mrna, prot;
- SeqMgrFeatContext fcontext;
+ SeqMgrFeatContext fcontext, pcontext;
BioseqPtr bsp, protbsp;
Int4 left, right, tmp;
+ ValNodeBlock cds_list, mrna_list, prot_list;
if (c == NULL || gene == NULL || (bsp = BioseqFindFromSeqLoc (gene->location)) == NULL)
{
return;
}
+ InitValNodeBlock(&cds_list, c->cds_list);
+ InitValNodeBlock(&mrna_list, c->mrna_list);
+ InitValNodeBlock(&prot_list, c->prot_list);
+
left = SeqLocStart (gene->location);
right = SeqLocStop (gene->location);
if (left > right) {
@@ -17116,11 +17518,11 @@ static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene)
cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &fcontext))
{
if (gene == GetGeneForFeature (cds)) {
- ValNodeAddPointer (&(c->cds_list), 0, cds);
+ ValNodeAddPointerToEnd (&cds_list, 0, cds);
mrna = GetmRNAforCDS (cds);
if (mrna != NULL)
{
- ValNodeAddPointer (&(c->mrna_list), 0, mrna);
+ ValNodeAddPointerToEnd (&mrna_list, 0, mrna);
}
if (cds->product != NULL)
@@ -17128,18 +17530,18 @@ static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene)
protbsp = BioseqFindFromSeqLoc (cds->product);
if (protbsp != NULL)
{
- prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext);
+ prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &pcontext);
if (prot != NULL)
{
- ValNodeAddPointer (&(c->prot_list), 0, prot);
+ ValNodeAddPointerToEnd (&prot_list, 0, prot);
}
/* also add in mat_peptides from protein feature */
- prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
+ prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext);
while (prot != NULL)
{
- ValNodeAddPointer (&(c->prot_list), 0, prot);
- prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
+ ValNodeAddPointerToEnd (&prot_list, 0, prot);
+ prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext);
}
}
}
@@ -17151,9 +17553,12 @@ static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene)
mrna = SeqMgrGetNextFeature (bsp, mrna, 0, FEATDEF_mRNA, &fcontext))
{
if (gene == GetGeneForFeature (mrna)) {
- ValNodeAddPointer (&(c->mrna_list), 0, mrna);
+ ValNodeAddPointerToEnd (&mrna_list, 0, mrna);
}
}
+ c->cds_list = cds_list.head;
+ c->mrna_list = mrna_list.head;
+ c->prot_list = prot_list.head;
}
@@ -17310,10 +17715,11 @@ static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp)
Char ch_orig = 0;
DbtagPtr dbtag;
- if (scp == NULL || (text = scp->match_text) == NULL || sip == NULL || sip->choice != SEQID_GENERAL
- || (dbtag = (DbtagPtr) sip->data.ptrvalue) == NULL
- || StringCmp (dbtag->db, "BankIt") != 0
- || dbtag->tag == NULL) {
+ if (scp == NULL || scp->match_text == NULL || sip == NULL || sip->choice != SEQID_GENERAL) {
+ return FALSE;
+ }
+ dbtag = (DbtagPtr) sip->data.ptrvalue;
+ if (dbtag == NULL || StringCmp (dbtag->db, "BankIt") != 0 || dbtag->tag == NULL) {
return FALSE;
}
text = CopyListWithoutBankIt (scp->match_text);
@@ -17336,8 +17742,8 @@ static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp)
rval = DoesStringMatchConstraint (partial_match, scp);
partial_match = MemFree (partial_match);
}
- text = MemFree (text);
scp->match_text = tmp;
+ text = MemFree (text);
return rval;
}
@@ -17772,67 +18178,72 @@ static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, Se
}
+/* Pub fields */
+typedef struct pubfieldlabel {
+ Int4 pub_field;
+ CharPtr name;
+} PubFieldLabelData, PNTR PubFieldLabelPtr;
+
+
+static PubFieldLabelData pubfield_labels[] = {
+ { Publication_field_cit, "citation" } ,
+ { Publication_field_authors, "authors" } ,
+ { Publication_field_journal, "journal" } ,
+ { Publication_field_volume, "volume" } ,
+ { Publication_field_issue, "issue" } ,
+ { Publication_field_pages, "pages" } ,
+ { Publication_field_date, "date" } ,
+ { Publication_field_serial_number, "serial number" } ,
+ { Publication_field_title, "title" } ,
+ { Publication_field_affiliation, "affiliation" } ,
+ { Publication_field_affil_div, "department" } ,
+ { Publication_field_affil_city, "city" } ,
+ { Publication_field_affil_sub, "state" } ,
+ { Publication_field_affil_country, "country" } ,
+ { Publication_field_affil_street, "street" } ,
+ { Publication_field_affil_email, "email" } ,
+ { Publication_field_affil_fax, "fax" } ,
+ { Publication_field_affil_phone, "phone" } ,
+ { Publication_field_affil_zipcode, "postal code" } ,
+ { Publication_field_pmid, "PMID"} ,
+ { Publication_field_pub_class, "class" }
+};
+
+#define NUM_pubfield_labels sizeof (pubfield_labels) / sizeof (PubFieldLabelData)
+
+
NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field)
{
CharPtr rval = NULL;
- switch (pub_field) {
- case Publication_field_cit:
- rval = "citation";
- break;
- case Publication_field_authors:
- rval = "authors";
- break;
- case Publication_field_journal:
- rval = "journal";
- break;
- case Publication_field_volume:
- rval = "volume";
- break;
- case Publication_field_issue:
- rval = "issue";
- break;
- case Publication_field_pages:
- rval = "pages";
- break;
- case Publication_field_date:
- rval = "date";
- break;
- case Publication_field_serial_number:
- rval = "serial number";
- break;
- case Publication_field_title:
- rval = "title";
- break;
- case Publication_field_affiliation:
- rval = "affiliation";
- break;
- case Publication_field_affil_div:
- rval = "department";
- break;
- case Publication_field_affil_city:
- rval = "city";
- break;
- case Publication_field_affil_sub:
- rval = "state";
- break;
- case Publication_field_affil_country:
- rval = "country";
- break;
- case Publication_field_affil_street:
- rval = "street";
- break;
- case Publication_field_affil_email:
- rval = "email";
- break;
- case Publication_field_affil_fax:
- rval = "fax";
- break;
- case Publication_field_affil_phone:
- rval = "phone";
+ Int4 i;
+
+ for (i = 0; i < NUM_pubfield_labels; i++) {
+ if (pubfield_labels[i].pub_field == pub_field) {
+ rval = pubfield_labels[i].name;
break;
- case Publication_field_affil_zipcode:
- rval = "postal code";
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Int4 GetPubFieldFromLabel(CharPtr label)
+{
+ Int4 rval = -1;
+ Int4 i;
+
+ if (StringNICmp (label, "publication", 11) == 0) {
+ label = label + 11;
+ while (*label == '-' || *label == ' ') {
+ label++;
+ }
+ }
+
+ for (i = 0; i < NUM_pubfield_labels; i++) {
+ if (StringsAreEquivalent(pubfield_labels[i].name, label)) {
+ rval = pubfield_labels[i].pub_field;
break;
+ }
}
return rval;
}
@@ -17841,25 +18252,11 @@ NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field)
NLM_EXTERN ValNodePtr GetPubFieldList (void)
{
ValNodePtr val_list = NULL;
+ Int4 i;
- ValNodeAddPointer (&val_list, Publication_field_title, StringSave ("title"));
- ValNodeAddPointer (&val_list, Publication_field_authors, StringSave ("authors"));
- ValNodeAddPointer (&val_list, Publication_field_journal, StringSave ("journal"));
- ValNodeAddPointer (&val_list, Publication_field_issue, StringSave ("issue"));
- ValNodeAddPointer (&val_list, Publication_field_pages, StringSave ("pages"));
- ValNodeAddPointer (&val_list, Publication_field_serial_number, StringSave ("serial number"));
- ValNodeAddPointer (&val_list, Publication_field_date, StringSave ("date"));
- ValNodeAddPointer (&val_list, Publication_field_cit, StringSave ("citation"));
- ValNodeAddPointer (&val_list, Publication_field_affiliation, StringSave ("affiliation"));
- ValNodeAddPointer (&val_list, Publication_field_affil_div, StringSave ("department"));
- ValNodeAddPointer (&val_list, Publication_field_affil_city, StringSave ("city"));
- ValNodeAddPointer (&val_list, Publication_field_affil_sub, StringSave ("state"));
- ValNodeAddPointer (&val_list, Publication_field_affil_country, StringSave ("country"));
- ValNodeAddPointer (&val_list, Publication_field_affil_street, StringSave ("street"));
- ValNodeAddPointer (&val_list, Publication_field_affil_email, StringSave ("email"));
- ValNodeAddPointer (&val_list, Publication_field_affil_fax, StringSave ("fax"));
- ValNodeAddPointer (&val_list, Publication_field_affil_phone, StringSave ("phone"));
- ValNodeAddPointer (&val_list, Publication_field_affil_zipcode, StringSave ("postal code"));
+ for (i = 0; i < NUM_pubfield_labels; i++) {
+ ValNodeAddPointer (&val_list, pubfield_labels[i].pub_field, StringSave (pubfield_labels[i].name));
+ }
return val_list;
}
@@ -17868,30 +18265,400 @@ NLM_EXTERN ValNodePtr GetPubFieldList (void)
static ValNodePtr MakePubFieldTypeList (void)
{
ValNodePtr field_list = NULL;
+ Int4 i;
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_title);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_authors);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_journal);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_issue);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_pages);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_serial_number);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_date);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_cit);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affiliation);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_div);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_city);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_sub);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_country);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_street);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_email);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_fax);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_phone);
- ValNodeAddInt (&field_list, FieldType_pub, Publication_field_affil_zipcode);
+ for (i = 0; i < NUM_pubfield_labels; i++) {
+ ValNodeAddInt (&field_list, FieldType_pub, pubfield_labels[i].pub_field);
+ }
return field_list;
}
+typedef struct pub_class_qual {
+ Uint1 pub_choice;
+ Int4 status;
+ Uint1 art_from;
+ CharPtr name;
+} PubClassQualData, PNTR PubClassQualPtr;
+
+
+static PubClassQualData pub_class_quals[] = {
+ { PUB_Gen, Pub_type_unpublished, 0, "unpublished" } ,
+ { PUB_Sub, Pub_type_in_press, 0, "in-press submission" } ,
+ { PUB_Sub, Pub_type_published, 0, "submission" } ,
+ { PUB_Article, Pub_type_in_press, 1, "in-press journal" } ,
+ { PUB_Article, Pub_type_published, 1, "journal" } ,
+ { PUB_Article, Pub_type_in_press, 2, "in-press book chapter" } ,
+ { PUB_Article, Pub_type_published, 2, "book chapter" } ,
+ { PUB_Article, Pub_type_in_press, 3, "in-press proceedings chapter" } ,
+ { PUB_Article, Pub_type_published, 3, "proceedings chapter" } ,
+ { PUB_Book, Pub_type_in_press, 0, "in-press book" } ,
+ { PUB_Book, Pub_type_published, 0, "book" } ,
+ { PUB_Man, Pub_type_in_press, 0, "in-press thesis" } ,
+ { PUB_Man, Pub_type_published, 0, "thesis" } ,
+ { PUB_Proc, Pub_type_in_press, 0, "in-press proceedings" } ,
+ { PUB_Proc, Pub_type_published, 0, "proceedings" } ,
+ { PUB_Patent, Pub_type_any, 0, "patent" }
+};
+
+#define NUM_pub_class_quals sizeof (pub_class_quals) / sizeof (PubClassQualData)
+
+
+
+NLM_EXTERN ValNodePtr GetPubClassList ()
+{
+ ValNodePtr list = NULL;
+ Int4 i;
+
+ for (i = 0; i < NUM_pub_class_quals; i++) {
+ ValNodeAddPointer (&list, Publication_field_pub_class, StringSave (pub_class_quals[i].name));
+ }
+
+ return list;
+}
+
+
+static PubClassQualPtr GetPubclassQualFromPub (PubPtr the_pub)
+{
+ CharPtr str = NULL;
+ CitArtPtr art;
+ Int4 ml_class;
+ Int4 art_from = 0;
+ Int4 i;
+
+ if (the_pub == NULL) {
+ return NULL;
+ }
+
+ ml_class = GetPubMLStatus(the_pub);
+ if (the_pub->choice == PUB_Article && (art = (CitArtPtr) the_pub->data.ptrvalue) != NULL) {
+ art_from = art->from;
+ }
+
+ for (i = 0; i < NUM_pub_class_quals; i++) {
+ if (the_pub->choice == pub_class_quals[i].pub_choice
+ && (ml_class == pub_class_quals[i].status || ml_class == 0 || pub_class_quals[i].status == 0)
+ && (art_from == 0 || pub_class_quals[i].art_from == 0 || art_from == pub_class_quals[i].art_from)) {
+ return pub_class_quals + i;
+ }
+ }
+
+ return NULL;
+}
+
+
+NLM_EXTERN CharPtr GetPubclassFromPub (PubPtr the_pub)
+{
+ PubClassQualPtr pq = GetPubclassQualFromPub (the_pub);
+ if (pq == NULL) {
+ return NULL;
+ } else {
+ return StringSave(pq->name);
+ }
+}
+
+
+static PubClassQualPtr GetPubclassFromString(CharPtr str)
+{
+ Int4 i;
+ PubClassQualPtr pq = NULL;
+
+ for (i = 0; i < NUM_pub_class_quals; i++) {
+ if (StringsAreEquivalent (pub_class_quals[i].name, str)) {
+ pq = pub_class_quals + i;
+ break;
+ }
+ }
+ return pq;
+}
+
+
+static Boolean FreePubDataForConversion (PubPtr the_pub)
+{
+ Boolean rval = FALSE;
+
+ if (the_pub == NULL) {
+ return FALSE;
+ }
+
+ switch (the_pub->choice) {
+ case PUB_Gen:
+ the_pub->data.ptrvalue = CitGenFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ case PUB_Sub:
+ the_pub->data.ptrvalue = CitSubFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ case PUB_Article:
+ the_pub->data.ptrvalue = CitArtFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ case PUB_Journal:
+ the_pub->data.ptrvalue = CitJourFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ case PUB_Book:
+ case PUB_Man:
+ case PUB_Proc:
+ the_pub->data.ptrvalue = CitBookFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ case PUB_Patent:
+ the_pub->data.ptrvalue = CitPatFree (the_pub->data.ptrvalue);
+ rval = TRUE;
+ break;
+ }
+ return rval;
+}
+
+
+static void SetArtFrom(PubPtr the_pub, Uint1 art_from)
+{
+ CitArtPtr cap;
+ CitJourPtr cjp;
+ CitBookPtr cbp;
+
+ if (the_pub == NULL) {
+ return;
+ }
+ if (the_pub->choice == PUB_Article) {
+ if ((cap = (CitArtPtr)the_pub->data.ptrvalue) == NULL) {
+ cap = CitArtNew();
+ the_pub->data.ptrvalue = cap;
+ }
+ cap->from = art_from;
+ switch (cap->from) {
+ case 1:
+ cjp = CitJourNew();
+ cjp->imp = ImprintNew();
+ cap->fromptr = cjp;
+ break;
+ case 2:
+ case 3:
+ cbp = CitBookNew();
+ cbp->imp = ImprintNew();
+ cap->fromptr = cbp;
+ break;
+ }
+ }
+}
+
+
+static Boolean NewPubDataForConversion (PubPtr the_pub, Uint1 art_from)
+{
+ CitBookPtr cbp;
+ Boolean rval = FALSE;
+
+ if (the_pub == NULL) {
+ return FALSE;
+ }
+
+ switch (the_pub->choice) {
+ case PUB_Gen:
+ the_pub->data.ptrvalue = CitGenNew();
+ rval = TRUE;
+ break;
+ case PUB_Sub:
+ the_pub->data.ptrvalue = CitSubNew();
+ rval = TRUE;
+ break;
+ case PUB_Article:
+ the_pub->data.ptrvalue = CitArtNew();
+ SetArtFrom(the_pub, art_from);
+ rval = TRUE;
+ break;
+ case PUB_Journal:
+ the_pub->data.ptrvalue = CitJourNew();
+ rval = TRUE;
+ break;
+ case PUB_Book:
+ case PUB_Man:
+ case PUB_Proc:
+ cbp = CitBookNew();
+ cbp->imp = ImprintNew();
+ cbp->imp->date = DateNew();
+ the_pub->data.ptrvalue = cbp;
+ rval = TRUE;
+ break;
+ case PUB_Patent:
+ the_pub->data.ptrvalue = CitPatNew();
+ rval = TRUE;
+ break;
+ }
+ return rval;
+}
+
+
+static ImprintPtr GetPubImprint (PubPtr the_pub)
+{
+ CitArtPtr cap;
+ CitBookPtr cbp;
+ CitJourPtr cjp;
+ ImprintPtr imp = NULL;
+
+ if (the_pub == NULL || the_pub->data.ptrvalue == NULL)
+ {
+ return NULL;
+ }
+
+ switch (the_pub->choice)
+ {
+ case PUB_Article :
+ cap = (CitArtPtr) the_pub->data.ptrvalue;
+ if (cap->from == 1)
+ {
+ cjp = (CitJourPtr) cap->fromptr;
+ if (cjp != NULL)
+ {
+ imp = cjp->imp;
+ }
+ }
+ else if (cap->from == 2 || cap->from == 3)
+ {
+ cbp = (CitBookPtr) cap->fromptr;
+ if (cbp != NULL) {
+ imp = cbp->imp;
+ }
+ }
+ break;
+ case PUB_Journal :
+ cjp = (CitJourPtr) the_pub->data.ptrvalue;
+ imp = cjp->imp;
+ case PUB_Book :
+ case PUB_Man :
+ cbp = (CitBookPtr) the_pub->data.ptrvalue;
+ imp = cbp->imp;
+ break;
+ default :
+ break;
+
+ }
+ return imp;
+}
+
+
+static Boolean SetPubStatusOnPub (PubPtr the_pub, Int4 status)
+{
+ ImprintPtr imp;
+ CitGenPtr cgp;
+ Boolean rval = FALSE;
+
+ imp = GetPubImprint(the_pub);
+ if (imp != NULL) {
+ switch (status) {
+ case Pub_type_unpublished:
+ imp->prepub = 255;
+ rval = TRUE;
+ break;
+ case Pub_type_published:
+ imp->prepub = 0;
+ rval = TRUE;
+ break;
+ case Pub_type_in_press:
+ imp->prepub = 2;
+ rval = TRUE;
+ break;
+ case Pub_type_submitter_block:
+ imp->prepub = 1;
+ rval = TRUE;
+ break;
+ }
+ } else if (the_pub->choice == PUB_Gen) {
+ if ((cgp = (CitGenPtr) the_pub->data.ptrvalue) == NULL) {
+ cgp = CitGenNew();
+ the_pub->data.ptrvalue = cgp;
+ }
+ if (status == Pub_type_unpublished) {
+ cgp->cit = MemFree (cgp->cit);
+ cgp->cit = StringSave("unpublished");
+ } else {
+ if (StringICmp (cgp->cit, "unpublished") == 0) {
+ cgp->cit = MemFree (cgp->cit);
+ }
+ }
+ } else {
+
+ }
+ return rval;
+}
+
+
+static void CopyRelevantPubDetails (PubPtr orig_pub, PubPtr new_pub)
+{
+ Int4 i;
+ CharPtr val;
+
+ if (orig_pub == NULL || new_pub == NULL) {
+ return;
+ }
+
+ for (i = 0; i < NUM_pubfield_labels; i++) {
+ if (pubfield_labels[i].pub_field != Publication_field_pub_class /* field we are copying now */
+ && pubfield_labels[i].pub_field != Publication_field_authors /* already copying this elsewhere */) {
+ val = GetPubFieldFromPub(orig_pub, pubfield_labels[i].pub_field, NULL);
+ if (!StringHasNoText (val)) {
+ SetPubFieldOnPub(new_pub, pubfield_labels[i].pub_field, NULL, val, ExistingTextOption_replace_old);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN Boolean SetPubclassOnPub (PubPtr the_pub, CharPtr pub_class)
+{
+ PubClassQualPtr orig_pq = NULL, new_pq = NULL;
+ AuthListPtr PNTR palp;
+ AuthListPtr PNTR new_palp;
+ Boolean rval = FALSE;
+ ValNode new_pub;
+
+ if (the_pub == NULL) {
+ return FALSE;
+ }
+
+ new_pq = GetPubclassFromString(pub_class);
+ orig_pq = GetPubclassQualFromPub(the_pub);
+
+ if (new_pq == NULL || orig_pq == NULL || new_pq == orig_pq) {
+ return FALSE;
+ }
+
+ if (new_pq->pub_choice == the_pub->choice && new_pq->art_from == orig_pq->art_from) {
+ /* only thing changing is in-press/published */
+ if (new_pq->status != orig_pq->status) {
+ rval = SetPubStatusOnPub(the_pub, new_pq->status);
+ }
+ } else {
+ MemSet (&new_pub, 0, sizeof (ValNode));
+ new_pub.choice = new_pq->pub_choice;
+ NewPubDataForConversion(&new_pub, new_pq->art_from);
+
+ palp = GetAuthListForPub(the_pub);
+ new_palp = GetAuthListForPub(&new_pub);
+ if (palp && *palp && new_palp) {
+ *new_palp = AsnIoMemCopy (*palp, (AsnReadFunc) AuthListAsnRead, (AsnWriteFunc) AuthListAsnWrite);
+ }
+ /* TODO: Copy over other relevant details */
+ CopyRelevantPubDetails(the_pub, &new_pub);
+
+ SetPubStatusOnPub(&new_pub, new_pq->status);
+ rval = FreePubDataForConversion(the_pub);
+ if (rval) {
+ the_pub->choice = new_pub.choice;
+ the_pub->data.ptrvalue = new_pub.data.ptrvalue;
+ } else {
+ FreePubDataForConversion(&new_pub);
+ }
+
+ }
+
+
+ return FALSE;
+}
+
+
NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint)
{
Boolean rval = FALSE;
@@ -17950,13 +18717,13 @@ NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub)
imp = cjp->imp;
}
}
- else if (cap->from == 2 || cap->from == 3)
- {
+ else if (cap->from == 2 || cap->from == 3)
+ {
cbp = (CitBookPtr) cap->fromptr;
- if (cbp != NULL) {
+ if (cbp != NULL) {
imp = cbp->imp;
- }
- }
+ }
+ }
break;
case PUB_Journal :
cjp = (CitJourPtr) the_pub->data.ptrvalue;
@@ -18356,6 +19123,7 @@ static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, Field
}
break;
case FieldType_misc:
+ case FieldType_dblink:
bsp = GetSequenceForObject (choice, data);
if (bsp != NULL) {
str = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, constraint->field, constraint->string_constraint, NULL);
@@ -18365,6 +19133,7 @@ static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, Field
str = MemFree (str);
}
break;
+
/* TODO LATER */
case FieldType_pub:
break;
@@ -18662,19 +19431,21 @@ static Boolean DoesCodingRegionMatchTranslationConstraint (SeqFeatPtr sfp, Trans
cp1 = actual;
cp2 = translation;
- for (pos = 0; pos < comp_len && rval; pos++) {
- if (*cp1 != *cp2) {
- num++;
- if (constraint->num_mismatches->choice == QuantityConstraint_equals
- && num > constraint->num_mismatches->data.intvalue) {
- rval = FALSE;
- } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
- && num >= constraint->num_mismatches->data.intvalue) {
- rval = FALSE;
+ if (cp1 != NULL && cp2 != NULL) {
+ for (pos = 0; pos < comp_len && rval; pos++) {
+ if (*cp1 != *cp2) {
+ num++;
+ if (constraint->num_mismatches->choice == QuantityConstraint_equals
+ && num > constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
+ && num >= constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ }
}
+ cp1++;
+ cp2++;
}
- cp1++;
- cp2++;
}
if (rval) {
if (constraint->num_mismatches->choice == QuantityConstraint_greater_than
@@ -19219,6 +19990,61 @@ static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, V
}
+static void AddDBLinkDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
+{
+ SeqDescrPtr sdp;
+ UserObjectPtr uop;
+ SeqMgrDescContext context;
+ Boolean found = FALSE;
+ ObjValNodePtr ovp;
+
+ if (bsp == NULL || dest_list == NULL) {
+ return;
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if ((uop = sdp->data.ptrvalue) != NULL
+ && IsUserObjectDBLink (uop)) {
+ ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ /* if no existing comment descriptor, create one, marked for delete.
+ * unmark it for deletion when it gets populated.
+ */
+ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
+ sdp->data.ptrvalue = CreateDBLinkUserObject ();
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static ValNodePtr CollectDBLinkObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint)
+{
+ ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
+
+ if (sep == NULL) {
+ return NULL;
+ }
+
+ /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
+ bsp_list = CollectNucBioseqs (sep);
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
+ AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
+ }
+ }
+ bsp_list = ValNodeFree (bsp_list);
+
+ return target_list;
+}
+
+
static void AddStructuredCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list, ValNodePtr PNTR dest_tail)
{
SeqDescrPtr sdp;
@@ -19307,6 +20133,11 @@ NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionP
&& action->action->choice == ActionChoice_apply
&& (apply = action->action->data.ptrvalue) != NULL) {
ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint);
+ } else if (field_type == FieldType_dblink
+ && action->action != NULL
+ && action->action->choice == ActionChoice_apply
+ && (apply = action->action->data.ptrvalue) != NULL) {
+ ocd.object_list = CollectDBLinkObjectsForApply (sep, apply->field->data.intvalue, action->constraint);
} else if (field_type == FieldType_struc_comment_field) {
ocd.object_list = CollectStructuredCommentsForApply (sep, action->constraint);
} else {
@@ -19436,7 +20267,11 @@ static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_n
protbsp = BioseqFindFromSeqLoc (cds->product);
if (protbsp != NULL)
{
- prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext);
+ prot = SeqMgrGetBestProteinFeature (protbsp, NULL);
+ if (prot == NULL) {
+ prot = GetBestProteinFeatureUnindexed (cds->product);
+ }
+
/* if there is no full-length protein feature, make one */
if (prot == NULL)
{
@@ -19777,34 +20612,47 @@ static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act, BoolPtr cr
}
-static void AlsoChangeMrnaForObject (Uint1 choice, Pointer data)
+static Boolean AlsoChangeMrnaForObject (Uint1 choice, Pointer data)
{
CharPtr str;
- SeqFeatPtr sfp, mrna;
+ SeqFeatPtr sfp, mrna, cds;
+ BioseqPtr prot;
FeatureField f;
+ Boolean rval = FALSE;
if (choice == 0) {
str = GetFieldValueFromCGPSet (data, CDSGeneProt_field_prot_name, NULL);
- SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old);
+ rval = SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old);
str = MemFree (str);
} else if (choice == OBJ_SEQFEAT) {
sfp = (SeqFeatPtr) data;
- if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
- mrna = GetmRNAforCDS (sfp);
+ if (sfp != NULL) {
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ mrna = GetmRNAforCDS (sfp);
+ } else if (sfp->data.choice == SEQFEAT_PROT) {
+ prot = BioseqFindFromSeqLoc(sfp->location);
+ cds = SeqMgrGetCDSgivenProduct (prot, NULL);
+ mrna = GetmRNAforCDS (cds);
+ }
if (mrna != NULL) {
- f.type = Macro_feature_type_cds;
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ f.type = Macro_feature_type_cds;
+ } else {
+ f.type = Macro_feature_type_prot;
+ }
f.field = ValNodeNew(NULL);
f.field->next = NULL;
f.field->choice = FeatQualChoice_legal_qual;
f.field->data.intvalue = Feat_qual_legal_product;
str = GetQualFromFeature (sfp, &f, NULL);
f.type = Macro_feature_type_mRNA;
- SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old);
+ rval = SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old);
str = MemFree (str);
f.field = ValNodeFree (f.field);
}
}
}
+ return rval;
}
@@ -19948,14 +20796,53 @@ static Boolean NoFieldChange (CharPtr new_val, ValNodePtr vnp, FieldTypePtr fiel
}
+static Boolean AddValuesToList(ValNodePtr apply, ValNodePtr PNTR current, Uint2 existing_text)
+{
+ ValNodePtr vnp_a, vnp_c;
+ Boolean rval = FALSE;
+ CharPtr str;
+
+ if (apply == NULL) {
+ return FALSE;
+ } else if (existing_text == ExistingTextOption_leave_old && current != NULL && *current != NULL) {
+ return FALSE;
+ } else if (existing_text == ExistingTextOption_add_qual) {
+ for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
+ ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
+ }
+ rval = TRUE;
+ } else if (existing_text == ExistingTextOption_replace_old) {
+ *current = ValNodeFreeData (*current);
+ for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
+ ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
+ }
+ rval = TRUE;
+ } else {
+ for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
+ if (*current == NULL) {
+ ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
+ rval = TRUE;
+ } else {
+ for (vnp_c = *current; vnp_c != NULL; vnp_c = vnp_c->next) {
+ str = (CharPtr)(vnp_c->data.ptrvalue);
+ rval |= SetStringValue(&str, (CharPtr)(vnp_a->data.ptrvalue), existing_text);
+ vnp_c->data.ptrvalue = str;
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+
NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
{
ValNodePtr vnp;
- Int4 num_succeed = 0, num_fail = 0;
+ Int4 num_succeed = 0;
CharPtr str, from_val, field_name = NULL;
FieldTypePtr field_from, field_to;
- StringConstraint remove_constraint;
- Boolean fail;
+ Boolean already_added, field_change;
+ ValNodePtr val_list_from, val_list_to, val_vnp;
if (action == NULL || object_list == NULL || action->fields == NULL) return 0;
@@ -19979,37 +20866,36 @@ NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodeP
} else {
for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
/* there may be multiple qualifiers */
- MemSet (&remove_constraint, 0, sizeof (StringConstraint));
- remove_constraint.case_sensitive = TRUE;
- remove_constraint.match_location = String_location_equals;
- remove_constraint.not_present = FALSE;
- remove_constraint.whole_word = FALSE;
- fail = FALSE;
-
- while ((str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra)) != NULL && !fail) {
- remove_constraint.match_text = StringSave (str);
+ val_list_from = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
+ val_list_to = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra);
+ for (val_vnp = val_list_from; val_vnp != NULL; val_vnp = val_vnp->next) {
+ str = (CharPtr)(val_vnp->data.ptrvalue);
if (action->strip_name) {
RemoveFieldNameFromString (field_name, str);
}
FixCapitalizationInString(&str, action->capitalization, NULL);
-
- if ((SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra)
- || (!action->keep_original && NoFieldChange(str, vnp, field_to, NULL, batch_extra)))
- && (action->keep_original || RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, &remove_constraint))) {
- if (also_change_mrna) {
- AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
- }
- num_succeed ++;
- } else {
- num_fail++;
- fail = TRUE;
+ val_vnp->data.ptrvalue = str;
+ }
+ field_change = AddValuesToList(val_list_from, &val_list_to, action->existing_text);
+ if (field_change) {
+ if (!action->keep_original) {
+ RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_from, scp);
}
- str = MemFree (str);
- remove_constraint.match_text = MemFree (remove_constraint.match_text);
- if (action->keep_original) {
- break;
+ RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_to, NULL);
+ for (val_vnp = val_list_to; val_vnp != NULL; val_vnp = val_vnp->next) {
+ SetFieldValueForObjectEx(vnp->choice, vnp->data.ptrvalue, field_to,
+ NULL, (CharPtr) (val_vnp->data.ptrvalue),
+ ExistingTextOption_add_qual, batch_extra);
}
}
+ if (also_change_mrna) {
+ field_change |= AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
+ }
+ if (field_change) {
+ num_succeed++;
+ }
+ val_list_from = ValNodeFreeData(val_list_from);
+ val_list_to = ValNodeFreeData(val_list_to);
}
}
@@ -20157,7 +21043,7 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP
}
}
}
- if (!IsTextMarkerEmpty (action->portion->right_marker)
+ if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->right_marker)
&& action->remove_right
&& !action->portion->include_right
&& action->portion != NULL
@@ -20207,13 +21093,27 @@ static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolP
Int4 num_succeed = 0;
FieldTypePtr field_from;
BatchExtraPtr batch_extra;
+ AECRActionPtr act_cpy = NULL;
+ FeatureFieldPtr field_cpy;
if (act == NULL || act->action == NULL) return 0;
+ field_type = FieldTypeFromAECRAction (act);
+ if (field_type == FieldType_cds_gene_prot) {
+ if (act->action->choice == ActionChoice_edit) {
+ act_cpy = AsnIoMemCopy (act, (AsnReadFunc) AECRActionAsnRead, (AsnWriteFunc) AECRActionAsnWrite);
+ e = (EditActionPtr)act_cpy->action->data.ptrvalue;
+ field_cpy = FeatureFieldFromCDSGeneProtField (e->field->data.intvalue);
+ e->field->choice = FieldType_feature_field;
+ e->field->data.ptrvalue = field_cpy;
+ act = act_cpy;
+ field_type = FieldTypeFromAECRAction (act);
+ }
+ }
+
batch_extra = BatchExtraNew ();
InitBatchExtraForAECRAction (batch_extra, act, sep);
- field_type = FieldTypeFromAECRAction (act);
if (field_type == FieldType_cds_gene_prot) {
entityID = ObjMgrGetEntityIDForChoice(sep);
object_list = BuildCGPSetList (entityID, act, created_protein_features);
@@ -20222,13 +21122,16 @@ static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolP
object_list = GetObjectListForAECRActionEx (sep, act, batch_extra);
}
+ if (object_list == NULL) {
+ return 0;
+ }
switch (act->action->choice) {
case ActionChoice_apply:
a = (ApplyActionPtr) act->action->data.ptrvalue;
scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint);
num_succeed = DoApplyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra);
- if (a->field->choice == FieldType_misc) {
+ if (a->field->choice == FieldType_misc || a->field->choice == FieldType_dblink) {
DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
}
break;
@@ -20268,6 +21171,8 @@ static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolP
num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp);
if (r->field->choice == FieldType_misc) {
DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
+ } else if (r->field->choice == FieldType_dblink) {
+ DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
}
break;
case ActionChoice_parse:
@@ -20276,6 +21181,7 @@ static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolP
}
object_list = FreeObjectList (object_list);
batch_extra = BatchExtraFree (batch_extra);
+ act_cpy = AECRActionFree (act_cpy);
return num_succeed;
}
@@ -20529,7 +21435,7 @@ static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2)
/* Callback function used for sorting and uniqueing */
-static int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2)
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2)
{
ValNodePtr vnp1;
@@ -20546,7 +21452,7 @@ static int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2)
}
-static int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2)
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2)
{
ValNodePtr vnp1;
@@ -20789,16 +21695,13 @@ static ValNodePtr GetRnaQualFieldList (SeqEntryPtr sep)
}
-static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data)
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldListFromUserObject (UserObjectPtr uop)
{
- UserObjectPtr uop;
+ ValNodePtr list = NULL;
UserFieldPtr ufp;
ValNodePtr vnp;
- if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user
- && (uop = sdp->data.ptrvalue) != NULL
- && IsUserObjectStructuredComment (uop)) {
-
+ if (uop != NULL && IsUserObjectStructuredComment (uop)) {
ufp = uop->data;
while (ufp != NULL) {
if (ufp->label != NULL && ufp->label->str != NULL
@@ -20807,15 +21710,28 @@ static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data)
vnp = ValNodeNew (NULL);
vnp->choice = StructuredCommentField_named;
vnp->data.ptrvalue = StringSave (ufp->label->str);
- ValNodeAddPointer ((ValNodePtr PNTR) data, FieldType_struc_comment_field, vnp);
+ ValNodeAddPointer (&list, FieldType_struc_comment_field, vnp);
}
ufp = ufp->next;
}
}
+ return list;
+}
+
+
+static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+
+ if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user
+ && (uop = sdp->data.ptrvalue) != NULL
+ && IsUserObjectStructuredComment (uop)) {
+ ValNodeLink ((ValNodePtr PNTR) data, GetStructuredCommentFieldListFromUserObject(uop));
+ }
}
-static ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep)
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep)
{
ValNodePtr field_list = NULL;
ValNodePtr dbname, field_name;
@@ -20953,40 +21869,6 @@ static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data)
}
-static void AddDBLinkDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
-{
- SeqDescrPtr sdp;
- UserObjectPtr uop;
- SeqMgrDescContext context;
- Boolean found = FALSE;
- ObjValNodePtr ovp;
-
- if (bsp == NULL || dest_list == NULL) {
- return;
- }
-
- for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
- sdp != NULL;
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
- if ((uop = sdp->data.ptrvalue) != NULL
- && IsUserObjectDBLink (uop)) {
- ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
- found = TRUE;
- }
- }
- if (!found) {
- /* if no existing comment descriptor, create one, marked for delete.
- * unmark it for deletion when it gets populated.
- */
- sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
- sdp->data.ptrvalue = CreateDBLinkUserObject ();
- ovp = (ObjValNodePtr) sdp;
- ovp->idx.deleteme = TRUE;
- ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
- }
-}
-
-
static ValNodePtr CollectDBLinkDescriptors (SeqEntryPtr sep)
{
ValNodePtr seq_list = NULL, vnp, desc_list = NULL;
@@ -21104,6 +21986,21 @@ static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep)
}
+static void CollectDblinkCallback (SeqDescPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+
+ if (sdp == NULL || data == NULL
+ || sdp->choice != Seq_descr_user
+ || (uop = (UserObjectPtr)sdp->data.ptrvalue) == NULL
+ || uop->type == NULL
+ || StringCmp (uop->type->str, "DBLink") != 0) {
+ return;
+ } else {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep)
{
@@ -21140,6 +22037,9 @@ NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr s
object_list = CollectNucBioseqs (sep);
ValNodeLink (&object_list, CollectCommentDescriptors (sep));
break;
+ case FieldType_dblink:
+ VisitDescriptorsInSep (sep, &object_list, CollectDblinkCallback);
+ break;
}
return object_list;
}
@@ -21209,6 +22109,13 @@ NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr se
ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline);
ValNodeAddInt (&fields, FieldType_misc, Misc_field_keyword);
break;
+ case FieldType_dblink:
+ ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_trace_assembly);
+ ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_sample);
+ ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_probe_db);
+ ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_sequence_read_archve);
+ ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_project);
+ break;
}
return fields;
}
@@ -22898,6 +23805,7 @@ static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharP
num_succeeded++;
}
sdp->data.ptrvalue = cp;
+ RemoveAutodefObjectsForDesc(sdp);
}
}
}
@@ -23732,7 +24640,8 @@ static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, S
ConvertAndRemoveFeatureCollectionData d;
ValNodePtr vnp;
SeqFeatPtr sfp;
- Int4 num_deleted = 0;
+ Int4 num_deleted = 0, num_products_deleted = 0;
+ BioseqPtr bsp;
if (action == NULL) return 0;
@@ -23745,12 +24654,17 @@ static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, S
sfp = vnp->data.ptrvalue;
if (sfp != NULL) {
sfp->idx.deleteme = TRUE;
+ if (sfp->product != NULL && (bsp = BioseqFind(SeqLocId(sfp->product))) != NULL) {
+ bsp->idx.deleteme = TRUE;
+ num_products_deleted++;
+ }
num_deleted ++;
}
}
d.feature_list = ValNodeFree (d.feature_list);
DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
- return num_deleted;
+ RenormalizeNucProtSets (sep, TRUE);
+ return num_deleted + num_products_deleted;
}
@@ -23994,9 +24908,9 @@ static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, Conver
}
-static Boolean ConvertGeneToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+static Boolean ConvertGeneToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
{
- return ConvertGeneToMiscFeatFunc (sfp, featdef_to);
+ return ConvertGeneToImpFeatFunc (sfp, featdef_to);
}
@@ -24112,7 +25026,7 @@ static Boolean ConvertRegionToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeat
static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
{
- SeqLocPtr slp;
+ SeqLocPtr slp = NULL;
BioseqPtr bsp;
SeqEntryPtr sep;
Boolean no_cds = FALSE;
@@ -24192,7 +25106,7 @@ static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDst
static Boolean ConvertToSite (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
{
- SeqLocPtr slp;
+ SeqLocPtr slp = NULL;
BioseqPtr bsp;
SeqEntryPtr sep;
Boolean no_cds = FALSE;
@@ -24340,6 +25254,18 @@ static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_t
}
+static Boolean mRNAToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+{
+ return ConvertmRNAToCodingRegion (sfp);
+}
+
+
+static Boolean tRNAToGeneConvertFunc(SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+{
+ return ConverttRNAToGene (sfp);
+}
+
+
static Boolean MiscFeatToGeneConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
{
return ConvertMiscFeatToGene (sfp);
@@ -24407,12 +25333,18 @@ static ConvertFeatTableData conversion_functions[] = {
{ SEQFEAT_COMMENT, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_misc_feature,
ConvertCommentToMiscFeat,
"Creates a misc_feature with the same note as the original. Note - the flatfile display for the feature is the same." },
- { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_IMP, FEATDEF_misc_feature,
- ConvertGeneToMiscFeat,
- "Creates a misc_feature with the gene description and locus prepended to the original comment, separated by semicolons." },
+ { SEQFEAT_GENE, FEATDEF_GENE, SEQFEAT_IMP, FEATDEF_ANY,
+ ConvertGeneToImpFeat,
+ "Creates an import feature with the gene description and locus prepended to the original comment, separated by semicolons." },
{ SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY,
ConvertRNAToImpFeatEx,
"Creates an import feature of the specified subtype and adds the RNA product name to the comment." } ,
+ { SEQFEAT_RNA, FEATDEF_mRNA, SEQFEAT_CDREGION, FEATDEF_CDS,
+ mRNAToCodingRegionConvertFunc,
+ "Convert mRNA to coding region, use mRNA product for protein feature" },
+ { SEQFEAT_RNA, FEATDEF_tRNA, SEQFEAT_GENE, FEATDEF_GENE,
+ tRNAToGeneConvertFunc,
+ "Convert tRNA to gene, use tRNA product for gene description" },
{ SEQFEAT_SITE, FEATDEF_ANY, SEQFEAT_IMP, FEATDEF_ANY,
ConvertSiteToImpFeat,
"Creates an import feature of the specified subtype with the site type name as a /note qualifier." } ,
@@ -24540,6 +25472,9 @@ static Int4 ApplyConvertFeatureActionToSeqEntry (ConvertFeatureActionPtr action,
d.feature_list = NULL;
VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
+ if (d.feature_list == NULL) {
+ return 0;
+ }
for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
sfp = vnp->data.ptrvalue;
if (sfp != NULL) {
@@ -24983,7 +25918,7 @@ static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location));
break;
case Partial_3_clear_constraint_good_end:
- clear_partial = !HasGoodStopCodon(sfp);
+ clear_partial = HasGoodStopCodon(sfp);
break;
}
if (clear_partial) {
@@ -25078,58 +26013,58 @@ static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr
}
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
- hasNulls = LocationHasNullsBetween (sfp->location);
- switch (convert_location)
- {
- case Convert_location_type_join :
- if (hasNulls)
- {
- slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE);
- sfp->location = SeqLocFree (sfp->location);
- sfp->location = slp;
- if (bsp->repr == Seq_repr_seg)
- {
- slp = SegLocToPartsEx (bsp, sfp->location, FALSE);
- sfp->location = SeqLocFree (sfp->location);
- sfp->location = slp;
- hasNulls = LocationHasNullsBetween (sfp->location);
- sfp->partial = (sfp->partial || hasNulls);
- }
- FreeAllFuzz (sfp->location);
- SetSeqLocPartial (sfp->location, partial5, partial3);
+ hasNulls = LocationHasNullsBetween (sfp->location);
+ switch (convert_location)
+ {
+ case Convert_location_type_join :
+ if (hasNulls)
+ {
+ slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE);
+ sfp->location = SeqLocFree (sfp->location);
+ sfp->location = slp;
+ if (bsp->repr == Seq_repr_seg)
+ {
+ slp = SegLocToPartsEx (bsp, sfp->location, FALSE);
+ sfp->location = SeqLocFree (sfp->location);
+ sfp->location = slp;
+ hasNulls = LocationHasNullsBetween (sfp->location);
+ sfp->partial = (sfp->partial || hasNulls);
+ }
+ FreeAllFuzz (sfp->location);
+ SetSeqLocPartial (sfp->location, partial5, partial3);
rval = TRUE;
- }
- break;
- case Convert_location_type_order :
- if (!hasNulls)
- {
- slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE);
+ }
+ break;
+ case Convert_location_type_order :
+ if (!hasNulls)
+ {
+ slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE);
sfp->location = SeqLocFree (sfp->location);
- sfp->location = slp;
- if (bsp->repr == Seq_repr_seg)
- {
- slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
- sfp->location = SeqLocFree (sfp->location);
- sfp->location = slp;
- hasNulls = LocationHasNullsBetween (sfp->location);
- sfp->partial = (sfp->partial || hasNulls);
- }
- FreeAllFuzz (sfp->location);
- SetSeqLocPartial (sfp->location, partial5, partial3);
+ sfp->location = slp;
+ if (bsp->repr == Seq_repr_seg)
+ {
+ slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
+ sfp->location = SeqLocFree (sfp->location);
+ sfp->location = slp;
+ hasNulls = LocationHasNullsBetween (sfp->location);
+ sfp->partial = (sfp->partial || hasNulls);
+ }
+ FreeAllFuzz (sfp->location);
+ SetSeqLocPartial (sfp->location, partial5, partial3);
rval = TRUE;
- }
- break;
- case Convert_location_type_merge :
+ }
+ break;
+ case Convert_location_type_merge :
if (sfp->location->choice != SEQLOC_INT) {
- slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE);
- sfp->location = SeqLocFree (sfp->location);
- sfp->location = slp;
- SetSeqLocPartial (sfp->location, partial5, partial3);
+ slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE);
+ sfp->location = SeqLocFree (sfp->location);
+ sfp->location = slp;
+ SetSeqLocPartial (sfp->location, partial5, partial3);
rval = TRUE;
}
- default:
- break;
- }
+ default:
+ break;
+ }
return rval;
}
@@ -25535,6 +26470,7 @@ static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionP
}
retranslated = FALSE;
if (sfp->data.choice == SEQFEAT_CDREGION && action->retranslate_cds) {
+ SeqMgrIndexFeatures(sfp->idx.entityID, NULL);
retranslated = RetranslateOneCDS (sfp, sfp->idx.entityID, TRUE, TRUE);
}
num_affected++;
@@ -25821,7 +26757,7 @@ NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list)
CharPtr fix, extra_text;
Boolean any_change = FALSE;
- if (fp == NULL || object_list == NULL) return FALSE;
+ if (object_list == NULL) return FALSE;
for (vnp = object_list; vnp != NULL; vnp = vnp->next)
{
@@ -25846,19 +26782,25 @@ NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list)
extra_text++;
}
}
- fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix);
+ if (fp != NULL) {
+ fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix);
+ }
bad_ssp->name = MemFree (bad_ssp->name);
bad_ssp->name = fix;
if (extra_text != NULL)
{
AddAltitudeToSubSourceNote (biop, extra_text);
- fprintf (fp, "Moved %s to subsource note\n", extra_text);
+ if (fp != NULL) {
+ fprintf (fp, "Moved %s to subsource note\n", extra_text);
+ }
}
any_change = TRUE;
}
else
{
- fprintf (fp, "Unable to correct %s\n", bad_ssp->name);
+ if (fp != NULL) {
+ fprintf (fp, "Unable to correct %s\n", bad_ssp->name);
+ }
}
}
}
@@ -26045,19 +26987,8 @@ static ReplacePairData macro_spell_fixes[] = {
{"Agricultrual", "Agricultural"},
{"Agricultureal", "Agricultural"},
{"Agricultrure", "Agriculture"},
- {"univeristy", "University" },
- {"univerisity", "University" },
- {"univercity", "University" },
- {"uiniversity", "University" },
- {"uinversity", "University" },
- {"univesity", "University" },
- {"uviversity", "University" },
- {"universtiy", "University" },
- {"unvierstity", "University" },
- {"univiersity", "University" },
- {"universtity", "University" },
- {"Unversity", "University" },
- {"protien", "protein" },
+ {"bioremidiation", "bioremediation"},
+ {"Colledge", "College"},
{"Insitiute", "Institute" },
{"Instutite", "Institute" },
{"instute", "Institute" },
@@ -26079,6 +27010,8 @@ static ReplacePairData macro_spell_fixes[] = {
{"hypothtical", "hypothetical" },
{"hypthetical", "hypothetical" },
{"hyptothetical", "hypothetical" },
+ {"idendification", "identification" },
+ {"protien", "protein" },
{"puatative", "putative" },
{"puative", "putative" },
{"puative", "putative" },
@@ -26089,6 +27022,20 @@ static ReplacePairData macro_spell_fixes[] = {
{"putatitve", "putative" },
{"putitive", "putative" },
{"reseach", "research"},
+ {"sequene", "sequence"},
+ {"univeristy", "University" },
+ {"univerisity", "University" },
+ {"univercity", "University" },
+ {"uiniversity", "University" },
+ {"uinversity", "University" },
+ {"univesity", "University" },
+ {"uviversity", "University" },
+ {"universtiy", "University" },
+ {"unvierstity", "University" },
+ {"univiersity", "University" },
+ {"universtity", "University" },
+ {"Unversity", "University" },
+ {"Univresity", "University" },
{NULL, NULL}};
@@ -26278,6 +27225,9 @@ static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr act
d.obj_list = NULL;
VisitDescriptorsInSep (sep, &d, RemoveDescriptorCollectionCallback);
+ if (d.obj_list == NULL) {
+ return 0;
+ }
for (vnp = d.obj_list; vnp != NULL; vnp = vnp->next) {
sdp = vnp->data.ptrvalue;
if (sdp != NULL && sdp->extended != 0) {
@@ -26291,6 +27241,79 @@ static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr act
}
+static void TrimStopsFromCompleteCodingRegionsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ Boolean p5, p3;
+ BioseqPtr protbsp;
+ CharPtr prot_str;
+ Int4 len;
+ /* variables for shortening protein features */
+ SeqFeatPtr prot_sfp;
+ SeqMgrFeatContext fcontext;
+ SeqIntPtr sintp;
+ /* variables for logging */
+ LogInfoPtr lip;
+ Char id_buf[100];
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) {
+ return;
+ }
+
+ CheckSeqLocForPartial (sfp->location, &p5, &p3);
+ if (p3) {
+ return;
+ }
+
+ protbsp = BioseqFindFromSeqLoc (sfp->product);
+ if (protbsp == NULL) {
+ return;
+ }
+
+ prot_str = GetSequenceByBsp (protbsp);
+ if (prot_str == NULL || (len = StringLen (prot_str)) == 0
+ || prot_str[len - 1] != '*') {
+ prot_str = MemFree (prot_str);
+ return;
+ }
+
+ BSSeek ((ByteStorePtr) protbsp->seq_data, -1, SEEK_END);
+ BSDelete ((ByteStorePtr) protbsp->seq_data, 1);
+ protbsp->length -= 1;
+ prot_str = MemFree (prot_str);
+
+ for (prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, 0, &fcontext);
+ prot_sfp != NULL;
+ prot_sfp = SeqMgrGetNextFeature (protbsp, prot_sfp, 0, 0, &fcontext)) {
+ if (prot_sfp->location != NULL
+ && prot_sfp->location->choice == SEQLOC_INT
+ && (sintp = (SeqIntPtr)prot_sfp->location->data.ptrvalue) != NULL) {
+ if (sintp->to > protbsp->length - 1) {
+ sintp->to = protbsp->length - 1;
+ }
+ }
+ }
+
+ lip = (LogInfoPtr) data;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ SeqIdWrite (SeqIdFindBest (protbsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
+ fprintf (lip->fp, "Trimmed trailing * from %s\n", id_buf);
+ }
+ lip->data_in_log = TRUE;
+ }
+}
+
+
+NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp)
+{
+ LogInfoData lid;
+ MemSet (&lid, 0, sizeof (LogInfoData));
+ lid.fp = log_fp;
+ VisitFeaturesInSep (sep, &lid, TrimStopsFromCompleteCodingRegionsCallback);
+ return lid.data_in_log;
+}
+
+
static DefLineType DefLineTypeFromAutodefListType(Uint2 list_type)
{
DefLineType deflinetype = DEFLINE_USE_FEATURES;
@@ -26415,7 +27438,7 @@ static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
CharPtr orig, tmp;
ValNodePtr pub;
AuthListPtr alp = NULL;
- ValNodePtr names;
+ ValNodePtr names;
AuthorPtr ap, ap_orig;
AffilPtr affil_orig;
@@ -26550,6 +27573,12 @@ static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPt
SeqDescPtr sdp;
CharPtr summ;
Boolean rval = FALSE;
+ AuthListPtr alp;
+ ValNodePtr names;
+ AuthorPtr ap;
+ SeqSubmitPtr ssp;
+ SubmitBlockPtr sbp;
+ CitSubPtr csp;
if (action == NULL || sep == NULL) return FALSE;
@@ -26582,6 +27611,26 @@ static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPt
ApplyFixPubCapsCallback (pdp, &f);
}
+ ssp = FindSeqSubmitForSeqEntry (sep);
+ if (ssp != NULL) {
+ sbp = ssp->sub;
+ if (sbp != NULL) {
+ csp = sbp->cit;
+ if (csp != NULL) {
+ alp = csp->authors;
+ if (alp != NULL && alp->choice == 1) {
+ for (names = alp->names; names != NULL; names = names->next) {
+ ap = names->data.ptrvalue;
+ if (f.action->authors && !f.action->punct_only) {
+ FixCapitalizationInAuthor (ap);
+ f.num_sub_fields++;
+ }
+ }
+ }
+ }
+ }
+ }
+
f.orgnames = ValNodeFree (f.orgnames);
if (f.num_sub_fields > 0 || f.num_pub_fields > 0) {
@@ -26799,6 +27848,309 @@ static Int4 SortFieldsInSeqEntry (SortFieldsActionPtr action, SeqEntryPtr sep)
}
+static Boolean DoStringsMatch (CharPtr str1, CharPtr str2, Boolean case_sensitive)
+{
+ Boolean rval = FALSE;
+
+ if (case_sensitive) {
+ if (StringCmp (str1, str2) == 0) {
+ rval = TRUE;
+ }
+ } else if (StringICmp (str1, str2) == 0) {
+ rval = TRUE;
+ }
+ return rval;
+}
+
+
+static Boolean DoGBQualListsMatch (GBQualPtr gbq1, GBQualPtr gbq2, Boolean case_sensitive)
+{
+ Boolean rval = TRUE;
+
+ while (rval && gbq1 != NULL && gbq2 != NULL) {
+ if (!DoStringsMatch (gbq1->qual, gbq2->qual, case_sensitive)) {
+ rval = FALSE;
+ } else if (!DoStringsMatch (gbq1->val, gbq2->val, case_sensitive)) {
+ rval = FALSE;
+ } else {
+ gbq1 = gbq1->next;
+ gbq2 = gbq2->next;
+ }
+ }
+ if (gbq1 != NULL || gbq2 != NULL) {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static Boolean CheckBioseqForPartial (BioseqPtr bsp, BoolPtr partial5, BoolPtr partial3)
+{
+ SeqMgrDescContext context;
+ SeqDescrPtr sdp;
+ MolInfoPtr mip;
+ Boolean rval = FALSE;
+
+ if (bsp == NULL) {
+ return FALSE;
+ }
+ if (partial5 != NULL) {
+ *partial5 = FALSE;
+ }
+ if (partial3 != NULL) {
+ *partial3 = FALSE;
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ if (sdp != NULL && (mip = (MolInfoPtr) sdp->data.ptrvalue) != NULL) {
+ /* partial 5 */
+ if (mip->completeness == 3 || mip->completeness == 5) {
+ if (partial5 != NULL) {
+ *partial5 = TRUE;
+ }
+ rval = TRUE;
+ }
+ /* partial 3 */
+ if (mip->completeness == 4 || mip->completeness == 5) {
+ if (partial3 != NULL) {
+ *partial3 = TRUE;
+ }
+ rval = TRUE;
+ }
+ if (mip->completeness == 2) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensitive, Boolean ignore_partial)
+{
+ BioseqPtr bsp1, bsp2;
+ Int2 ctr, pos1, pos2;
+ Char buf1[51];
+ Char buf2[51];
+ Int4 len = 50;
+ SeqFeatPtr sfp1, sfp2;
+ SeqMgrFeatContext fcontext1, fcontext2;
+ Boolean partial5_1, partial5_2, partial3_1, partial3_2;
+
+ if (slp1 == NULL && slp2 == NULL) {
+ return TRUE;
+ } else if (slp1 == NULL || slp2 == NULL) {
+ return FALSE;
+ } else if (SeqLocCompare (slp1, slp2) == SLC_A_EQ_B) {
+ return TRUE;
+ } else {
+ bsp1 = BioseqFindFromSeqLoc (slp1);
+ bsp2 = BioseqFindFromSeqLoc (slp2);
+ if (bsp1 == NULL || bsp2 == NULL) {
+ /* can't compare, assume they don't match */
+ return FALSE;
+ } else if (bsp1->length != bsp2->length) {
+ return FALSE;
+ } else {
+ CheckBioseqForPartial (bsp1, &partial5_1, &partial3_1);
+ CheckBioseqForPartial (bsp2, &partial5_2, &partial3_2);
+ if (!ignore_partial
+ && ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)
+ || (partial3_1 && !partial3_2) || (!partial3_1 && partial3_2))) {
+ return FALSE;
+ }
+ /* check that translation sequences match */
+ pos1 = 0;
+ pos2 = 0;
+ if (ignore_partial) {
+ if (partial5_1 || partial5_2) {
+ pos1++;
+ pos2++;
+ }
+ }
+ while (pos1 < bsp1->length && pos2 < bsp2->length) {
+ ctr = SeqPortStreamInt (bsp1, pos1, MIN(pos1 + len - 1, bsp1->length - 1), Seq_strand_plus,
+ STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
+ (Pointer) buf1, NULL);
+ ctr = SeqPortStreamInt (bsp2, pos2, MIN(pos2 + len - 1, bsp2->length - 1), Seq_strand_plus,
+ STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
+ (Pointer) buf2, NULL);
+ if (StringNCmp (buf1, buf2, ctr) != 0) {
+ return FALSE;
+ }
+ pos1 += len;
+ pos2 += len;
+ }
+
+ /* now check that protein features match */
+ sfp1 = SeqMgrGetNextFeature (bsp1, NULL, 0, 0, &fcontext1);
+ sfp2 = SeqMgrGetNextFeature (bsp2, NULL, 0, 0, &fcontext2);
+ while (sfp1 != NULL && sfp2 != NULL) {
+ if (!DoFeaturesMatch (sfp1, sfp2, TRUE, case_sensitive, ignore_partial)) {
+ return FALSE;
+ }
+ sfp1 = SeqMgrGetNextFeature (bsp1, sfp1, SEQFEAT_PROT, 0, &fcontext1);
+ sfp2 = SeqMgrGetNextFeature (bsp2, sfp2, SEQFEAT_PROT, 0, &fcontext2);
+ }
+ if (sfp1 != NULL || sfp2 != NULL) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+ }
+ }
+}
+
+
+static Boolean DoLocationPartialsMatch (SeqLocPtr slp1, SeqLocPtr slp2)
+{
+ Boolean partial5_1, partial3_1, partial1;
+ Boolean partial5_2, partial3_2, partial2;
+
+ partial1 = CheckSeqLocForPartial (slp1, &partial5_1, &partial3_1);
+ partial2 = CheckSeqLocForPartial (slp2, &partial5_2, &partial3_2);
+ if ((partial1 && !partial2) || (!partial1 && partial2)) {
+ return FALSE;
+ }
+ if ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)) {
+ return FALSE;
+ }
+ if ((partial3_1 && !partial3_2) || (!partial3_1 && partial3_2)) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_different_sequences, Boolean ignore_partial)
+{
+ SeqLocPtr slp_tmp1, slp_tmp2;
+
+ if (slp1 == NULL && slp2 == NULL) {
+ return TRUE;
+ } else if (slp1 == NULL || slp2 == NULL) {
+ return FALSE;
+ }
+
+ if (!ignore_partial && !DoLocationPartialsMatch (slp1, slp2)) {
+ return FALSE;
+ }
+ if (allow_different_sequences) {
+ for (slp_tmp1 = SeqLocFindNext (slp1, NULL), slp_tmp2 = SeqLocFindNext (slp2, NULL);
+ slp_tmp1 != NULL && slp_tmp2 != NULL;
+ slp_tmp1 = SeqLocFindNext (slp1, slp_tmp1), slp_tmp2 = SeqLocFindNext (slp2, slp_tmp2)) {
+ if (SeqLocStart (slp_tmp1) != SeqLocStart (slp_tmp2)
+ || SeqLocStop (slp_tmp1) != SeqLocStop (slp_tmp2)
+ || (!ignore_partial && !DoLocationPartialsMatch (slp_tmp1, slp_tmp2))) {
+ return FALSE;
+ }
+ }
+ } else if (SeqLocCompare (slp1, slp2) != SLC_A_EQ_B) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2)
+{
+ if (crp1 == NULL && crp2 == NULL) {
+ return TRUE;
+ } else if (crp1 == NULL || crp2 == NULL) {
+ return FALSE;
+ } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){
+ return FALSE;
+ } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){
+ return FALSE;
+ } else if (crp1->gaps != crp2->gaps) {
+ return FALSE;
+ } else if (crp1->mismatch != crp2->mismatch) {
+ return FALSE;
+ } else if (crp1->stops != crp2->stops) {
+ return FALSE;
+ } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL)
+ || (crp1->genetic_code != NULL && crp2->genetic_code == NULL)
+ || (crp1->genetic_code != NULL && crp2->genetic_code != NULL
+ && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) {
+ return FALSE;
+ } else if ((crp1->code_break == NULL && crp2->code_break != NULL)
+ || (crp1->code_break != NULL && crp2->code_break == NULL)
+ || (crp1->code_break != NULL && crp2->code_break != NULL
+ && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) {
+ return FALSE;
+ } else if (crp1->frame != crp2->frame) {
+ if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) {
+ /* both effectively frame 1, ignore this difference */
+ } else {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2)
+{
+ if (d1 == NULL && d2 == NULL) {
+ return TRUE;
+ } else if (d1 == NULL || d2 == NULL) {
+ return FALSE;
+ } else if (d1->choice != d2->choice) {
+ return FALSE;
+ } else if (d1->choice == SEQFEAT_CDREGION) {
+ return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue);
+ } else {
+ return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite);
+ }
+}
+
+
+NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial)
+{
+ if (sfp1 == NULL && sfp2 == NULL) {
+ return TRUE;
+ } else if (sfp1 == NULL || sfp2 == NULL) {
+ return FALSE;
+ } if (sfp1->data.choice != sfp2->data.choice) {
+ return FALSE;
+ } else if (sfp1->idx.subtype != sfp2->idx.subtype) {
+ return FALSE;
+ } else if (!ignore_partial && ((sfp1->partial && !sfp2->partial) || (!sfp1->partial && sfp2->partial))) {
+ return FALSE;
+ } else if ((sfp1->pseudo && !sfp2->pseudo) || (!sfp1->pseudo && sfp2->pseudo)) {
+ return FALSE;
+ } else if ((sfp1->excpt && !sfp2->excpt) || (!sfp1->excpt && sfp2->excpt)) {
+ return FALSE;
+ } else if (!DoLocationsMatch (sfp1->location, sfp2->location, allow_different_sequences, ignore_partial)) {
+ return FALSE;
+ } else if (!DoStringsMatch (sfp1->comment, sfp2->comment, case_sensitive)) {
+ return FALSE;
+ } else if (!DoStringsMatch (sfp1->title, sfp2->title, case_sensitive)) {
+ return FALSE;
+ } else if (sfp1->ext != NULL || sfp2->ext != NULL) {
+ return FALSE;
+ } else if (sfp1->exts != NULL || sfp2->exts != NULL) {
+ return FALSE;
+ } else if (!DoStringsMatch (sfp1->except_text, sfp2->except_text, case_sensitive)) {
+ return FALSE;
+ } else if (sfp1->exp_ev != sfp2->exp_ev) {
+ return FALSE;
+ } else if (!DoGBQualListsMatch (sfp1->qual, sfp2->qual, case_sensitive)) {
+ return FALSE;
+ } else if ((sfp1->cit != NULL || sfp2->cit != NULL) && PubMatch (sfp1->cit, sfp2->cit) != 0) {
+ return FALSE;
+ } else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) {
+ return FALSE;
+ } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) {
+ return FALSE;
+ } else if (!XrefsMatch (sfp1->xref, sfp2->xref)) {
+ return FALSE;
+ } else if (!ProductsMatch (sfp1->product, sfp2->product, case_sensitive, ignore_partial)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
typedef struct dupfeats {
ValNodePtr delete_list;
RemoveDuplicateFeatureActionPtr action;
@@ -26877,7 +28229,7 @@ NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 ent
DeleteMarkedObjects (entityID, 0, NULL);
if (remove_proteins) {
sep = GetTopSeqEntryForEntityID (entityID);
- RenormalizeNucProtSets (sep, TRUE);
+ RenormalizeNucProtSets (sep, TRUE);
}
}
@@ -27459,15 +28811,19 @@ static void GetPubsForAuthorFixFeat (SeqFeatPtr sfp, Pointer data)
static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr action, FILE *log_fp)
{
- PubCollectData p;
- ValNodePtr vnp, pub;
- PubdescPtr pdp;
- SeqFeatPtr sfp;
- SeqDescPtr sdp;
- AuthListPtr alp;
- ValNodePtr names;
- AuthorPtr ap;
- Int4 num_changed = 0;
+ PubCollectData p;
+ ValNodePtr vnp, pub;
+ PubdescPtr pdp;
+ SeqFeatPtr sfp;
+ SeqDescPtr sdp;
+ AuthListPtr alp;
+ ValNodePtr names;
+ AuthorPtr ap;
+ SeqSubmitPtr ssp;
+ SubmitBlockPtr sbp;
+ ContactInfoPtr cip;
+ CitSubPtr csp;
+ Int4 num_changed = 0;
if (sep == NULL || action == NULL) {
return FALSE;
@@ -27518,6 +28874,63 @@ static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr act
}
}
}
+ ssp = FindSeqSubmitForSeqEntry (sep);
+ if (ssp != NULL) {
+ sbp = ssp->sub;
+ if (sbp != NULL) {
+ csp = sbp->cit;
+ if (csp != NULL) {
+ alp = csp->authors;
+ if (alp != NULL && alp->choice == 1) {
+ for (names = alp->names; names != NULL; names = names->next) {
+ ap = names->data.ptrvalue;
+ switch (action->fix_type) {
+ case Author_fix_type_truncate_middle_initials:
+ if (TruncateAuthorMiddleInitials(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_strip_suffix:
+ if (StripSuffixFromAuthor(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_move_middle_to_first:
+ if (MoveAuthorMiddleToFirst (ap)) {
+ num_changed++;
+ }
+ break;
+ }
+ }
+ }
+ }
+ cip = sbp->contact;
+ if (cip != NULL) {
+ ap = cip->contact;
+ if (ap != NULL) {
+ /*
+ switch (action->fix_type) {
+ case Author_fix_type_truncate_middle_initials:
+ if (TruncateAuthorMiddleInitials(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_strip_suffix:
+ if (StripSuffixFromAuthor(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_move_middle_to_first:
+ if (MoveAuthorMiddleToFirst (ap)) {
+ num_changed++;
+ }
+ break;
+ }
+ */
+ }
+ }
+ }
+ }
p.list = ValNodeFree (p.list);
if (num_changed > 0) {
@@ -28308,6 +29721,11 @@ static Boolean PerformApplyTableInSeqEntry
fprintf (log_fp, "%s not recognized as qualifier name, unable to apply table from %s\n",
(CharPtr) val->data.ptrvalue, action->filename);
}
+ } else {
+ if (IsFieldTypeCDSProduct(t->field)) {
+ t->match_mrna = action->also_change_mrna;
+ }
+ t->skip_blank = action->skip_blanks;
}
}
}
@@ -28616,10 +30034,40 @@ static Boolean IsStructuredCommentWithPrefix (UserObjectPtr uop, CharPtr prefix)
return FALSE;
}
-
+
+static Boolean TruncateAtLocalId (SeqIdPtr sip_local, CharPtr filename)
+{
+ Char id_buf[20];
+ CharPtr cmp;
+ ObjectIdPtr oip;
+ Boolean removed_id = FALSE;
+ Int4 len, f_len;
+
+ if (filename == NULL || sip_local == NULL
+ || (oip = (ObjectIdPtr) sip_local->data.ptrvalue) == NULL) {
+ return FALSE;
+ }
+ f_len = StringLen (filename);
+
+ if (oip->id > 0) {
+ sprintf (id_buf, "%d", oip->id);
+ cmp = id_buf;
+ } else {
+ cmp = oip->str;
+ }
+ len = StringLen (cmp);
+ if (f_len > len + 1 && filename[f_len - len - 1] == '/'
+ && StringCmp (filename + (f_len - len), cmp) == 0) {
+ filename[f_len - len - 1] = 0;
+ removed_id = TRUE;
+ }
+ return removed_id;
+}
+
+
static void FindSeqTechBsp (BioseqPtr bsp, Pointer data)
{
- SeqIdPtr sip;
+ SeqIdPtr sip, sip_local = NULL;
DbtagPtr dbtag;
PropagateSeqTechPtr p;
CharPtr cp;
@@ -28638,12 +30086,21 @@ static void FindSeqTechBsp (BioseqPtr bsp, Pointer data)
&& (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL
&& (StringICmp (dbtag->db, "NCBIFILE") == 0)) {
p->filename = StringSave (dbtag->tag->str);
- cp = StringRChr (p->filename, '/');
- if (cp != NULL) {
- *cp = 0;
- }
+ } else if (sip->choice == SEQID_LOCAL) {
+ sip_local = sip;
+ }
+ }
+ if (p->filename == NULL) {
+ p = MemFree (p);
+ return;
+ }
+ if (!TruncateAtLocalId(sip_local, p->filename)) {
+ cp = StringRChr (p->filename, '/');
+ if (cp != NULL) {
+ *cp = 0;
}
}
+
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
sdp != NULL && p->uop == NULL;
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
@@ -28731,6 +30188,367 @@ static Boolean PerformPropagateSequenceTechnology
}
+typedef struct ecrepdata {
+ CharPtr before;
+ CharPtr after;
+} EcRepData, PNTR EcRepPtr;
+
+
+static EcRepPtr EcRepFree (EcRepPtr e)
+{
+ if (e != NULL) {
+ e->before = MemFree (e->before);
+ e->after = MemFree (e->after);
+ e = MemFree (e);
+ }
+ return e;
+}
+
+
+static int LIBCALLBACK SortVnpByEcBefore (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ EcRepPtr erp1, erp2;
+ CharPtr str1, str2;
+ ValNodePtr vnp1, vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ erp1 = (EcRepPtr) vnp1->data.ptrvalue;
+ erp2 = (EcRepPtr) vnp2->data.ptrvalue;
+ if (erp1 == NULL || erp2 == NULL) return 0;
+ str1 = erp1->before;
+ str2 = erp2->before;
+ if (str1 == NULL || str2 == NULL) return 0;
+ return StringCmp (str1, str2);
+}
+
+static EcRepPtr PNTR SetupECReplacementTable (CharPtr file, Int4Ptr len)
+
+{
+ EcRepPtr erp;
+ FileCache fc;
+ FILE *fp = NULL;
+ Int4 i;
+ ValNodePtr last = NULL;
+ Char line [512];
+ Char path [PATH_MAX];
+ CharPtr ptr;
+ ErrSev sev;
+ CharPtr str;
+ ValNodePtr vnp;
+ ValNodePtr ec_rep_list = NULL;
+ EcRepPtr PNTR ec_rep_data = NULL;
+ Int4 ec_rep_len = 0;
+
+ if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
+ FileBuildPath (path, NULL, file);
+ sev = ErrSetMessageLevel (SEV_ERROR);
+ fp = FileOpen (path, "r");
+ ErrSetMessageLevel (sev);
+ if (fp != NULL) {
+ FileCacheSetup (&fc, fp);
+
+ str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
+ while (str != NULL) {
+ if (StringDoesHaveText (str)) {
+ ptr = StringChr (str, '\t');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ erp = (EcRepPtr) MemNew (sizeof (EcRepData));
+ if (erp != NULL) {
+ erp->before = StringSave (str);
+ erp->after = StringSave (ptr);
+ vnp = ValNodeAddPointer (&last, 0, (Pointer) erp);
+ if (ec_rep_list == NULL) {
+ ec_rep_list = vnp;
+ }
+ last = vnp;
+ }
+ }
+ }
+ str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
+ }
+
+ FileClose (fp);
+ ec_rep_len = ValNodeLen (ec_rep_list);
+ if (ec_rep_len > 0) {
+ ec_rep_list = ValNodeSort (ec_rep_list, SortVnpByEcBefore);
+ ec_rep_data = (EcRepPtr PNTR) MemNew (sizeof (EcRepPtr) * (ec_rep_len + 1));
+ if (ec_rep_data != NULL) {
+ for (vnp = ec_rep_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ erp = (EcRepPtr) vnp->data.ptrvalue;
+ ec_rep_data [i] = erp;
+ }
+ }
+ }
+ }
+ }
+ ec_rep_list = ValNodeFree (ec_rep_list);
+ *len = ec_rep_len;
+ return ec_rep_data;
+}
+
+
+static EcRepPtr PNTR FreeECReplacementTable (EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len)
+{
+ Int4 i;
+
+ if (ec_rep_data == NULL) {
+ return NULL;
+ }
+ for (i = 0; i < ec_rep_len; i++) {
+ ec_rep_data[i] = EcRepFree(ec_rep_data[i]);
+ }
+ ec_rep_data = MemFree (ec_rep_data);
+ return ec_rep_data;
+}
+
+
+static EcRepPtr GetEcReplacementFromTable (CharPtr str, EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len)
+{
+ Int4 L, R, mid;
+ EcRepPtr erp = NULL;
+
+ L = 0;
+ R = ec_rep_len - 1;
+ while (L < R) {
+ mid = (L + R) / 2;
+ erp = ec_rep_data [(int) mid];
+ if (erp != NULL && StringCmp (erp->before, str) < 0) {
+ L = mid + 1;
+ } else {
+ R = mid;
+ }
+ }
+ erp = ec_rep_data [(int) R];
+ return erp;
+}
+
+
+typedef struct replaceupdatedec {
+ FILE *log_fp;
+ UpdateReplacedEcNumbersActionPtr action;
+ EcRepPtr PNTR ec_rep_data;
+ Int4 ec_rep_len;
+ Int4 num_removed;
+ Int4 num_replaced;
+} ReplaceUpdatedECData, PNTR ReplaceUpdatedEcPtr;
+
+
+static Boolean GetLocusTagFromProtRef (SeqFeatPtr sfp, CharPtr PNTR p_locus_tag)
+
+{
+ BioseqPtr bsp;
+ SeqFeatPtr cds;
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr gene;
+ GeneRefPtr grp;
+
+ if (sfp == NULL || p_locus_tag == NULL) return FALSE;
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp != NULL) {
+ if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
+ if (StringDoesHaveText (grp->locus_tag)) {
+ *p_locus_tag = StringSave (grp->locus_tag);
+ return TRUE;
+ } else if (StringDoesHaveText (grp->locus)) {
+ *p_locus_tag = StringSave (grp->locus);
+ return TRUE;
+ }
+ }
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return FALSE;
+ cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
+ if (cds == NULL) return FALSE;
+ grp = SeqMgrGetGeneXref (cds);
+ if (grp != NULL) {
+ if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
+ if (StringDoesHaveText (grp->locus_tag)) {
+ *p_locus_tag = StringSave (grp->locus_tag);
+ return TRUE;
+ } else if (StringDoesHaveText (grp->locus)) {
+ *p_locus_tag = StringSave (grp->locus);
+ return TRUE;
+ }
+ }
+ gene = SeqMgrGetOverlappingGene (cds->location, &fcontext);
+ if (gene == NULL || gene->data.choice != SEQFEAT_GENE) return FALSE;
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (grp != NULL) {
+ if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
+ if (StringDoesHaveText (grp->locus_tag)) {
+ *p_locus_tag = StringSave (grp->locus_tag);
+ return TRUE;
+ } else if (StringDoesHaveText (grp->locus)) {
+ *p_locus_tag = StringSave (grp->locus);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static void UpdateECCallback (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ ProtRefPtr prp;
+ CharPtr str;
+ ValNodePtr vnp;
+ CharPtr locus_tag = NULL;
+ ReplaceUpdatedEcPtr r;
+ EcRepPtr erp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp == NULL || prp->ec == NULL) return;
+ r = (ReplaceUpdatedEcPtr) userdata;
+ if (r == NULL) {
+ return;
+ }
+ GetLocusTagFromProtRef (sfp, &locus_tag);
+
+ for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (ValidateECnumber (str)) {
+ erp = GetEcReplacementFromTable(str, r->ec_rep_data, r->ec_rep_len);
+ if (erp != NULL && StringCmp (erp->before, str) == 0) {
+ if (StringChr (erp->after, '\t') == NULL) {
+ if (r->log_fp != NULL) {
+ fprintf (r->log_fp, "%s:replaced %s with %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before, erp->after);
+ }
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = StringSave (erp->after);
+ r->num_replaced++;
+ } else if (r->action->delete_multiple_replacement) {
+ if (r->log_fp != NULL) {
+ fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before);
+ }
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ r->num_removed++;
+ }
+ }
+ str = vnp->data.ptrvalue;
+ if ( str != NULL && r->action->delete_unrecognized && ECnumberNotInList (str)) {
+ if (r->log_fp != NULL) {
+ fprintf (r->log_fp, "%s: deleted %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str);
+ }
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ r->num_removed++;
+ }
+ } else {
+ if (r->action->delete_improper_format) {
+ if (r->log_fp != NULL) {
+ fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str);
+ }
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ r->num_removed++;
+ }
+ }
+ }
+ locus_tag = MemFree (locus_tag);
+}
+
+
+static Boolean ReplaceUpdatedECNumbers (SeqEntryPtr sep, UpdateReplacedEcNumbersActionPtr action, FILE *log_fp)
+{
+ ReplaceUpdatedECData r;
+
+ MemSet (&r, 0, sizeof (ReplaceUpdatedECData));
+ r.action = action;
+ r.log_fp = log_fp;
+ r.ec_rep_data = SetupECReplacementTable ("ecnum_replaced.txt", &(r.ec_rep_len));
+
+ VisitFeaturesInSep (sep, (Pointer) &r, UpdateECCallback);
+
+ r.ec_rep_data = FreeECReplacementTable(r.ec_rep_data, r.ec_rep_len);
+ if (r.num_removed > 0 || r.num_replaced > 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+typedef struct retranslatecdscallback {
+ Int4 num_retranslated;
+ RetranslateCdsActionPtr action;
+} RetranslateCDSCallbackData, PNTR RetranslateCDSCallbackPtr;
+
+static void PerformRetranslationsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ RetranslateCDSCallbackPtr r;
+
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION
+ && (r = (RetranslateCDSCallbackPtr) data) != NULL
+ && r->action != NULL
+ && RetranslateOneCDS (sfp, sfp->idx.entityID, !r->action->obey_stop_codon, r->action->obey_stop_codon)) {
+ r->num_retranslated++;
+ }
+}
+
+
+static Boolean PerformRetranslations (SeqEntryPtr sep, RetranslateCdsActionPtr action, FILE *log_fp)
+{
+ RetranslateCDSCallbackData r;
+
+ MemSet (&r, 0, sizeof (RetranslateCDSCallbackData));
+ r.action = action;
+
+ VisitFeaturesInSep (sep, &r, PerformRetranslationsCallback);
+ if (r.num_retranslated > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Retranslated %d coding regions\n", r.num_retranslated);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+typedef struct adjustfeaturesforgapscallback {
+ Int4 num_processed;
+ AdjustFeaturesForGapsActionPtr action;
+} AdjustFeaturesForGapCallbackData, PNTR AdjustFeaturesForGapCallbackPtr;
+
+static void PerformAdjustFeaturesForGapsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ AdjustFeaturesForGapCallbackPtr r;
+
+ if (sfp != NULL
+ && (r = (AdjustFeaturesForGapCallbackPtr) data) != NULL
+ && r->action != NULL) {
+
+ AdjustFeatureForGapsCallback (sfp, r->action);
+ r->num_processed++;
+ }
+}
+
+
+static Boolean PerformAdjustFeaturesForGaps (SeqEntryPtr sep, AdjustFeaturesForGapsActionPtr action, FILE *log_fp)
+{
+ AdjustFeaturesForGapCallbackData r;
+
+ MemSet (&r, 0, sizeof (AdjustFeaturesForGapCallbackData));
+ r.action = action;
+
+ VisitFeaturesInSep (sep, &r, PerformAdjustFeaturesForGapsCallback);
+ if (r.num_processed > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Adjusted %d features for gaps\n", r.num_processed);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
NLM_EXTERN CharPtr SummarizePerformAutofixAction (AutofixActionPtr action)
{
DiscrepancyType test_type;
@@ -29603,7 +31421,40 @@ NLM_EXTERN CharPtr SummarizeMacroAction (ValNodePtr vnp)
case MacroActionChoice_autoapply_structured_comments:
str = StringSave ("Autoapply structured comment prefixes");
break;
- default:
+ case MacroActionChoice_reorder_structured_comments:
+ str = StringSave ("Reorder structured comment fields");
+ break;
+ case MacroActionChoice_remove_duplicate_structured_comments:
+ str = StringSave ("Remove duplicate structured comments");
+ break;
+ case MacroActionChoice_lookup_taxonomy:
+ str = StringSave ("Perform taxonomy lookup and correct genetic codes");
+ break;
+ case MacroActionChoice_lookup_pubs:
+ str = StringSave ("Perform pubs lookup");
+ break;
+ case MacroActionChoice_trim_terminal_ns:
+ str = StringSave ("Trim terminal Ns from nucleotide bioseqs");
+ break;
+ case MacroActionChoice_update_replaced_ecnumbers:
+ str = StringSave ("Update Replaced EC_numbers");
+ break;
+ case MacroActionChoice_instantiate_protein_titles:
+ str = StringSave ("Instantiate Protein Titles");
+ break;
+ case MacroActionChoice_retranslate_cds:
+ str = StringSave ("Retranslate coding regions");
+ break;
+ case MacroActionChoice_add_selenocysteine_except:
+ str = StringSave ("Replace selenocysteine stops");
+ break;
+ case MacroActionChoice_join_short_trnas:
+ str = StringSave ("Join short tRNAs");
+ break;
+ case MacroActionChoice_adjust_features_for_gaps:
+ str = StringSave ("Adjust features for gaps");
+ break;
+ default:
str = StringSave ("Invalid action");
break;
}
@@ -29622,6 +31473,7 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro,
Boolean any_change = FALSE;
Boolean created_protein_features = FALSE;
ValNodePtr list;
+ LogInfoData lid;
entityID = SeqMgrGetEntityIDForSeqEntry(sep);
if (pNumNoOp != NULL) {
@@ -29695,6 +31547,9 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro,
num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp);
if (num > 0) {
any_change = TRUE;
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+ needs_update = FALSE;
} else if (pNumNoOp != NULL) {
(*pNumNoOp)++;
}
@@ -29779,7 +31634,7 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro,
}
break;
case MacroActionChoice_adjust_for_consensus_splice:
- if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp)) {
+ if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp, TRUE)) {
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
needs_update = FALSE;
@@ -29971,7 +31826,7 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro,
if (PropagateMissingOldNames (list)) {
any_change = TRUE;
if (log_fp != NULL) {
- fprintf (log_fp, "Propagated missing old name qualifiers");
+ fprintf (log_fp, "Propagated missing old name qualifiers\n");
}
} else if (pNumNoOp != NULL) {
(*pNumNoOp)++;
@@ -29984,6 +31839,102 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro,
(*pNumNoOp)++;
}
break;
+ case MacroActionChoice_reorder_structured_comments:
+ if (ReorderStructuredCommentsInSeqEntry (sep)) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Reordered structured comment fields\n");
+ }
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_remove_duplicate_structured_comments:
+ if (RemoveDuplicateStructuredCommentsInSeqEntry(sep)) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Removed duplicate structured comments\n");
+ }
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_lookup_taxonomy:
+ Taxon3ReplaceOrgInSeqEntry(sep, FALSE);
+ CorrectGenCodes (sep, entityID);
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Performed TaxLookup and corrected genetic codes\n");
+ }
+ any_change = TRUE;
+ break;
+ case MacroActionChoice_lookup_pubs:
+ MemSet (&lid, 0, sizeof (LogInfoData));
+ lid.fp = log_fp;
+ num = LookupPubsInSeqEntry (sep, log_fp == NULL ? NULL : &lid);
+ if (num > 0) {
+ any_change = TRUE;
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Replaced %d pubs during Pub Lookup\n", num);
+ }
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_trim_terminal_ns:
+ MemSet (&lid, 0, sizeof (LogInfoData));
+ lid.fp = log_fp;
+ num = TrimNsFromNucsInSeqEntry (sep, log_fp == NULL ? NULL : &lid);
+ if (num > 0) {
+ any_change = TRUE;
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Trimmed terminal Ns from %d sequences\n", num);
+ }
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_update_replaced_ecnumbers:
+ if (ReplaceUpdatedECNumbers(sep, macro->data.ptrvalue, log_fp)) {
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_instantiate_protein_titles:
+ InstantiateProteinTitles (entityID, NULL);
+ any_change = TRUE;
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Instantiated protein titles\n", num);
+ }
+ break;
+ case MacroActionChoice_retranslate_cds:
+ if (PerformRetranslations (sep, macro->data.ptrvalue, log_fp)) {
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_add_selenocysteine_except:
+ if (ReplaceStopsWithSelenocysteineInSeqEntry(sep, log_fp)) {
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_join_short_trnas:
+ if (JoinShortTrnas(sep, log_fp)) {
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
+ case MacroActionChoice_adjust_features_for_gaps:
+ if (PerformAdjustFeaturesForGaps (sep, macro->data.ptrvalue, log_fp)) {
+ any_change = TRUE;
+ } else if (pNumNoOp != NULL) {
+ (*pNumNoOp)++;
+ }
+ break;
}
macro = macro->next;
}
@@ -30253,6 +32204,9 @@ NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp)
case Publication_field_title:
str = StringSave ("publication title");
break;
+ case Publication_field_pmid:
+ str = StringSave ("PMID");
+ break;
default:
label = GetPubFieldLabel (vnp->data.intvalue);
if (label == NULL) {
@@ -30300,7 +32254,7 @@ NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str)
Int4 qual_type, feat_type = -1;
FieldTypePtr ft = NULL;
FeatureFieldPtr ffp;
- ValNodePtr vnp;
+ ValNodePtr vnp, molfield;
CharPtr cpy, cp;
RnaQualPtr rq;
@@ -30376,6 +32330,51 @@ NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str)
ft->choice = FieldType_misc;
ft->data.intvalue = Misc_field_comment_descriptor;
}
+
+ /* try DBLink fields */
+ if (ft == NULL) {
+ qual_type = GetDBLinkFieldTypeFromDBLinkName (str);
+ if (qual_type > -1) {
+ ft = ValNodeNew (NULL);
+ ft->choice = FieldType_dblink;
+ ft->data.intvalue = qual_type;
+ }
+ }
+
+ /* try publication fields */
+ if (ft == NULL) {
+ qual_type = GetPubFieldFromLabel(str);
+ if (qual_type > -1) {
+ ft = ValNodeNew (NULL);
+ ft->choice = FieldType_pub;
+ ft->data.intvalue = qual_type;
+ }
+ }
+ /* molinfo fields */
+ if (ft == NULL) {
+ if (StringsAreEquivalent(str, "completeness")) {
+ molfield = ValNodeNew (NULL);
+ molfield->choice = MolinfoField_completedness;
+ ft = ValNodeNew (NULL);
+ ft->choice = FieldType_molinfo_field;
+ ft->data.ptrvalue = molfield;
+ } else if (StringsAreEquivalent(str, "topology")) {
+ molfield = ValNodeNew (NULL);
+ molfield->choice = MolinfoField_topology;
+ ft = ValNodeNew (NULL);
+ ft->choice = FieldType_molinfo_field;
+ ft->data.ptrvalue = molfield;
+ }
+ }
+ /* location/genome */
+ if (ft == NULL && StringsAreEquivalent(str, "location") || StringsAreEquivalent(str, "genome")) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualValChoice_location;
+ ft = ValNodeNew (NULL);
+ ft->choice = FieldType_source_qual;
+ ft->data.ptrvalue = vnp;
+ }
+
}
return ft;
}
@@ -31739,7 +33738,7 @@ NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word)
ValNodePtr vnp;
CharPtr summ = NULL;
- if (word == NULL && word->synonyms == NULL) {
+ if (word == NULL || word->synonyms == NULL) {
return NULL;
}
@@ -33062,6 +35061,9 @@ NLM_EXTERN MatchTypePtr MatchTypeFromTableMatchType (TableMatchPtr t)
case TableMatchType_protein_name:
m->choice = eTableMatchProteinName;
break;
+ case TableMatchType_bioproject:
+ m->choice = eTableMatchBioProject;
+ break;
case TableMatchType_any:
m->choice = eTableMatchAny;
break;
@@ -33114,6 +35116,9 @@ NLM_EXTERN TableMatchPtr TableMatchTypeFromMatchType (MatchTypePtr m)
case eTableMatchProteinName:
t->match_type->choice = TableMatchType_protein_name;
break;
+ case eTableMatchBioProject:
+ t->match_type->choice = TableMatchType_bioproject;
+ break;
case eTableMatchAny:
t->match_type->choice = TableMatchType_any;
break;
@@ -33525,7 +35530,7 @@ static BioseqSearchItemPtr BioseqSearchItemNewStr (BioseqPtr bsp, CharPtr str, B
bsi->bsp = bsp;
bsi->str = str;
bsi->free_str = need_free;
- if (IsAllDigits (bsi->str)) {
+ if (StringIsAllDigits (bsi->str)) {
bsi->num = atoi (bsi->str);
}
return bsi;
@@ -33650,9 +35655,14 @@ NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Po
NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list)
{
- if (vnbp->head == NULL) {
+ if (list == NULL) {
+ return;
+ } else if (vnbp->head == NULL) {
vnbp->head = list;
vnbp->tail = list;
+ while (vnbp->tail->next != NULL) {
+ vnbp->tail = vnbp->tail->next;
+ }
} else {
vnbp->tail->next = list;
while (vnbp->tail->next != NULL) {
@@ -33662,6 +35672,19 @@ NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list)
}
+NLM_EXTERN void ValNodeSortBlock (ValNodeBlockPtr vnbp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr )))
+{
+ if (vnbp == NULL || vnbp->head == NULL) {
+ return;
+ }
+ vnbp->head = ValNodeSort(vnbp->head, compar);
+ vnbp->tail = vnbp->head;
+ while (vnbp->tail->next != NULL) {
+ vnbp->tail = vnbp->tail->next;
+ }
+}
+
+
static SeqIdPtr FindLocalId (SeqIdPtr list)
{
while (list != NULL && list->choice != SEQID_LOCAL) {
@@ -34001,7 +36024,7 @@ static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr inde
if (index == NULL) {
return NULL;
}
- if (IsAllDigits (str)) {
+ if (StringIsAllDigits (str)) {
match = atoi (str);
imax = index->num_total - 1;
imin = index->num_str;
@@ -34051,6 +36074,7 @@ static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str
Int4 i;
ValNodePtr list = NULL;
StringConstraintPtr scp;
+ Char buf[5000];
if (StringHasNoText (match_str) || index == NULL) {
return NULL;
@@ -34063,6 +36087,12 @@ static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str
ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp);
}
}
+ for (i = index->num_str; i < index->num_str + index->num_int; i++) {
+ sprintf (buf, "%u", index->items[i]->num);
+ if (DoesStringMatchConstraint (buf, scp)) {
+ ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp);
+ }
+ }
scp = StringConstraintFree (scp);
list = ValNodeSort (list, SortVnpByChoiceAndPtrvalue);
@@ -34115,7 +36145,7 @@ static void FindBioseqByProteinName(SeqEntryPtr sep, BioseqByMatchPtr bsp_m, Val
}
else if (IS_Bioseq_set(sep)) {
for (tmp = ((BioseqSetPtr) sep->data.ptrvalue)->seq_set; tmp != NULL; tmp= tmp->next) {
- FindBioseqByProteinName(tmp, bsp_m, match_list);
+ FindBioseqByProteinName(tmp, bsp_m, match_list);
}
}
} /* FindBioseqByProteinName */
@@ -34129,6 +36159,40 @@ static void GetAllBioseqsCallback (BioseqPtr bsp, Pointer data)
}
+typedef struct stringlist {
+ CharPtr str;
+ ValNodePtr list;
+} StringListData, PNTR StringListPtr;
+
+
+static void GetBioseqsByBioProjectCallback(BioseqPtr bsp, Pointer data)
+{
+ StringListPtr s;
+ CharPtr bioproject;
+
+ if (bsp == NULL || ISA_aa(bsp->mol) || (s = (StringListPtr) data) == NULL) {
+ return;
+ }
+ bioproject = GetBioProjectIdFromBioseq(bsp, NULL);
+ if (StringICmp (bioproject, s->str) == 0) {
+ ValNodeAddPointer (&(s->list), OBJ_BIOSEQ, bsp);
+ }
+ bioproject = MemFree (bioproject);
+}
+
+
+static ValNodePtr GetBioseqsByBioProject (SeqEntryPtr sep, CharPtr match_str)
+{
+ StringListData s;
+
+ MemSet (&s, 0, sizeof (StringListData));
+ s.str = match_str;
+
+ VisitBioseqsInSep (sep, &s, GetBioseqsByBioProjectCallback);
+ return s.list;
+}
+
+
static ValNodePtr
FindMatchForRowEx
(MatchTypePtr match_type,
@@ -34189,6 +36253,9 @@ FindMatchForRowEx
case eTableMatchAny:
VisitBioseqsInSep (sep, &match_list, GetAllBioseqsCallback);
break;
+ case eTableMatchBioProject:
+ match_list = GetBioseqsByBioProject(sep, match_str);
+ break;
}
return match_list;
}
@@ -34233,6 +36300,7 @@ static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp)
cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
if (cds != NULL)
{
+ sfp = NULL;
if (featdef == FEATDEF_CDS)
{
sfp = cds;
@@ -34315,7 +36383,7 @@ static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef)
sfp != NULL && fcontext.left < stop;
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
{
- if (fcontext.right >= start && gene == GetGeneForFeature (sfp))
+ if (sfp != gene && fcontext.right >= start && gene == GetGeneForFeature (sfp))
{
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
}
@@ -34478,6 +36546,7 @@ static ValNodePtr GetFeatureListForRowAndColumn (MatchTypePtr match_type, ValNod
break;
case eTableMatchNucID:
case eTableMatchAny:
+ case eTableMatchBioProject:
for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
}
@@ -34553,6 +36622,7 @@ static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValN
case eTableMatchProteinID:
case eTableMatchNucID:
case eTableMatchAny:
+ case eTableMatchBioProject:
for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == OBJ_BIOSEQ) {
AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list);
@@ -34575,6 +36645,51 @@ static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValN
}
+Boolean PropagateThisDescriptor (SeqDescPtr sdp, Pointer extradata)
+{
+ if (sdp == (SeqDescPtr) extradata) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static void PrePropagatePubs (BioseqPtr bsp)
+{
+ ValNodePtr pub_list = NULL, vnp;
+ SeqEntryPtr sep;
+ SeqDescPtr sdp;
+ SeqMgrDescContext dcontext;
+ ObjValNodePtr ovp;
+ BioseqSetPtr bssp;
+
+ if (bsp == NULL) {
+ return;
+ }
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) {
+ if (sdp->extended && (ovp = (ObjValNodePtr) sdp) != NULL
+ && ovp->idx.parenttype == OBJ_BIOSEQSET
+ && (bssp = (BioseqSetPtr) ovp->idx.parentptr) != NULL
+ && bssp->_class != BioseqseqSet_class_nuc_prot) {
+ ValNodeAddPointer (&pub_list, OBJ_SEQDESC, sdp);
+ }
+ }
+ if (pub_list != NULL) {
+ sep = GetTopSeqEntryForEntityID(bsp->idx.entityID);
+ for (vnp = pub_list; vnp != NULL; vnp = vnp->next) {
+ PropagateSomeDescriptors (sep, PropagateThisDescriptor, vnp->data.ptrvalue);
+ }
+ DeleteMarkedObjects (bsp->idx.entityID, 0, NULL);
+ ObjMgrSetDirtyFlag (bsp->idx.entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, bsp->idx.entityID, 0, 0);
+ pub_list = ValNodeFree (pub_list);
+ }
+}
+
+
static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
{
SeqDescrPtr sdp;
@@ -34584,6 +36699,9 @@ static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
if (bsp == NULL || feature_list == NULL) return;
+ /* pre-propagate publications descriptors */
+ PrePropagatePubs(bsp);
+
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
sdp != NULL;
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) {
@@ -34684,6 +36802,7 @@ static ValNodePtr GetPubListForRowAndColumn (MatchTypePtr match_type, ValNodePtr
case eTableMatchProteinID:
case eTableMatchNucID:
case eTableMatchAny:
+ case eTableMatchBioProject:
for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == OBJ_BIOSEQ) {
AddPubsForBioseq (vnp->data.ptrvalue, &feature_list);
@@ -34784,6 +36903,7 @@ static ValNodePtr GetSequenceListForRowAndColumn (MatchTypePtr match_type, ValNo
case eTableMatchProteinName: /* J. Chen */
case eTableMatchProteinID:
case eTableMatchNucID:
+ case eTableMatchBioProject:
for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == OBJ_BIOSEQ) {
ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, vnp->data.ptrvalue);
@@ -35048,9 +37168,17 @@ NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp)
ProtRefPtr prp;
RnaRefPtr rrp;
- if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return FALSE;
-
- prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (sfp == NULL) {
+ return FALSE;
+ }
+
+ if (sfp->data.choice == SEQFEAT_PROT) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ } else if (sfp->data.choice == SEQFEAT_CDREGION) {
+ prp = GetProtRefForFeature(sfp);
+ } else {
+ return FALSE;
+ }
mrna = GetmRNAForFeature (sfp);
if (mrna == NULL) return FALSE;
@@ -35084,11 +37212,14 @@ NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft)
if (ft == NULL) return FALSE;
if (ft->choice == FieldType_feature_field) {
field = (FeatureFieldPtr) ft->data.ptrvalue;
- if (field != NULL && field->type == Macro_feature_type_cds
- && field->field != NULL
- && field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_product) {
- rval = TRUE;
+ if (field != NULL && field->field != NULL && field->field->choice == FeatQualChoice_legal_qual) {
+ if (field->type == Macro_feature_type_cds
+ && field->field->data.intvalue == Feat_qual_legal_product) {
+ rval = TRUE;
+ } else if (field->type == Macro_feature_type_prot
+ && field->field->data.intvalue == Feat_qual_legal_product) {
+ rval = TRUE;
+ }
}
} else if (ft->choice == FieldType_cds_gene_prot) {
if (ft->data.intvalue == CDSGeneProt_field_prot_name) {
@@ -35159,7 +37290,7 @@ NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr colum
ValNodePtr line_vnp, col_vnp, val_vnp;
Int4 line_num, col_num;
TabColumnConfigPtr t;
- ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field;
+ ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field, sq;
CharPtr bad_format_fmt = "Locus tag %s has incorrect format";
CharPtr dup_fmt = "Locus tag %s appears in the table more than once";
CharPtr inconsistent_fmt = "Locus tag prefix for %s is inconsistent";
@@ -35205,6 +37336,17 @@ NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr colum
sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue);
ValNodeAddPointer (&err_list, 0, err_msg);
}
+ tmp_field = MolinfoFieldFree(tmp_field);
+ } else if (t->field != NULL && t->field->choice == FieldType_source_qual
+ && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL
+ && sq->choice == SourceQualValChoice_location) {
+ tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue);
+ if (tmp_field == NULL) {
+ err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue)));
+ sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue);
+ ValNodeAddPointer (&err_list, 0, err_msg);
+ }
+ tmp_field = FieldTypeFree (tmp_field);
}
}
}
@@ -35390,7 +37532,7 @@ NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr tab
match_type = FindMatchTypeInHeader (columns);
if (match_type == NULL) {
ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No Match Type"));
- } else if (match_type->choice == eTableMatchAny && table->next != NULL) {
+ } else if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
if (table->next->next != NULL) {
ValNodeAddPointerToEnd (&vnb, 0, StringSave ("Too many rows for apply to all"));
} else {
@@ -35670,7 +37812,7 @@ NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr
ValNodePtr target_vnp, tmp_field;
TabColumnConfigPtr t;
CharPtr val, qual_name;
- ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp;
+ ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp, sq;
CharPtr err_msg;
CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d";
CharPtr num_affected_fmt = "%d fields affected";
@@ -35713,6 +37855,7 @@ NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr
success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL);
} else {
if (t->field != NULL && t->field->choice == FieldType_molinfo_field) {
+ /* adjust molinfo fields */
success = FALSE;
if (target_vnp->choice == OBJ_BIOSEQ) {
tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue);
@@ -35721,6 +37864,17 @@ NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr
tmp_field = MolinfoFieldFree(tmp_field);
}
}
+ } else if (t->field != NULL && t->field->choice == FieldType_source_qual
+ && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL
+ && sq->choice == SourceQualValChoice_location) {
+ /* adjust for source location */
+ success = FALSE;
+ tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue);
+ if (tmp_field != NULL) {
+ success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, tmp_field, NULL,
+ val_vnp->data.ptrvalue, t->existing_text);
+ tmp_field = FieldTypeFree (tmp_field);
+ }
} else {
success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL,
val_vnp->data.ptrvalue, t->existing_text);
@@ -36105,7 +38259,7 @@ NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, V
match_type = FindMatchTypeInHeader (columns);
- if (match_type->choice == eTableMatchAny && table->next != NULL) {
+ if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
/* skip first row, must contain header */
table = table->next;
}
@@ -36219,7 +38373,7 @@ NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr tab
entityID = SeqMgrGetEntityIDForSeqEntry (sep);
- if (match_type->choice == eTableMatchAny && table->next != NULL) {
+ if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
/* skip first row, must contain header */
table = table->next;
}
@@ -36458,8 +38612,10 @@ static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data)
} else {
pp_f_last->next = ps->forward;
}
- while (pp_f_last->next != NULL) {
- pp_f_last = pp_f_last->next;
+ if (pp_f_last != NULL) {
+ while (pp_f_last->next != NULL) {
+ pp_f_last = pp_f_last->next;
+ }
}
ps->forward = NULL;
}
@@ -36469,8 +38625,10 @@ static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data)
} else {
pp_r_last->next = ps->reverse;
}
- while (pp_r_last->next != NULL) {
- pp_r_last = pp_r_last->next;
+ if (pp_r_last != NULL) {
+ while (pp_r_last->next != NULL) {
+ pp_r_last = pp_r_last->next;
+ }
}
ps->reverse = NULL;
}
@@ -36794,8 +38952,10 @@ NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_ve
{
CharPtr find = NULL, replace = NULL, fix_type = NULL, feat_constraint = NULL, except = NULL;
CharPtr summ = NULL;
+ CharPtr tmp = NULL;
CharPtr butnot = " but not ";
CharPtr desc = " Description: ";
+ CharPtr fatal = "(FATAL)";
Int4 len;
if (rule == NULL) {
@@ -36817,6 +38977,14 @@ NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_ve
StringCat (summ, fix_type);
StringCat (summ, ")");
}
+ if (rule->fatal) {
+ len = StringLen (summ) + StringLen (fatal) + 1;
+ tmp = (CharPtr) MemNew (sizeof (Char) * len);
+ StringCpy (tmp, summ);
+ StringCat (tmp, fatal);
+ summ = (CharPtr) MemFree (summ);
+ summ = tmp;
+ }
return summ;
}
@@ -36848,6 +39016,9 @@ NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_ve
if (except != NULL) {
len += StringLen (butnot);
}
+ if (rule->fatal) {
+ len += StringLen(fatal);
+ }
summ = (CharPtr) MemNew (sizeof (Char) * len);
StringCpy (summ, find);
if (except != NULL) {
@@ -36875,6 +39046,9 @@ NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_ve
StringCat (summ, desc);
StringCat (summ, rule->description);
}
+ if (rule->fatal) {
+ StringCat (summ, fatal);
+ }
find = MemFree (find);
except = MemFree (except);
@@ -38755,3 +40929,201 @@ NLM_EXTERN ValNodePtr ShuffleUpdateBioseqListWithIndex (ValNodePtr PNTR update_b
return unmatched_list;
}
+
+
+static int LIBCALLBACK SortVnpByInt (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+
+ if (vnp1->data.intvalue > vnp2->data.intvalue) {
+ return 1;
+ } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static void AddGeneQualifiersToNote (SeqFeatPtr gene, CharPtr PNTR note)
+{
+ GeneRefPtr grp;
+ GBQualPtr gbq;
+
+ if (gene == NULL || note == NULL || gene->data.choice != SEQFEAT_GENE) {
+ return;
+ }
+
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (!StringHasNoText(grp->locus)) {
+ SetStringValue(note, grp->locus, ExistingTextOption_prefix_semi);
+ }
+ if (!StringHasNoText(grp->allele)) {
+ SetStringValue(note, grp->allele, ExistingTextOption_prefix_semi);
+ }
+ if (!StringHasNoText(grp->desc)) {
+ SetStringValue(note, grp->desc, ExistingTextOption_prefix_semi);
+ }
+ if (!StringHasNoText(grp->maploc)) {
+ SetStringValue(note, grp->maploc, ExistingTextOption_prefix_semi);
+ }
+ if (!StringHasNoText(grp->locus_tag)) {
+ SetStringValue(note, grp->locus_tag, ExistingTextOption_prefix_semi);
+ }
+ if (!StringHasNoText(gene->comment)) {
+ SetStringValue(note, gene->comment, ExistingTextOption_prefix_semi);
+ }
+ for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
+ if (!StringHasNoText (gbq->val)) {
+ SetStringValue(note, gbq->val, ExistingTextOption_prefix_semi);
+ }
+ }
+
+}
+
+
+static void LogCDSConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene, ProtRefPtr prp)
+{
+ GeneRefPtr grp;
+ CharPtr desc = NULL;
+ CharPtr loc;
+
+ if (lip == NULL || lip->fp == NULL) {
+ return;
+ }
+ if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) {
+ if (!StringHasNoText (grp->locus_tag)) {
+ desc = grp->locus_tag;
+ } else if (!StringHasNoText (grp->locus)) {
+ desc = grp->locus;
+ }
+ }
+ if (desc == NULL && prp != NULL) {
+ if (prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ desc = prp->name->data.ptrvalue;
+ }
+ }
+ if (desc == NULL) {
+ desc = "Unknown";
+ }
+ loc = SeqLocPrint (sfp->location);
+ fprintf (lip->fp, "%s CDS at %s converted to misc_feature", desc, loc);
+ loc = MemFree (loc);
+ lip->data_in_log = TRUE;
+}
+
+
+static void LogrRNAConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene)
+{
+ GeneRefPtr grp;
+ CharPtr desc = NULL, loc;
+
+ if (lip == NULL || lip->fp == NULL) {
+ return;
+ }
+ if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) {
+ if (!StringHasNoText (grp->locus_tag)) {
+ desc = StringSave(grp->locus_tag);
+ } else if (!StringHasNoText (grp->locus)) {
+ desc = StringSave(grp->locus);
+ }
+ }
+ if (desc == NULL) {
+ desc = GetRNAProductString(sfp, NULL);
+ }
+ if (desc == NULL) {
+ desc = StringSave("unknown");
+ }
+ loc = SeqLocPrint (sfp->location);
+ fprintf (lip->fp, "%s rRNA at %s converted to misc_feature", desc, loc);
+ loc = MemFree (loc);
+ desc = MemFree (desc);
+ lip->data_in_log = TRUE;
+}
+
+
+/* for cleaning up bad features identified by validator or asndisc */
+NLM_EXTERN void ConvertListToMiscFeat (ValNodePtr list, Boolean remove_gene, LogInfoPtr lip)
+{
+ ValNodePtr vnp, other_list;
+ SeqFeatPtr sfp, gene;
+ ProtRefPtr prp;
+ BioseqPtr pbsp;
+ ImpFeatPtr ifp;
+ CharPtr rna_name;
+ ValNodePtr entityIDList = NULL;
+ SeqEntryPtr sep;
+ Boolean converted;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ gene = GetGeneForFeature(sfp);
+ converted = FALSE;
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ prp = GetProtRefForFeature (sfp);
+ LogCDSConversion(lip, sfp, gene, prp);
+ if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetStringValue(&(sfp->comment), prp->name->data.ptrvalue, ExistingTextOption_prefix_semi);
+ }
+ pbsp = BioseqFindFromSeqLoc (sfp->product);
+ if (pbsp != NULL) {
+ pbsp->idx.deleteme = TRUE;
+ }
+ sfp->data.value.ptrvalue = CdRegionFree (sfp->data.value.ptrvalue);
+ sfp->data.choice = SEQFEAT_IMP;
+ ifp = ImpFeatNew();
+ ifp->key = StringSave("misc_feature");
+ sfp->data.value.ptrvalue = ifp;
+ sfp->product = SeqLocFree (sfp->product);
+ sfp->idx.subtype = 0;
+ ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
+ converted = TRUE;
+ } else if (sfp->data.choice == SEQFEAT_RNA) {
+ LogrRNAConversion(lip, sfp, gene);
+ rna_name = GetRNAProductString(sfp, NULL);
+ SetStringValue(&(sfp->comment), rna_name, ExistingTextOption_prefix_semi);
+ sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
+ sfp->data.choice = SEQFEAT_IMP;
+ ifp = ImpFeatNew();
+ ifp->key = StringSave("misc_feature");
+ sfp->data.value.ptrvalue = ifp;
+ sfp->idx.subtype = 0;
+ ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
+ converted = TRUE;
+ }
+ if (converted && remove_gene && gene != NULL) {
+ other_list = GetFeaturesForGene(gene, 0);
+ if (ValNodeLen (other_list) < 2) {
+ AddGeneQualifiersToNote(gene, &(sfp->comment));
+ gene->idx.deleteme = TRUE;
+ if (lip != NULL && lip->fp != NULL) {
+ fprintf (lip->fp, ", gene deleted");
+ }
+ }
+ other_list = ValNodeFree (other_list);
+ }
+ if (converted && lip != NULL && lip->fp != NULL) {
+ fprintf (lip->fp, "\n");
+ }
+ }
+ entityIDList = ValNodeSort (entityIDList, SortVnpByInt);
+ ValNodeUnique (&entityIDList, SortVnpByInt, ValNodeFree);
+ for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
+ /* remove any protein sequences or genes that were marked for deletion */
+ DeleteMarkedObjects (vnp->data.intvalue, 0, NULL);
+ sep = GetTopSeqEntryForEntityID(vnp->data.intvalue);
+ RenormalizeNucProtSets (sep, TRUE);
+ SeqMgrIndexFeatures (vnp->data.intvalue, NULL);
+ }
+ entityIDList = ValNodeFree (entityIDList);
+}
+
+
+
diff --git a/api/macroapi.h b/api/macroapi.h
index d8ff5975..a2e2461e 100644
--- a/api/macroapi.h
+++ b/api/macroapi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/15/2007
*
-* $Revision: 1.154 $
+* $Revision: 1.170 $
*
* File Description:
*
@@ -90,6 +90,7 @@ NLM_EXTERN CharPtr SummarizeFeatQual (ValNodePtr qual);
NLM_EXTERN CharPtr GetSourceQualName (Int4 srcqual);
NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname);
NLM_EXTERN Int4 GetSrcQualFromSubSrcOrOrgMod (Int4 qual, Boolean is_org_mod);
+NLM_EXTERN Int4 GetSubSrcQualFromSrcQual (Int4 srcqual, Int4Ptr subfield);
NLM_EXTERN Int4 GetOrgModQualFromSrcQual (Int4 srcqual, Int4Ptr subfield);
NLM_EXTERN ValNodePtr GetSourceQualList (Boolean for_remove);
NLM_EXTERN Boolean IsNonTextSourceQual (Int4 srcqual);
@@ -151,6 +152,7 @@ NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice
NLM_EXTERN Int2 FeatureTypeFromFieldType (FieldTypePtr field);
NLM_EXTERN Int4 GetFeatureTypeForRnaType (Int4 rnatype);
NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2);
+NLM_EXTERN int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort);
NLM_EXTERN Boolean AreAECRActionFieldsEqual (AECRActionPtr action1, AECRActionPtr action2);
NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action);
NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype);
@@ -194,14 +196,22 @@ NLM_EXTERN Boolean SettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp, C
NLM_EXTERN CharPtr GettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp);
NLM_EXTERN Boolean SetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text);
NLM_EXTERN CharPtr GetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp);
+NLM_EXTERN Boolean SettRNACodons_Recognized (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text);
/* Structured Comment functions */
NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp);
NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop);
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep);
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldListFromUserObject (UserObjectPtr uop);
+
/* Publication functions */
NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field);
+NLM_EXTERN Int4 GetPubFieldFromLabel(CharPtr label);
NLM_EXTERN ValNodePtr GetPubFieldList (void);
+NLM_EXTERN ValNodePtr GetPubClassList ();
+NLM_EXTERN CharPtr GetPubclassFromPub (PubPtr the_pub);
+NLM_EXTERN Boolean SetPubclassOnPub (PubPtr the_pub, CharPtr pub_class);
NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp);
NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub);
@@ -302,6 +312,8 @@ NLM_EXTERN AECRSamplePtr GetExistingTextForParseAction (ParseActionPtr action, S
NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp);
NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep);
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2);
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2);
NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop);
NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list);
@@ -381,6 +393,7 @@ NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list);
NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data);
NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data);
NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list);
+NLM_EXTERN void ValNodeSortBlock (ValNodeBlockPtr vnbp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr )));
typedef enum {
@@ -392,6 +405,7 @@ typedef enum {
eTableMatchBioSource,
eTableMatchSourceQual,
eTableMatchProteinName, /* J. Chen */
+ eTableMatchBioProject,
eTableMatchAny
} ETableMatchType;
@@ -475,6 +489,8 @@ NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp);
NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor);
NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor);
+NLM_EXTERN CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp);
+NLM_EXTERN ValNodePtr ReadNameListFromString (CharPtr value);
NLM_EXTERN Int4 ConvertLocalIdsToTSAIds (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr tp);
NLM_EXTERN Int4 CreateTSAIDsFromDeflineInSep (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr t);
@@ -534,6 +550,14 @@ NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uin
NLM_EXTERN ValNodePtr ShuffleUpdateBioseqListWithIndex (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list);
+NLM_EXTERN Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp);
+
+
+NLM_EXTERN void ConvertListToMiscFeat (ValNodePtr list, Boolean remove_gene, LogInfoPtr lip);
+NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp);
+NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/api/objmgr.c b/api/objmgr.c
index daadfc9b..9e283428 100644
--- a/api/objmgr.c
+++ b/api/objmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.89 $
+* $Revision: 6.90 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -2908,7 +2908,7 @@ NLM_EXTERN void LIBCALL ObjMgrDump (FILE * fp, CharPtr title)
ObjMgrPtr omp;
ObjMgrDataPtr omdp;
Uint4 i;
- Char buf[80];
+ Char buf[128];
BioseqPtr bsp;
Boolean close_it = FALSE;
diff --git a/api/pgppop.c b/api/pgppop.c
index fd8bc75d..6dfdc192 100644
--- a/api/pgppop.c
+++ b/api/pgppop.c
@@ -1,4 +1,4 @@
-/* $Id: pgppop.c,v 6.69 2012/03/30 14:22:03 choi Exp $
+/* $Id: pgppop.c,v 6.73 2016/09/02 14:57:38 ucko Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,262 +29,13 @@
*
* Version Creation Date: 05/03/99
*
-* $Revision: 6.69 $
+* $Revision: 6.73 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
*
-* $Log: pgppop.c,v $
-* Revision 6.69 2012/03/30 14:22:03 choi
-* Fixed bug in DDV_DisplayParaG: write <FONT> tag for minus strand
-* sequences only if html display format was specified.
-*
-* Revision 6.68 2006/07/13 17:06:39 bollin
-* use Uint4 instead of Uint2 for itemID values
-* removed unused variables
-* resolved compiler warnings
-*
-* Revision 6.67 2001/10/03 00:15:47 vakatov
-* Replaced some ASSERTs by VERIFYs
-*
-* Revision 6.66 2001/01/16 17:15:54 hurwitz
-* modified DDV_GetBspCoordGivenDispCoord
-*
-* Revision 6.65 2001/01/10 23:38:39 lewisg
-* fix seqid and various memory leaks
-*
-* Revision 6.64 2000/10/25 01:22:56 bauer
-* fixed DDV display of PDB-Id's in CDD-server
-*
-* Revision 6.63 2000/10/13 19:48:16 hurwitz
-* added functions for getting first VALID disp coord in range of bsp coords
-*
-* Revision 6.62 2000/08/25 18:57:12 shavirin
-* Removed printing of BLAST middle line if characters in 1st and 3d
-* lines are in lower case - used for unaligned regions.
-*
-* Revision 6.61 2000/08/11 20:59:07 shavirin
-* Added default character for a gap in the function DDV_GetSequenceFromParaG().
-*
-* Revision 6.60 2000/07/18 19:59:24 bauer
-* fixed bug in DDV_Print_Sequence
-*
-* Revision 6.59 2000/05/19 14:30:52 wheelan
-* fixed problem with formatting PDB ids
-*
-* Revision 6.58 2000/04/19 12:33:32 durand
-* for HTML output, replaced double quote char. by a spacein the defline
-*
-* Revision 6.57 2000/03/31 21:33:21 durand
-* added new default color schemas for BLAST
-*
-* Revision 6.56 2000/03/29 14:21:56 durand
-* fixed problem when displaying middle line of BLAST SeqAligns
-*
-* Revision 6.54 2000/03/28 13:32:28 durand
-* update DDV_DisplayDefaultAlign to receive a pre-initialized DDV_Disp_Opt data structure
-*
-* Revision 6.53 2000/03/27 14:19:13 durand
-* fixed bugs in BLAST outputs
-*
-* Revision 6.52 2000/03/24 12:58:55 durand
-* fixed a bug in DDV_DisplayDefaultAlign
-*
-* Revision 6.51 2000/03/22 14:14:25 durand
-* updated DDV_DisplayDefaultAlign to get the SeqAlign size correclty
-*
-* Revision 6.50 2000/03/21 19:26:48 durand
-* pgppop now uses AlignMgr
-*
-* Revision 6.49 2000/02/24 16:46:51 thiessen
-* fixed improper acces to freed memory
-*
-* Revision 6.48 2000/02/23 19:49:49 durand
-* use row number instead of SeqId for coloring
-*
-* Revision 6.47 2000/02/22 21:20:11 durand
-* remove the second $LOG line
-*
-* Revision 6.46 2000/02/22 20:55:22 thiessen
-* add null pointer test to DDV_GetBspCoordGivenDispCoord()
-*
-* Revision 6.45 2000/02/17 15:54:35 durand
-* use ~ for an unaligned gap and - for an aligned gap
-*
-* Revision 6.44 2000/02/15 15:31:45 lewisg
-* move DDVRulerDescr to pgppop
-*
-* Revision 6.43 2000/02/14 16:39:55 durand
-* add new output options for BLAST
-*
-* Revision 6.42 2000/02/07 14:16:56 durand
-* replaced BioseqUnlockById by BioseqUnlock
-*
-* Revision 6.41 2000/02/03 14:03:57 durand
-* replaced call to FeatDefLine() by CreateDefLine()
-*
-* Revision 6.40 2000/01/26 15:08:22 durand
-* update DDV_DeleteParaGList function
-*
-* Revision 6.39 1999/12/29 22:55:02 lewisg
-* get rid of seqalign id
-*
-* Revision 6.38 1999/12/20 14:37:53 durand
-* transfer some PopSet Viewer functions from here to wwwddv.c; update the code to better use Color Manager for BLAST outputs
-*
-* Revision 6.37 1999/12/08 22:40:54 durand
-* add the code to produce colored BLAST outputs
-*
-* Revision 6.36 1999/12/07 18:46:33 durand
-* add DDV_GetBspCoordGivenPgpList function
-*
-* Revision 6.35 1999/11/26 15:42:25 vakatov
-* Fixed for the C++ and/or MSVC DLL compilation
-*
-* Revision 6.34 1999/10/29 14:14:24 durand
-* add DDV_GetBspCoordGivenDispCoord() and DDV_GetDispCoordGivenBspCoord()
-*
-* Revision 6.33 1999/10/20 13:13:53 durand
-* add new fields in data structure for DDV
-*
-* Revision 6.32 1999/10/08 17:50:28 durand
-* move DDV_DisplayBlastSAP from pgppop.c to ddvcreate.c due to conflict between api and ddv
-*
-* Revision 6.31 1999/10/07 19:18:46 durand
-* Modified function DDV_DisplayBlastSAP to use AlignMgr
-*
-* Revision 6.30 1999/09/29 17:16:44 shavirin
-* Modified function DDV_DisplayBlastSAP(): added new parameter and printing
-* of the BLAST scores.
-*
-* Revision 6.29 1999/09/29 13:42:27 durand
-* add middle line for BLAST output
-*
-* Revision 6.27 1999/09/28 19:49:30 shavirin
-* Changed definition of the function DDV_DisplayBlastSAP()
-*
-* Revision 6.26 1999/09/28 13:06:39 durand
-* add a first set of functions to display CDS in PopSet Viewer
-*
-* Revision 6.24 1999/09/16 18:52:26 durand
-* redesign the PopSet viewer toolbar
-*
-* Revision 6.23 1999/09/07 13:41:55 durand
-* update Entrez links for PopSet Viewer
-*
-* Revision 6.22 1999/09/02 19:08:56 chappey
-* fixes in PrintSeqAlignCallback
-*
-* Revision 6.21 1999/09/01 21:04:20 durand
-* call SeqAlignSetFree after PairSeqAlign2MultiSeqAlign
-*
-* Revision 6.20 1999/08/31 13:51:30 durand
-* add PubMed link for PopSet Viewer
-*
-* Revision 6.18 1999/08/30 21:24:23 durand
-* display Unpublished study when a PopSet entry doesn't have a title
-*
-* Revision 6.17 1999/08/30 20:29:56 durand
-* make PrintSeqAlignCallback estern function
-*
-* Revision 6.16 1999/08/30 18:20:11 durand
-* update SeqAlignToBS
-*
-* Revision 6.15 1999/08/30 17:57:05 sirotkin
-* changed mydata in PrintSeqAlignCallback
-*
-* Revision 6.14 1999/08/30 14:18:08 durand
-* use ByteStore to format the SeqAlign output
-*
-* Revision 6.12 1999/08/27 14:54:04 durand
-* fix memory leaks in PopSet Viewer
-*
-* Revision 6.11 1999/08/16 18:56:59 lewisg
-* made DDV_GetSeqAlign extern and added prototype to header
-*
-* Revision 6.10 1999/08/08 15:54:35 chappey
-* made DDV_GetSeqAlign static as there is no prototype
-*
-* Revision 6.9 1999/08/07 16:57:48 sicotte
-* fixed compiler warnings
-*
-* Revision 6.8 1999/08/07 16:53:13 sicotte
-* added includes sqnutils.h and alignval.h for new code
-*
-* Revision 6.7 1999/08/06 21:43:17 chappey
-* SeqAlignToBS new function to save in ByteStore structure the text output of the SeqAlign(s) packaged in a SeqEntry
-*
-* Revision 6.6 1999/07/22 13:23:13 durand
-* made DDV_SearchAli external function
-*
-* Revision 6.5 1999/07/21 21:52:12 durand
-* add some functions to display a summary for a PopSet entry
-*
-* Revision 6.4 1999/07/19 21:16:01 durand
-* add DDV_ResetParaGSeqAlignCoord to reset the seqalign coord in the display data structures of DDV
-*
-* Revision 6.3 1999/07/15 18:20:51 durand
-* add display options to support BLAST outputs
-*
-* Revision 6.2 1999/07/13 20:46:56 durand
-* comment out call to PairSeqAlign2MultiSeqAlign to avoid DDV compiling problems
-*
-* Revision 6.1 1999/07/09 13:59:58 durand
-* move pgppop from desktop to api
-*
-* Revision 6.2 1999/07/07 19:12:26 durand
-* fix a tiny bug in DDV_GetSequenceFromParaG
-*
-* Revision 6.1 1999/07/06 20:18:07 kans
-* initial public checkin
-*
-* Revision 1.35 1999/07/06 18:54:27 durand
-* add new features for the display of PopSet viewer
-*
-* Revision 1.34 1999/07/02 13:22:17 durand
-* fix bugs for the display of minus strand sequences
-*
-* Revision 1.32 1999/06/29 16:48:10 shavirin
-* Changed definition of function DDV_ShowSeqAlign()
-*
-* Revision 1.31 1999/06/28 22:07:20 durand
-* add loader functions and clean the code with Lint and Purify
-*
-* Revision 1.30 1999/06/24 20:49:41 shavirin
-* Added new function DDV_ShowSeqAlign().
-*
-* Revision 1.29 1999/06/23 18:11:17 durand
-* fix a variable initialization problem under NT
-*
-* Revision 1.28 1999/06/23 17:24:23 durand
-* use a binary encoding to manage the display styles
-*
-* Revision 1.27 1999/06/21 18:37:56 durand
-* update DDV_DisplayDefaultAlign to produce full text output
-*
-* Revision 1.26 1999/06/19 18:36:13 durand
-* new display procedure
-*
-* Revision 1.25 1999/06/16 13:10:48 durand
-* update/add functions for Vibrant DDV
-*
-* Revision 1.24 1999/06/14 23:49:43 durand
-* add function for Vibrant DDV
-*
-* Revision 1.23 1999/06/11 22:33:01 durand
-* add new functions for Vibrant DDV
-*
-* Revision 1.22 1999/06/11 17:59:40 durand
-* popset viewer uses more code from UDV
-*
-* Revision 1.21 1999/06/09 21:35:30 durand
-* add constructors/destructors for BspInfo struct as well as read seq function
-*
-*
-*
-*
* ==========================================================================
*/
#include <stdio.h>
@@ -577,7 +328,7 @@ Int2 n2,nCompt,
ByteStorePtr bs=NULL;
Boolean bGiForProductOk=FALSE;
SeqIdPtr sip=NULL;
-Int4 gi=0;
+BIG_ID gi=0;
BioseqPtr prot;
/*retrieve the protein sequence; need to be optimized in future release*/
@@ -699,7 +450,7 @@ Int2 n2=0,
ByteStorePtr bs=NULL;
Boolean bGiForProductOk=FALSE;
SeqIdPtr sip=NULL;
-Int4 gi=0;
+BIG_ID gi=0;
BioseqPtr prot;
/*retrieve the protein sequence; need to be optimized in future release*/
@@ -2178,7 +1929,8 @@ static ValNodePtr DDV_DisplayParaG
CharPtr szSequence,szDisp,szFLine=NULL,szTmp,szMiddleLine, idstring;
BioseqPtr bsp;
SeqIdPtr sip;
- Int4 bspLength,size,stop,nCompt2,pos,gi,diff,disp;
+ Int4 bspLength,size,stop,nCompt2,pos,diff,disp;
+ BIG_ID gi;
Int4 bsp_start,bsp_stop, n, j;
Char szBuf4[WWW_SCRIPT_SIZE]={""}; /*Entrez query*/
Uint1 bsp_strand;
@@ -2517,7 +2269,7 @@ static ValNodePtr DDV_DisplayParaG
Return value : -
*******************************************************************************/
-static CharPtr DDV_Nav_Arrows(Int4 gi,Int4 TotalAliLength,
+static CharPtr DDV_Nav_Arrows( BIG_ID gi,Int4 TotalAliLength,
Int4 numBlockAffich,Uint4 disp_format,Int2 LineSize)
{
CharPtr szWide,szFLine=NULL;
@@ -2617,7 +2369,7 @@ Char szWWW[WWW_SCRIPT_SIZE]={""};
text is directly sent to fp.
*******************************************************************************/
-NLM_EXTERN void DDV_AffichageParaG(MsaParaGPopListPtr mpplp,Int4 gi,Int4 from,Int4 to,
+NLM_EXTERN void DDV_AffichageParaG(MsaParaGPopListPtr mpplp, BIG_ID gi,Int4 from,Int4 to,
Int4 TotalAliLength,Int4 numBlockAffich,Uint4 disp_format,Int2 LineSize,
FILE *fp,ByteStorePtr PNTR bspp,Int4Ptr PNTR matrix,DDV_ColorGlobal * gclr,
ValNodePtr mask)
@@ -2998,8 +2750,7 @@ Char DefLine[255];
if (szSeq){
if (!DDV_GetSequenceFromParaG(pgp,&szSeq,bspLength,IsAA,NULL,
NULL,NULL)) continue;
- fprintf(fp,szSeq);
- fprintf(fp,"\n");
+ fprintf(fp, "%s\n", szSeq);
MemFree(szSeq);
}
vnp=vnp->next;
@@ -3412,7 +3163,7 @@ ValNodePtr PNTR vnpp;
Return value : FALSE if failed
*******************************************************************************/
-NLM_EXTERN Boolean DDV_DisplayDefaultAlign(SeqAlignPtr sap,Int4 gi,Int4 from,Int4 to,
+NLM_EXTERN Boolean DDV_DisplayDefaultAlign(SeqAlignPtr sap, BIG_ID gi,Int4 from,Int4 to,
Uint4 disp_format,Pointer disp_data,FILE *fp)
{
SABlockPtr sabp=NULL;/*indexed SeqAlign*/
@@ -3748,7 +3499,7 @@ Boolean nRet;
return(nRet);
}
-NLM_EXTERN Boolean DDV_ShowSeqAlign(SeqAlignPtr seqalign, Int4 gi, Int4 from, Int4 to,
+NLM_EXTERN Boolean DDV_ShowSeqAlign(SeqAlignPtr seqalign, BIG_ID gi, Int4 from, Int4 to,
Uint4 disp_format)
{
@@ -3923,7 +3674,7 @@ SeqAnnotPtr sap;
Purpose : display a summary for a PopSet entry
*******************************************************************************/
-NLM_EXTERN void DDV_PrintPopSetSummary(SeqEntryPtr sep, Int4 gi, FILE *FileOut)
+NLM_EXTERN void DDV_PrintPopSetSummary(SeqEntryPtr sep, BIG_ID gi, FILE *FileOut)
{
ValNodePtr vnp_sap,vnp_biosrc,vnp_biosrc2,vnp,vnp2;
SeqAlignPtr sap;
diff --git a/api/pgppop.h b/api/pgppop.h
index d405747e..d73c0246 100644
--- a/api/pgppop.h
+++ b/api/pgppop.h
@@ -1,4 +1,4 @@
-/* $Id: pgppop.h,v 6.39 2001/01/16 17:15:54 hurwitz Exp $
+/* $Id: pgppop.h,v 6.42 2016/06/21 21:42:36 kans Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -29,178 +29,12 @@
*
* Version Creation Date: 05/03/99
*
-* $Revision: 6.39 $
+* $Revision: 6.42 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
-* $Log: pgppop.h,v $
-* Revision 6.39 2001/01/16 17:15:54 hurwitz
-* modified DDV_GetBspCoordGivenDispCoord
-*
-* Revision 6.38 2000/10/13 19:48:16 hurwitz
-* added functions for getting first VALID disp coord in range of bsp coords
-*
-* Revision 6.37 2000/08/25 18:58:29 shavirin
-* Added parameter Boolean is_aa to the function DDV_GetBLASTCompLine_1().
-*
-* Revision 6.36 2000/07/08 20:43:55 vakatov
-* Get all "#include" out of the 'extern "C" { }' scope; other cleanup...
-*
-* Revision 6.35 2000/05/24 21:42:59 hurwitz
-* getting hide/show rows to work with DDV and DDE together
-*
-* Revision 6.34 2000/05/19 14:30:53 wheelan
-* fixed problem with formatting PDB ids
-*
-* Revision 6.33 2000/03/31 21:33:22 durand
-* added new default color schemas for BLAST
-*
-* Revision 6.32 2000/03/28 13:32:29 durand
-* update DDV_DisplayDefaultAlign to receive a pre-initialized DDV_Disp_Opt data structure
-*
-* Revision 6.31 2000/03/27 14:19:14 durand
-* fixed bugs in BLAST outputs
-*
-* Revision 6.30 2000/03/21 19:26:48 durand
-* pgppop now uses AlignMgr
-*
-* Revision 6.29 2000/02/15 15:31:46 lewisg
-* move DDVRulerDescr to pgppop
-*
-* Revision 6.28 2000/02/14 16:39:56 durand
-* add new output options for BLAST
-*
-* Revision 6.27 1999/12/20 14:37:54 durand
-* transfer some PopSet Viewer functions from here to wwwddv.c; update the code to better use Color Manager for BLAST outputs
-*
-* Revision 6.26 1999/12/08 22:40:54 durand
-* add the code to produce colored BLAST outputs
-*
-* Revision 6.25 1999/12/07 18:46:34 durand
-* add DDV_GetBspCoordGivenPgpList function
-*
-* Revision 6.24 1999/11/26 15:42:26 vakatov
-* Fixed for the C++ and/or MSVC DLL compilation
-*
-* Revision 6.23 1999/11/17 22:42:37 durand
-* add entitiesTbl in MsaParaGPopList data structure
-*
-* Revision 6.22 1999/10/29 14:14:25 durand
-* add DDV_GetBspCoordGivenDispCoord() and DDV_GetDispCoordGivenBspCoord()
-*
-* Revision 6.21 1999/10/08 17:50:29 durand
-* move DDV_DisplayBlastSAP from pgppop.c to ddvcreate.c due to conflict between api and ddv
-*
-* Revision 6.20 1999/09/29 17:16:45 shavirin
-* Modified function DDV_DisplayBlastSAP(): added new parameter and printing
-* of the BLAST scores.
-*
-* Revision 6.19 1999/09/29 13:42:27 durand
-* add middle line for BLAST output
-*
-* Revision 6.18 1999/09/28 19:49:31 shavirin
-* Changed definition of the function DDV_DisplayBlastSAP()
-*
-* Revision 6.17 1999/09/28 13:06:40 durand
-* add a first set of functions to display CDS in PopSet Viewer
-*
-* Revision 6.15 1999/09/16 18:52:27 durand
-* redesign the PopSet viewer toolbar
-*
-* Revision 6.14 1999/09/10 14:27:10 durand
-* add RulerDescr in MsaParaGPopList struct to be used for complex ruler types
-*
-* Revision 6.13 1999/09/02 17:32:23 durand
-* add some defines for new display types in DDV
-*
-* Revision 6.12 1999/08/31 16:45:39 durand
-* add sap in MsaParaGPopList structure
-*
-* Revision 6.11 1999/08/31 13:51:31 durand
-* add PubMed link for PopSet Viewer
-*
-* Revision 6.10 1999/08/30 20:29:56 durand
-* make PrintSeqAlignCallback estern function
-*
-* Revision 6.9 1999/08/30 14:18:09 durand
-* use ByteStore to format the SeqAlign output
-*
-* Revision 6.8 1999/08/16 18:57:00 lewisg
-* made DDV_GetSeqAlign extern and added prototype to header
-*
-* Revision 6.7 1999/08/06 21:43:16 chappey
-* SeqAlignToBS new function to save in ByteStore structure the text output of the SeqAlign(s) packaged in a SeqEntry
-*
-* Revision 6.6 1999/07/22 13:23:14 durand
-* made DDV_SearchAli external function
-*
-* Revision 6.5 1999/07/21 21:52:13 durand
-* add some functions to display a summary for a PopSet entry
-*
-* Revision 6.4 1999/07/20 17:01:48 durand
-* add eID field in URLData structure for PopSet Viewer
-*
-* Revision 6.3 1999/07/19 21:16:02 durand
-* add DDV_ResetParaGSeqAlignCoord to reset the seqalign coord in the display data structures of DDV
-*
-* Revision 6.2 1999/07/15 18:20:51 durand
-* add display options to support BLAST outputs
-*
-* Revision 6.1 1999/07/09 13:59:58 durand
-* move pgppop from desktop to api
-*
-* Revision 6.2 1999/07/06 22:31:24 kans
-* removed whitespace before #define directives
-*
-* Revision 6.1 1999/07/06 20:18:07 kans
-* initial public checkin
-*
-* Revision 1.30 1999/07/06 18:54:27 durand
-* add new features for the display of PopSet viewer
-*
-* Revision 1.29 1999/07/02 13:22:18 durand
-* fix bugs for the display of minus strand sequences
-*
-* Revision 1.28 1999/06/29 16:48:11 shavirin
-* Changed definition of function DDV_ShowSeqAlign()
-*
-* Revision 1.27 1999/06/28 22:07:22 durand
-* add loader functions and clean the code with Lint and Purify
-*
-* Revision 1.26 1999/06/25 14:21:07 durand
-* add a new command-line to wwwddv.cgi
-*
-* Revision 1.25 1999/06/24 20:49:42 shavirin
-* Added new function DDV_ShowSeqAlign().
-*
-* Revision 1.24 1999/06/23 17:24:24 durand
-* use a binary encoding to manage the display styles
-*
-* Revision 1.23 1999/06/21 18:37:57 durand
-* update DDV_DisplayDefaultAlign to produce full text output
-*
-* Revision 1.22 1999/06/19 18:36:13 durand
-* new display procedure
-*
-* Revision 1.21 1999/06/14 23:49:44 durand
-* add function for Vibrant DDV
-*
-* Revision 1.20 1999/06/11 22:33:02 durand
-* add new functions for Vibrant DDV
-*
-* Revision 1.19 1999/06/11 18:03:14 durand
-* add declarations
-*
-* Revision 1.17 1999/06/11 14:01:07 durand
-* add a define _PGPPOP_ to avoid compilation error when mutilple include
-*
-* Revision 1.16 1999/06/09 21:35:30 durand
-* add constructors/destructors for BspInfo struct as well as read seq function
-*
-*
-*
*
* ==========================================================================
*/
@@ -288,7 +122,7 @@ typedef struct ddvrulerdescr{
Command-Line or URL content
*****************************************************************************/
typedef struct urldata{ /*structure containing the decoded URL*/
- Int4 gi; /*gi to retrieve in ID1*/
+ BIG_ID gi; /*gi to retrieve in ID1*/
Int4 from; /*want to display from...*/
Int4 to; /*... to (SeqAlign coord)*/
Uint4 disp_format;/*display format (seebelow)*/
@@ -421,7 +255,7 @@ NLM_EXTERN CharPtr DDV_GetBLASTCompLine_2(ParaGPtr pgpQuery, ParaGPtr pgpSubject
Int4Ptr PNTR matrix);
NLM_EXTERN CharPtr DDV_GetBLASTCompLine_3(CharPtr szQuery, ParaGPtr pgpSubject,
Int4Ptr PNTR matrix);
-NLM_EXTERN void DDV_AffichageParaG(MsaParaGPopListPtr mpplp,Int4 gi,Int4 from,Int4 to,
+NLM_EXTERN void DDV_AffichageParaG(MsaParaGPopListPtr mpplp, BIG_ID gi,Int4 from,Int4 to,
Int4 TotalAliLength,Int4 numBlockAffich,Uint4 disp_format,Int2 LineSize,
FILE *fp,ByteStorePtr PNTR bspp,Int4Ptr PNTR matrix,DDV_ColorGlobal * gclr,
ValNodePtr mask);
@@ -440,7 +274,7 @@ NLM_EXTERN void DDV_PrintStudyName(CharPtr szPopSetName,CharPtr szPopSetAuth,
CharPtr szJournalTitle,Int4 pmid,FILE *fp);
NLM_EXTERN void DDV_GetArticleInfo(SeqEntryPtr sep,CharPtr szPopSetName,
CharPtr szPopSetAuth,CharPtr szJournalTitle,Int4Ptr pmid);
-NLM_EXTERN Boolean DDV_DisplayDefaultAlign(SeqAlignPtr sap,Int4 gi,Int4 from,Int4 to,
+NLM_EXTERN Boolean DDV_DisplayDefaultAlign(SeqAlignPtr sap, BIG_ID gi,Int4 from,Int4 to,
Uint4 disp_format,Pointer disp_data,FILE *fp);
NLM_EXTERN CharPtr DDV_ReadSeqGivenSpp(SeqPortPtr spp,Int4 from,Int4 to,Uint1 strand,
Boolean IsAA,BoolPtr bError);
@@ -451,11 +285,11 @@ NLM_EXTERN BspInfoPtr DDV_BspInfoDelete(BspInfoPtr bip);
NLM_EXTERN BspInfoPtr DDV_BspInfoDeleteList(BspInfoPtr bip);
NLM_EXTERN void DDV_LocateParaG(ValNodePtr PNTR TableHead,Int4 nBsp);
NLM_EXTERN Boolean DDV_CreateDisplay(SeqAlignPtr sap,MsaParaGPopListPtr mpplp);
-NLM_EXTERN Boolean DDV_ShowSeqAlign(SeqAlignPtr seqalign, Int4 gi, Int4 from, Int4 to,
+NLM_EXTERN Boolean DDV_ShowSeqAlign(SeqAlignPtr seqalign, BIG_ID gi, Int4 from, Int4 to,
Uint4 disp_format);
NLM_EXTERN void DDV_GetEntryBioSource(SeqEntryPtr sep,ValNodePtr PNTR vnpp);
-NLM_EXTERN void DDV_PrintPopSetSummary(SeqEntryPtr sep, Int4 gi, FILE *FileOut);
+NLM_EXTERN void DDV_PrintPopSetSummary(SeqEntryPtr sep, BIG_ID gi, FILE *FileOut);
NLM_EXTERN void DDV_SearchAli(SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
NLM_EXTERN void DDV_GetSeqAlign(Pointer dataptr, Uint2 datatype,
ValNodePtr PNTR vnp);
diff --git a/api/product_rules.inc b/api/product_rules.inc
index 571173cc..c32cceee 100644
--- a/api/product_rules.inc
+++ b/api/product_rules.inc
@@ -1,4 +1,4 @@
-/* $Id: product_rules.inc,v 1.2 2012/05/03 20:23:11 kans Exp $
+/* $Id: product_rules.inc,v 1.4 2016/07/22 15:39:13 kachalos Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -23,10 +23,10 @@
*
* ===========================================================================
*
- * Author: Jonathan Kans et al.
+ * Author: NCBI developers
*
* File Description:
- * Built-in copy of product_rules.txt.
+ * Built-in copy of product_rules.prt.
*
*/
@@ -35,77 +35,142 @@ static const char* const s_Defaultproductrules[] = {
" {",
" find",
" contains-plural NULL ,",
- " rule-type putative-typo } ,",
+ " rule-type putative-typo ,",
+ " fatal FALSE } ,",
" {",
" find",
- " n-or-more-brackets-or-parentheses 2 ,",
+ " three-numbers NULL ,",
" except",
" string-constraint {",
- " match-text \"1-(5-phosphoribosyl)-5-\" ,",
- " match-location starts ,",
+ " match-text \"methyltransferas\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " word \"methyltransferas\" ,",
+ " synonyms {",
+ " \"F420\" ,",
+ " \"FK506\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word FALSE } } ,",
" whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
- " {",
- " find",
- " three-numbers NULL ,",
- " rule-type database } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"16S rRNA pseudouridine(516) synthase\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \" 23S rRNA pseudouridine(955/2504/2580) synthase\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"23S rRNA pseudouridine(1911/1915/1917) synthase\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type database ,",
+ " description \"contains three or more numbers together that may be",
+ " identifiers more appropriate in note\" ,",
+ " fatal FALSE } ,",
" {",
" find",
" all-caps NULL ,",
- " rule-type putative-typo } ,",
+ " rule-type putative-typo ,",
+ " fatal FALSE } ,",
" {",
" find",
" unbalanced-paren NULL ,",
- " rule-type inappropriate-symbol } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" too-long 100 ,",
" feat-constraint {",
- " field {",
- " field",
- " feature-field {",
- " type cds ,",
- " field",
- " legal-qual product } ,",
- " string-constraint {",
- " match-text \"bifunctional\" ,",
- " match-location contains ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " field {",
- " field",
- " feature-field {",
- " type cds ,",
- " field",
- " legal-qual product } ,",
- " string-constraint {",
- " match-text \"multifunctional\" ,",
- " match-location contains ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } } } ,",
- " rule-type description } ,",
+ " string {",
+ " match-text \"multifunctional\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"bifunctional\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"acetylglucosamine\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type description ,",
+ " description \"Is longer than 100 characters. Remove descriptive phrases or",
+ " synonyms from product names. Keep valid long product names, eg long enzyme",
+ " names\" ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -119,8 +184,93 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \" citochrome\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"cytochrome\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \" cytochome\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"cytochrome\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \" cytochorme\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"cytochrome\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -134,8 +284,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -149,8 +302,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -164,8 +320,47 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type database } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"'\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"()\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -179,8 +374,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -194,8 +392,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -209,13 +410,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"-\" ,",
- " match-location ends ,",
+ " match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -224,13 +428,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"-\" ,",
- " match-location starts ,",
+ " match-location ends ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -239,13 +446,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \".\" ,",
- " match-location starts ,",
+ " match-location ends ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -254,13 +464,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \".\" ,",
- " match-location ends ,",
+ " match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -269,8 +482,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -284,8 +500,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -299,8 +518,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -314,8 +536,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -329,8 +554,85 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"16S ribosomal RNA\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " word \"16S\" ,",
+ " synonyms {",
+ " \"18S\" ,",
+ " \"28S\" ,",
+ " \"5S\" ,",
+ " \"5.8S\" ,",
+ " \"23S\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " description \"equals ribosomal RNA\" ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"16S rRNA\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " word \"16S\" ,",
+ " synonyms {",
+ " \"18S\" ,",
+ " \"28S\" ,",
+ " \"5S\" ,",
+ " \"5.8S\" ,",
+ " \"23S\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " description \"equals rRNA\" ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -344,8 +646,29 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \":\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -359,8 +682,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -374,8 +700,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -389,8 +718,29 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"@\" ,",
+ " match-location contains ,",
+ " case-sensitive TRUE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -404,13 +754,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"accessroy\" ,",
- " match-location contains ,",
+ " match-text \"ABC-type polysaccharide\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -419,19 +772,22 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"accessory\" ,",
- " whole-string FALSE ,",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"aceytltranferase\" ,",
+ " match-text \"accessroy\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -441,20 +797,23 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"acetyltransferase\" ,",
+ " replace \"accessory\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"active site\" ,",
- " match-location ends ,",
+ " match-text \"aceytltranferase\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -463,11 +822,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein ,",
- " description \"ends with 'active site' (If this is only the active site,",
- " remove this feature. If this is a translated protein, provide a product name.",
- " Consider adding 'protein' to the end of the name.)\" } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"acetyltransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -489,7 +855,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -497,7 +865,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -511,7 +880,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -519,7 +890,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"adenylate transferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -533,7 +905,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -541,7 +915,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"adenylyltransferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ADP\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Agrobacterium\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -563,7 +988,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -571,35 +998,119 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"alternate protein name\" ,",
+ " match-text \"alchohol\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"alcohol\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"alpha\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
" whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " feat-constraint {",
- " string {",
- " match-text \"alternative protein name\" ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"alpha-1\" ,",
" match-location equals ,",
- " case-sensitive TRUE ,",
+ " case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word FALSE ,",
- " not-present TRUE ,",
+ " not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"alternate gene name\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"alternate name\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -621,7 +1132,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -629,7 +1142,41 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"alternate protein name\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"alternative protein name\" ,",
+ " match-location equals ,",
+ " case-sensitive TRUE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -643,7 +1190,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -651,7 +1200,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"aluminum\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -673,7 +1223,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -681,7 +1233,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"aminomutaseaminotransferase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"aminomutase aminotransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -695,7 +1273,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"aminotransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"aminotransferease\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -703,7 +1308,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"aminotransferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"aminotransferease\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"aminotransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -717,8 +1348,157 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type evolutionary-relationship } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"and related enzyme\" ,",
+ " match-location ends ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"and related enzymes\" ,",
+ " match-location ends ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"and related protein\" ,",
+ " match-location ends ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"and related proteins\" ,",
+ " match-location ends ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"animal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"anion\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -732,7 +1512,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -740,7 +1522,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"asparaginase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -754,8 +1537,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Archaeal\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"archael\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"archaeal\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -769,7 +1612,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -777,7 +1622,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"arginine \" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -799,7 +1645,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -807,7 +1655,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -821,7 +1670,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -829,7 +1680,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"arginine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -851,7 +1703,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -859,7 +1713,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -873,7 +1728,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -881,7 +1738,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"aspartate\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -903,7 +1761,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -911,7 +1771,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"aspartyl\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -925,8 +1811,126 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"assemby\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"assembly\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"assessory\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"accessory\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ATP\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " synonyms {",
+ " \"probable\" ,",
+ " \"putative\" ,",
+ " \"hypothetical\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ATPas\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ATPase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -940,8 +1944,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -955,8 +1969,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -970,8 +1987,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Bacilllus\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -985,8 +2037,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1000,8 +2062,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1023,7 +2095,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1031,7 +2105,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1045,8 +2120,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1068,7 +2153,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1076,7 +2163,83 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"believed\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"bifuctional\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"bifunctional\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"bifunctional\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1090,7 +2253,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1098,7 +2263,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"bifunctional\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1112,7 +2278,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1120,25 +2288,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"biogenesis\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"binding\" ,",
- " match-location ends ,",
+ " match-text \"binds\" ,",
+ " match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein ,",
- " description \"ends with 'binding' (If this is only the binding site, remove",
- " this feature. If this is a translated protein, provide a product name.",
- " Consider adding 'protein' to the end of the name.)\" } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1152,7 +2328,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1160,7 +2338,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"biotin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1174,7 +2353,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1182,7 +2363,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"biosynthesis\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1196,7 +2378,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1204,7 +2388,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"biosynthesis\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"bis\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1218,7 +2428,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1226,7 +2438,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"binding\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1240,8 +2453,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1255,46 +2478,97 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
- " has-term \"C term\" ,",
- " feat-constraint {",
- " string {",
- " match-text \"C term\" ,",
- " match-location starts ,",
+ " string-constraint {",
+ " match-text \"c-terminal domain protein\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word FALSE ,",
- " not-present TRUE ,",
+ " not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
- " has-term \"C-term\" ,",
- " feat-constraint {",
- " string {",
- " match-text \"C-term\" ,",
- " match-location starts ,",
+ " string-constraint {",
+ " match-text \"C-terminus\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word FALSE ,",
- " not-present TRUE ,",
+ " not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"C-terminus\" ,",
+ " match-text \"calchone\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"chalcone\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Calcium\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -1304,7 +2578,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1312,7 +2588,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1326,8 +2603,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1341,8 +2628,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1356,7 +2653,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"candidate protein\" ,",
@@ -1369,7 +2668,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1377,7 +2678,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1399,7 +2701,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1407,7 +2711,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1421,7 +2726,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1429,7 +2736,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"carboxylic\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"catalize\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"catalyze\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1443,8 +2776,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type use-protein } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1458,8 +2801,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1473,7 +2819,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1481,7 +2829,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"cell division FtsK/SpoIIIE\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1495,8 +2844,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Changed start to match that seen in other orgs\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Changed start to match that seen in other orgs\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1510,7 +2919,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1518,7 +2929,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"characteriz\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1532,7 +2944,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1540,7 +2954,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"characteriz\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1554,7 +2969,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1562,7 +2979,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"CHC2 zinc finger protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1576,7 +2994,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1584,7 +3004,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"chelating\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1598,8 +3019,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1613,13 +3044,48 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Chloroplast\" ,",
+ " match-text \"chlorAMPhenicol\" ,",
" match-location contains ,",
+ " case-sensitive TRUE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"chloramphenicol \" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"chloroplastic\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -1628,25 +3094,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " feat-constraint {",
- " source {",
- " field1",
- " textqual lineage ,",
- " constraint {",
- " match-text \"Eukaryota\" ,",
- " match-location contains ,",
- " case-sensitive TRUE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " type-constraint any } } ,",
- " rule-type no-organelle-for-prokaryote } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"chroamtid\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"chromatid\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1668,7 +3152,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1676,7 +3162,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"claster\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"cluster\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1690,8 +3202,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1705,7 +3227,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1713,7 +3237,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"containing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1727,7 +3252,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1735,11 +3262,27 @@ static const char* const s_Defaultproductrules[] = {
" replace \"coenzyme\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"COG\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"COG\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -1749,33 +3292,41 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type hypothetical ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"hypothetical protein\" ,",
- " whole-string TRUE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"COG\" ,",
- " match-location contains ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " except",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
" string-constraint {",
- " match-text \"COG\" ,",
- " match-location equals ,",
+ " match-text \"COGnitor\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -1784,8 +3335,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1799,7 +3353,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"adenomatous polyposis coli \" ,",
@@ -1812,8 +3368,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"colour\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"color\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1827,8 +3418,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1842,7 +3436,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1850,7 +3446,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"component\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1864,7 +3461,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1872,7 +3471,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"component\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1886,7 +3486,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1894,7 +3496,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"component\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1916,7 +3519,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1924,7 +3529,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1938,7 +3544,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1946,7 +3554,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"conserved\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1960,7 +3569,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -1968,7 +3579,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"conserved\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -1990,7 +3602,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -1998,7 +3612,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2020,7 +3635,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2028,7 +3645,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2042,7 +3660,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2050,7 +3670,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2072,7 +3693,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"conserved hypothetical domain protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2080,7 +3728,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"conserved hypothetical family protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2102,7 +3776,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2110,7 +3786,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2124,7 +3801,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2132,7 +3811,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"conserved predicted protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2146,7 +3851,59 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"conserved protein of unknown function\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"conserved putative protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2154,7 +3911,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2168,7 +3926,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2176,7 +3936,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2190,8 +3951,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2205,7 +3976,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2213,7 +3986,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"conserved\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2227,7 +4001,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2235,7 +4011,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"containing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2249,7 +4026,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2257,7 +4036,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"containing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2271,8 +4051,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"containinging\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"containing\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"contains\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2286,8 +4126,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2301,7 +4144,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2309,7 +4154,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"conserved\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2323,7 +4169,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2331,7 +4179,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"conserved\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2345,7 +4194,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2353,7 +4204,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"containing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2367,8 +4219,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2382,8 +4237,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"CTP\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2405,7 +4288,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2413,7 +4298,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2435,7 +4321,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2443,7 +4331,41 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"database\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"no significant database hits\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2457,7 +4379,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2465,7 +4389,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"degradation\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2479,8 +4404,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2494,7 +4422,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2502,7 +4432,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"dependent\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"deulfurase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"desulfurase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2516,7 +4472,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2524,7 +4482,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"dehydrogenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"di\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2538,7 +4522,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2546,7 +4532,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"diacylglycerol\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2560,7 +4547,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -2568,7 +4557,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"dimerization\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2582,7 +4572,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -2590,7 +4582,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"dimerizing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2604,7 +4597,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2612,7 +4607,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"dioxygenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"dipeptide\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2626,7 +4647,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2634,7 +4657,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"disulfide\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2648,7 +4672,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2656,7 +4682,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"division\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2678,7 +4705,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2686,7 +4715,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2700,26 +4730,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"domain\" ,",
- " match-location ends ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein ,",
- " description \"ends with 'domain' (If this is only similar to a domain,",
- " remove this feature. If this is a translated protein, provide a product name.",
- " Consider adding 'protein' to the end of the name.)\" } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2741,7 +4756,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2749,7 +4766,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2763,7 +4781,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2771,7 +4791,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2785,7 +4806,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2793,7 +4816,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"domain\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2815,7 +4839,42 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"domain of unknown function\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " synonyms {",
+ " \"probable\" ,",
+ " \"putative\" ,",
+ " \"hypothetical\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2823,7 +4882,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2837,7 +4897,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"domain of unknown function\" ,",
@@ -2850,7 +4912,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"domain of unknown function family protein\" ,",
" match-location equals ,",
@@ -2862,7 +4926,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"domain of unknown function protein\" ,",
" match-location equals ,",
@@ -2874,7 +4940,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -2882,37 +4950,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein of unknown function\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"domain of unknown function\" ,",
- " match-location equals ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " ignore-words {",
- " {",
- " synonyms {",
- " \"probable\" ,",
- " \"putative\" ,",
- " \"hypothetical\" } ,",
- " case-sensitive FALSE ,",
- " whole-word TRUE } } ,",
- " whole-word FALSE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type hypothetical ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"hypothetical protein\" ,",
- " whole-string TRUE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2926,7 +4965,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2934,7 +4975,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2948,7 +4990,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2956,7 +5000,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -2978,7 +5023,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -2986,7 +5033,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3000,7 +5048,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3008,7 +5058,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3022,7 +5073,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3030,7 +5083,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"domain protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3044,7 +5098,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3052,7 +5108,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3066,7 +5123,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3074,7 +5133,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"domain\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3088,7 +5148,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3096,7 +5158,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"domain\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3110,8 +5173,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -3133,7 +5199,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3141,7 +5209,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3155,30 +5224,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"DUF\" ,",
- " match-location equals ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type hypothetical ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
" replace {",
" replace-func",
" simple-replace {",
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3192,7 +5249,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" except",
" string-constraint {",
" match-text \"DUF\" ,",
@@ -3205,8 +5264,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"DUF\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3220,15 +5307,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"dehydrogenase\" ,",
+ " replace \"dihydrogenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3242,15 +5332,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"dihydrogenase\" ,",
+ " replace \"dehydrogenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3264,8 +5357,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3279,8 +5382,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3294,7 +5407,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3302,7 +5417,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"enantioselective\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3316,8 +5432,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"enzymye\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"enzyme\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ErfKYbiSYcfSYnhG\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ErfK/YbiS/YcfS/YnhG\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3331,8 +5507,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3346,7 +5532,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3354,7 +5542,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3368,7 +5557,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" except",
" string-constraint {",
" match-text \"EST\" ,",
@@ -3381,8 +5572,82 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"et al\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct TRUE ,",
+ " ignore-words {",
+ " {",
+ " word \"et al\" ,",
+ " synonyms {",
+ " \"unpublished\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } ,",
+ " {",
+ " word \"et al\" ,",
+ " synonyms {",
+ " \"citation\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } ,",
+ " {",
+ " word \"et al\" ,",
+ " synonyms {",
+ " \"published\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"resuscitation\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " description \"may contain publication reference\" ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"evidenced by\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3396,7 +5661,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3404,7 +5671,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"exported\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"expressed\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3418,7 +5711,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3426,7 +5721,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3440,7 +5736,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3448,7 +5746,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"factor\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3470,7 +5769,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3478,7 +5779,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"faecal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"fecal\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3492,7 +5819,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3500,7 +5829,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"flagella\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"faimily\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"family\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"faimly\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"family\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3514,7 +5894,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3522,7 +5904,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"family\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"familiy\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"family\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"family\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3536,7 +5969,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3544,7 +5979,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"family\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3566,7 +6002,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3574,7 +6012,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3588,7 +6027,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -3596,7 +6037,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"fiber\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3618,7 +6060,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3626,7 +6070,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3640,7 +6085,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3648,7 +6095,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"flagellar\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3662,63 +6110,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type database } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"fold\" ,",
- " match-location ends ,",
+ " match-text \"formly\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " feat-constraint {",
- " field {",
- " field",
- " feature-field {",
- " type cds ,",
- " field",
- " legal-qual product } ,",
- " string-constraint {",
- " match-text \"folD\" ,",
- " match-location ends ,",
- " case-sensitive TRUE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " field {",
- " field",
- " feature-field {",
- " type cds ,",
- " field",
- " legal-qual product } ,",
- " string-constraint {",
- " match-text \"FolD\" ,",
- " match-location ends ,",
- " case-sensitive TRUE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } } } ,",
- " rule-type use-protein ,",
- " description \"ends with 'fold' (If this is only similar to a protein fold,",
- " remove this feature. If this is a translated protein, provide a product name.",
- " Consider adding 'protein' to the end of the name.)\" } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"formyl\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3732,8 +6153,44 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"frame\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"frame shift\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3747,8 +6204,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3762,7 +6222,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"programmed frameshift\" ,",
@@ -3775,8 +6237,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3790,8 +6255,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"funciton\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"function\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3805,8 +6305,86 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"fungi\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"g:t\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"galactose\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3820,8 +6398,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3835,8 +6423,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type gene } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type gene ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3850,8 +6441,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type gene } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type gene ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3865,7 +6459,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"genome instability\" ,",
@@ -3878,7 +6474,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"genome maintenance\" ,",
" match-location contains ,",
@@ -3890,8 +6488,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3913,7 +6514,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -3921,7 +6524,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Giardia\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3935,7 +6564,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3943,7 +6574,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glycerol\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3957,7 +6589,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -3965,7 +6599,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glycosyl\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3979,8 +6614,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -3994,7 +6632,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4002,7 +6642,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glucosaminyl\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"glutamate\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4024,7 +6690,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4032,7 +6700,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4054,7 +6723,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4062,7 +6733,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4076,7 +6748,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4084,7 +6758,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glutamine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4098,7 +6773,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4106,7 +6783,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glycine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4128,7 +6806,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4136,7 +6816,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"glycolsyltransferase\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"glycosyltransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4150,7 +6856,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4158,12 +6866,13 @@ static const char* const s_Defaultproductrules[] = {
" replace \"glucosyl\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"golgi\" ,",
- " match-location contains ,",
+ " match-text \"GMP\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -4172,11 +6881,23 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " except",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
" string-constraint {",
- " match-text \"golgi family\" ,",
- " match-location contains ,",
+ " match-text \"gp\" ,",
+ " match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -4185,25 +6906,94 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
- " source {",
- " field1",
- " textqual lineage ,",
- " constraint {",
- " match-text \"Eukaryota\" ,",
- " match-location contains ,",
- " case-sensitive TRUE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " type-constraint any } } ,",
- " rule-type no-organelle-for-prokaryote } ,",
+ " string {",
+ " match-text \"gph\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"GPI\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"GPN\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"GPH\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type putative-typo ,",
+ " description \"may contain systematic gene product identifiers from phage\" ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"GTP\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4225,7 +7015,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4233,7 +7025,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4247,7 +7040,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"archaemetzincin\" ,",
@@ -4260,12 +7055,15 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type british ,",
" replace {",
" replace-func",
" haem-replace \"haem\" ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4279,8 +7077,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4294,12 +7102,40 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" haem-replace \"haem\" ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"heavychain\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"heavy chain\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4313,8 +7149,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4328,8 +7174,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4351,7 +7207,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4359,7 +7217,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4381,7 +7240,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4389,7 +7250,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4411,7 +7273,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4419,7 +7283,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"helix-turn-helix protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4433,7 +7298,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4441,7 +7308,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hemagglutinin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4455,7 +7323,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4463,7 +7333,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hemolysin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4477,7 +7348,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4485,7 +7358,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hemoglobin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4499,7 +7373,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4507,7 +7383,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hexapeptide\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4521,7 +7398,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4529,7 +7408,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hexapeptide\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4543,8 +7423,29 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"highly similar\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4558,7 +7459,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4566,7 +7469,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"histidine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4580,7 +7484,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4588,7 +7494,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"histidine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4610,7 +7517,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4618,7 +7527,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4632,7 +7542,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4640,7 +7552,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"homoserine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"homlog\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"homolog\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4654,8 +7592,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4669,8 +7617,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4684,7 +7642,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4692,7 +7652,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"homocysteine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4701,13 +7662,59 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Homologue\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"horikoshii\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
" whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type evolutionary-relationship } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4721,7 +7728,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4729,7 +7738,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4743,8 +7753,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hy0\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hydolase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hydrolase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4766,7 +7836,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4774,7 +7846,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hydrolas\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hydrolase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4788,7 +7886,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4796,7 +7896,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hydrolase of the alpha/beta superfamily\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4810,7 +7911,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4818,7 +7921,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4832,7 +7936,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4840,7 +7946,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4854,7 +7961,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4862,7 +7971,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4876,7 +7986,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4884,7 +7996,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyperthetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4898,7 +8036,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4906,7 +8046,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4920,7 +8061,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyphothetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4928,7 +8096,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4950,7 +8119,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -4958,7 +8129,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4972,7 +8144,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -4980,7 +8154,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -4994,7 +8169,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5002,7 +8179,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5016,7 +8194,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5024,7 +8204,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5038,7 +8219,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5046,7 +8229,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5060,7 +8244,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5068,7 +8254,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5082,7 +8269,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5090,7 +8279,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5104,7 +8294,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5112,7 +8304,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5126,7 +8319,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5134,7 +8329,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5148,7 +8344,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5156,7 +8354,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotherical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypothertical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5170,7 +8419,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5178,7 +8429,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5192,7 +8444,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5200,7 +8454,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5214,7 +8469,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5222,7 +8479,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5236,7 +8494,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypothetial\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5244,7 +8529,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5258,7 +8544,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5266,7 +8554,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5280,7 +8569,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5288,7 +8579,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5310,7 +8602,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -5318,7 +8612,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5332,7 +8627,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"hypothetical protein\" ,",
@@ -5345,7 +8642,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"hypothetical domain protein\" ,",
" match-location equals ,",
@@ -5357,7 +8656,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"hypothetical ORF\" ,",
" match-location equals ,",
@@ -5369,7 +8670,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"hypothetical\" ,",
" match-location equals ,",
@@ -5381,7 +8684,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"Hypothetical conserved protein\" ,",
" match-location equals ,",
@@ -5393,7 +8698,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"hypothetical protein\" ,",
" match-location starts ,",
@@ -5405,7 +8712,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"UPF\" ,",
" match-location contains ,",
@@ -5427,7 +8736,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5435,7 +8746,109 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypothetical protein\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"hypothetical protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type putative-typo ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Hypothetical protein gene\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space TRUE ,",
+ " ignore-punct TRUE ,",
+ " ignore-words {",
+ " {",
+ " word \"gene\" ,",
+ " synonyms {",
+ " \"sequence\" ,",
+ " \"partial sequence\" ,",
+ " \"complete sequence\" ,",
+ " \"partial\" ,",
+ " \"complete\" ,",
+ " \"gene sequence\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word FALSE } ,",
+ " {",
+ " word \"hypothetical\" ,",
+ " synonyms {",
+ " \"putative\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word FALSE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotheticala\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5444,12 +8857,14 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5457,7 +8872,108 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotheticial\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotheticical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotheticl\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypotheticla\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5471,7 +8987,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5479,7 +8997,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5493,7 +9012,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5501,7 +9022,83 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hypoyhtetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyppothetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyprothetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5515,7 +9112,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyptohetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5523,7 +9147,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyptothetcial\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5537,7 +9187,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"hyputhetical\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5545,7 +9222,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5559,8 +9237,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"immunoreactive\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"immunoreactive protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5574,8 +9280,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5589,8 +9298,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"incolved\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"involved\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5604,7 +9341,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" except",
" string-constraint {",
" match-text \"incomplete ORF domain protein\" ,",
@@ -5617,8 +9356,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -5632,7 +9374,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -5640,7 +9384,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"indepedent\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"independent\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5654,7 +9424,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5662,7 +9434,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"inducible\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5676,7 +9449,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5684,7 +9459,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"initiation\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"inorganic phosphate\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5706,7 +9507,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -5714,7 +9517,133 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"insitol\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"inositol\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"intein C-terminal splicing region\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"intein N-terminal splicing region\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"interacts with\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"internal repeat sequences detected\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5728,8 +9657,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5743,7 +9675,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5751,7 +9685,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"initiation\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5765,7 +9700,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5773,7 +9710,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"inversion\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5787,8 +9725,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5810,7 +9758,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -5818,7 +9768,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" prefix-and-numbers \"IS\" ,",
@@ -5829,7 +9780,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"IS ORF\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"IS protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative TRUE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"isation\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ization\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5851,7 +9853,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -5859,7 +9863,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5873,7 +9878,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5881,7 +9888,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"isomerase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5895,7 +9903,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5903,7 +9913,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"isomerase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5917,8 +9928,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5932,7 +9953,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5940,7 +9963,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"potassium\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5954,7 +9978,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -5962,7 +9988,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"potassium\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"lacitehtopyh\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5976,8 +10028,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -5999,7 +10061,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6007,12 +10071,38 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"like\" ,",
- " match-location ends ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"likeity\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -6021,8 +10111,204 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"likely\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"likely\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"liporotein\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"lipoprotein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"listeria/Bacterioides\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"localisation\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"localization\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"localised\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"localized\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"localization of periplasmic protein complexes\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"located in\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6036,8 +10322,50 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"LOW QUALITY PROTEIN:\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Low Quality Protein: \" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6059,7 +10387,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6067,7 +10397,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6081,7 +10412,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -6089,7 +10422,66 @@ static const char* const s_Defaultproductrules[] = {
" replace \"major\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"malate\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " synonyms {",
+ " \"probable\" ,",
+ " \"putative\" ,",
+ " \"hypothetical\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Maltose\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6103,7 +10495,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6111,7 +10505,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6125,8 +10520,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6140,7 +10545,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6148,7 +10555,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"measure\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"medated\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"mediated\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"mediates\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6170,7 +10628,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6178,7 +10638,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"membrane protein of\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"membrane protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6192,7 +10678,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6200,7 +10688,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6214,7 +10703,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6222,7 +10713,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane transport\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6236,7 +10728,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6244,7 +10738,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6258,7 +10753,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6266,7 +10763,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6280,7 +10778,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6288,7 +10788,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"membrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6310,7 +10811,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6318,7 +10821,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6332,7 +10836,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6340,7 +10846,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"methyltransferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6354,7 +10861,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6362,11 +10871,37 @@ static const char* const s_Defaultproductrules[] = {
" replace \"methylase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"miscellaneous\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Mitochondrial\" ,",
+ " match-text \"miscellaneous\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -6376,25 +10911,51 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " feat-constraint {",
- " source {",
- " field1",
- " textqual lineage ,",
- " constraint {",
- " match-text \"Eukaryota\" ,",
- " match-location contains ,",
- " case-sensitive TRUE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " type-constraint any } } ,",
- " rule-type no-organelle-for-prokaryote } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"miscellaneous\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"mitchondrial\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"mitochondrial\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6408,7 +10969,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -6416,7 +10979,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"mobilization\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6430,7 +10994,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6438,7 +11004,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"molybdenum\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6452,7 +11019,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6460,7 +11029,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"molybdopterin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6474,7 +11044,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6482,7 +11054,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"molybdopterin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6496,7 +11069,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6504,7 +11079,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"monooxygenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6518,7 +11094,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6526,7 +11104,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"monoxide\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6540,7 +11119,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6548,25 +11129,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"monooxygenase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"motif\" ,",
- " match-location ends ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein ,",
- " description \"ends with 'motif' (If this is only similar to a protein",
- " motif, remove this feature. If this is a translated protein, provide a",
- " product name. Consider adding 'protein' to the end of the name.)\" } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6580,8 +11144,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6603,7 +11177,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6611,7 +11187,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6625,7 +11202,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6633,7 +11212,90 @@ static const char* const s_Defaultproductrules[] = {
" replace \"multidrug\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"mulitfunction\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"multifunction\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"MULTISPECIES\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"MULTISPECIES:\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"MULTISPECIES: \" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6647,8 +11309,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6662,7 +11334,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6670,7 +11344,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"mutarotase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"mutlifunction\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"multifunction\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6684,8 +11384,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6699,8 +11409,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6714,7 +11434,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6722,41 +11444,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
- " has-term \"N term\" ,",
- " feat-constraint {",
- " string {",
- " match-text \"N term\" ,",
- " match-location starts ,",
+ " string-constraint {",
+ " match-text \"mythylglyoxyl \" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present TRUE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"methylglyoxyl\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
- " has-term \"N-term\" ,",
- " feat-constraint {",
- " string {",
- " match-text \"N-term\" ,",
- " match-location starts ,",
+ " string-constraint {",
+ " match-text \"n-terminal domain protein\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word FALSE ,",
- " not-present TRUE ,",
+ " not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6770,7 +11509,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Na\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6778,7 +11544,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Na+\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6800,7 +11592,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6808,7 +11602,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6822,8 +11617,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6837,7 +11635,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6845,7 +11645,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"uncharacterized\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6867,7 +11668,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6875,7 +11678,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6889,7 +11693,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6897,7 +11703,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"endoribonuclease\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6911,8 +11718,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Ni\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6926,7 +11768,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -6934,7 +11778,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"nickel\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6948,8 +11793,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6971,7 +11826,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -6979,7 +11836,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -6993,8 +11851,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7008,8 +11869,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7031,7 +11895,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7039,24 +11905,152 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"novel\" ,",
+ " match-text \"no function assigned\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"no likeity\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"no significant database hits\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"no significant database matches\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"no significant homology\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"no significant homology\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" except",
" string-constraint {",
- " match-text \"novel protein\" ,",
+ " match-text \"no significant homology\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -7066,8 +12060,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7081,7 +12078,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"novel protein\" ,",
@@ -7094,7 +12093,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7102,7 +12103,41 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"novel\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"novel protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7116,7 +12151,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7124,7 +12161,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"integral \" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"nucear\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"nuclear\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7138,7 +12201,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7146,7 +12211,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"nuclear\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7160,7 +12226,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7168,7 +12236,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"nucleotidyl\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"nucletide\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"nucleotide\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7182,7 +12276,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7190,7 +12286,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"nuclear\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"nulceotide\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"nucleotide\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"null\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7212,7 +12359,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7220,7 +12369,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7242,7 +12392,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7250,7 +12402,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7264,7 +12417,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7272,7 +12427,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"cobalt\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"observed by proteomics\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7286,7 +12467,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7294,7 +12477,26 @@ static const char* const s_Defaultproductrules[] = {
" replace \"module\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"of\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7308,15 +12510,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type might-be-nonfunctional ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
" replace {",
" replace-func",
" simple-replace {",
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7330,7 +12535,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"open reading frame\" ,",
@@ -7343,23 +12550,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type might-be-nonfunctional } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"or related\" ,",
- " match-location contains ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word TRUE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -7381,7 +12576,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7389,7 +12586,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7403,7 +12601,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7411,7 +12611,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7433,7 +12634,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7441,7 +12644,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"organise\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"organize\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7455,7 +12684,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7463,7 +12694,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7477,7 +12709,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"orphan protein\" ,",
@@ -7490,8 +12724,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7500,13 +12737,34 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type evolutionary-relationship } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"orthologue\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7520,7 +12778,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7528,7 +12788,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7542,7 +12803,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7550,7 +12813,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"outer\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Oxalate\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7564,7 +12853,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"oxidoreductase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"oxidoreductase ()\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7572,7 +12888,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidoreductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7586,7 +12903,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7594,7 +12913,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidoreductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7608,7 +12928,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7616,7 +12938,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidoreductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7630,7 +12953,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7638,7 +12963,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidoreductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7652,7 +12978,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7660,7 +12988,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidoreductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"oxigenase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"oxygenase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7674,7 +13028,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7682,7 +13038,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"oxidase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7704,7 +13061,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7712,7 +13071,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7734,7 +13094,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7742,7 +13104,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"p-loop protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7751,13 +13114,59 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"paralogue\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type evolutionary-relationship } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type evolutionary-relationship ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"part\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7771,8 +13180,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -7794,7 +13206,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7802,7 +13216,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"PASTA protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7824,7 +13239,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7832,7 +13249,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7846,7 +13264,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7854,7 +13274,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"peptidoglycan\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"peptidyl-prolyl cis-trans\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7868,7 +13314,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7876,7 +13324,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"periplasmic\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7890,7 +13339,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7898,7 +13349,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"periplasmic\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7912,7 +13364,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7920,7 +13374,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"periplasmic\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7934,8 +13389,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7949,7 +13414,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -7957,7 +13424,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"peptidase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -7979,7 +13447,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -7987,7 +13457,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8009,7 +13480,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8017,7 +13490,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"pheremone\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"pheromone\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8031,7 +13530,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8039,7 +13540,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphate\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8053,7 +13555,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8061,7 +13565,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phospho\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8075,7 +13580,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8083,7 +13590,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphoserine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8097,7 +13605,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8105,7 +13615,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphate\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"phospatase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"phosphatase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"phosphateN\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"phosphate N\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8119,7 +13680,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8127,7 +13690,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphatidyltransferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8141,7 +13705,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8149,7 +13715,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphotransferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8163,7 +13730,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8171,7 +13740,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphopantetheine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8185,7 +13755,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8193,7 +13765,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"phosphatase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8215,7 +13788,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"plasmid-like protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8223,7 +13823,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8237,8 +13838,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8252,7 +13863,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8260,7 +13873,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"polymerase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"polymeris\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type british ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"polymeriz\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8269,13 +13908,48 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " word \"pombe\" ,",
+ " synonyms {",
+ " \"genitalium\" ,",
+ " \"leprae\" ,",
+ " \"crassa\" ,",
+ " \"ciliare\" ,",
+ " \"falciparum\" ,",
+ " \"fumigata\" ,",
+ " \"vinifera\" ,",
+ " \"lipolytica\" ,",
+ " \"ambisexualis\" ,",
+ " \"brasilense\" ,",
+ " \"carbonum\" ,",
+ " \"elegans\" ,",
+ " \"melanogaster\" ,",
+ " \"capricolum\" ,",
+ " \"pneumoniae\" ,",
+ " \"pseudotuberculosis\" ,",
+ " \"histolytica\" ,",
+ " \"influenzae\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word FALSE } } ,",
" whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " description \"contains organism name\" ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8289,7 +13963,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8297,7 +13973,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8311,7 +13988,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8319,7 +13998,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8333,7 +14013,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8341,7 +14023,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"possible\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8355,7 +14038,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"possible protein\" ,",
@@ -8368,7 +14053,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8376,7 +14063,76 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"possibly\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"poteasome\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"proteasome\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"potein\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8390,7 +14146,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"potential protein\" ,",
@@ -8403,7 +14161,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8411,7 +14171,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"precurso\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"precursor\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8433,7 +14219,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8441,7 +14229,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"precusor\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"precursor\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8463,7 +14277,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8471,7 +14287,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8485,7 +14302,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"predicted protein\" ,",
@@ -8498,7 +14317,23 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " string {",
+ " match-text \"Predicted:\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8506,7 +14341,83 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Predicted:\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"predictedprotein\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"predicted protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"predictet\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"predicted\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8520,7 +14431,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8528,7 +14441,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"precursor\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"probabable\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"probable\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8550,7 +14489,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8558,7 +14499,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8572,7 +14514,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"probable protein\" ,",
@@ -8585,7 +14529,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8593,7 +14539,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8607,7 +14554,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8615,7 +14564,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8629,7 +14579,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8637,7 +14589,26 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"probably\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8651,7 +14622,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8659,7 +14632,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8681,7 +14655,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8689,7 +14665,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8703,7 +14680,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"proposed protein\" ,",
@@ -8716,7 +14695,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8724,7 +14705,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8738,7 +14720,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8746,7 +14730,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"proptein\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8760,7 +14770,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8768,7 +14780,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"protazoan\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"protozoan\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8782,7 +14820,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8790,7 +14830,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8804,7 +14845,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"hypothetical protein\" ,",
@@ -8817,7 +14860,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized protein\" ,",
" match-location equals ,",
@@ -8829,7 +14874,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8837,7 +14884,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8851,8 +14899,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8866,7 +14917,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8874,7 +14927,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8896,7 +14950,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8904,7 +14960,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"protein of\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8926,7 +15008,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"protein of unknown function, duf\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " word \"protein of unknown function, duf\" ,",
+ " synonyms {",
+ " \"protein of unknown function duf\" ,",
+ " \"domain of unknown function duf\" ,",
+ " \"domain of unknown function, duf\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8934,7 +15052,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"protein product\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8948,7 +15092,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -8956,7 +15102,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Protein putative protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -8970,7 +15142,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -8978,7 +15152,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9000,7 +15175,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9008,7 +15185,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9017,12 +15195,14 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9030,7 +15210,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9039,12 +15220,14 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9052,7 +15235,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"proten\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9066,7 +15275,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9074,7 +15285,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9088,7 +15300,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9096,7 +15310,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9110,7 +15325,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9118,7 +15335,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9132,7 +15350,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9140,7 +15360,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9149,20 +15370,14 @@ static const char* const s_Defaultproductrules[] = {
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " ignore-words {",
- " {",
- " synonyms {",
- " \"probable\" ,",
- " \"putative\" ,",
- " \"hypothetical\" } ,",
- " case-sensitive FALSE ,",
- " whole-word TRUE } } ,",
" whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9170,7 +15385,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9184,14 +15400,42 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"pseudogene\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
" match-text \"pseudogene\" ,",
" match-location contains ,",
- " case-sensitive TRUE ,",
+ " case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word TRUE ,",
@@ -9199,8 +15443,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -9214,8 +15461,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9229,7 +15486,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9237,7 +15496,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9259,7 +15519,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9267,7 +15529,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"PTS system protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"puataive\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9281,7 +15569,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9289,7 +15579,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9303,7 +15594,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9311,7 +15604,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9333,7 +15627,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9341,7 +15637,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9355,7 +15652,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9363,7 +15662,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9377,7 +15677,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9385,7 +15687,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9399,7 +15702,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9407,7 +15712,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9421,7 +15727,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9429,7 +15737,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9443,7 +15752,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9451,7 +15762,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putativ\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9473,7 +15810,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9481,7 +15820,48 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putative possible\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"possible protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9495,7 +15875,49 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putative potential\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"possible protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9503,7 +15925,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9517,7 +15940,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9525,7 +15950,48 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putative probable\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"possible protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9539,7 +16005,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9547,7 +16015,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9561,7 +16030,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9569,7 +16040,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9583,7 +16055,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9591,7 +16065,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9605,7 +16080,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putativie\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9613,7 +16115,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putatuive\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9627,7 +16155,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9635,7 +16165,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9649,7 +16180,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9657,7 +16190,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9671,7 +16205,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9679,7 +16215,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9693,7 +16230,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9701,7 +16240,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9715,7 +16255,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9723,7 +16265,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"outer\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"putitative\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9737,7 +16305,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9745,7 +16315,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"puttive\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"putative\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9759,8 +16355,93 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"pyradoxal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"pyridoxal\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Pyrococcus\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"pyruvyltransferase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"pyruvyl transferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9774,7 +16455,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9782,7 +16465,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"alcohol\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9796,7 +16480,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9804,7 +16490,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcriptional\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9818,8 +16505,76 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"raw score\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"raw score\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"raw score\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9833,7 +16588,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9841,7 +16598,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"recognized\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9855,7 +16613,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9863,7 +16623,51 @@ static const char* const s_Defaultproductrules[] = {
" replace \"reductase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"region\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"regulates\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9877,7 +16681,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9885,7 +16691,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"regulator\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9899,7 +16706,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -9907,52 +16716,76 @@ static const char* const s_Defaultproductrules[] = {
" replace \"regulatory\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"related\" ,",
- " match-location ends ,",
+ " match-text \"Related\" ,",
+ " match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Related\" ,",
- " match-location starts ,",
+ " match-text \"remnant\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"remnant\" ,",
+ " match-text \"reolvase\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"resolvase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -9974,7 +16807,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -9982,25 +16817,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"repeat\" ,",
- " match-location ends ,",
+ " match-text \"repeatl\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type use-protein ,",
- " description \"ends with 'repeat' (If this is only similar to a repeat,",
- " remove this feature. If this is a translated protein, provide a product name.",
- " Consider adding 'protein' to the end of the name.)\" } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"repeat\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"replicaiton\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"replication\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10014,7 +16882,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10022,7 +16892,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"response\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10036,7 +16907,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10044,7 +16917,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"resistance\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10058,8 +16932,68 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ribomal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ribosomal\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ribonuleotide\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ribonucleotide\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10073,7 +17007,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10081,7 +17017,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"ribosomal\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ribosomal RNA\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10095,7 +17057,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10103,7 +17067,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"ribosomal\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ribossomal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ribosomal\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10117,8 +17107,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"riobosyltransferase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"ribosyltransferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10140,7 +17165,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10148,7 +17175,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10162,8 +17190,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10185,7 +17216,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10193,7 +17226,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10207,7 +17241,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10215,7 +17251,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"response\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10229,8 +17266,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10244,8 +17291,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10267,7 +17324,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10275,7 +17334,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10297,7 +17357,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10305,7 +17367,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10319,7 +17382,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10327,7 +17392,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"serine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10341,8 +17407,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10356,14 +17432,67 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"sigm\" ,",
+ " match-text \"shrot\" ,",
" match-location contains ,",
- " case-sensitive TRUE ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"short\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"shrot-chain\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"short-chain\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"shutting\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
" whole-word TRUE ,",
@@ -10371,8 +17500,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type putative-typo } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"shuttling\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"shuttlingfactor\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"shuttling factor\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10394,7 +17558,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10402,7 +17568,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10424,7 +17591,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10432,7 +17601,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"signal peptide protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10446,7 +17616,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10454,7 +17626,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"signaling\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10468,8 +17641,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"similiar\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"similar\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10483,7 +17691,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10491,7 +17701,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"similar\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10505,8 +17716,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10520,7 +17741,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10528,7 +17751,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10542,8 +17766,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10557,8 +17784,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10572,7 +17802,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10580,7 +17812,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"specific\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"specific\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10594,8 +17852,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10609,8 +17877,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10624,7 +17902,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10632,7 +17912,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"specific\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10646,8 +17927,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10661,7 +17952,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10669,7 +17962,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"stabilization\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10683,8 +17977,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10698,8 +18002,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10713,8 +18027,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10728,8 +18052,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10743,8 +18077,11 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
@@ -10758,8 +18095,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10773,8 +18120,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10788,8 +18145,79 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"strongly conserved\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"strongly similar\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"structual\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"structural\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10803,8 +18231,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"subitilus\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10818,7 +18274,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10826,7 +18284,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"subunit\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10840,8 +18299,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10855,8 +18324,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10878,7 +18357,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"sugar\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -10886,7 +18392,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10900,7 +18407,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10908,7 +18417,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"sugar\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"sulfer\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"sulfur\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10922,7 +18457,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -10930,7 +18467,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"sulfate\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10944,7 +18482,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -10952,7 +18492,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"sulfide\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10966,7 +18507,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10974,7 +18517,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"sulfo\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -10988,7 +18532,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -10996,7 +18542,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"sulfur\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11010,7 +18557,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11018,7 +18567,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"SWIM zinc finger protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11032,7 +18582,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11040,7 +18592,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"synthesis\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11054,7 +18607,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11062,7 +18617,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"synthase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11076,7 +18632,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11084,7 +18642,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"tetracenomycin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11098,8 +18657,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11113,7 +18682,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11121,7 +18692,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"thiamin/thiamine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11135,7 +18707,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11143,7 +18717,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"thiamine S\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11157,7 +18732,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11165,7 +18742,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"thioredoxin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11179,7 +18757,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11187,7 +18767,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"thioredoxin\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11201,7 +18782,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11209,7 +18792,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"threonine\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11231,7 +18815,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -11239,7 +18825,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11261,7 +18848,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -11269,7 +18858,26 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"TPA:\" ,",
+ " match-location starts ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11283,8 +18891,43 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type database } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"trancription\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transcription\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11298,7 +18941,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11306,7 +18951,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcription\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11320,7 +18966,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11328,7 +18976,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11342,7 +18991,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11350,7 +19001,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transporter\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11364,7 +19016,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11372,7 +19026,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcribed\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transcirption\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transcription\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transcripitonal\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transcriptional\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11386,7 +19091,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11394,7 +19101,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcript\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11408,7 +19116,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11416,7 +19126,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcriptional\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11430,7 +19141,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11438,7 +19151,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcriptional\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11452,7 +19166,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11460,7 +19176,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transcriptional\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11474,7 +19191,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11482,11 +19201,12 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transmembrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Transemembrane\" ,",
+ " match-text \"transemembrane\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -11496,7 +19216,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11504,11 +19226,12 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transmembrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"transemembrane\" ,",
+ " match-text \"Transemembrane\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -11518,7 +19241,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11526,7 +19251,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transmembrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11540,7 +19266,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11548,7 +19276,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11562,7 +19291,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11570,7 +19301,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transferase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11584,7 +19316,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11592,11 +19326,12 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transglycosylase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"transmebrane\" ,",
+ " match-text \"Transmebrane\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -11606,7 +19341,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11614,11 +19351,12 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transmembrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Transmebrane\" ,",
+ " match-text \"transmebrane\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -11628,7 +19366,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transmembrane\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transmemembrane\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11636,7 +19401,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transmembrane\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11650,7 +19416,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11658,7 +19426,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transporter\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11672,7 +19441,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11680,7 +19451,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transposase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11702,7 +19474,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11710,7 +19484,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transport-associated protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11724,7 +19499,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11732,7 +19509,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transporter\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposase and inactivated derivative\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposase and inactivated derivatives\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11746,7 +19574,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11754,7 +19584,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transposase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11768,7 +19599,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11776,7 +19609,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transposase\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11798,7 +19632,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11806,7 +19642,133 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transposase\" ,",
" whole-string FALSE ,",
" weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposase transposase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposases and inactivated derivative\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposases and inactivated derivatives\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposases and inactive derivative\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"transposases and inactive derivatives\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transposase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11828,7 +19790,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -11836,7 +19800,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11850,7 +19815,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11858,7 +19825,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transporter\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11872,7 +19840,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11880,7 +19850,58 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transsulfuration\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"trasnferase\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transferase\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"trasporter\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"transporter\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11902,7 +19923,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -11910,7 +19933,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11924,7 +19948,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -11932,7 +19958,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"transporter\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11946,8 +19973,36 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"trunucated\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"truncated\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11969,7 +20024,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -11977,7 +20034,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"ttg start\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -11991,7 +20074,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"ttg start codon\" ,",
@@ -12004,45 +20089,61 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"ttg start\" ,",
- " match-location equals ,",
+ " match-text \"Tuberculosis\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type hypothetical ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
" replace {",
" replace-func",
" simple-replace {",
" replace \"hypothetical protein\" ,",
- " whole-string FALSE ,",
+ " whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Tuberculosis\" ,",
+ " match-text \"tumefaciens\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12056,7 +20157,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -12064,7 +20167,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"tumor\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"Type II\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12078,8 +20207,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12093,7 +20232,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -12101,7 +20242,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"type\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12123,7 +20265,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"uknown\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12131,7 +20300,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"uncharacterizaed\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"uncharacterized\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12145,7 +20340,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12153,7 +20350,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12167,7 +20365,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"uncharacterized protein\" ,",
@@ -12180,7 +20380,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized protein conserved in bacteria\" ,",
" match-location equals ,",
@@ -12192,7 +20394,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized conserved protein\" ,",
" match-location equals ,",
@@ -12204,7 +20408,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized\" ,",
" match-location equals ,",
@@ -12216,7 +20422,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized domain 1\" ,",
" match-location equals ,",
@@ -12228,7 +20436,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"uncharacterized protein\" ,",
" match-location starts ,",
@@ -12240,7 +20450,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"UPF\" ,",
" match-location contains ,",
@@ -12262,7 +20474,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -12270,7 +20484,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12284,7 +20499,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12292,7 +20509,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12306,7 +20524,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -12314,7 +20534,66 @@ static const char* const s_Defaultproductrules[] = {
" replace \"undecaprenyl\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"undefined product\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"undefined product\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " feat-constraint {",
+ " string {",
+ " match-text \"undefined product\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present TRUE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12328,7 +20607,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"unique protein\" ,",
@@ -12341,7 +20622,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -12349,7 +20632,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"putative\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12363,7 +20647,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"unknow protein\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12371,7 +20682,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"unknown\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12385,7 +20722,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" field {",
" field",
@@ -12404,7 +20743,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" field {",
" field",
" feature-field {",
@@ -12422,7 +20763,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
" string {",
" match-text \"unknown\" ,",
" match-location equals ,",
@@ -12434,7 +20777,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" string {",
" match-text \"unknown protein\" ,",
" match-location equals ,",
@@ -12446,12 +20791,40 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type might-be-nonfunctional } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type might-be-nonfunctional ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"unknown\" ,",
+ " match-text \"unknown domain\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"unknown function\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12461,7 +20834,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12469,7 +20844,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12483,7 +20859,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12491,7 +20869,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12505,7 +20884,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
@@ -12513,7 +20894,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"unknown\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"unnamed\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12527,7 +20934,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" feat-constraint {",
" string {",
" match-text \"unnamed\" ,",
@@ -12540,12 +20949,15 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"unnamed\" ,",
+ " match-text \"unnamed protein product\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12555,7 +20967,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12563,24 +20977,37 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"UPF\" ,",
- " match-location contains ,",
+ " match-text \"unspecified product\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " except",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
" string-constraint {",
- " match-text \"UPF\" ,",
+ " match-text \"unusual protein\" ,",
" match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12590,8 +21017,18 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12605,7 +21042,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12613,7 +21052,41 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"UPF\" ,",
+ " match-location contains ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word TRUE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " except",
+ " string-constraint {",
+ " match-text \"UPF\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12635,7 +21108,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12643,7 +21118,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12657,7 +21133,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -12665,7 +21143,8 @@ static const char* const s_Defaultproductrules[] = {
" replace \"utilization\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12679,7 +21158,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type british ,",
" replace {",
" replace-func",
@@ -12687,7 +21168,33 @@ static const char* const s_Defaultproductrules[] = {
" replace \"utilizing\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " string-constraint {",
+ " match-text \"UTP\" ,",
+ " match-location equals ,",
+ " case-sensitive FALSE ,",
+ " ignore-space FALSE ,",
+ " ignore-punct FALSE ,",
+ " whole-word FALSE ,",
+ " not-present FALSE ,",
+ " is-all-caps FALSE ,",
+ " is-all-lower FALSE ,",
+ " is-all-punct FALSE ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
@@ -12709,7 +21216,9 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type hypothetical ,",
" replace {",
" replace-func",
@@ -12717,26 +21226,37 @@ static const char* const s_Defaultproductrules[] = {
" replace \"hypothetical protein\" ,",
" whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"weakly conserved\" ,",
+ " match-text \"vigtamin\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"vitamin\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"widely conserved\" ,",
+ " match-text \"weakly conserved\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12746,28 +21266,34 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type description } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"xenopus\" ,",
+ " match-text \"widely conserved\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type description ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"xray structure\" ,",
- " match-location contains ,",
+ " match-text \"xanthine\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -12776,12 +21302,22 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"yeast\" ,",
+ " match-text \"xenopus\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12791,27 +21327,40 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"Yersinia\" ,",
+ " match-text \"xray structure\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type remove-organism-name } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"ypothetical\" ,",
+ " match-text \"yeast\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12821,19 +21370,22 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"hypothetical\" ,",
- " whole-string FALSE ,",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"ytochrome\" ,",
+ " match-text \"YeeEYedE\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -12843,180 +21395,206 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"cytochrome\" ,",
+ " replace \"YeeE/YedE\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"yypothetical\" ,",
+ " match-text \"Yersinia\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type remove-organism-name ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"hypothetical\" ,",
- " whole-string FALSE ,",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"zinc\" ,",
- " match-location equals ,",
+ " match-text \"YjgPYjgQ\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " ignore-words {",
- " {",
- " synonyms {",
- " \"probable\" ,",
- " \"putative\" ,",
- " \"hypothetical\" } ,",
- " case-sensitive FALSE ,",
- " whole-word TRUE } } ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type hypothetical ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"hypothetical protein\" ,",
- " whole-string TRUE ,",
+ " replace \"YjgP/YjgQ\" ,",
+ " whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"zinc finger\" ,",
- " match-location equals ,",
+ " match-text \"ypothetical\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"zinc finger protein\" ,",
+ " replace \"hypothetical\" ,",
" whole-string FALSE ,",
- " weasel-to-putative TRUE } ,",
- " move-to-note FALSE } } ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"\\\\-PA\" ,",
+ " match-text \"ysine\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
- " not-present FALSE ,",
- " is-all-caps FALSE ,",
- " is-all-lower FALSE ,",
- " is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
- " {",
- " find",
- " string-constraint {",
- " match-text \"_\" ,",
- " match-location ends ,",
- " case-sensitive FALSE ,",
- " ignore-space FALSE ,",
- " ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
- " {",
- " find",
- " underscore NULL ,",
- " rule-type database } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"lysine\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"|\" ,",
+ " match-text \"ytochrome\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"cytochrome\" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"homlog\" ,",
+ " match-text \"yypothetical\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"homolog\" ,",
+ " replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"raw score\" ,",
- " match-location contains ,",
+ " match-text \"zinc\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
+ " ignore-words {",
+ " {",
+ " synonyms {",
+ " \"probable\" ,",
+ " \"putative\" ,",
+ " \"hypothetical\" } ,",
+ " case-sensitive FALSE ,",
+ " whole-word TRUE } } ,",
" whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type inappropriate-symbol } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note TRUE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"hyperthetical\" ,",
- " match-location contains ,",
+ " match-text \"zinc finger\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -13025,20 +21603,23 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"hypothetical\" ,",
+ " replace \"zinc finger protein\" ,",
" whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " weasel-to-putative TRUE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"hypotherical\" ,",
- " match-location contains ,",
+ " match-text \"\\\" ,",
+ " match-location ends ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -13047,19 +21628,15 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"hypothetical\" ,",
- " whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"predictet\" ,",
+ " match-text \"\\\\-PA\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -13069,20 +21646,16 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"predicted\" ,",
- " whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"predictedprotein\" ,",
- " match-location contains ,",
+ " match-text \"_\" ,",
+ " match-location ends ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
@@ -13091,19 +21664,20 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type putative-typo ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"predicted protein\" ,",
- " whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
+ " {",
+ " find",
+ " underscore NULL ,",
+ " rule-type database ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"enzymye\" ,",
+ " match-text \"|\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -13113,41 +21687,40 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type putative-typo ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"enzyme\" ,",
- " whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type inappropriate-symbol ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"puttive\" ,",
+ " match-text \"familly\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type putative-typo ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"putative\" ,",
+ " replace \"family\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"heavychain\" ,",
+ " match-text \"hypotheitical\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -13157,19 +21730,22 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"heavy chain\" ,",
+ " replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"precurso\" ,",
+ " match-text \"hypopthetical\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -13179,19 +21755,22 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"precursor\" ,",
+ " replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"precusor\" ,",
+ " match-text \"hypothetetical\" ,",
" match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
@@ -13201,117 +21780,152 @@ static const char* const s_Defaultproductrules[] = {
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
" rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"precursor\" ,",
+ " replace \"hypothetical\" ,",
" whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"repeatl\" ,",
- " match-location contains ,",
+ " match-text \"no product string in file\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel FALSE } ,",
- " rule-type typo ,",
- " replace {",
- " replace-func",
- " simple-replace {",
- " replace \"repeat\" ,",
- " whole-string FALSE ,",
- " weasel-to-putative FALSE } ,",
- " move-to-note FALSE } } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type none ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"binds\" ,",
- " match-location starts ,",
+ " match-text \"distantly\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"interacts with\" ,",
- " match-location starts ,",
+ " match-text \"protein distantly\" ,",
+ " match-location equals ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type hypothetical ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"hypothetical protein\" ,",
+ " whole-string TRUE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"located in\" ,",
+ " match-text \"ifunctional \" ,",
" match-location starts ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word TRUE ,",
+ " whole-word FALSE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
+ " replace {",
+ " replace-func",
+ " simple-replace {",
+ " replace \"bifunctional \" ,",
+ " whole-string FALSE ,",
+ " weasel-to-putative FALSE } ,",
+ " move-to-note FALSE } ,",
+ " fatal FALSE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"mediates\" ,",
- " match-location starts ,",
+ " match-text \"refseq\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
- " ignore-punct FALSE ,",
+ " ignore-punct TRUE ,",
" whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type description } ,",
+ " ignore-weasel TRUE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type none ,",
+ " fatal TRUE } ,",
" {",
" find",
" string-constraint {",
- " match-text \"expressed\" ,",
- " match-location equals ,",
+ " match-text \"casette\" ,",
+ " match-location contains ,",
" case-sensitive FALSE ,",
" ignore-space FALSE ,",
" ignore-punct FALSE ,",
- " whole-word FALSE ,",
+ " whole-word TRUE ,",
" not-present FALSE ,",
" is-all-caps FALSE ,",
" is-all-lower FALSE ,",
" is-all-punct FALSE ,",
- " ignore-weasel TRUE } ,",
- " rule-type hypothetical ,",
+ " ignore-weasel FALSE ,",
+ " is-first-cap FALSE ,",
+ " is-first-each-cap FALSE } ,",
+ " rule-type typo ,",
" replace {",
" replace-func",
" simple-replace {",
- " replace \"hypothetical protein\" ,",
- " whole-string TRUE ,",
+ " replace \"cassette\" ,",
+ " whole-string FALSE ,",
" weasel-to-putative FALSE } ,",
- " move-to-note TRUE } } }"
+ " move-to-note FALSE } ,",
+ " fatal FALSE } }"
};
diff --git a/api/samutil.c b/api/samutil.c
index 8deaadb2..e45ebacf 100644
--- a/api/samutil.c
+++ b/api/samutil.c
@@ -1,4 +1,4 @@
-/* $Id: samutil.c,v 1.76 2005/04/26 21:33:52 kans Exp $
+/* $Id: samutil.c,v 1.79 2016/06/21 21:42:36 kans Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -23,256 +23,18 @@
*
* ===========================================================================
*
-* File Name: $Id: samutil.c,v 1.76 2005/04/26 21:33:52 kans Exp $
+* File Name: $Id: samutil.c,v 1.79 2016/06/21 21:42:36 kans Exp $
*
* Author: Lewis Geer
*
* Version Creation Date: 8/12/99
*
-* $Revision: 1.76 $
+* $Revision: 1.79 $
*
* File Description: Utility functions for AlignIds and SeqAlignLocs
*
* Modifications:
* --------------------------------------------------------------------------
-* $Log: samutil.c,v $
-* Revision 1.76 2005/04/26 21:33:52 kans
-* added SEQID_GPIPE
-*
-* Revision 1.75 2001/10/03 00:15:47 vakatov
-* Replaced some ASSERTs by VERIFYs
-*
-* Revision 1.74 2001/08/07 17:22:52 kans
-* added support for third party annotation SeqIDs
-*
-* Revision 1.73 2001/05/25 19:05:36 vakatov
-* Nested comment typo fixed
-*
-* Revision 1.72 2001/02/07 16:51:36 hurwitz
-* bug fix
-*
-* Revision 1.71 2001/02/06 20:52:01 hurwitz
-* memory leak fix
-*
-* Revision 1.70 2001/01/26 23:26:17 hurwitz
-* bug fix for DDE_GetMsaTxtNodeGivenBioseqCoord
-*
-* Revision 1.69 2001/01/24 23:04:01 hurwitz
-* added a couple utility functions
-*
-* Revision 1.68 2001/01/10 23:38:39 lewisg
-* fix seqid and various memory leaks
-*
-* Revision 1.67 2001/01/03 22:16:52 hurwitz
-* fix potential memory leak
-*
-* Revision 1.66 2000/12/29 00:43:20 hurwitz
-* got rid of some asserts
-*
-* Revision 1.65 2000/11/01 20:50:21 hurwitz
-* made functions for get block starts and stops from populated paraGs
-*
-* Revision 1.64 2000/10/16 23:45:02 hurwitz
-* fixed DDE_GetStart for case of no left tail
-*
-* Revision 1.63 2000/10/05 21:27:42 hurwitz
-* bug fix for making ruler, added functions to get bioseq start and len of each aligned block
-*
-* Revision 1.62 2000/09/08 21:47:57 hurwitz
-* added DDE_GetNumResidues function
-*
-* Revision 1.61 2000/08/25 22:10:32 hurwitz
-* added utility function
-*
-* Revision 1.60 2000/08/24 19:15:46 hurwitz
-* bug fix: undo and redo functions cause quit query dialog
-*
-* Revision 1.59 2000/08/23 20:06:49 hurwitz
-* fixes to DDE_PopListFree
-*
-* Revision 1.58 2000/08/07 23:02:39 hurwitz
-* when merging panels fails, scroll to spot where alignments differ
-*
-* Revision 1.57 2000/08/03 17:12:37 hurwitz
-* added functions to check if alignments are mergeable
-*
-* Revision 1.56 2000/07/17 17:49:12 hurwitz
-* fixed bug. when copying ParaG's need to copy all rows so edits can be saved properly
-*
-* Revision 1.55 2000/07/05 19:23:12 lewisg
-* add two panes to ddv, update msvc project files
-*
-* Revision 1.54 2000/07/05 18:42:16 hurwitz
-* added split block function to DDV
-*
-* Revision 1.53 2000/06/30 22:31:51 hurwitz
-* added merge block function for DDV
-*
-* Revision 1.52 2000/06/22 20:56:50 hurwitz
-* assorted bug fixes
-*
-* Revision 1.51 2000/06/21 21:20:56 hurwitz
-* a couple bug fixes
-*
-* Revision 1.50 2000/06/15 20:26:05 hurwitz
-* added left/right/center justify for DDE
-*
-* Revision 1.49 2000/06/08 20:04:39 hurwitz
-* made warning about converting to true multiple alignment into a Message window, and other small fixes
-*
-* Revision 1.48 2000/06/07 19:09:35 hurwitz
-* made DDE_ReMakeRuler work with linked list of ParaGs
-*
-* Revision 1.47 2000/05/31 23:07:26 hurwitz
-* made NoGaps a runtime parameter, fixed bug with vertical scroll of show/hide list, save edits query is not performed if nothing to save
-*
-* Revision 1.46 2000/05/25 21:40:42 hurwitz
-* rows hidden in DDV are hidden in DDE, can save edits when rows are hidden in DDE
-*
-* Revision 1.45 2000/05/19 13:48:32 hurwitz
-* made a version of DDE that doesn't allow aligned gaps, changed wording for adding new rows
-*
-* Revision 1.44 2000/05/16 19:43:02 hurwitz
-* grey out create block, delete block, undo, and redo as needed
-*
-* Revision 1.43 2000/05/12 21:18:13 hurwitz
-* added window asking if user wants to save unsaved edits for dde
-*
-* Revision 1.42 2000/05/10 21:54:54 hurwitz
-* free memory when DDE is closed
-*
-* Revision 1.41 2000/05/08 16:28:25 wheelan
-* fix SAM_ReplaceGI for dense-diag alignments
-*
-* Revision 1.40 2000/05/05 20:24:14 hurwitz
-* some bug fixes, also redraw proper block in DDE after a save operation that causes a merge of 2 blocks
-*
-* Revision 1.39 2000/05/04 22:43:38 hurwitz
-* don't launch DDE on top of DDV, change some wording, redraw DDE after save to AlnMgr
-*
-* Revision 1.38 2000/05/02 19:50:37 hurwitz
-* fixed some bugs with launching DDE from DDV, added new alnMgr fn for positioning DDE on proper column
-*
-* Revision 1.37 2000/04/26 21:53:22 hurwitz
-* added save function to tell AlnMgr about edits made in DDE
-*
-* Revision 1.36 2000/04/21 23:00:51 hurwitz
-* can launch DDE from DDV
-*
-* Revision 1.35 2000/04/13 22:03:32 hurwitz
-* a couple more small bug fixes
-*
-* Revision 1.34 2000/04/13 18:57:03 hurwitz
-* for DDE: many bug fixes, also get rid of columns that are all unaligned gaps
-*
-* Revision 1.33 2000/04/10 20:58:42 hurwitz
-* added GUI controls for DeleteBlock in DDE
-*
-* Revision 1.32 2000/04/07 16:21:08 hurwitz
-* made delete block faster, added delete block to edit menu
-*
-* Revision 1.31 2000/04/05 20:52:35 hurwitz
-* added GUI control for shifting left and right alignment boundaries
-*
-* Revision 1.30 2000/04/03 22:26:31 hurwitz
-* can now shift a row with click and drag
-*
-* Revision 1.29 2000/03/29 20:02:47 hurwitz
-* keep track of master during move row operations
-*
-* Revision 1.28 2000/03/25 00:22:09 hurwitz
-* put DDE_StackPtr in DDV_Main, add to stack inside DDE api's, added insert char, delete char, home and end keyboard control
-*
-* Revision 1.27 2000/03/23 00:00:00 hurwitz
-* DDE api's are called with stack now
-*
-* Revision 1.26 2000/03/20 22:22:48 hurwitz
-* added more checks to verify subroutine, 1 bug fix
-*
-* Revision 1.25 2000/03/16 15:51:31 hurwitz
-* added function to create an aligned block
-*
-* Revision 1.24 2000/03/14 22:08:21 hurwitz
-* undo and redo working properly, restore-original function added
-*
-* Revision 1.23 2000/03/10 23:01:43 hurwitz
-* added undo and redo functions, first pass
-*
-* Revision 1.22 2000/03/09 22:28:40 hurwitz
-* added shift block and delete block, a bug fix too
-*
-* Revision 1.21 2000/03/08 22:02:07 hurwitz
-* added verify function, debugging, handle align_start != 0
-*
-* Revision 1.20 2000/03/06 22:45:58 hurwitz
-* can shift right boundary of an aligned block left and right, DDVRuler updates added
-*
-* Revision 1.19 2000/03/01 22:49:40 lewisg
-* import bioseq, neatlyindex, get rid of dead code
-*
-* Revision 1.18 2000/02/29 21:13:06 hurwitz
-* added low level functions for shifting left and right the left alignment boundary
-*
-* Revision 1.17 2000/02/28 16:28:39 hurwitz
-* added functions for deleting an aligned gap
-*
-* Revision 1.16 2000/02/24 23:37:00 hurwitz
-* added ability to insert gaps
-*
-* Revision 1.15 2000/02/18 16:06:22 hurwitz
-* for editing multiple sequence alignments: shift row right now working
-*
-* Revision 1.14 2000/02/15 17:43:49 hurwitz
-* reverted to 1.12
-*
-* Revision 1.12 2000/02/14 23:09:08 hurwitz
-* got rid of calls to DDV_RulerDescrNew() because of library conflicts
-*
-* Revision 1.11 2000/02/14 20:58:57 hurwitz
-* added functions for editing multiple sequence alignments: hide/show row, move row, shift row left
-*
-* Revision 1.10 2000/01/24 20:54:34 vakatov
-* SAM_ViewString:: made #define to fix for the DLL build on PC
-*
-* Revision 1.9 2000/01/24 16:11:13 lewisg
-* speed up seqid comparison in color manager, make fast windows version of SetColor()
-*
-* Revision 1.8 1999/12/11 01:30:34 lewisg
-* fix bugs with sharing colors between ddv and cn3d
-*
-* Revision 1.7 1999/12/03 23:17:24 lewisg
-* Patrick's new global update msg, argument passing when launching ddv, experimental editing
-*
-* Revision 1.6 1999/11/24 21:24:30 vakatov
-* Fixed for the C++ and/or MSVC DLL compilation
-*
-* Revision 1.5 1999/10/05 23:18:15 lewisg
-* add ddv and udv to cn3d with memory management
-*
-* Revision 1.4 1999/09/27 17:53:08 kans
-* seqalign entityID/itemID/itemtype now in GatherIndex substructure
-*
-* Revision 1.3 1999/09/27 17:49:12 lewisg
-* fix denseseg constructor, bug in valnode loops, add SAM_ValNodeByPosition
-*
-* Revision 1.2 1999/09/21 19:33:53 lewisg
-* fix broken declarations
-*
-* Revision 1.1 1999/09/21 18:09:14 lewisg
-* binary search added to color manager, various bug fixes, etc.
-*
-* Revision 1.4 1999/09/03 23:27:32 lewisg
-* minor speedups by avoiding casts
-*
-* Revision 1.3 1999/09/03 14:01:40 lewisg
-* use faster seqid compare SAM_CompareID
-*
-* Revision 1.2 1999/09/01 23:02:59 lewisg
-* binary search in color functions
-*
-* Revision 1.1 1999/08/13 22:08:16 lewisg
-* color manager updated to use alignment coords
-*
*
* ==========================================================================
*/
@@ -297,7 +59,7 @@ NLM_EXTERN void SAM_ReplaceGI(SeqAlign *salp)
DenseSegPtr dsp;
DenseDiagPtr ddp;
SeqId *sip, *sipBest, *sipPrev, *wholesip;
- Int4 gi;
+ BIG_ID gi;
for(;salp != NULL; salp = salp->next) {
if (salp->segtype == SAS_DENSEG) {
diff --git a/api/seqmgr.c b/api/seqmgr.c
index 0ab376c6..bc6f9b41 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.324 $
+* $Revision: 6.344 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -38,8 +38,6 @@
* Date Name Description of modification
* ------- ---------- -----------------------------------------------------
*
-*
-*
* ==========================================================================
*/
@@ -676,13 +674,13 @@ static BioseqPtr NEAR BioseqFindFunc (SeqIdPtr sid, Boolean reload_from_cache, B
Int4 i, j, num, imin, imax, retval;
SeqIdIndexElementPtr PNTR sipp;
CharPtr tmp;
- Char buf[80];
+ Char buf[128];
Boolean do_return;
SeqMgrPtr smp;
ObjMgrPtr omp;
ObjMgrDataPtr omdp = NULL;
BioseqPtr bsp = NULL, tbsp;
- SeqEntryPtr scope;
+ SeqEntryPtr scope = NULL;
if (sid == NULL)
return NULL;
@@ -760,7 +758,7 @@ done_cache:
/* stringify as in SeqMgrAdd */
- MakeReversedSeqIdString (sid, buf, 79); /* common function to make id, call RevStringUpper */
+ MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1); /* common function to make id, call RevStringUpper */
imin = 0;
@@ -1098,14 +1096,14 @@ NLM_EXTERN BioseqPtr LIBCALL BioseqFetch (SeqIdPtr sid, Uint1 ld_type)
/*****************************************************************************
*
-* GetSeqIdForGI(Int4)
+* GetSeqIdForGI(BIG_ID)
* returns the SeqId for a GI
* returns NULL if can't find it
* The returned SeqId is allocated. Caller must free it.
*
*****************************************************************************/
typedef struct seqidblock {
- Int4 uid;
+ BIG_ID uid;
time_t touch;
SeqIdPtr sip;
CharPtr revstr;
@@ -1125,10 +1123,10 @@ static Int2 seqidunidxcount = 0;
static TNlmRWlock sid_RWlock = NULL;
*/
-NLM_EXTERN void RecordInSeqIdGiCache (Int4 gi, SeqIdPtr sip)
+NLM_EXTERN void RecordInSeqIdGiCache ( BIG_ID gi, SeqIdPtr sip)
{
- Char buf [80];
+ Char buf [128];
ValNodePtr vnp;
SeqIdBlockPtr sibp;
Int4 retval;
@@ -1451,13 +1449,14 @@ static Boolean UpdateSeqIdGiArrays (void)
return TRUE;
}
-NLM_EXTERN Boolean FetchFromSeqIdGiCache (Int4 gi, SeqIdPtr PNTR sipp)
+NLM_EXTERN Boolean FetchFromSeqIdGiCache ( BIG_ID gi, SeqIdPtr PNTR sipp)
{
ValNodePtr vnp;
SeqIdBlockPtr sibp = NULL;
Int2 left, right, mid;
- Int4 compare, ret;
+ BIG_ID compare;
+ Int4 ret;
Boolean done = FALSE;
@@ -1536,7 +1535,7 @@ NLM_EXTERN Boolean FetchFromSeqIdGiCache (Int4 gi, SeqIdPtr PNTR sipp)
return done;
}
-NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (Int4 gi)
+NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (BIG_ID gi)
{
BioseqPtr bsp = NULL;
ObjMgrProcPtr ompp;
@@ -1656,10 +1655,10 @@ NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (Int4 gi)
* returns 0 if can't find it
*
*****************************************************************************/
-NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, Int4Ptr gip)
+NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, BIG_ID_PNTR gip)
{
- Char buf [80];
+ Char buf [128];
ValNodePtr vnp;
SeqIdBlockPtr sibp = NULL;
Int2 left, right, mid;
@@ -1740,14 +1739,14 @@ NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, Int4Ptr gip)
return done;
}
-NLM_EXTERN Int4 LIBCALL GetGIForSeqId (SeqIdPtr sid)
+NLM_EXTERN BIG_ID LIBCALL GetGIForSeqId (SeqIdPtr sid)
{
BioseqPtr bsp = NULL;
ObjMgrProcPtr ompp;
OMProcControl ompc;
Int2 ret;
SeqIdPtr sip;
- Int4 gi = 0;
+ BIG_ID gi = 0;
SeqEntryPtr oldscope = NULL;
@@ -1798,7 +1797,7 @@ NLM_EXTERN Int4 LIBCALL GetGIForSeqId (SeqIdPtr sid)
{
if (sip->choice == SEQID_GI)
{
- gi = sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
SeqIdFree(sip);
RecordInSeqIdGiCache (gi, sid);
return gi;
@@ -2742,6 +2741,7 @@ static Boolean NextLitLength (DeltaSeqPtr next, Int4Ptr lenp)
if (next == NULL || next->choice != 2) return FALSE;
slp = (SeqLitPtr) next->data.ptrvalue;
if (slp == NULL || slp->seq_data == NULL) return FALSE;
+ if (slp->seq_data_type == Seq_code_gap) return FALSE;
*lenp = slp->length;
return TRUE;
}
@@ -2809,7 +2809,7 @@ NLM_EXTERN Boolean LIBCALL CountGapsInDeltaSeq (BioseqPtr bsp, Int4Ptr num_segs,
if (slp == NULL) break;
tlen = slp->length;
len += tlen;
- if (slp->seq_data != NULL)
+ if (slp->seq_data != NULL && slp->seq_data_type != Seq_code_gap)
{
residues += slp->length;
while (NextLitLength (next, &nxtlen)) {
@@ -3059,7 +3059,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
BioseqPtr PNTR bspp, bsp;
Int4 i, total, k, old_BioseqIndexCnt;
SeqIdPtr sip;
- Char buf[80];
+ Char buf[128];
/*
CharPtr tmp;
*/
@@ -3142,7 +3142,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
{
tmp = tsip->accession;
tsip->accession = NULL;
- SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf,sort_now);
tsip->accession = tmp;
}
@@ -3150,7 +3150,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
/*
tmp = tsip->name;
tsip->name = NULL;
- SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
*/
@@ -3168,20 +3168,20 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
if (tsi.name != NULL) {
tsi.accession = NULL;
- SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
tsi.accession = tsip->accession;
}
tsi.name = NULL;
- SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
if (version) {
tsi.version = 0;
- SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
/*
tsip->version = 0;
- SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
tsip->version = version;
*/
@@ -3195,7 +3195,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
*/
break;
default:
- SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, 79);
+ SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
break;
}
@@ -4001,11 +4001,13 @@ static void SeqMgrClearIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index,
if (IS_Bioseq (sep)) {
bsp = (BioseqPtr) sep->data.ptrvalue;
if (bsp == NULL) return;
+ bsp->seqentry = NULL;
omdp = SeqMgrGetOmdpForBioseq (bsp);
sap = bsp->annot;
} else if (IS_Bioseq_set (sep)) {
bssp = (BioseqSetPtr) sep->data.ptrvalue;
if (bssp == NULL) return;
+ bssp->seqentry = NULL;
omdp = SeqMgrGetOmdpForPointer (bssp);
sap = bssp->annot;
} else return;
@@ -4076,7 +4078,7 @@ NLM_EXTERN Boolean IsNonGappedLiteral (BioseqPtr bsp)
if (dsp->choice != 2) return FALSE; /* not Seq-lit */
slitp = (SeqLitPtr) dsp->data.ptrvalue;
if (slitp == NULL) return FALSE;
- if (slitp->seq_data == NULL || slitp->length == 0) return FALSE; /* gap */
+ if (slitp->seq_data == NULL || slitp->length == 0 || slitp->seq_data_type == Seq_code_gap) return FALSE; /* gap */
}
return TRUE;
@@ -4114,7 +4116,7 @@ static BioseqPtr FindAppropriateBioseq (SeqLocPtr loc, BioseqPtr tryfirst, BoolP
/* first see if this is raw local part of segmented bioseq */
- if (bsp != NULL && (bsp->repr == Seq_repr_raw || IsNonGappedLiteral (bsp))) {
+ if (bsp != NULL && (bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta)) {
omdp = SeqMgrGetOmdpForBioseq (bsp);
if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
bspextra = (BioseqExtraPtr) omdp->extradata;
@@ -4223,7 +4225,7 @@ NLM_EXTERN BioseqPtr LIBCALL SeqMgrGetParentOfPart (BioseqPtr bsp,
{
BioseqExtraPtr bspextra;
- Char buf [80];
+ Char buf [128];
Int2 compare;
Uint2 entityID;
Int4 i;
@@ -4794,6 +4796,7 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetDesiredFeature (Uint2 entityID, BioseqPtr
context->dnaStop = item->dnaStop;
context->partialL = item->partialL;
context->partialR = item->partialR;
+ context->external = item->external;
context->farloc = item->farloc;
context->bad_order = item->bad_order;
context->mixed_strand = item->mixed_strand;
@@ -5380,7 +5383,16 @@ static void ProcessFeatureProducts (SeqFeatPtr sfp, Uint4 itemID, GatherObjectPt
}
/* if (omdp->tempload == TL_NOT_TEMP) { */
+ /* check bsp mol against feature type */
+ if (bspextra->cdsOrRnaFeat != NULL) {
+ /* do not override if set */
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && ISA_aa (bsp->mol)) {
+ bspextra->cdsOrRnaFeat = sfp;
+ } else if (sfp->data.choice == SEQFEAT_RNA && ISA_na (bsp->mol)) {
bspextra->cdsOrRnaFeat = sfp;
+ } else if (sfp->data.choice == SEQFEAT_PROT && ISA_aa (bsp->mol)) {
+ bspextra->cdsOrRnaFeat = sfp;
+ }
/* } */
/* add to prodlisthead list for gather by get_feats_product */
@@ -5466,8 +5478,8 @@ static Boolean SimpleIvalsCalculation (SeqLocPtr slp, BioseqPtr bsp, Boolean fli
static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
BioseqPtr bsp, ExtraIndexPtr exindx, SeqFeatPtr sfp,
Int4 left, Int4 right, Uint4 itemID, Uint2 subtype,
- Boolean farloc, Boolean bad_order, Boolean mixed_strand,
- Boolean ignore, Boolean ts_image)
+ Boolean external, Boolean farloc, Boolean bad_order,
+ Boolean mixed_strand, Boolean ignore, Boolean ts_image)
{
Char buf [129];
@@ -5546,6 +5558,7 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
}
item->dnaStop = -1;
CheckSeqLocForPartial (sfp->location, &(item->partialL), &(item->partialR));
+ item->external = external;
item->farloc = farloc;
item->bad_order = bad_order;
item->mixed_strand = mixed_strand;
@@ -5744,7 +5757,7 @@ static Boolean RecordFeatureOnBioseq (
{
Boolean bad_order;
BioseqExtraPtr bspextra;
- Char buf [81];
+ Char buf [128];
Int2 count;
CharPtr ctmp;
Int4 diff;
@@ -5803,7 +5816,7 @@ static Boolean RecordFeatureOnBioseq (
if (left == -1 || right == -1) {
GatherContext gc;
GatherContextPtr gcp;
- Char lastbspid [41];
+ Char lastbspid [128];
SeqIdPtr sip;
MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
gcp = &gc;
@@ -5913,7 +5926,7 @@ static Boolean RecordFeatureOnBioseq (
}
RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left,
- right, gop->itemID, gop->subtype, usingLocalBsp,
+ right, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
bad_order, mixed_strand, special_case, ts_image);
/* record gene, publication, and biosource features twice if spanning the origin */
@@ -5925,7 +5938,7 @@ static Boolean RecordFeatureOnBioseq (
sfp->idx.subtype == FEATDEF_operon) {
RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left + bsp->length,
- right + bsp->length, gop->itemID, gop->subtype, usingLocalBsp,
+ right + bsp->length, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
bad_order, mixed_strand, TRUE, ts_image);
}
@@ -6070,10 +6083,17 @@ static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
if (sap != NULL && sap->type == 1) {
bsp = NULL;
sfp = (SeqFeatPtr) sap->data;
+ /* if empty Seq-annot with Seq-annot.descr, use last Bioseq */
+ if (sfp == NULL) {
+ bsp = exindx->lastbsp;
+ }
while (sfp != NULL && bsp == NULL) {
slp = sfp->location;
if (slp != NULL) {
bsp = BioseqFindFromSeqLoc (slp);
+ if (bsp == NULL && gop->external) {
+ bsp = exindx->lastbsp;
+ }
}
sfp = sfp->next;
}
@@ -6224,7 +6244,7 @@ static Boolean RecordSegmentsInBioseqs (GatherObjectPtr gop)
{
BioseqPtr bsp = NULL;
BioseqExtraPtr bspextra;
- Char buf [80];
+ Char buf [128];
Dbtag db;
DeltaSeqPtr dsp;
ExtraIndexPtr exindx;
@@ -6294,6 +6314,8 @@ static Boolean RecordSegmentsInBioseqs (GatherObjectPtr gop)
exindx->lastbsp = bsp;
} else if (bsp->repr == Seq_repr_delta) {
exindx->lastbsp = bsp;
+ } else if (bsp->repr == Seq_repr_ref) {
+ exindx->lastbsp = bsp;
} else {
exindx->lastbsp = NULL;
}
@@ -6548,16 +6570,22 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
CdRegionPtr crp1;
CdRegionPtr crp2;
Int2 i;
+ Char id1 [128];
+ Char id2 [128];
Int2 j;
Int2 numivals;
SeqAnnotPtr sap1;
SeqAnnotPtr sap2;
+ SeqIdPtr sip1;
+ SeqIdPtr sip2;
SMFeatItemPtr PNTR spp1 = vp1;
SMFeatItemPtr PNTR spp2 = vp2;
SMFeatItemPtr sp1;
SMFeatItemPtr sp2;
SeqFeatPtr sfp1;
SeqFeatPtr sfp2;
+ SeqLocPtr slp1;
+ SeqLocPtr slp2;
Uint1 subtype1;
Uint1 subtype2;
@@ -6608,6 +6636,14 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
return 1;
}
+ /* precursor RNA before non-coding RNA */
+
+ if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
+ return -1;
+ } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
+ return 1;
+ }
+
/* then cds features */
if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
@@ -6674,6 +6710,7 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
sfp1 = sp1->sfp;
sfp2 = sp2->sfp;
if (sfp1 != NULL && sfp2 != NULL) {
+
crp1 = (CdRegionPtr) sfp1->data.value.ptrvalue;
crp2 = (CdRegionPtr) sfp2->data.value.ptrvalue;
if (crp1 != NULL && crp2 != NULL) {
@@ -6688,6 +6725,37 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
}
}
+ /* then compare cds or mRNA product identifiers */
+
+ sfp1 = sp1->sfp;
+ sfp2 = sp2->sfp;
+ if (sfp1 != NULL && sfp2 != NULL) {
+ slp1 = (SeqLocPtr) sfp1->product;
+ slp2 = (SeqLocPtr) sfp2->product;
+ if (slp1 != NULL && slp2 == NULL) {
+ return 1;
+ } else if (slp1 == NULL && slp2 != NULL) {
+ return -1;
+ } else if (slp1 != NULL && slp2 != NULL) {
+ sip1 = SeqLocId (slp1);
+ sip2 = SeqLocId (slp2);
+ if (sip1 != NULL && sip2 == NULL) {
+ return 1;
+ } else if (sip1 == NULL && sip2 != NULL) {
+ return -1;
+ } else if (sip1 != NULL && sip2 != NULL) {
+ SeqIdWrite (sip1, id1, PRINTID_FASTA_LONG, sizeof (id1) - 1);
+ SeqIdWrite (sip2, id2, PRINTID_FASTA_LONG, sizeof (id2) - 1);
+ compare = StringCmp (id1, id2);
+ if (compare > 0) {
+ return 1;
+ } else if (compare < 0) {
+ return -1;
+ }
+ }
+ }
+ }
+
/* then compare feature label */
compare = StringCmp (sp1->label, sp2->label);
@@ -6788,6 +6856,14 @@ static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2)
return 1;
}
+ /* precursor RNA before non-coding RNA */
+
+ if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
+ return -1;
+ } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
+ return 1;
+ }
+
/* then cds features */
if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
@@ -7012,7 +7088,7 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
/* check for raw part packaged with segmented bioseq */
- if ((bsp->repr == Seq_repr_raw || IsNonGappedLiteral (bsp)) &&
+ if ((bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta) &&
lastsegbsp != NULL && *lastsegbsp != NULL && WithinPartsSet (bsp)) {
omdp = SeqMgrGetOmdpForBioseq (bsp);
if (omdp == NULL) return;
@@ -7030,7 +7106,7 @@ static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
return;
}
- if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta) return;
+ if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return;
omdp = SeqMgrGetOmdpForBioseq (bsp);
if (omdp == NULL) return;
@@ -7361,7 +7437,7 @@ static void IndexFeaturesOnEntity (SeqEntryPtr sep, SMFeatItemPtr PNTR featsByID
if (bspextra == NULL) return;
numfeats = bspextra->numfeats;
- if (bspextra->featsByID != NULL || numfeats > 0) {
+ if (bspextra->featsByID != NULL && numfeats > 0) {
count = *countP;
for (i = 0; i < numfeats; i++, count++) {
@@ -7696,7 +7772,7 @@ static void DoSegmentedProtein (BioseqPtr bsp, Pointer userdata)
if (parent == NULL) return;
}
- omdp = SeqMgrGetOmdpForBioseq (bsp);
+ omdp = SeqMgrGetOmdpForBioseq (bsp);
if (omdp == NULL) return;
bspextra = (BioseqExtraPtr) omdp->extradata;
@@ -8454,6 +8530,8 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr P
*oipP = NULL;
}
if (sfp == NULL) return NULL;
+
+ /* Look for SeqFeatData xref, maybe with Feature ID as well */
xref = sfp->xref;
while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
xref = xref->next;
@@ -8466,8 +8544,20 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr P
*oipP = oip;
}
}
+ return grp;
}
- return grp;
+
+ /* Look for Feature ID xref on its own */
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->id.choice == 3) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL && oipP != NULL) {
+ *oipP = oip;
+ }
+ }
+ }
+
+ return NULL;
}
NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp)
@@ -9458,6 +9548,7 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (BioseqPtr bsp, VoidPtr fe
context->dnaStop = item->dnaStop;
context->partialL = item->partialL;
context->partialR = item->partialR;
+ context->external = item->external;
context->farloc = item->farloc;
context->bad_order = item->bad_order;
context->mixed_strand = item->mixed_strand;
@@ -9682,6 +9773,7 @@ static SeqFeatPtr LIBCALL SeqMgrGetNextFeatureEx (BioseqPtr bsp, SeqFeatPtr curr
context->dnaStop = item->dnaStop;
context->partialL = item->partialL;
context->partialR = item->partialR;
+ context->external = item->external;
context->farloc = item->farloc;
context->bad_order = item->bad_order;
context->mixed_strand = item->mixed_strand;
@@ -9932,7 +10024,7 @@ NLM_EXTERN Int4 LIBCALL SeqMgrExploreSegments (BioseqPtr bsp, Pointer userdata,
SeqLocPtr slp;
if (bsp == NULL) return 0;
- if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta) return 0;
+ if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return 0;
omdp = SeqMgrGetOmdpForBioseq (bsp);
if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
if (userfunc == NULL) return 0;
@@ -10177,6 +10269,7 @@ static Int4 LIBCALL SeqMgrExploreFeaturesInt (BioseqPtr bsp, Pointer userdata,
context.dnaStop = item->dnaStop;
context.partialL = item->partialL;
context.partialR = item->partialR;
+ context.external = item->external;
context.farloc = item->farloc;
context.bad_order = item->bad_order;
context.mixed_strand = item->mixed_strand;
@@ -10342,6 +10435,7 @@ NLM_EXTERN Int2 LIBCALL SeqMgrVisitFeatures (Uint2 entityID, Pointer userdata,
context.dnaStop = item->dnaStop;
context.partialL = item->partialL;
context.partialR = item->partialR;
+ context.external = item->external;
context.farloc = item->farloc;
context.bad_order = item->bad_order;
context.mixed_strand = item->mixed_strand;
@@ -10374,7 +10468,7 @@ static SMSeqIdxPtr BinarySearchPartToSegmentMap (BioseqPtr in, Int4 pos, BioseqP
{
BioseqExtraPtr bspextra;
- Char buf [80];
+ Char buf [128];
Int2 compare;
ObjMgrDataPtr omdp;
SMSeqIdxPtr PNTR partsBySeqId;
@@ -10590,7 +10684,7 @@ NLM_EXTERN SeqLocPtr TrimLocInSegment (
{
BioseqPtr bsp;
BioseqExtraPtr bspextra;
- Char buf [80];
+ Char buf [128];
Int2 compare;
ObjMgrDataPtr omdp;
Boolean partial5;
@@ -11035,7 +11129,7 @@ static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
SeqLocPtr loc;
SeqIdPtr sip;
TextSeqIdPtr tsip;
- Int4 uid = 0;
+ BIG_ID uid = 0;
ValNodePtr vnp;
if (slp == NULL || userdata == NULL) return;
@@ -11050,7 +11144,7 @@ static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
}
if (sip == NULL) return;
if (sip->choice == SEQID_GI) {
- uid = (Int4) sip->data.intvalue;
+ uid = (BIG_ID) sip->data.intvalue;
} else {
switch (sip->choice) {
case SEQID_GENBANK :
@@ -11073,6 +11167,9 @@ static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
}
}
break;
+ case SEQID_GENERAL:
+ uid = 0;
+ break;
default :
break;
}
@@ -11090,7 +11187,7 @@ static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
}
if (uid < 1) return;
- vnp = ValNodeAddInt (NULL, 0, uid);
+ vnp = ValNodeAddBigInt (NULL, 0, uid);
if (vnp == NULL) return;
/* link in head of uid list */
@@ -11127,6 +11224,11 @@ static void CollectAllBioseqs (BioseqPtr bsp, Pointer userdata)
}
}
}
+ } else if (bsp->repr == Seq_repr_ref) {
+ slp = (SeqLocPtr) bsp->seq_ext;
+ if (slp != NULL && slp->choice != SEQLOC_NULL) {
+ CollectAllSegments (slp, userdata);
+ }
}
}
@@ -11198,7 +11300,7 @@ static void FetchFromUidList (
bsp = (BioseqPtr) vnp->data.ptrvalue;
if (bsp == NULL) continue;
- if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta) continue;
+ if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) continue;
CollectAllBioseqs (bsp, (Pointer) &uidlist);
}
@@ -11298,6 +11400,11 @@ static void LookForNonGiBioseqs (
}
}
}
+ } else if (bsp->repr == Seq_repr_ref) {
+ slp = (SeqLocPtr) bsp->seq_ext;
+ if (slp != NULL && slp->choice != SEQLOC_NULL) {
+ LookForNonGiSegments (slp, NULL, userdata);
+ }
}
}
@@ -11601,7 +11708,7 @@ NLM_EXTERN void LIBCALL SeqMgrSetSeqIdSetFunc (SeqIdSetLookupFunc func)
SeqMgrUnlock ();
}
-NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI (Int4 gi)
+NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI (BIG_ID gi)
{
SeqIdSetLookupFunc func;
@@ -11880,7 +11987,7 @@ static void SeqMgrReport (void)
ObjMgrPtr omp;
SeqIdIndexElementPtr PNTR sipp;
SeqMgrPtr smp;
- Char str [41];
+ Char str [128];
omp = ObjMgrGet ();
if (omp != NULL) {
diff --git a/api/seqmgr.h b/api/seqmgr.h
index 6895d0eb..08474c3e 100644
--- a/api/seqmgr.h
+++ b/api/seqmgr.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.74 $
+* $Revision: 6.78 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -93,9 +93,9 @@ typedef BioseqPtr (LIBCALLBACK * BSFetchTop)
typedef BioseqPtr (LIBCALLBACK * BSFetch) PROTO((SeqIdPtr sip, Pointer data));
typedef Int4 (LIBCALLBACK * SIDPreCacheFunc) (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, Boolean alignments, Boolean history, Boolean inference, Boolean others);
-typedef Int4 (LIBCALLBACK * SeqLenLookupFunc) (Int4 gi);
-typedef CharPtr (LIBCALLBACK * AccnVerLookupFunc) (Int4 gi);
-typedef SeqIdPtr (LIBCALLBACK * SeqIdSetLookupFunc) (Int4 gi);
+typedef Int4 (LIBCALLBACK * SeqLenLookupFunc) (BIG_ID gi);
+typedef CharPtr (LIBCALLBACK * AccnVerLookupFunc) (BIG_ID gi);
+typedef SeqIdPtr (LIBCALLBACK * SeqIdSetLookupFunc) (BIG_ID gi);
typedef struct seqidindexelement {
CharPtr str; /* PRINTID_FASTA_SHORT string */
@@ -226,23 +226,23 @@ NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForEntityID PROTO((Int2 id));
/*****************************************************************************
*
-* GetSeqIdForGI(Int4)
+* GetSeqIdForGI(BIG_ID)
* returns the SeqId for a GI
* returns NULL if can't find it
* The returned SeqId is allocated. Caller must free it.
*
*****************************************************************************/
-NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI PROTO((Int4 gi));
+NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI PROTO((BIG_ID gi));
/*****************************************************************************
*
-* GetSeqIdSetForGI(Int4)
+* GetSeqIdSetForGI(BIG_ID)
* returns the chain of all SeqIds for a GI
* returns NULL if can't find it
* The returned SeqId chain is allocated. Caller must free it with SeqIdSetFree.
*
*****************************************************************************/
-NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI PROTO((Int4 gi));
+NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI PROTO((BIG_ID gi));
/*****************************************************************************
@@ -252,7 +252,7 @@ NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI PROTO((Int4 gi));
* returns 0 if can't find it
*
*****************************************************************************/
-NLM_EXTERN Int4 LIBCALL GetGIForSeqId PROTO((SeqIdPtr sid));
+NLM_EXTERN BIG_ID LIBCALL GetGIForSeqId PROTO((SeqIdPtr sid));
/*****************************************************************************
*
@@ -671,10 +671,10 @@ NLM_EXTERN Int4 LIBCALL GetUniGeneIDForSeqId PROTO((SeqIdPtr sip));
* Internal functions to cache gi - SeqId associations
*
*****************************************************************************/
-NLM_EXTERN Boolean FetchFromSeqIdGiCache (Int4 gi, SeqIdPtr PNTR sipp);
-NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, Int4Ptr gip);
+NLM_EXTERN Boolean FetchFromSeqIdGiCache (BIG_ID gi, SeqIdPtr PNTR sipp);
+NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, BIG_ID_PNTR gip);
-NLM_EXTERN void RecordInSeqIdGiCache (Int4 gi, SeqIdPtr sip);
+NLM_EXTERN void RecordInSeqIdGiCache (BIG_ID gi, SeqIdPtr sip);
NLM_EXTERN void FreeSeqIdGiCache (void);
@@ -700,6 +700,7 @@ typedef struct smfeatitem {
Int4 dnaStop; /* last stop on protein mapped to DNA coordinate for flatfile */
Boolean partialL; /* left end is partial */
Boolean partialR; /* right end is partial */
+ Boolean external; /* feature is external to sequence record (or derived gap) */
Boolean farloc; /* location has an accession not packaged in entity */
Boolean bad_order; /* location is out of order - possibly trans-spliced */
Boolean mixed_strand; /* location has mixed strands - possibly trans-spliced */
diff --git a/api/seqport.c b/api/seqport.c
index ac8a3617..d2e50343 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.188 $
+* $Revision: 6.197 $
*
* File Description: Ports onto Bioseqs
*
@@ -1754,11 +1754,15 @@ byte */
if (spp->smtp == NULL) /* no conversion, check now */
{
- index = (Int2)residue - (Int2)(spp->sctp->start_at);
- if ((index < 0) || (index >= (Int2)(spp->sctp->num)))
- residue = INVALID_RESIDUE;
- else if (*(spp->sctp->names[index]) == '\0')
+ if (spp->sctp != NULL) {
+ index = (Int2)residue - (Int2)(spp->sctp->start_at);
+ if ((index < 0) || (index >= (Int2)(spp->sctp->num)))
+ residue = INVALID_RESIDUE;
+ else if (*(spp->sctp->names[index]) == '\0')
+ residue = INVALID_RESIDUE;
+ } else {
residue = INVALID_RESIDUE;
+ }
}
}
else if (spp->isa_virtual) /* virtual */
@@ -2050,17 +2054,20 @@ static Int4 SeqPortStreamGap (
Boolean is_na,
Boolean is_virt,
Boolean is_known,
+ Boolean is_seq_gap,
StreamDataPtr sdp
)
{
Char buf [4004];
Char ch, gapchar = '-';
- Boolean expand_gaps, many_dashes, many_pluses, single_dash;
+ Boolean expand_gaps, many_dashes, many_pluses, many_tildes, single_dash;
Int4 len;
if (sdp == NULL) return 0;
+ many_tildes = (Boolean) ((sdp->flags & SEQ_GAP_AS_TILDE) != 0);
+
many_pluses = FALSE;
if (is_virt) {
if ((sdp->flags & SUPPRESS_VIRT_SEQ) != 0) return 0;
@@ -2068,6 +2075,10 @@ static Int4 SeqPortStreamGap (
many_pluses = TRUE;
gapchar = '+';
}
+ } else if (is_seq_gap) {
+ if (many_tildes) {
+ gapchar = '~';
+ }
} else if (is_known) {
if ((sdp->flags & KNOWN_GAP_AS_PLUS) != 0) {
many_pluses = TRUE;
@@ -2079,9 +2090,9 @@ static Int4 SeqPortStreamGap (
single_dash = (Boolean) ((sdp->flags & STREAM_GAP_MASK) == GAP_TO_SINGLE_DASH);
many_dashes = (Boolean) ((sdp->flags & STREAM_GAP_MASK) == EXPAND_GAPS_TO_DASHES);
- /* if both gap flags are false, ignore gap */
+ /* if all gap flags are false, ignore gap */
- if ((! expand_gaps) && (! single_dash) && (! many_dashes)) return 0;
+ if ((! expand_gaps) && (! single_dash) && (! many_dashes) && (! many_tildes)) return 0;
if (single_dash) {
@@ -2099,7 +2110,7 @@ static Int4 SeqPortStreamGap (
if (length < 1) return 0;
- if (many_dashes || many_pluses) {
+ if (many_dashes || many_pluses || many_tildes) {
ch = gapchar;
} else if (is_na) {
ch = 'N';
@@ -2320,7 +2331,7 @@ static Int4 SeqPortStreamRaw (
/* support for new Seq-data.gap */
- count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, FALSE, sdp);
+ count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, FALSE, TRUE, sdp);
return count;
}
@@ -2406,15 +2417,28 @@ static Int4 SeqPortStreamSeqLit (
if (slitp->length < 1) return 0;
- if (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) {
+ if (slitp->seq_data == NULL) {
+
+ /* literal without sequence data is a virtual gap */
+
+ if (slitp->fuzz != NULL) {
+ is_known = FALSE;
+ }
+
+ count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, is_known, FALSE, sdp);
+
+ return count;
+ }
+
+ if (slitp->seq_data_type == Seq_code_gap) {
- /* literal without sequence data is a virtual gap, also handle new gap type */
+ /* also handle new gap type */
if (slitp->fuzz != NULL) {
is_known = FALSE;
}
- count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, is_known, sdp);
+ count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, is_known, TRUE, sdp);
return count;
}
@@ -2556,7 +2580,8 @@ static Int4 SeqPortStreamSeqLoc (
SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
if (parentID != NULL) {
SeqIdWrite (parentID, pid, PRINTID_FASTA_LONG, sizeof (pid) - 1);
- ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s component of %s", buf, pid);
+ ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s component of %s, size = %d",
+ buf, pid, sizeof( sip->data.intvalue));
} else {
ErrPostEx (SEV_ERROR, 0, 0, "SeqPortStream failed to load Bioseq %s", buf);
}
@@ -2764,7 +2789,7 @@ static Int4 SeqPortStreamDelta (
/* process components in correct order */
- for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ for (vnp = head; vnp != NULL && (! sdp->failed); vnp = vnp->next) {
sop = (StreamObjPtr) vnp->data.ptrvalue;
if (sop == NULL) continue;
@@ -2882,7 +2907,7 @@ static Int4 SeqPortStreamSeg (
/* process components in correct order */
- for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ for (vnp = head; vnp != NULL && (! sdp->failed); vnp = vnp->next) {
sop = (StreamObjPtr) vnp->data.ptrvalue;
if (sop == NULL) continue;
@@ -2900,6 +2925,82 @@ static Int4 SeqPortStreamSeg (
return count;
}
+static Int4 SeqPortStreamRef (
+ BioseqPtr bsp,
+ Int4 start,
+ Int4 stop,
+ Uint1 strand,
+ StreamDataPtr sdp
+)
+
+{
+ Int4 count = 0, from, to, len;
+ Boolean is_na;
+ Boolean revcomp = FALSE;
+ SeqLocPtr slp;
+
+ if (bsp == NULL || sdp == NULL) return 0;
+
+ is_na = (Boolean) ISA_na (bsp->mol);
+
+ if (strand == Seq_strand_minus && is_na) {
+ revcomp = TRUE;
+ }
+
+ /* build linked list in forward or reverse order, depending upon input strand */
+
+ slp = (SeqLocPtr) bsp->seq_ext;
+
+ if (slp == NULL || slp->choice == SEQLOC_NULL) return 0;
+
+ len = 0;
+
+ from = SeqLocStart (slp);
+ to = SeqLocStop (slp);
+ strand = SeqLocStrand (slp);
+
+ if (from < 0 || to < 0) return 0;
+
+ len = to - from + 1;
+
+ if (len <= start) return 0;
+
+ /* adjust from and to if not using entire interval */
+
+ if (strand == Seq_strand_minus) {
+
+ if (start > 0) {
+ to -= start;
+ }
+
+ if (stop < len) {
+ from += len - stop - 1;
+ }
+
+ } else {
+
+ if (start > 0) {
+ from += start;
+ }
+
+ if (stop < len) {
+ to -= len - stop - 1;
+ }
+ }
+
+ if (revcomp) {
+ if (strand == Seq_strand_minus) {
+ strand = Seq_strand_plus;
+ } else {
+ strand = Seq_strand_minus;
+ }
+ }
+
+ count += SeqPortStreamSeqLoc (slp, from, to, strand, sdp, bsp->id);
+
+ return count;
+}
+
/* SeqPortStreamWork calls appropriate representation-specific function */
static Int4 SeqPortStreamWork (
@@ -2912,7 +3013,7 @@ static Int4 SeqPortStreamWork (
{
Int4 count = 0;
-
+
if (bsp == NULL || sdp == NULL) return 0;
/* start and stop position reality checks */
@@ -2948,7 +3049,7 @@ static Int4 SeqPortStreamWork (
switch (bsp->repr) {
case Seq_repr_virtual :
- count += SeqPortStreamGap (stop - start + 1, ISA_na (bsp->mol), TRUE, FALSE, sdp);
+ count += SeqPortStreamGap (stop - start + 1, ISA_na (bsp->mol), TRUE, FALSE, FALSE, sdp);
break;
case Seq_repr_raw :
@@ -2968,6 +3069,12 @@ static Int4 SeqPortStreamWork (
}
break;
+ case Seq_repr_ref :
+ if (bsp->seq_ext_type == 2) {
+ count += SeqPortStreamRef (bsp, start, stop, strand, sdp);
+ }
+ break;
+
default :
break;
}
@@ -6127,7 +6234,8 @@ NLM_EXTERN CharPtr ReadCodingRegionBases (SeqLocPtr location, Int4 len, Uint1 fr
*/
bases = MemNew ((size_t) (len + 6));
- if (bases == NULL) return NULL;
+ if (bases == NULL)
+ return NULL;
rcd.tmp = bases;
rcd.max = len;
@@ -8339,66 +8447,25 @@ extern void TestProtSearch (void)
*
*****************************************************************************/
-NLM_EXTERN CharPtr GetSequenceByFeature (SeqFeatPtr sfp)
+NLM_EXTERN CharPtr GetSequenceByFeatureEx (SeqFeatPtr sfp, StreamFlgType flags)
{
- Int4 len;
- CharPtr str = NULL;
- /*
- Int2 actual, cnt;
- BioseqPtr bsp;
- SeqPortPtr spp;
- CharPtr str = NULL, txt;
- */
+ Int4 len;
+ CharPtr str = NULL;
if (sfp == NULL) return NULL;
len = SeqLocLen (sfp->location);
if (len > 0 && len < MAXALLOC) {
str = MemNew (sizeof (Char) * (len + 2));
if (str != NULL) {
- SeqPortStreamLoc (sfp->location, STREAM_EXPAND_GAPS, (Pointer) str, NULL);
-
-#if 0
- spp = SeqPortNewByLoc (sfp->location, Seq_code_iupacna);
- if (spp != NULL) {
-
- bsp = BioseqFindFromSeqLoc (sfp->location);
- if (bsp != NULL) {
- if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_virtual) {
- SeqPortSet_do_virtual (spp, TRUE);
- }
- }
-
- cnt = (Int2) MIN (len, 32000L);
- txt = str;
- actual = 1;
-
- while (cnt > 0 && len > 0 && actual > 0) {
- actual = SeqPortRead (spp, (BytePtr) txt, cnt);
- if (actual < 0) {
- actual = -actual;
- if (actual == SEQPORT_VIRT || actual == SEQPORT_EOS) {
- actual = 1; /* ignore, keep going */
- } else if (actual == SEQPORT_EOF) {
- actual = 0; /* stop */
- }
- } else if (actual > 0) {
- len -= actual;
- txt += actual;
- cnt = (Int2) MIN (len, 32000L);
- }
- }
-
- SeqPortFree (spp);
- }
-#endif
+ SeqPortStreamLoc (sfp->location, flags, (Pointer) str, NULL);
}
}
return str;
}
-NLM_EXTERN CharPtr GetSequenceByLocation (SeqLocPtr slp)
+NLM_EXTERN CharPtr GetSequenceByLocationEx (SeqLocPtr slp, StreamFlgType flags)
{
Int4 len;
@@ -8409,14 +8476,14 @@ NLM_EXTERN CharPtr GetSequenceByLocation (SeqLocPtr slp)
if (len > 0 && len < MAXALLOC) {
str = MemNew (sizeof (Char) * (len + 2));
if (str != NULL) {
- SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS, (Pointer) str, NULL);
+ SeqPortStreamLoc (slp, flags, (Pointer) str, NULL);
}
}
return str;
}
-NLM_EXTERN CharPtr GetSequenceByBsp (BioseqPtr bsp)
+NLM_EXTERN CharPtr GetSequenceByBspEx (BioseqPtr bsp, StreamFlgType flags)
{
CharPtr str = NULL;
@@ -8426,12 +8493,12 @@ NLM_EXTERN CharPtr GetSequenceByBsp (BioseqPtr bsp)
str = MemNew (sizeof (Char) * (bsp->length + 2));
if (str == NULL) return NULL;
- SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) str, NULL);
+ SeqPortStream (bsp, flags, (Pointer) str, NULL);
return str;
}
-NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVer (SeqIdPtr sip, CharPtr accession, Boolean is_na)
+NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVerEx (SeqIdPtr sip, CharPtr accession, Boolean is_na, StreamFlgType flags)
{
BioseqPtr bsp;
@@ -8451,7 +8518,7 @@ NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVer (SeqIdPtr sip, CharPtr accession,
if ((ISA_na (bsp->mol) && is_na) || (ISA_aa (bsp->mol) && (! is_na))) {
if (bsp->length < MAXALLOC) {
- str = GetSequenceByBsp (bsp);
+ str = GetSequenceByBspEx (bsp, flags);
}
}
@@ -8459,6 +8526,30 @@ NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVer (SeqIdPtr sip, CharPtr accession,
return str;
}
+NLM_EXTERN CharPtr GetSequenceByFeature (SeqFeatPtr sfp)
+
+{
+ return GetSequenceByFeatureEx (sfp, STREAM_EXPAND_GAPS);
+}
+
+NLM_EXTERN CharPtr GetSequenceByLocation (SeqLocPtr slp)
+
+{
+ return GetSequenceByLocationEx (slp, STREAM_EXPAND_GAPS);
+}
+
+NLM_EXTERN CharPtr GetSequenceByBsp (BioseqPtr bsp)
+
+{
+ return GetSequenceByBspEx (bsp, STREAM_EXPAND_GAPS);
+}
+
+NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVer (SeqIdPtr sip, CharPtr accession, Boolean is_na)
+
+{
+ return GetSequenceByIdOrAccnDotVerEx (sip, accession, is_na, STREAM_EXPAND_GAPS);
+}
+
/* original convenience function now calls more advanced version that can get proteins */
NLM_EXTERN CharPtr GetDNAbyAccessionDotVersion (CharPtr accession)
diff --git a/api/seqport.h b/api/seqport.h
index 612efbd8..4b6f5b83 100644
--- a/api/seqport.h
+++ b/api/seqport.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.66 $
+* $Revision: 6.67 $
*
* File Description: Ports onto Bioseqs
*
@@ -189,9 +189,10 @@ typedef unsigned long StreamFlgType;
#define EXPAND_GAPS_TO_DASHES 3
#define KNOWN_GAP_AS_PLUS 4
+#define SEQ_GAP_AS_TILDE 8
-#define SUPPRESS_VIRT_SEQ 8
-#define STREAM_VIRT_AS_PLUS 16
+#define SUPPRESS_VIRT_SEQ 16
+#define STREAM_VIRT_AS_PLUS 32
#define STREAM_CORRECT_INVAL 64
@@ -805,6 +806,28 @@ NLM_EXTERN CharPtr GetSequenceByLocation (
SeqLocPtr slp
);
+NLM_EXTERN CharPtr GetSequenceByBspEx (
+ BioseqPtr bsp,
+ StreamFlgType flags
+);
+
+NLM_EXTERN CharPtr GetSequenceByIdOrAccnDotVerEx (
+ SeqIdPtr sip,
+ CharPtr accession,
+ Boolean is_na,
+ StreamFlgType flags
+);
+
+NLM_EXTERN CharPtr GetSequenceByFeatureEx (
+ SeqFeatPtr sfp,
+ StreamFlgType flags
+);
+
+NLM_EXTERN CharPtr GetSequenceByLocationEx (
+ SeqLocPtr slp,
+ StreamFlgType flags
+);
+
NLM_EXTERN CharPtr GetDNAbyAccessionDotVersion (
CharPtr accession
);
diff --git a/api/sequtil.c b/api/sequtil.c
index dd888835..0240c913 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.350 $
+* $Revision: 6.405 $
*
* File Description: Sequence Utilities for objseq and objsset
*
@@ -2657,7 +2657,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
char d [2];
CharPtr tmp;
static Uint1 fasta_order[NUM_SEQID] = { /* order for other id FASTA_LONG */
- 33, /* 0 = not set */
+ 33, /* 0 = not set */
20, /* 1 = local Object-id */
15, /* 2 = gibbsq */
16, /* 3 = gibbmt */
@@ -2667,7 +2667,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 12, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
13, /* 11 = general Dbtag */
255, /* 12 = gi */
10, /* 13 = ddbj */
@@ -2680,7 +2680,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
15 /* 20 = nat */
};
static Uint1 tmsmart_order[NUM_SEQID] = { /* order for other id FASTA_LONG */
- 33, /* 0 = not set */
+ 33, /* 0 = not set */
20, /* 1 = local Object-id */
15, /* 2 = gibbsq */
16, /* 3 = gibbmt */
@@ -2690,7 +2690,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 12, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
29, /* 11 = general Dbtag */
255, /* 12 = gi */
10, /* 13 = ddbj */
@@ -2703,7 +2703,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
15 /* 20 = nat */
};
static Uint1 general_order[NUM_SEQID] = { /* order for other id FASTA_LONG */
- 33, /* 0 = not set */
+ 33, /* 0 = not set */
20, /* 1 = local Object-id */
15, /* 2 = gibbsq */
16, /* 3 = gibbmt */
@@ -2713,7 +2713,7 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
10, /* 7 = pir */
10, /* 8 = swissprot */
15, /* 9 = patent */
- 13, /* 10 = other TextSeqId */
+ 10, /* 10 = other = refseq */
12, /* 11 = general Dbtag */
255, /* 12 = gi */
10, /* 13 = ddbj */
@@ -2815,8 +2815,8 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
{
if (sip->choice == SEQID_GI)
{
- sprintf(localbuf, "%s%s%ld", txtid[SEQID_GI], ldelim,
- (long)(sip->data.intvalue));
+ sprintf(localbuf, "%s%s%lld", txtid[SEQID_GI], ldelim,
+ (long long)(sip->data.intvalue));
Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
got_gi = TRUE;
} else if (sip->choice == SEQID_GENERAL) {
@@ -2946,10 +2946,13 @@ NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 b
break;
case SEQID_GIBBSQ:
case SEQID_GIBBMT:
- case SEQID_GI:
sprintf(localbuf, "%ld", (long)sip->data.intvalue);
Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
break;
+ case SEQID_GI:
+ sprintf(localbuf, "%lld", (long long)sip->data.intvalue);
+ Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
+ break;
case SEQID_GIIM:
sprintf(localbuf, "%ld", (long)((GiimPtr)sip->data.ptrvalue)->id);
Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
@@ -3196,8 +3199,8 @@ NLM_EXTERN Int4 SeqIdLabelLen (SeqIdPtr isip, Uint1 format)
{
if (sip->choice == SEQID_GI)
{
- sprintf(localbuf, "%s%s%ld", txtid[SEQID_GI], ldelim,
- (long)(sip->data.intvalue));
+ sprintf(localbuf, "%s%s%lld", txtid[SEQID_GI], ldelim,
+ (long long)(sip->data.intvalue));
label_len += StringLen (localbuf) + 1; /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
got_gi = TRUE;
} else if (sip->choice == SEQID_GENERAL) {
@@ -3326,10 +3329,13 @@ NLM_EXTERN Int4 SeqIdLabelLen (SeqIdPtr isip, Uint1 format)
break;
case SEQID_GIBBSQ:
case SEQID_GIBBMT:
- case SEQID_GI:
sprintf(localbuf, "%ld", (long)sip->data.intvalue);
label_len += StringLen (localbuf) + 1; /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
break;
+ case SEQID_GI:
+ sprintf(localbuf, "%lld", (long long)sip->data.intvalue);
+ label_len += StringLen (localbuf) + 1; /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
+ break;
case SEQID_GIIM:
sprintf(localbuf, "%ld", (long)((GiimPtr)sip->data.ptrvalue)->id);
label_len += StringLen (localbuf) + 1; /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
@@ -3465,7 +3471,7 @@ NLM_EXTERN CharPtr SeqIdWholeLabel (SeqIdPtr isip, Uint1 format)
/* The following function finds either an integer or a string id from
SeqIdPtr */
-Boolean GetAccessionFromSeqId(SeqIdPtr sip, Int4Ptr gi, CharPtr PNTR id)
+Boolean GetAccessionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi, CharPtr PNTR id)
{
return GetAccessionVersionFromSeqId(sip, gi, id, FALSE);
}
@@ -3473,7 +3479,7 @@ Boolean GetAccessionFromSeqId(SeqIdPtr sip, Int4Ptr gi, CharPtr PNTR id)
/* Maximal length of a version number in Accession.version identifiers */
#define MAX_VERSION_LENGTH 10
-Boolean GetAccessionVersionFromSeqId(SeqIdPtr sip, Int4Ptr gi,
+Boolean GetAccessionVersionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi,
CharPtr PNTR id, Boolean get_version)
{
Boolean numeric_id_type = FALSE;
@@ -3583,7 +3589,7 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
char localbuf[SEQID_PARSE_BUF_SIZE + 2];
char * tmp, *strt, * tokens[6], *chain;
char d;
- long num;
+ long long num;
CharPtr tp;
Int2 numtoken, i, type = 0, j, ctr=0, numdigits; /* ctr is number of OK ids done */
SeqIdPtr sip = NULL, head = NULL, last = NULL, tmpsip;
@@ -3718,10 +3724,10 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
}
if (oip->str == NULL)
{
- sscanf(tokens[0], "%ld", &num);
+ sscanf(tokens[0], "%lld", &num);
oip->id = (Int4)num;
- if (numdigits < 10 ||
- (numdigits == 10 && StringCmp (tokens [0], "2147483647") <= 0)) {
+ if (*tokens[0] != '0' && (numdigits < 10 ||
+ (numdigits == 10 && StringCmp (tokens [0], "2147483647") <= 0))) {
sscanf(tokens[0], "%ld", &num);
oip->id = (Int4)num;
} else {
@@ -3731,18 +3737,23 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
break;
case SEQID_GIBBSQ:
case SEQID_GIBBMT:
+ if (! IS_DIGIT(*tokens[0]))
+ goto erret;
+ sscanf(tokens[0], "%lld", &num);
+ sip->data.intvalue = (BIG_ID)num;
+ break;
case SEQID_GI:
if (! IS_DIGIT(*tokens[0]))
goto erret;
- sscanf(tokens[0], "%ld", &num);
- sip->data.intvalue = (Int4)num;
+ sscanf(tokens[0], "%lld", &num);
+ sip->data.intvalue = (BIG_ID)num;
break;
case SEQID_GIIM:
if (! IS_DIGIT(*tokens[0])) goto erret;
gim = GiimNew();
sip->data.ptrvalue = gim;
- sscanf(tokens[0], "%ld", &num);
- gim->id = (Int4)num;
+ sscanf(tokens[0], "%lld", &num);
+ gim->id = (BIG_ID)num;
break;
case SEQID_GENBANK:
case SEQID_EMBL:
@@ -3770,7 +3781,7 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
if (IS_DIGIT(*(tmp+1)))
{
*tmp = '\0';
- sscanf((tmp+1),"%ld",&num);
+ sscanf((tmp+1),"%lld",&num);
tsip->version =(Int2)num;
}
else
@@ -3815,7 +3826,7 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
} else {
ipp->number = StringSave(tokens[1]);
}
- sscanf(tokens[2], "%ld", &num);
+ sscanf(tokens[2], "%lld", &num);
patsip->seqid = (Int2)num;
break;
case SEQID_GENERAL:
@@ -3835,9 +3846,9 @@ NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
}
if (oip->str == NULL)
{
- if (numdigits < 10 ||
- (numdigits == 10 && StringCmp (tokens [1], "2147483647") <= 0)) {
- sscanf(tokens[1], "%ld", &num);
+ if (*tokens[1] != '0' && (numdigits < 10 ||
+ (numdigits == 10 && StringCmp (tokens [1], "2147483647") <= 0))) {
+ sscanf(tokens[1], "%lld", &num);
oip->id = (Int4)num;
} else {
oip->str = StringSave(tokens[1]);
@@ -3931,6 +3942,16 @@ NLM_EXTERN Boolean SeqIdMatch (SeqIdPtr a, SeqIdPtr b)
return FALSE;
}
+static Int8 GetGiFromSeqIdGeneral( SeqIdPtr seq_id)
+{
+ if( seq_id->choice != SEQID_GENERAL) return 0;
+ DbtagPtr db_tag = (DbtagPtr) seq_id->data.ptrvalue;
+ if( StringICmp( db_tag->db, "GI")) return 0;
+ ObjectIdPtr tag = db_tag->tag;
+ if( (tag == NULL) || (tag->str == NULL)) return 0;
+ return atol( tag->str);
+}
+
/*****************************************************************************
*
* SeqIdComp(a, b)
@@ -3977,6 +3998,19 @@ NLM_EXTERN Uint1 SeqIdComp (SeqIdPtr a, SeqIdPtr b)
return SIC_DIFF;
}
break;
+ case SEQID_GI:
+ {
+ Int8 gi = GetGiFromSeqIdGeneral( b);
+ if( a->data.intvalue == gi) return SIC_YES;
+ return SIC_DIFF;
+ }
+ case SEQID_GENERAL:
+ {
+ if( b->choice != SEQID_GI) return SIC_DIFF;
+ Int8 gi = GetGiFromSeqIdGeneral( a);
+ if( b->data.intvalue == gi) return SIC_YES;
+ return SIC_DIFF;
+ }
default:
return SIC_DIFF;
}
@@ -4885,10 +4919,11 @@ NLM_EXTERN Int4 SeqLocLen (SeqLocPtr anp) /* seqloc */
Int4 len = -1L, tmp;
SeqLocPtr slp;
Boolean locked = FALSE;
+ ErrSev logsev;
Boolean average = FALSE;
Int2 num;
SeqIdPtr sip;
- Int4 gi;
+ BIG_ID gi;
SeqMgrPtr smp;
SeqLenLookupFunc func;
@@ -4910,7 +4945,7 @@ NLM_EXTERN Int4 SeqLocLen (SeqLocPtr anp) /* seqloc */
bsp = BioseqFindCore(sip);
if (bsp == NULL) {
if (sip != NULL && sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
/* try registered service for rapid length lookup */
smp = SeqMgrWriteLock ();
if (smp != NULL) {
@@ -4922,7 +4957,9 @@ NLM_EXTERN Int4 SeqLocLen (SeqLocPtr anp) /* seqloc */
}
}
}
+ logsev = ErrSetLogLevel (SEV_MAX);
bsp = BioseqLockById(sip);
+ ErrSetLogLevel (logsev);
if (bsp != NULL)
locked = TRUE;
}
@@ -5630,7 +5667,9 @@ NLM_EXTERN Int2 SeqLocCompareEx (SeqLocPtr a, SeqLocPtr b, Boolean compare_stran
&& (!compare_strand || DoStrandsMatch (sip->strand, ((SeqPntPtr)b->data.ptrvalue)->strand)))
{
point = ((SeqPntPtr)b->data.ptrvalue)->point;
- if ((point >= sip->from) && (point <= sip->to))
+ if ((point == sip->from) && (point == sip->to))
+ retval = SLC_A_EQ_B;
+ else if ((point >= sip->from) && (point <= sip->to))
retval = SLC_B_IN_A;
}
break;
@@ -6872,7 +6911,7 @@ static Int4 SeqLocCoverage (SeqLocPtr slp)
Int4Ptr ivals;
Int4 numivals = 0;
SeqLocPtr tmp;
- SeqIdPtr sip;
+ SeqIdPtr sip = NULL;
SeqIdPtr PNTR id_list;
Int4 coverage = 0, i = 0, from, to, j;
Int4 i_from, i_to, j_from, j_to;
@@ -7171,7 +7210,6 @@ NLM_EXTERN Uint2 SeqLocPartialCheckEx (SeqLocPtr head, Boolean farFetch)
PackSeqPntPtr pspp;
IntFuzzPtr ifp;
Boolean miss_end;
- BioseqContextPtr bcp;
ValNodePtr vnp, vnp2;
Boolean locked, found_molinfo;
MolInfoPtr mip;
@@ -7368,11 +7406,9 @@ NLM_EXTERN Uint2 SeqLocPartialCheckEx (SeqLocPtr head, Boolean farFetch)
if (bsp != NULL)
locked = TRUE;
}
- if (bsp == NULL) break;
- bcp = BioseqContextNew(bsp);
- if (bcp != NULL) {
+ if (bsp != NULL) {
vnp = NULL;
- while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_molinfo, vnp, NULL)) != NULL)
+ while ((vnp = GetNextDescriptorUnindexed(bsp, Seq_descr_molinfo, vnp)) != NULL)
{
found_molinfo = TRUE;
mip = (MolInfoPtr)(vnp->data.ptrvalue);
@@ -7403,7 +7439,7 @@ NLM_EXTERN Uint2 SeqLocPartialCheckEx (SeqLocPtr head, Boolean farFetch)
}
if (! found_molinfo)
{
- while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_modif, vnp, NULL)) != NULL)
+ while ((vnp = GetNextDescriptorUnindexed(bsp, Seq_descr_modif, vnp)) != NULL)
{
for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next)
{
@@ -7432,7 +7468,6 @@ NLM_EXTERN Uint2 SeqLocPartialCheckEx (SeqLocPtr head, Boolean farFetch)
}
}
}
- BioseqContextFree(bcp);
}
if (locked)
BioseqUnlock (bsp);
@@ -10591,6 +10626,16 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_DDBJ_TPA_PROT;
} else if ((StringICmp(temp,"JAA") >= 0) && (StringICmp(temp,"JZZ") <= 0)) {
retcode = ACCN_NCBI_TPA_PROT;
+ } else if ((StringICmp(temp,"KAA") >= 0) && (StringICmp(temp,"KZZ") <= 0)) {
+ retcode = ACCN_NCBI_WGS_PROT;
+ } else if ((StringICmp(temp,"LAA") >= 0) && (StringICmp(temp,"LZZ") <= 0)) {
+ retcode = ACCN_DDBJ_TPA_PROT;
+ } else if ((StringICmp(temp,"OAA") >= 0) && (StringICmp(temp,"OZZ") <= 0)) {
+ retcode = ACCN_NCBI_WGS_PROT;
+ } else if ((StringICmp(temp,"PAA") >= 0) && (StringICmp(temp,"PZZ") <= 0)) {
+ retcode = ACCN_NCBI_WGS_PROT;
+ } else if ((StringICmp(temp,"SAA") >= 0) && (StringICmp(temp,"SZZ") <= 0)) {
+ retcode = ACCN_EMBL_PROT;
} else {
retcode = ACCN_IS_PROTEIN;
retval = TRUE;
@@ -10691,7 +10736,16 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"FA") == 0) ||
(StringICmp(temp,"GG") == 0) ||
(StringICmp(temp,"GL") == 0) ||
- (StringICmp(temp,"JH") == 0)) { /* NCBI segmented set header Bioseq */
+ (StringICmp(temp,"JH") == 0) ||
+ (StringICmp(temp,"KB") == 0) ||
+ (StringICmp(temp,"KD") == 0) ||
+ (StringICmp(temp,"KE") == 0) ||
+ (StringICmp(temp,"KI") == 0) ||
+ (StringICmp(temp,"KK") == 0) ||
+ (StringICmp(temp,"KL") == 0) ||
+ (StringICmp(temp,"KN") == 0) ||
+ (StringICmp(temp,"KQ") == 0) ||
+ (StringICmp(temp,"KV") == 0)) { /* NCBI segmented set header Bioseq */
retcode = ACCN_NCBI_SEGSET;
} else if ((StringICmp(temp,"AS") == 0) ||
(StringICmp(temp,"HR") == 0) ||
@@ -10725,7 +10779,10 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"JJ") == 0) ||
(StringICmp(temp,"JM") == 0) ||
(StringICmp(temp,"JS") == 0) ||
- (StringICmp(temp,"JY") == 0) ) { /* NCBI GSS */
+ (StringICmp(temp,"JY") == 0) ||
+ (StringICmp(temp,"KG") == 0) ||
+ (StringICmp(temp,"KO") == 0) ||
+ (StringICmp(temp,"KS") == 0) ) { /* NCBI GSS */
retcode = ACCN_NCBI_GSS;
} else if ((StringICmp(temp,"AR") == 0) ||
(StringICmp(temp,"DZ") == 0) ||
@@ -10738,7 +10795,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"GZ") == 0) ||
(StringICmp(temp,"HJ") == 0) ||
(StringICmp(temp,"HK") == 0) ||
- (StringICmp(temp,"HL") == 0)) { /* NCBI patent */
+ (StringICmp(temp,"HL") == 0) ||
+ (StringICmp(temp,"KH") == 0)) { /* NCBI patent */
retcode = ACCN_NCBI_PATENT;
} else if((StringICmp(temp,"BC")==0)) { /* NCBI long cDNA project : MGC */
retcode = ACCN_NCBI_cDNA;
@@ -10769,9 +10827,24 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"JU") == 0) ||
(StringICmp(temp,"JV") == 0) ||
(StringICmp(temp,"JW") == 0) ||
- (StringICmp(temp,"JX") == 0)) {
+ (StringICmp(temp,"JX") == 0) ||
+ (StringICmp(temp,"KA") == 0) ||
+ (StringICmp(temp,"KC") == 0) ||
+ (StringICmp(temp,"KF") == 0) ||
+ (StringICmp(temp,"KJ") == 0) ||
+ (StringICmp(temp,"KM") == 0) ||
+ (StringICmp(temp,"KP") == 0) ||
+ (StringICmp(temp,"KR") == 0) ||
+ (StringICmp(temp,"KT") == 0) ||
+ (StringICmp(temp,"KU") == 0) ||
+ (StringICmp(temp,"KX") == 0)) {
retcode = ACCN_NCBI_TSA;
- } else if((StringICmp(temp,"FX") == 0)) {
+ } else if((StringICmp(temp,"FX") == 0) ||
+ (StringICmp(temp,"LA") == 0) ||
+ (StringICmp(temp,"LE") == 0) ||
+ (StringICmp(temp,"LH") == 0) ||
+ (StringICmp(temp,"LI") == 0) ||
+ (StringICmp(temp,"LJ") == 0)) {
retcode = ACCN_DDBJ_TSA;
} else if ((StringICmp(temp,"AJ") == 0) ||
(StringICmp(temp,"AM") == 0) ||
@@ -10784,7 +10857,17 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"HE") == 0) ||
(StringICmp(temp,"HF") == 0) ||
(StringICmp(temp,"HG") == 0) ||
- (StringICmp(temp,"HI") == 0)) { /* EMBL direct submission */
+ (StringICmp(temp,"HI") == 0) ||
+ (StringICmp(temp,"LK") == 0) ||
+ (StringICmp(temp,"LL") == 0) ||
+ (StringICmp(temp,"LM") == 0) ||
+ (StringICmp(temp,"LN") == 0) ||
+ (StringICmp(temp,"LO") == 0) ||
+ (StringICmp(temp,"LP") == 0) ||
+ (StringICmp(temp,"LQ") == 0) ||
+ (StringICmp(temp,"LR") == 0) ||
+ (StringICmp(temp,"LS") == 0) ||
+ (StringICmp(temp,"LT") == 0)) { /* EMBL direct submission */
retcode = ACCN_EMBL_DIRSUB;
} else if ((StringICmp(temp,"AL") == 0) ||
(StringICmp(temp,"BX") == 0)||
@@ -10828,9 +10911,11 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"FS") == 0) ||
(StringICmp(temp,"FY") == 0) ||
(StringICmp(temp,"HX") == 0) ||
- (StringICmp(temp,"HY") == 0)) { /* DDBJ EST's */
+ (StringICmp(temp,"HY") == 0) ||
+ (StringICmp(temp,"LU") == 0)) { /* DDBJ EST's */
retcode = ACCN_DDBJ_EST;
- } else if ((StringICmp(temp,"AB") == 0)) { /* DDBJ direct submission */
+ } else if ((StringICmp(temp,"AB") == 0) ||
+ (StringICmp(temp,"LC") == 0)) { /* DDBJ direct submission */
retcode = ACCN_DDBJ_DIRSUB;
} else if ((StringICmp(temp,"AG") == 0) ||
(StringICmp(temp,"AP") == 0) ||
@@ -10840,7 +10925,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_DDBJ_HTGS;
} else if ((StringICmp(temp,"BA") == 0) ||
(StringICmp(temp,"DF") == 0) ||
- (StringICmp(temp,"DG") == 0)) { /* DDBJ CON division */
+ (StringICmp(temp,"DG") == 0) ||
+ (StringICmp(temp,"LD") == 0)) { /* DDBJ CON division */
retcode = ACCN_DDBJ_CON;
} else if ((StringICmp(temp,"BD") == 0) ||
(StringICmp(temp,"DD") == 0) ||
@@ -10854,12 +10940,17 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"FZ") == 0) ||
(StringICmp(temp,"GB") == 0) ||
(StringICmp(temp,"HV") == 0) ||
- (StringICmp(temp,"HW") == 0)) { /* DDBJ patent division */
+ (StringICmp(temp,"HW") == 0) ||
+ (StringICmp(temp,"HZ") == 0) ||
+ (StringICmp(temp,"LF") == 0) ||
+ (StringICmp(temp,"LG") == 0) ||
+ (StringICmp(temp,"LV") == 0)) { /* DDBJ patent division */
retcode = ACCN_DDBJ_PATENT;
} else if ((StringICmp(temp,"DE") == 0) ||
(StringICmp(temp,"DH") == 0) ||
(StringICmp(temp,"FT") == 0) ||
- (StringICmp(temp,"GA") == 0)) { /* DDBJ GSS */
+ (StringICmp(temp,"GA") == 0) ||
+ (StringICmp(temp,"LB") == 0)) { /* DDBJ GSS */
retcode = ACCN_DDBJ_GSS;
} else {
retcode = ACCN_IS_NT;
@@ -10919,6 +11010,28 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
s++;
}
break;
+ case 10: /* New 10-character accession, three letters +"_"+ 6 digits */
+ if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
+ break;
+ if(*(s+3)!='_')
+ break;
+ temp[0] = *s; s++;
+ temp[1] = *s; s++;
+ temp[2] = *s; s++;
+ temp[3] = NULLB; s++;
+
+ if ((StringICmp(temp,"MAP") == 0)) {
+ while (*s) {
+ if (! IS_DIGIT(*s)) {
+ retval = FALSE;
+ break;
+ }
+ s++;
+ }
+ retcode = ACCN_NCBI_OTHER;
+ } else
+ retval = FALSE;
+ break;
case 11: /* New 11-character accession, two letters +"_"+ 8 digits */
if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
break;
@@ -10957,9 +11070,37 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
} else if ((StringNICmp(temp,"C", 1) == 0)) {
retcode = ACCN_EMBL_WGS;
} else if ((StringNICmp(temp,"D", 1) == 0)) {
- retcode = ACCN_NCBI_WGS;
+ retcode = ACCN_NCBI_WGS_TPA;
} else if ((StringNICmp(temp,"E", 1) == 0)) {
- retcode = ACCN_DDBJ_WGS;
+ retcode = ACCN_DDBJ_WGS_TPA;
+ } else if ((StringNICmp(temp,"F", 1) == 0)) {
+ retcode = ACCN_EMBL_WGS;
+ } else if ((StringNICmp(temp,"G", 1) == 0)) {
+ retcode = ACCN_NCBI_TSA;
+ } else if ((StringNICmp(temp,"H", 1) == 0)) {
+ retcode = ACCN_EMBL_TSA;
+ } else if ((StringNICmp(temp,"I", 1) == 0)) {
+ retcode = ACCN_DDBJ_TSA;
+ } else if ((StringNICmp(temp,"J", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"K", 1) == 0)) {
+ retcode = ACCN_NCBI_TARGETED;
+ } else if ((StringNICmp(temp,"L", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"M", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"N", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"O", 1) == 0)) {
+ retcode = ACCN_EMBL_WGS;
+ } else if ((StringNICmp(temp,"P", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"Q", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"R", 1) == 0)) {
+ retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"S", 1) == 0)) {
+ retcode = ACCN_NCBI_PATENT;
} else
retval = FALSE;
while (*s) {
@@ -11225,3 +11366,28 @@ NLM_EXTERN Boolean IsSkippableDbtag (DbtagPtr dbt)
}
+NLM_EXTERN Boolean DoesCDSEndWithStopCodon (SeqFeatPtr cds)
+{
+ ByteStorePtr bs;
+ CharPtr prot_str;
+ Boolean retval = FALSE;
+
+ if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) {
+ return FALSE;
+ }
+ bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
+ if (bs == NULL) return FALSE;
+ prot_str = BSMerge (bs, NULL);
+ bs = BSFree (bs);
+ if (prot_str == NULL) return FALSE;
+
+ if (prot_str[StringLen (prot_str) - 1] == '*') {
+ retval = TRUE;
+ } else {
+ retval = FALSE;
+ }
+ prot_str = MemFree (prot_str);
+ return retval;
+}
+
+
diff --git a/api/sequtil.h b/api/sequtil.h
index bb915f95..e5c9e18f 100644
--- a/api/sequtil.h
+++ b/api/sequtil.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.61 $
+* $Revision: 6.65 $
*
* File Description: Sequence Utilities for objseq and objsset
*
@@ -452,9 +452,9 @@ NLM_EXTERN CharPtr SeqIdPrint(SeqIdPtr sip, CharPtr buf, Uint1 format);
NLM_EXTERN CharPtr SeqIdWrite(SeqIdPtr sip, CharPtr buf, Uint1 format, Uint4 buflen);
NLM_EXTERN Int4 SeqIdLabelLen (SeqIdPtr isip, Uint1 format);
NLM_EXTERN CharPtr SeqIdWholeLabel (SeqIdPtr isip, Uint1 format);
-NLM_EXTERN Boolean GetAccessionFromSeqId(SeqIdPtr sip, Int4Ptr gi,
+NLM_EXTERN Boolean GetAccessionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi,
CharPtr PNTR id);
-NLM_EXTERN Boolean GetAccessionVersionFromSeqId(SeqIdPtr sip, Int4Ptr gi,
+NLM_EXTERN Boolean GetAccessionVersionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi,
CharPtr PNTR id, Boolean get_version);
NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf);
@@ -985,6 +985,17 @@ NLM_EXTERN Boolean LIBCALL NAccnIsDDBJ (CharPtr s);
#define ACCN_REFSEQ_ARTIFICIAL_ASSEMBLY 68
#define ACCN_REFSEQ_WGS 69
+#define ACCN_NCBI_OPTICAL 70
+
+#define ACCN_NCBI_WGS_TPA 71
+#define ACCN_NCBI_WGS_TPA_PROT 72
+#define ACCN_EMBL_WGS_TPA 73
+#define ACCN_EMBL_WGS_TPA_PROT 74
+#define ACCN_DDBJ_WGS_TPA 75
+#define ACCN_DDBJ_WGS_TPA_PROT 76
+
+#define ACCN_NCBI_TARGETED 77
+
/* Some accessions prefix can be either protein or nucleotide
such as NCBI PATENT I, AR .. or segmented set Bioseqs 'AH'
@@ -1000,7 +1011,7 @@ NLM_EXTERN Boolean LIBCALL NAccnIsDDBJ (CharPtr s);
/*
Accession definitively points to a protein record
*/
-#define ACCN_IS_PROT(c) (((c)==ACCN_SWISSPROT) || ( (c)==ACCN_NCBI_PROT) || ((c)== ACCN_EMBL_PROT) || ((c)== ACCN_DDBJ_PROT) || ((c)== ACCN_REFSEQ_PROT) || ((c)== ACCN_IS_PROTEIN) || ((c)== ACCN_REFSEQ_PROT_PREDICTED) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_EMBL_TPA_PROT) || ((c)== ACCN_DDBJ_TPA_PROT) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_EMBL_WGS_PROT) || ((c)== ACCN_DDBJ_WGS_PROT))
+#define ACCN_IS_PROT(c) (((c)==ACCN_SWISSPROT) || ( (c)==ACCN_NCBI_PROT) || ((c)== ACCN_EMBL_PROT) || ((c)== ACCN_DDBJ_PROT) || ((c)== ACCN_REFSEQ_PROT) || ((c)== ACCN_IS_PROTEIN) || ((c)== ACCN_REFSEQ_PROT_PREDICTED) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_EMBL_TPA_PROT) || ((c)== ACCN_DDBJ_TPA_PROT) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_EMBL_WGS_PROT) || ((c)== ACCN_DDBJ_WGS_PROT) || ((c)== ACCN_NCBI_WGS_TPA_PROT) || ((c)== ACCN_EMBL_WGS_TPA_PROT) || ((c)== ACCN_DDBJ_WGS_TPA_PROT))
/*
Accession definitively points to a nucleotide record
@@ -1014,27 +1025,27 @@ NLM_EXTERN Boolean LIBCALL NAccnIsDDBJ (CharPtr s);
Define to detect Genbank's accessions: Genbank-subsumed GSDB accession numbers
are defined to be Genbank's as well as GSDB DIRSUB records.
*/
-#define ACCN_IS_GENBANK(c) ((((c)&65535) == ACCN_NCBI_GSDB) || (((c)&65535)==ACCN_GSDB_DIRSUB) || (((c)&65535) == ACCN_NCBI_EST) || (((c)&65535) == ACCN_NCBI_DIRSUB) || (((c)&65535) == ACCN_NCBI_GENOME) || (((c)&65535) == ACCN_NCBI_PATENT) || (((c)&65535) == ACCN_NCBI_HTGS) || (((c)&65535) == ACCN_NCBI_GSS) || (((c)&65535) == ACCN_NCBI_STS) || (((c)&65535) == ACCN_NCBI_BACKBONE) || (((c)&65535) == ACCN_NCBI_SEGSET) || (((c)&65535) == ACCN_NCBI_WGS) || (((c)&65535) == ACCN_NCBI_OTHER) || (((c)&65535) == ACCN_NCBI_PROT) || (((c)&65535) == ACCN_NCBI_cDNA) || (((c)&65535) == ACCN_NCBI_TSA) || (((c)&65535) == ACCN_NCBI_TSA_PROT) || (((c)&65535) == ACCN_EMBL_GB) || (((c)&65535) == ACCN_EMBL_GB_DDBJ || (((c)&65535) == ACCN_GB_DDBJ)) )
+#define ACCN_IS_GENBANK(c) ((((c)&65535) == ACCN_NCBI_GSDB) || (((c)&65535)==ACCN_GSDB_DIRSUB) || (((c)&65535) == ACCN_NCBI_EST) || (((c)&65535) == ACCN_NCBI_DIRSUB) || (((c)&65535) == ACCN_NCBI_GENOME) || (((c)&65535) == ACCN_NCBI_PATENT) || (((c)&65535) == ACCN_NCBI_HTGS) || (((c)&65535) == ACCN_NCBI_GSS) || (((c)&65535) == ACCN_NCBI_STS) || (((c)&65535) == ACCN_NCBI_BACKBONE) || (((c)&65535) == ACCN_NCBI_SEGSET) || (((c)&65535) == ACCN_NCBI_WGS) || (((c)&65535) == ACCN_NCBI_OTHER) || (((c)&65535) == ACCN_NCBI_OPTICAL) || (((c)&65535) == ACCN_NCBI_PROT) || (((c)&65535) == ACCN_NCBI_cDNA) || (((c)&65535) == ACCN_NCBI_TSA) || (((c)&65535) == ACCN_NCBI_TSA_PROT) || (((c)&65535) == ACCN_EMBL_GB) || (((c)&65535) == ACCN_EMBL_GB_DDBJ || (((c)&65535) == ACCN_GB_DDBJ)) )
/* XM_,NP_,NM_,NT_,NC_ reference sequence records created and curated by NCBI
REFSEQ project
*/
#define ACCN_IS_REFSEQ(c) (((c)== ACCN_REFSEQ_PROT) || ((c)== ACCN_REFSEQ_mRNA) || ((c)== ACCN_REFSEQ_CONTIG) || ((c)== ACCN_REFSEQ_CHROMOSOME) || ((c)== ACCN_REFSEQ_mRNA_PREDICTED) || ((c)== ACCN_REFSEQ_PROT_PREDICTED) || ((c)== ACCN_REFSEQ_GENOMIC) || ((c)== ACCN_REFSEQ_ARTIFICIAL_ASSEMBLY) || ((c)== ACCN_REFSEQ_WGS) || (((c)&65535)== ACCN_REFSEQ) )
-#define ACCN_IS_TPA(c) (((c)== ACCN_NCBI_TPA) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_EMBL_TPA) || ((c)== ACCN_EMBL_TPA_PROT) || ((c)== ACCN_DDBJ_TPA) || ((c)== ACCN_DDBJ_TPA_PROT))
+#define ACCN_IS_TPA(c) (((c)== ACCN_NCBI_TPA) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_EMBL_TPA) || ((c)== ACCN_EMBL_TPA_PROT) || ((c)== ACCN_DDBJ_TPA) || ((c)== ACCN_DDBJ_TPA_PROT) || ((c)== ACCN_NCBI_WGS_TPA) || ((c)== ACCN_NCBI_WGS_TPA_PROT) || ((c)== ACCN_EMBL_WGS_TPA) || ((c)== ACCN_EMBL_WGS_TPA_PROT) || ((c)== ACCN_DDBJ_WGS_TPA) || ((c)== ACCN_DDBJ_WGS_TPA_PROT))
-#define ACCN_IS_WGS(c) (((c)== ACCN_NCBI_WGS) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_EMBL_WGS) || ((c)== ACCN_EMBL_WGS_PROT) || ((c)== ACCN_DDBJ_WGS) || ((c)== ACCN_DDBJ_WGS_PROT) || ((c)== ACCN_REFSEQ_WGS))
+#define ACCN_IS_WGS(c) (((c)== ACCN_NCBI_WGS) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_EMBL_WGS) || ((c)== ACCN_EMBL_WGS_PROT) || ((c)== ACCN_DDBJ_WGS) || ((c)== ACCN_DDBJ_WGS_PROT) || ((c)== ACCN_REFSEQ_WGS) || ((c)== ACCN_NCBI_WGS_TPA) || ((c)== ACCN_NCBI_WGS_TPA_PROT) || ((c)== ACCN_EMBL_WGS_TPA) || ((c)== ACCN_EMBL_WGS_TPA_PROT) || ((c)== ACCN_DDBJ_WGS_TPA) || ((c)== ACCN_DDBJ_WGS_TPA_PROT))
#define ACCN_IS_TSA(c) (((c)== ACCN_NCBI_TSA) || ((c)== ACCN_NCBI_TSA_PROT) || ((c)== ACCN_EMBL_TSA) || ((c)== ACCN_EMBL_TSA_PROT) || ((c)== ACCN_DDBJ_TSA) || ((c)== ACCN_DDBJ_TSA_PROT))
-#define ACCN_IS_NCBI(c) (ACCN_IS_REFSEQ((c)) || ACCN_IS_GENBANK((c)) || ((c)== ACCN_NCBI_TPA) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_NCBI_WGS) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_NCBI_TSA))
+#define ACCN_IS_NCBI(c) (ACCN_IS_REFSEQ((c)) || ACCN_IS_GENBANK((c)) || ((c)== ACCN_NCBI_TPA) || ((c)== ACCN_NCBI_TPA_PROT) || ((c)== ACCN_NCBI_WGS) || ((c)== ACCN_NCBI_WGS_PROT) || ((c)== ACCN_NCBI_TSA) || ((c)== ACCN_NCBI_WGS_TPA) || ((c)== ACCN_NCBI_WGS_TPA_PROT) || ((c)== ACCN_NCBI_TARGETED))
/*
Macro to detect EMBL accession numbers (can also belong to another DB)
*/
-#define ACCN_IS_EMBL(c) ( (((c)&65535) == ACCN_EMBL_EST) || (((c)&65535) == ACCN_EMBL_DIRSUB) || (((c)&65535) == ACCN_EMBL_GENOME) || (((c)&65535) == ACCN_EMBL_PATENT) || (((c)&65535) == ACCN_EMBL_HTGS) || (((c)&65535) == ACCN_EMBL_CON) || (((c)&65535) == ACCN_EMBL_WGS) || (((c)&65535) == ACCN_EMBL_OTHER) || (((c)&65535) == ACCN_EMBL_PROT) || (((c)&65535) == ACCN_EMBL_GB) || (((c)&65535) == ACCN_EMBL_DDBJ) || (((c)&65535) == ACCN_EMBL_GB_DDBJ))
+#define ACCN_IS_EMBL(c) ( (((c)&65535) == ACCN_EMBL_EST) || (((c)&65535) == ACCN_EMBL_DIRSUB) || (((c)&65535) == ACCN_EMBL_GENOME) || (((c)&65535) == ACCN_EMBL_PATENT) || (((c)&65535) == ACCN_EMBL_HTGS) || (((c)&65535) == ACCN_EMBL_CON) || (((c)&65535) == ACCN_EMBL_WGS) || (((c)&65535) == ACCN_EMBL_OTHER) || (((c)&65535) == ACCN_EMBL_PROT) || (((c)&65535) == ACCN_EMBL_GB) || (((c)&65535) == ACCN_EMBL_DDBJ) || (((c)&65535) == ACCN_EMBL_GB_DDBJ) || (((c)&65535) == ACCN_EMBL_WGS_TPA) || (((c)&65535) == ACCN_EMBL_WGS_TPA_PROT))
-#define ACCN_IS_DDBJ(c) ((((c)&65535) == ACCN_DDBJ_EST) || (((c)&65535) == ACCN_DDBJ_DIRSUB) || (((c)&65535) == ACCN_DDBJ_GENOME) || (((c)&65535) == ACCN_DDBJ_PATENT) || (((c)&65535) == ACCN_DDBJ_HTGS) || (((c)&65535) == ACCN_DDBJ_CON) || (((c)&65535) == ACCN_DDBJ_WGS) || (((c)&65535) == ACCN_DDBJ_OTHER) || (((c)&65535) == ACCN_DDBJ_PROT) || (((c)&65535) == ACCN_DDBJ_GSS) || (((c)&65535) == ACCN_GB_DDBJ) || (((c)&65535) == ACCN_EMBL_DDBJ) || (((c)&65535) == ACCN_EMBL_GB_DDBJ))
+#define ACCN_IS_DDBJ(c) ((((c)&65535) == ACCN_DDBJ_EST) || (((c)&65535) == ACCN_DDBJ_DIRSUB) || (((c)&65535) == ACCN_DDBJ_GENOME) || (((c)&65535) == ACCN_DDBJ_PATENT) || (((c)&65535) == ACCN_DDBJ_HTGS) || (((c)&65535) == ACCN_DDBJ_CON) || (((c)&65535) == ACCN_DDBJ_WGS) || (((c)&65535) == ACCN_DDBJ_OTHER) || (((c)&65535) == ACCN_DDBJ_PROT) || (((c)&65535) == ACCN_DDBJ_GSS) || (((c)&65535) == ACCN_GB_DDBJ) || (((c)&65535) == ACCN_EMBL_DDBJ) || (((c)&65535) == ACCN_EMBL_GB_DDBJ) || (((c)&65535) == ACCN_EMBL_WGS_TPA) || (((c)&65535) == ACCN_EMBL_WGS_TPA_PROT))
#define ACCN_IS_SWISSPROT(c) ((c)== ACCN_SWISSPROT)
/*
@@ -1068,6 +1079,7 @@ NLM_EXTERN SeqIdPtr SeqIdDupBestList (SeqIdPtr id_list);
NLM_EXTERN SeqIdPtr SeqIdListfromSeqLoc (ValNodePtr vnpslp);
NLM_EXTERN Boolean IsSkippableDbtag (DbtagPtr dbt);
+NLM_EXTERN Boolean DoesCDSEndWithStopCodon (SeqFeatPtr cds);
#ifdef __cplusplus
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index 15ce1620..4c03b57b 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.716 $
+* $Revision: 6.906 $
*
* File Description:
*
@@ -64,6 +64,8 @@
#include <valid.h>
#include <objvalid.h>
#include <valapi.h>
+#include <findrepl.h>
+
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
@@ -176,68 +178,6 @@ NLM_EXTERN void NormalizeDescriptorOrder (
SeqEntryExplore (sep, NULL, NormalizeDescriptorProc);
}
-NLM_EXTERN DatePtr DateAdvance (DatePtr dp, Uint1 monthsToAdd)
-
-{
- if (dp == NULL) {
- dp = DateCurr ();
- }
- if (dp != NULL && dp->data [0] == 1 && dp->data [1] > 0) {
- while (monthsToAdd > 12) {
- monthsToAdd--;
- (dp->data [1])++;
- }
- if (dp->data [2] < 13 - monthsToAdd) {
- (dp->data [2]) += monthsToAdd;
- } else {
- (dp->data [1])++;
- (dp->data [2]) -= (12 - monthsToAdd);
- }
- if (dp->data [2] == 0) {
- dp->data [2] = 1;
- }
- if (dp->data [3] == 0) {
- switch (dp->data [2]) {
- case 4 :
- case 6 :
- case 9 :
- case 11 :
- dp->data [3] = 30;
- break;
- case 2 :
- dp->data [3] = 28;
- break;
- default :
- dp->data [3] = 31;
- break;
- }
- }
- }
- if (dp != NULL) {
- switch (dp->data [2]) {
- case 4 :
- case 6 :
- case 9 :
- case 11 :
- if (dp->data [3] > 30) {
- dp->data [3] = 30;
- }
- break;
- case 2 :
- if (dp->data [3] > 28) {
- dp->data [3] = 28;
- }
- break;
- default :
- if (dp->data [3] > 31) {
- dp->data [3] = 31;
- }
- break;
- }
- }
- return dp;
-}
-
typedef struct orgscan {
ObjMgrPtr omp;
Int2 nuclCode;
@@ -295,8 +235,11 @@ static Boolean OrgScanGatherFunc (GatherContextPtr gcp)
sfp = (SeqFeatPtr) gcp->thisitem;
switch (subtype) {
case FEATDEF_ORG :
+ //LCOV_EXCL_START
+ //org features are converted to biosrc features in BasicCleanup
orp = (OrgRefPtr) sfp->data.value.ptrvalue;
break;
+ //LCOV_EXCL_STOP
case FEATDEF_BIOSRC :
biop = (BioSourcePtr) sfp->data.value.ptrvalue;
break;
@@ -320,8 +263,11 @@ static Boolean OrgScanGatherFunc (GatherContextPtr gcp)
}
break;
case Seq_descr_org :
+ //LCOV_EXCL_START
+ // org descriptors are converted to biosrc descriptors in basiccleanup
orp = (OrgRefPtr) sdp->data.ptrvalue;
break;
+ //LCOV_EXCL_STOP
case Seq_descr_source :
biop = (BioSourcePtr) sdp->data.ptrvalue;
break;
@@ -383,6 +329,7 @@ static Boolean OrgScanGatherFunc (GatherContextPtr gcp)
return TRUE;
}
+//LCOV_EXCL_START
static Int2 SeqEntryOrEntityIDToGeneticCode (SeqEntryPtr sep, Uint2 entityID, BoolPtr mito,
CharPtr taxname, size_t maxsize,
BioSourcePtr PNTR biopp)
@@ -460,6 +407,7 @@ NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxn
return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, biopp);
}
+
NLM_EXTERN Boolean BioseqToGeneticCode (
BioseqPtr bsp,
Int2Ptr gencodep,
@@ -559,511 +507,6 @@ NLM_EXTERN Boolean BioseqToGeneticCode (
}
-static Boolean FindBspItem (GatherContextPtr gcp)
-
-{
- BioseqPtr PNTR bspp;
-
- bspp = (BioseqPtr PNTR) gcp->userdata;
- if (bspp != NULL && gcp->thistype == OBJ_BIOSEQ) {
- *bspp = (BioseqPtr) gcp->thisitem;
- }
- return TRUE;
-}
-
-NLM_EXTERN BioseqPtr GetBioseqGivenIDs (Uint2 entityID, Uint4 itemID, Uint2 itemtype)
-
-{
- BioseqPtr bsp;
-
- bsp = NULL;
- if (entityID > 0 && itemID > 0 && itemtype == OBJ_BIOSEQ) {
- GatherItem (entityID, itemID, itemtype, (Pointer) (&bsp), FindBspItem);
- }
- return bsp;
-}
-
-NLM_EXTERN BioseqPtr GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID)
-
-{
- BioseqPtr bsp;
- SeqEntryPtr sep;
- SeqIdPtr sip;
-
- if (slp == NULL) return NULL;
- bsp = NULL;
- sip = SeqLocId (slp);
- if (sip != NULL) {
- bsp = BioseqFind (sip);
- } else if (entityID > 0) {
- slp = SeqLocFindNext (slp, NULL);
- if (slp != NULL) {
- sip = SeqLocId (slp);
- if (sip != NULL) {
- bsp = BioseqFind (sip);
- if (bsp != NULL) {
- sep = GetBestTopParentForData (entityID, bsp);
- if (sep != NULL) {
- sep = FindNucSeqEntry (sep);
- if (sep != NULL && sep->choice == 1) {
- bsp = (BioseqPtr) sep->data.ptrvalue;
- }
- }
- }
- }
- }
- }
- return bsp;
-}
-
-typedef struct tripletdata {
- Uint2 entityID;
- Uint4 itemID;
- Uint2 itemtype;
- Pointer lookfor;
-} TripletData, PNTR TripletDataPtr;
-
-static Boolean FindIDsFromPointer (GatherContextPtr gcp)
-
-{
- TripletDataPtr tdp;
-
- tdp = (TripletDataPtr) gcp->userdata;
- if (tdp != NULL && gcp->thisitem == tdp->lookfor) {
- tdp->entityID = gcp->entityID;
- tdp->itemID = gcp->itemID;
- tdp->itemtype = gcp->thistype;
- }
- return TRUE;
-}
-
-NLM_EXTERN Uint4 GetItemIDGivenPointer (Uint2 entityID, Uint2 itemtype, Pointer lookfor)
-
-{
- GatherScope gs;
- TripletData td;
-
- if (entityID > 0 && itemtype > 0 && itemtype < OBJ_MAX && lookfor != NULL) {
- td.entityID = 0;
- td.itemID = 0;
- td.itemtype = 0;
- td.lookfor = lookfor;
- MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
- gs.seglevels = 1;
- gs.get_feats_location = FALSE;
- MemSet ((Pointer)(gs.ignore), (int)(FALSE), (size_t)(OBJ_MAX * sizeof(Boolean)));
- /* gs.ignore[itemtype] = FALSE; */
- GatherEntity (entityID, (Pointer) (&td), FindIDsFromPointer, &gs);
- if (td.entityID == entityID && td.itemID > 0 && td.itemtype == itemtype) {
- return td.itemID;
- }
- }
- return 0;
-}
-
-static void AddNucPart (BioseqPtr segseq, BioseqSetPtr parts, SeqEntryPtr addme)
-
-{
- BioseqPtr bsp;
- SeqLocPtr slp;
- SeqEntryPtr tmp;
-
- if (segseq == NULL || addme == NULL) return;
- if (addme->choice != 1 || addme->data.ptrvalue == NULL) return;
- bsp = (BioseqPtr) addme->data.ptrvalue;
-
- slp = ValNodeNew ((ValNodePtr) segseq->seq_ext);
- if (slp == NULL) return;
- if (segseq->seq_ext == NULL) {
- segseq->seq_ext = (Pointer) slp;
- }
- if (bsp->length >= 0) {
- segseq->length += bsp->length;
- slp->choice = SEQLOC_WHOLE;
- slp->data.ptrvalue = (Pointer) SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
- } else {
- slp->choice = SEQLOC_NULL;
- addme = SeqEntryFree (addme);
- return;
- }
-
- if (parts == NULL) {
- addme = SeqEntryFree (addme);
- return;
- }
- if (parts->seq_set != NULL) {
- tmp = parts->seq_set;
- while (tmp->next != NULL) {
- tmp = tmp->next;
- }
- tmp->next = addme;
- } else {
- parts->seq_set = addme;
- }
-}
-
-NLM_EXTERN void GetSeqEntryParent (SeqEntryPtr target, Pointer PNTR parentptr, Uint2Ptr parenttype)
-
-{
- ObjMgrPtr omp;
- ObjMgrDataPtr omdp;
-
- if (parentptr == NULL || parenttype == NULL) return;
- *parenttype = 0;
- *parentptr = NULL;
- if (target == NULL || target->data.ptrvalue == NULL) return;
- omp = ObjMgrGet ();
- if (omp == NULL) return;
- omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
- if (omdp == NULL) return;
- *parenttype = omdp->parenttype;
- *parentptr = omdp->parentptr;
-}
-
-NLM_EXTERN void SaveSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr PNTR omdptopptr, ObjMgrData PNTR omdataptr)
-
-{
- ObjMgrPtr omp;
- ObjMgrDataPtr omdp, omdptop = NULL;
-
- if (target == NULL || omdptopptr == NULL || omdataptr == NULL) return;
- *omdptopptr = NULL;
- MemSet ((Pointer) omdataptr, 0, sizeof (ObjMgrData));
- omp = ObjMgrGet ();
- if (omp == NULL) return;
- omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
- if (omdp == NULL) return;
- omdptop = ObjMgrFindTop (omp, omdp);
- if (omdptop == NULL) return;
- if (omdptop->EntityID == 0) return;
- *omdptopptr = omdptop;
- MemCopy ((Pointer) omdataptr, omdptop, sizeof (ObjMgrData));
- omdptop->userdata = NULL;
-}
-
-extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp);
-extern void ObjMgrRecordOmdpByEntityID (Uint2 entityID, ObjMgrDataPtr omdp);
-NLM_EXTERN void RestoreSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr omdptop, ObjMgrData PNTR omdataptr)
-
-{
- ObjMgrPtr omp;
- ObjMgrDataPtr omdp, omdpnew = NULL;
-
- if (target == NULL || omdptop == NULL || omdataptr == NULL) return;
- if (omdataptr->EntityID == 0) return;
- omp = ObjMgrGet ();
- if (omp == NULL) return;
- omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
- if (omdp == NULL) return;
- omdpnew = ObjMgrFindTop (omp, omdp);
- if (omdpnew == NULL) return;
- if (omdpnew != omdptop) {
- omdpnew->EntityID = omdataptr->EntityID;
- omdptop->EntityID = 0;
- omdpnew->lockcnt = omdataptr->lockcnt;
- omdpnew->tempload = omdataptr->tempload;
- omdpnew->clipboard = omdataptr->clipboard;
- omdpnew->dirty = omdataptr->dirty;
- omdpnew->being_freed = omdataptr->being_freed;
- omdpnew->free = omdataptr->free;
- omdpnew->options = omdataptr->options;
- ObjMgrRemoveEntityIDFromRecycle (omdpnew->EntityID, omp);
- ObjMgrRecordOmdpByEntityID (omdpnew->EntityID, omdpnew);
- }
- omdpnew->userdata = omdataptr->userdata;
-}
-
-NLM_EXTERN void AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, Boolean relink)
-
-{
- SeqEntryPtr first;
- BioseqPtr insertbsp;
- BioseqSetPtr nuc_prot;
- Uint2 parenttype;
- Pointer parentptr;
- BioseqSetPtr parts;
- BioseqPtr seg;
- BioseqSetPtr segs;
- BioseqPtr targetbsp;
- BioseqSetPtr targetbssp;
- SeqEntryPtr the_nuc;
- SeqEntryPtr the_prt;
- SeqEntryPtr tmp;
- ObjMgrDataPtr omdptop;
- ObjMgrData omdata;
-
- if (target == NULL || insert == NULL) return;
- if (target->data.ptrvalue == NULL || insert->data.ptrvalue == NULL) return;
-
- if (relink) {
- SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
- GetSeqEntryParent (target, &parentptr, &parenttype);
- }
-
- if (IS_Bioseq (target) && IS_Bioseq (insert)) {
- targetbsp = (BioseqPtr) target->data.ptrvalue;
- insertbsp = (BioseqPtr) insert->data.ptrvalue;
- if (ISA_na (targetbsp->mol)) {
- if (ISA_na (insertbsp->mol)) {
-
- seg = BioseqNew ();
- if (seg == NULL) return;
- seg->mol = targetbsp->mol;
- seg->repr = Seq_repr_seg;
- seg->seq_ext_type = 1;
- seg->length = 0;
- /* seg->id = MakeSeqID ("SEG_dna"); */
- /* seg->id = MakeNewProteinSeqId (NULL, NULL); */
- seg->id = MakeUniqueSeqID ("segseq_");
- SeqMgrAddToBioseqIndex (seg);
-
- the_nuc = SeqEntryNew ();
- if (the_nuc == NULL) return;
- the_nuc->choice = 1;
- the_nuc->data.ptrvalue = (Pointer) seg;
-
- segs = BioseqSetNew ();
- if (segs == NULL) return;
- segs->_class = 2;
- segs->seq_set = the_nuc;
-
- parts = BioseqSetNew ();
- if (parts == NULL) return;
- parts->_class = 4;
-
- tmp = SeqEntryNew ();
- if (tmp == NULL) return;
- tmp->choice = 2;
- tmp->data.ptrvalue = (Pointer) parts;
- the_nuc->next = tmp;
-
- first = SeqEntryNew ();
- if (first == NULL) return;
- first->choice = 1;
- first->data.ptrvalue = (Pointer) targetbsp;
- target->choice = 2;
- target->data.ptrvalue = (Pointer) segs;
-
- AddNucPart (seg, parts, first);
- AddNucPart (seg, parts, insert);
-
- } else if (ISA_aa (insertbsp->mol)) {
-
- nuc_prot = BioseqSetNew ();
- if (nuc_prot == NULL) return;
- nuc_prot->_class = 1;
-
- the_nuc = SeqEntryNew ();
- if (the_nuc == NULL) return;
- the_nuc->choice = 1;
- the_nuc->data.ptrvalue = (Pointer) targetbsp;
- target->choice = 2;
- target->data.ptrvalue = (Pointer) nuc_prot;
- nuc_prot->seq_set = the_nuc;
-
- the_nuc->next = insert;
-
- }
- } else if (ISA_aa (targetbsp->mol)) {
- if (ISA_na (insertbsp->mol)) {
-
- nuc_prot = BioseqSetNew ();
- if (nuc_prot == NULL) return;
- nuc_prot->_class = 1;
-
- the_prt = SeqEntryNew ();
- if (the_prt == NULL) return;
- the_prt->choice = 1;
- the_prt->data.ptrvalue = (Pointer) targetbsp;
- target->choice = 2;
- target->data.ptrvalue = (Pointer) nuc_prot;
- nuc_prot->seq_set = insert;
-
- the_prt->next = insert->next;
- insert->next = the_prt;
-
- }
- }
- } else if (IS_Bioseq_set (target)) {
- targetbssp = (BioseqSetPtr) target->data.ptrvalue;
- if (targetbssp->_class == 1 && IS_Bioseq (insert)) {
- insertbsp = (BioseqPtr) insert->data.ptrvalue;
- if (ISA_aa (insertbsp->mol)) {
-
- nuc_prot = targetbssp;
- if (nuc_prot->seq_set != NULL) {
- tmp = nuc_prot->seq_set;
- while (tmp->next != NULL) {
- tmp = tmp->next;
- }
- tmp->next = insert;
- } else {
- nuc_prot->seq_set = insert;
- }
-
- }
- } else if (targetbssp->_class == 2 && IS_Bioseq (insert)) {
- insertbsp = (BioseqPtr) insert->data.ptrvalue;
- if (ISA_na (insertbsp->mol)) {
-
- the_nuc = FindNucSeqEntry (target);
- if (the_nuc != NULL && the_nuc->next != NULL) {
- tmp = the_nuc->next;
- if (tmp->choice == 2 && tmp->data.ptrvalue != NULL) {
- parts = (BioseqSetPtr) tmp->data.ptrvalue;
- if (parts->_class == 4 && the_nuc->choice == 1) {
- seg = (BioseqPtr) the_nuc->data.ptrvalue;
- AddNucPart (seg, parts, insert);
- }
- }
- }
-
- } else if (ISA_aa (insertbsp->mol)) {
-
- nuc_prot = BioseqSetNew ();
- if (nuc_prot == NULL) return;
- nuc_prot->_class = 1;
-
- first = SeqEntryNew ();
- if (first == NULL) return;
- first->choice = 2;
- first->data.ptrvalue = (Pointer) targetbssp;
- target->choice = 2;
- target->data.ptrvalue = (Pointer) nuc_prot;
- nuc_prot->seq_set = first;
-
- first->next = insert;
-
- }
- } else if (targetbssp->_class == 7) {
-
- if (targetbssp->seq_set != NULL) {
- tmp = targetbssp->seq_set;
- while (tmp->next != NULL) {
- tmp = tmp->next;
- }
- tmp->next = insert;
- } else {
- targetbssp->seq_set = insert;
- }
- } else if ((targetbssp->_class >= BioseqseqSet_class_mut_set &&
- targetbssp->_class <= BioseqseqSet_class_eco_set) ||
- targetbssp->_class == BioseqseqSet_class_wgs_set ||
- targetbssp->_class == BioseqseqSet_class_small_genome_set) {
-
- if (targetbssp->seq_set != NULL) {
- tmp = targetbssp->seq_set;
- while (tmp->next != NULL) {
- tmp = tmp->next;
- }
- tmp->next = insert;
- } else {
- targetbssp->seq_set = insert;
- }
-
- } else if (targetbssp->_class == BioseqseqSet_class_gen_prod_set) {
-
- if (targetbssp->seq_set != NULL) {
- tmp = targetbssp->seq_set;
- while (tmp->next != NULL) {
- tmp = tmp->next;
- }
- tmp->next = insert;
- } else {
- targetbssp->seq_set = insert;
- }
-
- }
- }
-
- if (relink) {
- SeqMgrLinkSeqEntry (target, parenttype, parentptr);
- RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
- }
-}
-
-NLM_EXTERN void ReplaceSeqEntryWithSeqEntry (SeqEntryPtr target, SeqEntryPtr replaceWith, Boolean relink)
-
-{
- Uint2 parenttype;
- Pointer parentptr;
- ObjMgrDataPtr omdptop;
- ObjMgrData omdata;
-
- if (target == NULL || replaceWith == NULL) return;
-
- if (relink) {
- SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
- GetSeqEntryParent (target, &parentptr, &parenttype);
- }
-
- if (target->choice == 1) {
- BioseqFree ((BioseqPtr) target->data.ptrvalue);
- } else if (target->choice == 2) {
- BioseqSetFree ((BioseqSetPtr) target->data.ptrvalue);
- }
- target->choice = replaceWith->choice;
- target->data.ptrvalue = replaceWith->data.ptrvalue;
- MemFree (replaceWith);
-
- if (relink) {
- SeqMgrLinkSeqEntry (target, parenttype, parentptr);
- RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
- }
-}
-
-static void SeqEntryRemoveLoop (SeqEntryPtr sep, SeqEntryPtr del, SeqEntryPtr PNTR prev)
-
-{
- BioseqSetPtr bssp;
- SeqEntryPtr next;
-
- while (sep != NULL) {
- next = sep->next;
- if (sep == del) {
- *prev = sep->next;
- sep->next = NULL;
- SeqEntryFree (sep);
- } else {
- prev = (SeqEntryPtr PNTR) &(sep->next);
- if (IS_Bioseq_set (sep)) {
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- if (bssp != NULL) {
- SeqEntryRemoveLoop (bssp->seq_set, del, &(bssp->seq_set));
- }
- }
- }
- sep = next;
- }
-}
-
-NLM_EXTERN void RemoveSeqEntryFromSeqEntry (SeqEntryPtr top, SeqEntryPtr del, Boolean relink)
-
-{
- SeqEntryPtr dummy;
- ObjMgrDataPtr omdptop;
- ObjMgrData omdata;
- Uint2 parenttype;
- Pointer parentptr;
-
- if (top == NULL || del == NULL) return;
- if (top->data.ptrvalue == NULL || del->data.ptrvalue == NULL) return;
-
- if (relink) {
- SaveSeqEntryObjMgrData (top, &omdptop, &omdata);
- GetSeqEntryParent (top, &parentptr, &parenttype);
- }
-
- dummy = NULL;
- SeqEntryRemoveLoop (top, del, &dummy);
-
- if (relink) {
- SeqMgrLinkSeqEntry (top, parenttype, parentptr);
- RestoreSeqEntryObjMgrData (top, omdptop, &omdata);
- }
-}
-
-
typedef struct commontitle {
BioseqPtr bsp;
SeqDescPtr sdp;
@@ -1250,7 +693,7 @@ NLM_EXTERN void PromoteCommonTitlesToSet (SeqEntryPtr sep)
{
VisitSetsInSep (sep, NULL, PromoteCommonTitlesSetCallback);
}
-
+//LCOV_EXCL_STOP
NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
@@ -1267,18 +710,26 @@ NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 inde
sdp = bsp->descr;
prevsdp = (Pointer PNTR) &(bsp->descr);
} else if (IS_Bioseq_set (sep)) {
+ //LCOV_EXCL_START
+ //cleanup functions only call this during RenormalizeNucProtSets,
+ //and only for Bioseqs
bssp = (BioseqSetPtr) sep->data.ptrvalue;
sdp = bssp->descr;
prevsdp = (Pointer PNTR) &(bssp->descr);
+ //LCOV_EXCL_STOP
} else return;
hastitle = FALSE;
while (sdp != NULL) {
nextsdp = sdp->next;
if (sdp->choice == Seq_descr_title) {
if (hastitle) {
+ //LCOV_EXCL_START
+ //when called from RenormalizeNucProtSets,
+ //extra titles are already gone
*(prevsdp) = sdp->next;
sdp->next = NULL;
SeqDescFree (sdp);
+ //LCOV_EXCL_STOP
} else {
hastitle = TRUE;
prevsdp = (Pointer PNTR) &(sdp->next);
@@ -1351,6 +802,8 @@ NLM_EXTERN Int4 RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
sap = bsp->annot;
}
} else if (IS_Bioseq_set (sep)) {
+ //LCOV_EXCL_START
+ //should not have set inside nuc-prot set
bssp = (BioseqSetPtr) sep->data.ptrvalue;
ValNodeLink (&(bssp->descr), descr);
if (bssp->annot == NULL) {
@@ -1359,6 +812,7 @@ NLM_EXTERN Int4 RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
} else {
sap = bssp->annot;
}
+ //LCOV_EXCL_STOP
}
if (sap != NULL) {
tmp_sap = sap;
@@ -1383,6 +837,8 @@ NLM_EXTERN Int4 RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
}
+//LCOV_EXCL_START
+//only used by RemoveSingleItemSet, which is not used by cleanup
static Boolean SetHasAlignments (BioseqSetPtr bssp)
{
SeqAnnotPtr sap;
@@ -1400,7 +856,8 @@ static Boolean SetHasAlignments (BioseqSetPtr bssp)
}
-NLM_EXTERN Int4 RemoveSingleItemSet (SeqEntryPtr sep, Boolean relink)
+//not used by cleanup
+NLM_EXTERN Int4 RemoveSingleItemSet(SeqEntryPtr sep, Boolean relink)
{
SeqAnnotPtr annot;
BioseqPtr bsp;
@@ -1495,6 +952,21 @@ NLM_EXTERN Int4 RemoveSingleItemSet (SeqEntryPtr sep, Boolean relink)
}
+static Boolean IsExtractableDescriptor (SeqDescPtr sdp)
+{
+ if (sdp == NULL) {
+ return FALSE;
+ }
+ if (sdp->choice == Seq_descr_pub || sdp->choice == Seq_descr_source) {
+ return TRUE;
+ } else if (sdp->choice == Seq_descr_user && IsDBLinkObject(sdp->data.ptrvalue)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
NLM_EXTERN ValNodePtr ExtractBioSourceAndPubs (SeqEntryPtr sep)
{
@@ -1521,7 +993,7 @@ NLM_EXTERN ValNodePtr ExtractBioSourceAndPubs (SeqEntryPtr sep)
} else return NULL;
while (sdp != NULL) {
nextsdp = sdp->next;
- if (sdp->choice == Seq_descr_pub || sdp->choice == Seq_descr_source) {
+ if (IsExtractableDescriptor(sdp)) {
*(prevsdp) = sdp->next;
sdp->next = NULL;
if (descr == NULL) {
@@ -1724,6 +1196,7 @@ NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID)
}
return NULL;
}
+//LCOV_EXCL_STOP
NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr)
@@ -2015,6 +1488,7 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean
SetSeqLocPartialEx (location, partial5, partial3, -1);
}
+//LCOV_EXCL_START
NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
{
@@ -2236,6 +1710,7 @@ NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCI (VoidPtr ptr1, VoidPtr ptr2)
return NaturalStringICmp (str1, str2);
}
+//LCOV_EXCL_STOP
NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list)
@@ -2267,6 +1742,7 @@ NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list)
return list;
}
+//LCOV_EXCL_START
NLM_EXTERN ValNodePtr UniqueStringValNodeCS (ValNodePtr list)
{
@@ -2327,6 +1803,28 @@ NLM_EXTERN ValNodePtr UniqueStringValNodeCI (ValNodePtr list)
return list;
}
+NLM_EXTERN int LIBCALLBACK SortByChoice (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ Uint1 chs1;
+ Uint1 chs2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ chs1 = (Uint1) vnp1->choice;
+ chs2 = (Uint1) vnp2->choice;
+ if (chs1 > chs2) {
+ return 1;
+ } else if (chs1 < chs2) {
+ return -1;
+ }
+ return 0;
+}
+
NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -3362,6 +2860,7 @@ NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID)
{
PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE, NULL);
}
+//LCOV_EXCL_STOP
/* begin BasicSeqEntryCleanup section */
@@ -3395,6 +2894,7 @@ static Boolean AlreadyInVnpList (ValNodePtr head, ValNodePtr curr)
return FALSE;
}
+//LCOV_EXCL_START
NLM_EXTERN CharPtr TrimSpacesAndSemicolons (CharPtr str)
{
@@ -3449,6 +2949,7 @@ NLM_EXTERN CharPtr TrimSpacesAndSemicolons (CharPtr str)
}
return str;
}
+//LCOV_EXCL_STOP
NLM_EXTERN CharPtr TrimSpacesAndJunkFromEnds (
CharPtr str,
@@ -3466,8 +2967,8 @@ NLM_EXTERN CharPtr TrimSpacesAndJunkFromEnds (
dst = str;
ptr = str;
ch = *ptr;
- if (ch != '\0' && ch <= ' ') {
- while (ch != '\0' && ch <= ' ') {
+ if (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
+ while (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
ptr++;
ch = *ptr;
}
@@ -3760,6 +3261,32 @@ static void CleanVisStringList (ValNodePtr PNTR vnpp)
}
}
+static void CleanVisStringJunkListAndCompress (ValNodePtr PNTR vnpp)
+
+{
+ ValNodePtr next;
+ ValNodePtr PNTR prev;
+ ValNodePtr vnp;
+
+ if (vnpp == NULL) return;
+ prev = vnpp;
+ vnp = *vnpp;
+ while (vnp != NULL) {
+ next = vnp->next;
+ TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
+ TrimSpacesAndJunkFromEnds (vnp->data.ptrvalue, TRUE);
+ Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
+ if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
+ *prev = vnp->next;
+ vnp->next = NULL;
+ ValNodeFreeData (vnp);
+ } else {
+ prev = &(vnp->next);
+ }
+ vnp = next;
+ }
+}
+
static void CleanVisStringListAndCompress (ValNodePtr PNTR vnpp)
{
@@ -3844,6 +3371,8 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
choice = 4;
} else if (StringICmp (gbq->qual, "old_locus_tag") == 0) {
choice = 5;
+ } else if (StringICmp (gbq->qual, "gene_synonym") == 0) {
+ choice = 6;
}
if (choice > 0) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
@@ -3877,6 +3406,9 @@ static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
*/
return FALSE;
break;
+ case 6 :
+ if (StringHasNoText (gbq->val)) return FALSE;
+ ValNodeCopyStr (&(grp->syn), 0, gbq->val);
default :
break;
}
@@ -3942,6 +3474,7 @@ static CharPtr SimpleValuePos (CharPtr qval)
return (TextSave(bptr, eptr-bptr));
}
+//LCOV_EXCL_START
extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset);
extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
@@ -4060,6 +3593,7 @@ extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
return TRUE;
}
+//LCOV_EXCL_STOP
extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset);
extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
@@ -4071,15 +3605,18 @@ extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
CodeBreakPtr lastcbp;
Boolean locmap;
int num_errs;
+ Boolean packed_int = TRUE;
CharPtr pos;
Boolean pos_range = FALSE;
SeqIntPtr sintp;
SeqIdPtr sip;
Boolean sitesmap;
SeqLocPtr slp;
+ SeqLocPtr slp1, slp2;
SeqPntPtr spp;
Uint1 strand;
Int4 temp;
+ CharPtr tmp;
if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
if (StringHasNoText (val)) return FALSE;
@@ -4108,6 +3645,14 @@ extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
"transl_except parsing failed, %s, drop the transl_except", val);
return FALSE;
}
+ if (StringChr (pos, ',') != NULL) {
+ tmp = (CharPtr) MemNew ((StringLen (pos) + 10) * sizeof (Char));
+ if (tmp != NULL) {
+ sprintf (tmp, "join(%s)", pos);
+ MemFree (pos);
+ pos = tmp;
+ }
+ }
cbp->loc = Nlm_gbparseint (pos, &locmap, &sitesmap, &num_errs, sip);
if (cbp->loc == NULL) {
CodeBreakFree (cbp);
@@ -4122,8 +3667,7 @@ extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
if (spp != NULL) {
spp->point += offset;
}
- }
- if (cbp->loc->choice == SEQLOC_INT) {
+ } else if (cbp->loc->choice == SEQLOC_INT) {
sintp = cbp->loc->data.ptrvalue;
if (sintp == NULL) {
MemFree (pos);
@@ -4162,6 +3706,56 @@ extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
MemFree (pos);
return FALSE;
}
+ } else {
+ slp1 = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
+ if (slp1 != NULL) {
+ slp2 = aaLoc_to_dnaLoc (sfp, slp1);
+ if (slp2 != NULL) {
+ SeqLocFree (cbp->loc);
+ cbp->loc = slp2;
+ }
+ SeqLocFree (slp1);
+ }
+ slp = SeqLocFindNext (cbp->loc, NULL);
+ while (slp != NULL) {
+ if (slp->choice == SEQLOC_PNT) {
+ spp = slp->data.ptrvalue;
+ if (spp != NULL) {
+ sintp = SeqIntNew();
+ if (sintp != NULL) {
+ sintp->id = SeqIdDup (spp->id);
+ sintp->from = spp->point;
+ sintp->to = spp->point;
+ sintp->strand = SeqLocStrand (sfp->location);
+ slp->choice = SEQLOC_INT;
+ slp->data.ptrvalue = sintp;
+ SeqPntFree (spp);
+ }
+ }
+ }
+ if (slp->choice == SEQLOC_INT) {
+ sintp = slp->data.ptrvalue;
+ if (sintp == NULL) {
+ MemFree (pos);
+ return FALSE;
+ }
+ sintp->from += offset;
+ sintp->to += offset;
+ if (sintp->from > sintp->to) {
+ temp = sintp->from;
+ sintp->from = sintp->to;
+ sintp->to = temp;
+ }
+ sintp->strand = SeqLocStrand (sfp->location);
+ } else {
+ packed_int = FALSE;
+ }
+ slp = SeqLocFindNext (cbp->loc, slp);
+ }
+ slp = cbp->loc;
+ if (packed_int && slp->choice == SEQLOC_MIX) {
+ slp->choice = SEQLOC_PACKED_INT;
+ }
}
/* add to code break list */
@@ -4286,21 +3880,24 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
{
Uint1 aa = 0;
- Char codon [16];
Uint1 curraa;
Uint1 from = 0;
- Int2 i;
Int2 j;
Boolean justTrnaText;
- Boolean okayToFree = TRUE;
SeqMapTablePtr smtp;
- CharPtr str;
Uint1 trpcodon [6];
+ /*
+ Char codon [16];
+ Int2 i;
+ Boolean okayToFree = TRUE;
+ CharPtr str;
+ */
/* look for tRNA-OTHER with actual amino acid in comment */
if (trp == NULL) return;
+ /*
if (sfp != NULL && sfp->comment != NULL && trp->codon [0] == 255) {
codon [0] = '\0';
if (StringNICmp (sfp->comment, "codon recognized: ", 18) == 0) {
@@ -4335,6 +3932,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
}
}
}
+ */
if (! CodonsAlreadyInOrder (trp)) {
StableMergeSort ((VoidPtr) &(trp->codon), 6, sizeof (Uint1), SortCodons);
@@ -4390,7 +3988,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
trp->codon [j] = trpcodon [j];
}
}
- if (StringCmp (sfp->comment, "fMet") != 0) {
+ if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
sfp->comment = MemFree (sfp->comment);
}
}
@@ -4407,7 +4005,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
trp->codon [j] = trpcodon [j];
}
}
- if (StringCmp (sfp->comment, "fMet") != 0) {
+ if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
sfp->comment = MemFree (sfp->comment);
}
}
@@ -4665,12 +4263,14 @@ static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmbl
Boolean emptyRNA;
Int4 from;
Boolean is_fMet = FALSE;
+ Boolean is_iMet = FALSE;
Boolean is_std_name = FALSE;
Int2 j;
Boolean justTrnaText;
size_t len;
CharPtr name;
CharPtr ptr;
+ RNAGenPtr rgp;
RnaRefPtr rrp;
SeqIntPtr sintp;
SeqIdPtr sip;
@@ -4718,6 +4318,7 @@ static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmbl
aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
if (aa != 0) {
is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
+ is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
trp = (tRNAPtr) MemNew (sizeof (tRNA));
if (trp != NULL) {
@@ -4747,6 +4348,19 @@ static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmbl
sfp->comment = str;
}
}
+ if (is_iMet) {
+ if (sfp->comment == NULL) {
+ sfp->comment = StringSave ("iMet");
+ } else {
+ len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
+ str = MemNew (sizeof (Char) * len);
+ StringCpy (str, sfp->comment);
+ StringCat (str, "; ");
+ StringCat (str, "iMet");
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = str;
+ }
+ }
}
CleanupTrna (sfp, trp);
}
@@ -4759,11 +4373,23 @@ static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmbl
if (rrp->type == 3 && rrp->ext.choice == 2) {
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
if (trp != NULL && trp->aatype == 2) {
+ if (trp->aa == 77) {
+ if (StringICmp (gbq->val, "tRNA-fMet") == 0 || StringICmp (gbq->val, "tRNA-iMet") == 0) return FALSE;
+ }
if (trp->aa == ParseTRnaString (gbq->val, NULL, NULL, FALSE)) {
return TRUE;
}
}
}
+ if (rrp->ext.choice == 3) {
+ rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
+ if (rgp == NULL) return FALSE;
+ if (StringHasNoText (rgp->product)) {
+ rgp->product = StringSave (gbq->val);
+ return TRUE;
+ }
+ return FALSE;
+ }
if (rrp->ext.choice != 0 && rrp->ext.choice != 1) return FALSE;
name = (CharPtr) rrp->ext.value.ptrvalue;
if (! HasNoText (name)) {
@@ -5082,15 +4708,26 @@ static void CleanupRptUnitSeq (GBQualPtr gbq)
if (gbq == NULL) return;
if (StringHasNoText (gbq->val)) return;
+
+ /* do not clean if val contains non-sequence characters */
+ ptr = gbq->val;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (StringChr ("ACGTUacgtu", ch) == NULL) return;
+ ptr++;
+ ch = *ptr;
+ }
+
/* lower case, and convert U to T */
ptr = gbq->val;
ch = *ptr;
while (ch != '\0') {
if (IS_UPPER (ch)) {
ch = TO_LOWER (ch);
- if (ch == 'u') {
- ch = 't';
- }
+ *ptr = ch;
+ }
+ if (ch == 'u') {
+ ch = 't';
*ptr = ch;
}
ptr++;
@@ -5170,9 +4807,10 @@ static void CleanupReplace (GBQualPtr gbq)
while (ch != '\0') {
if (IS_UPPER (ch)) {
ch = TO_LOWER (ch);
- if (ch == 'u') {
- ch = 't';
- }
+ *ptr = ch;
+ }
+ if (ch == 'u') {
+ ch = 't';
*ptr = ch;
}
ptr++;
@@ -5248,6 +4886,20 @@ static void CleanupInference (GBQualPtr gbq)
ch = *ptr;
}
*dst = '\0';
+
+ dst = str;
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ *dst = ch;
+ if ((ch == ':' || ch == ',') && *(ptr + 1) == '?' && *(ptr + 2) == '|') {
+ ptr += 2;
+ }
+ dst++;
+ ptr++;
+ ch = *ptr;
+ }
+ *dst = '\0';
}
static CharPtr evCategoryNoSpace [] = {
@@ -5558,7 +5210,7 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
prevqual = (GBQualPtr PNTR) &(sfp->qual);
while (gbq != NULL) {
CleanVisString (&(gbq->qual));
- CleanVisString (&(gbq->val));
+ CleanVisStringAndCompress (&(gbq->val));
if (gbq->qual == NULL) {
gbq->qual = StringSave ("");
}
@@ -5687,10 +5339,22 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
CleanupReplace (gbq);
unlink = FALSE;
} else if (StringICmp (gbq->qual, "rpt_unit_seq") == 0) {
- CleanupRptUnitSeq (gbq);
+ if (IsBaseRange (gbq->val)) {
+ gbq->qual = MemFree (gbq->qual);
+ gbq->qual = StringSave ("rpt_unit_range");
+ CleanupRptUnitRange (gbq);
+ } else {
+ CleanupRptUnitSeq (gbq);
+ }
unlink = FALSE;
} else if (StringICmp (gbq->qual, "rpt_unit_range") == 0) {
- CleanupRptUnitRange (gbq);
+ if (! IsBaseRange (gbq->val)) {
+ gbq->qual = MemFree (gbq->qual);
+ gbq->qual = StringSave ("rpt_unit_seq");
+ CleanupRptUnitSeq (gbq);
+ } else {
+ CleanupRptUnitRange (gbq);
+ }
unlink = FALSE;
} else if (sfp->data.choice == SEQFEAT_GENE && HandledGBQualOnGene (sfp, gbq)) {
} else if (sfp->data.choice == SEQFEAT_CDREGION && HandledGBQualOnCDS (sfp, gbq, &afterMe)) {
@@ -5720,6 +5384,15 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
StringICmp (str, "allelic") == 0 ||
StringICmp (str, "unknown") == 0) {
sfp->pseudo = TRUE;
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_UPPER (ch)) {
+ *ptr = TO_LOWER (ch);
+ }
+ ptr++;
+ ch = *ptr;
+ }
}
unlink = FALSE;
} else if (StringICmp (gbq->qual, "ribosomal_slippage") == 0 ||
@@ -5821,8 +5494,27 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
} else {
unlink = FALSE;
}
+
if (StringICmp (gbq->qual, "mobile_element") == 0) {
- if (StringStr (gbq->val, " :") == 0 || StringStr (gbq->val, ": ") == 0) {
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (ifp != NULL) {
+ if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) {
+ gbq->qual = MemFree (gbq->qual);
+ gbq->qual = StringSave ("mobile_element_type");
+ ifp->key = MemFree (ifp->key);
+ ifp->key = StringSave ("mobile_element");
+ sfp->idx.subtype = FEATDEF_mobile_element;
+ }
+ }
+ }
+ }
+ if (StringICmp (gbq->qual, "mobile_element") == 0) {
+ gbq->qual = MemFree (gbq->qual);
+ gbq->qual = StringSave ("mobile_element_type");
+ }
+ if (StringICmp (gbq->qual, "mobile_element_type") == 0) {
+ if (StringStr (gbq->val, " :") != NULL || StringStr (gbq->val, ": ") != NULL) {
len = StringLen (gbq->val) + 5;
ptr = StringChr (gbq->val, ':');
if (ptr != NULL) {
@@ -5839,31 +5531,19 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
}
}
}
-
- if (StringICmp (gbq->qual, "mobile_element") == 0) {
- if (sfp->data.choice == SEQFEAT_IMP) {
- ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
- if (ifp != NULL) {
- if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) {
- gbq->qual = MemFree (gbq->qual);
- gbq->qual = StringSave ("mobile_element_type");
- ifp->key = MemFree (ifp->key);
- ifp->key = StringSave ("mobile_element");
- sfp->idx.subtype = FEATDEF_mobile_element;
- }
- }
- }
- }
+
if (StringICmp (gbq->qual, "estimated_length") == 0) {
all_digits = TRUE;
ptr = gbq->val;
- ch = *ptr;
- while (ch != '\0') {
- if (! IS_DIGIT (ch)) {
- all_digits = FALSE;
- }
- ptr++;
+ if (ptr != NULL) {
ch = *ptr;
+ while (ch != '\0') {
+ if (! IS_DIGIT (ch)) {
+ all_digits = FALSE;
+ }
+ ptr++;
+ ch = *ptr;
+ }
}
if (! all_digits) {
if (StringICmp (gbq->val, "unknown") != 0) {
@@ -5901,6 +5581,11 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
if (rpt_unit_range != NULL) {
CleanupRptUnit (rpt_unit_range);
}
+
+ if (StringHasNoText (gbq->qual) && StringHasNoText (gbq->val)) {
+ unlink = TRUE;
+ }
+
if (unlink) {
*(prevqual) = gbq->next;
gbq->next = NULL;
@@ -5962,7 +5647,7 @@ static Boolean GBQualsAlreadyInOrder (GBQualPtr list)
return TRUE;
}
-static GBQualPtr SortFeatureGBQuals (GBQualPtr list)
+NLM_EXTERN GBQualPtr SortFeatureGBQuals (GBQualPtr list)
{
size_t count, i;
@@ -5992,7 +5677,7 @@ static GBQualPtr SortFeatureGBQuals (GBQualPtr list)
return list;
}
-static void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq)
+NLM_EXTERN void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq)
{
GBQualPtr gbq;
@@ -6043,9 +5728,9 @@ static CharPtr illegalGbqualList [NUM_ILLEGAL_QUALS] = {
"protein_id",
"pseudo",
"transcript_id",
+ "translation",
"transl_except",
"transl_table",
- "translation",
};
static Int2 QualifierIsIllegal (CharPtr qualname)
@@ -6242,6 +5927,117 @@ static void RemoveSpaceBeforeAndAfterColon (CharPtr str)
}
}
+static void CorrectTildes (
+ CharPtr PNTR str
+)
+
+{
+#ifndef OS_MSWIN
+ FindReplaceString (str, "were ~25 cm in height (~3 weeks)", "were ~~25 cm in height (~~3 weeks)", FALSE, FALSE);
+ FindReplaceString (str, "generally ~3 weeks", "generally ~~3 weeks", FALSE, FALSE);
+ FindReplaceString (str, "sequencing (~4 96-well plates)", "sequencing (~~4 96-well plates)", FALSE, FALSE);
+ FindReplaceString (str, "size distribution (~2 kb)", "size distribution (~~2 kb)", FALSE, FALSE);
+ FindReplaceString (str, "sequencing (~3 96-well plates)", "sequencing (~~3 96-well plates)", FALSE, FALSE);
+ FindReplaceString (str, "vector. 1~2 ul of ligated", "vector. 1~~2 ul of ligated", FALSE, FALSE);
+ /*
+ FindReplaceString (str, "Lambda FLC I.~Islet cells were provided", "Lambda FLC I.~~Islet cells were provided", FALSE, FALSE);
+ */
+ FindReplaceString (str, "different strains~of mice", "different strains of mice", FALSE, FALSE);
+ FindReplaceString (str, "oligo-dT-NotI primer~(5'-biotin", "oligo-dT-NotI primer (5'-biotin", FALSE, FALSE);
+ FindReplaceString (str, "sizes of 200~800 bp were purified", "sizes of 200~~800 bp were purified", FALSE, FALSE);
+ FindReplaceString (str, "Tween 20 (~50 ml per tree)", "Tween 20 (~~50 ml per tree)", FALSE, FALSE);
+ FindReplaceString (str, "the SMART approach (~http://www.evrogen.com", "the SMART approach (http://www.evrogen.com", FALSE, FALSE);
+ FindReplaceString (str, "the morning (~10 am) with", "the morning (~~10 am) with", FALSE, FALSE);
+ FindReplaceString (str, "(host) sequences (~10%)", "(host) sequences (~~10%)", FALSE, FALSE);
+ /*
+ FindReplaceString (str, "unidirectionally.~ High quality", "unidirectionally. High quality", FALSE, FALSE);
+ FindReplaceString (str, "onlysubmitted.~ Average", "onlysubmitted. Average", FALSE, FALSE);
+ */
+ FindReplaceString (str, "Plasmid; ~The F03-1270", "Plasmid; The F03-1270", FALSE, FALSE);
+ FindReplaceString (str, "using STS-PCR~from Eb", "using STS-PCR from Eb", FALSE, FALSE);
+ FindReplaceString (str, "specific to~the Eb", "specific to the Eb", FALSE, FALSE);
+ FindReplaceString (str, "side of insert); , M.F., Lennon", "side of insert); Bonaldo, M.F., Lennon", FALSE, FALSE);
+ FindReplaceString (str, "Uni-ZAP XR vector. 1~2 ul of", "Uni-ZAP XR vector. 1~~2 ul of", FALSE, FALSE);
+ FindReplaceString (str, "from diploid~Secale montanum", "from diploid Secale montanum", FALSE, FALSE);
+ FindReplaceString (str, "homology with~U43516,", "homology with U43516,", FALSE, FALSE);
+ /*
+ FindReplaceString (str, "from http//www.biobase.dk/~ddbase", "from http//www.biobase.dk/~~ddbase", FALSE, FALSE);
+ */
+ FindReplaceString (str, "plasmid; ~Assembled EST", "plasmid; Assembled EST", FALSE, FALSE);
+ FindReplaceString (str, "databases.~Different cDNA", "databases. Different cDNA", FALSE, FALSE);
+ FindReplaceString (str, "enzyme PstI.~DH5-alpha", "enzyme PstI. DH5-alpha", FALSE, FALSE);
+ FindReplaceString (str, "as they~were prepared", "as they were prepared", FALSE, FALSE);
+ FindReplaceString (str, "loci in~the genome", "loci in the genome", FALSE, FALSE);
+ FindReplaceString (str, "P{CaSpeR}Cp1~50C (FBti0004219)", "P{CaSpeR}Cp1~~50C (FBti0004219)", FALSE, FALSE);
+ FindReplaceString (str, "seedlings with 2~4 leaves", "seedlings with 2~~4 leaves", FALSE, FALSE);
+ FindReplaceString (str, "tween 20 (~50mLs per tree)", "tween 20 (~~50mLs per tree)", FALSE, FALSE);
+#endif
+}
+
+static void FixStrainForPrefix (OrgModPtr omp)
+
+{
+ Char ch;
+ CharPtr cpy;
+ ValNodePtr head = NULL;
+ size_t len;
+ CharPtr pfx;
+ CharPtr sfx;
+ CharPtr str;
+ CharPtr tmp;
+ ValNodePtr vnp;
+
+ if (omp == NULL || omp->subtype != ORGMOD_strain) return;
+ str = omp->subname;
+ if (StringHasNoText (str)) return;
+
+ head = SplitStringAtSemicolon (str);
+ if (head == NULL) return;
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ TrimSpacesAroundString (str);
+
+ pfx = NULL;
+ sfx = NULL;
+ if (StringNICmp (str, "ATCC", 4) == 0) {
+ pfx = "ATCC";
+ sfx = str + 4;
+ } else if (StringNICmp (str, "DSM", 3) == 0) {
+ pfx = "DSM";
+ sfx = str + 3;
+ }
+ if (pfx == NULL || sfx == NULL) continue;
+
+ ch = *sfx;
+ if (ch == ':' || ch == '/') {
+ sfx++;
+ }
+ cpy = StringSave (sfx);
+ TrimSpacesAroundString(cpy);
+ if (! StringIsAllDigits (cpy)) {
+ cpy = MemFree (cpy);
+ continue;
+ }
+
+ len = StringLen (pfx) + StringLen (cpy) + 3;
+ tmp = (CharPtr) MemNew (len);
+ if (tmp == NULL) continue;
+ StringCpy (tmp, pfx);
+ StringCat (tmp, " ");
+ StringCat (tmp, cpy);
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = tmp;
+ cpy = MemFree (cpy);
+ }
+
+ tmp = ValNodeMergeStrsEx (head, "; ");
+ if (tmp == NULL) return;
+
+ omp->subname = MemFree (omp->subname);
+ omp->subname = tmp;
+}
static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
@@ -6268,13 +6064,18 @@ static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
TrimSpacesAndJunkFromEnds (omp->subname, FALSE);
RemoveFlankingQuotes (&(omp->subname));
CleanVisStringAndCompress (&(omp->attrib));
+ if (omp->subtype == ORGMOD_other && StringDoesHaveText (omp->subname)) {
+ CorrectTildes (&(omp->subname));
+ }
if (omp->subtype == ORGMOD_common && StringICmp (omp->subname, orpcommon) == 0) {
+ /*
unlink = TRUE;
+ */
} else if (last != NULL) {
if (HasNoText (omp->subname)) {
unlink = TRUE;
- } else if (last->subtype == omp->subtype &&
- StringICmp (last->subname, omp->subname) == 0 ||
+ } else if ((last->subtype == omp->subtype &&
+ StringICmp (last->subname, omp->subname) == 0) ||
(last->subtype == omp->subtype &&
last->subtype == ORGMOD_other &&
StringStr (last->subname, omp->subname) != NULL)) {
@@ -6305,6 +6106,7 @@ static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
omp = next;
}
+
for (omp = *ompp; omp != NULL; omp = omp->next) {
if (omp->subtype != ORGMOD_specimen_voucher &&
omp->subtype != ORGMOD_culture_collection &&
@@ -6338,6 +6140,13 @@ static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
omp_gb_anamorph = omp;
} else if (omp->subtype == ORGMOD_other) {
omp_other = omp;
+ } else if (omp->subtype == ORGMOD_nat_host) {
+ if (StringICmp (omp->subname, "human") == 0) {
+ omp->subname = MemFree (omp->subname);
+ omp->subname = StringSave ("Homo sapiens");
+ }
+ } else if (omp->subtype == ORGMOD_strain) {
+ FixStrainForPrefix (omp);
}
}
if (omp_other != NULL && StringNICmp (omp_other->subname, "anamorph:", 9) == 0) {
@@ -6482,6 +6291,7 @@ static SubSourcePtr SortSubSourceList (SubSourcePtr list)
return list;
}
+//LCOV_EXCL_START
static CharPtr TrimParenthesesAndCommasAroundString (CharPtr str)
{
@@ -6544,6 +6354,7 @@ static CharPtr CombineSplitQual (CharPtr origval, CharPtr newval)
MemFree (origval);
return str;
}
+//LCOV_EXCL_STOP
static Uint1 LocationForPlastidText (CharPtr plastid_name)
{
@@ -6568,6 +6379,7 @@ static Uint1 LocationForPlastidText (CharPtr plastid_name)
}
}
+//LCOV_EXCL_START
NLM_EXTERN void StringToLower (CharPtr str)
{
@@ -6581,6 +6393,7 @@ NLM_EXTERN void StringToLower (CharPtr str)
ch = *str;
}
}
+//LCOV_EXCL_STOP
static void CleanPCRPrimerSeq (CharPtr seq)
@@ -6648,9 +6461,40 @@ static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp)
PCRPrimerPtr next;
PCRPrimerPtr PNTR prev;
PCRPrimerPtr ppp;
+ PCRPrimerPtr pr1, pr2;
if (pppp == NULL) return;
+ ppp = *pppp;
+ while (ppp != NULL) {
+ CleanVisString (&(ppp->seq));
+ CleanPCRPrimerSeq (ppp->seq);
+ CleanVisString (&(ppp->name));
+ Asn2gnbkCompressSpaces (ppp->name);
+ StringToLower (ppp->seq);
+
+ ppp = ppp->next;
+ }
+
+ ppp = *pppp;
+ for (pr1 = ppp; pr1 != NULL; pr1 = pr1->next) {
+ for (pr2 = pr1->next; pr2 != NULL; pr2 = pr2->next) {
+ if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
+ pr2->seq = MemFree (pr2->seq);
+ pr2->name = MemFree (pr2->name);
+ } else if (StringCmp (pr1->name, pr2->name) == 0) {
+ if (StringHasNoText (pr1->seq)) {
+ pr1->seq = MemFree (pr1->seq);
+ pr1->seq = pr2->seq;
+ pr2->seq = NULL;
+ } else if (StringHasNoText (pr2->seq)) {
+ pr2->seq = MemFree (pr2->seq);
+ pr2->name = MemFree (pr2->name);
+ }
+ }
+ }
+ }
+
prev = pppp;
ppp = *pppp;
while (ppp != NULL) {
@@ -6693,22 +6537,76 @@ static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp)
}
}
+static Boolean PCRPrimersMatch (PCRPrimerPtr ppp1, PCRPrimerPtr ppp2)
+
+{
+ Int2 len1 = 0, len2 = 0, matches = 0;
+ PCRPrimerPtr pr1, pr2;
+
+ if (ppp1 == NULL || ppp2 == NULL) return FALSE;
+
+ for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
+ len1++;
+ }
+ for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
+ len2++;
+ }
+ if (len1 != len2) return FALSE;
+
+ for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
+ for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
+ if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
+ matches++;
+ }
+ }
+ }
+
+ if (matches == len1) return TRUE;
+
+ return FALSE;
+}
+
+static Boolean PCRReactionSetsMatch (PCRReactionSetPtr prp1, PCRReactionSetPtr prp2)
+
+{
+ if (prp1 == NULL || prp2 == NULL) return FALSE;
+
+ if (! PCRPrimersMatch (prp1->forward, prp2->forward)) return FALSE;
+ if (! PCRPrimersMatch (prp1->reverse, prp2->reverse)) return FALSE;
+
+ return TRUE;
+}
+
static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp)
{
+ PCRReactionSetPtr curr;
PCRReactionSetPtr next;
PCRReactionSetPtr PNTR prev;
PCRReactionSetPtr prp;
if (prpp == NULL) return;
+ prp = *prpp;
+ while (prp != NULL) {
+ CleanupPCRPrimers (&(prp->forward));
+ CleanupPCRPrimers (&(prp->reverse));
+ prp = prp->next;
+ }
+
prev = prpp;
prp = *prpp;
while (prp != NULL) {
next = prp->next;
- CleanupPCRPrimers (&(prp->forward));
- CleanupPCRPrimers (&(prp->reverse));
+ curr = next;
+ while (curr != NULL) {
+ if (PCRReactionSetsMatch (prp, curr)) {
+ curr->forward = PCRPrimerFree (curr->forward);
+ curr->reverse = PCRPrimerFree (curr->reverse);
+ }
+ curr = curr->next;
+ }
if (prp->forward == NULL && prp->reverse == NULL) {
*prev = next;
@@ -6720,8 +6618,124 @@ static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp)
prp = next;
}
+
+}
+
+static void CleanupAltitude (SubSourcePtr ssp)
+
+{
+ Char ch;
+ size_t len;
+ CharPtr ptr;
+
+ if (ssp == NULL || StringHasNoText (ssp->name)) return;
+ len = StringLen (ssp->name);
+ if (len < 1) return;
+
+ ptr = ssp->name;
+ ch = *ptr;
+
+ if (len > 2 && ptr [len-1] == '.') {
+ ptr [len-1] = '\0';
+ }
+
+ if (ch == '+' || ch == '-') {
+ ptr++;
+ ch = *ptr;
+ }
+
+ if (! IS_DIGIT (ch)) return;
+
+ ptr++;
+ ch = *ptr;
+ while (IS_DIGIT (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+
+ if (ch == '.') {
+ ptr++;
+ ch = *ptr;
+ if (! IS_DIGIT (ch)) return;
+ ptr++;
+ ch = *ptr;
+ while (IS_DIGIT (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+ }
+
+ if (StringCmp (ptr, "m") == 0 ||
+ StringCmp (ptr, "m.") == 0 ||
+ StringCmp (ptr, " m") == 0||
+ StringCmp (ptr, " meters") == 0||
+ StringCmp (ptr, " metres") == 0) {
+ *ptr = '\0';
+ ptr = (CharPtr) MemNew (len + 5);
+ if (ptr == NULL) return;
+ StringCpy (ptr, ssp->name);
+ StringCat (ptr, " m");
+ ssp->name = MemFree (ssp->name);
+ ssp->name = ptr;
+ }
}
+static CharPtr coll_date_month_abbrevs [12] =
+{
+ "-Jan-", "-Feb-", "-Mar-", "-Apr-", "-May-", "-Jun-",
+ "-Jul-", "-Aug-", "-Sep-", "-Oct-", "-Nov-", "-Dec-"
+};
+
+static void CorrectMonthCapitalization (CharPtr str)
+
+{
+ Int2 i;
+ Int2 j;
+ CharPtr month;
+ CharPtr ptr;
+
+ for (i = 0; i < 12; i++) {
+ month = coll_date_month_abbrevs [i];
+ ptr = StringISearch (str, month);
+ if (ptr == NULL) continue;
+ for (j = 0; j < 5; j++) {
+ ptr [j] = month [j];
+ }
+ return;
+ }
+}
+
+typedef struct stringpair {
+ CharPtr from;
+ CharPtr to;
+} StringPair, PNTR StringPairPtr;
+
+static StringPair sex_conv[] = {
+ { "asexual female", "asexual and female" },
+ { "asexual male", "asexual and male" },
+ { "dioecious female", "dioecious and female" },
+ { "dioecious male", "dioecious and male" },
+ { "f and m mixed", "female, male, and mixed" },
+ { "f", "female" },
+ { "f/m", "female and male" },
+ { "female,male", "female and male" },
+ { "female/hermaphrodite", "female and hermaphrodite" },
+ { "female/male mixed", "female, male, and mixed" },
+ { "female/male", "female and male" },
+ { "m and f mixed", "male, female, and mixed" },
+ { "m", "male" },
+ { "m/f", "male and female" },
+ { "male,female", "male and female" },
+ { "male/female mixed", "male, female, and mixed" },
+ { "male/female", "male and female" },
+ { "male/hermaphrodite", "male and hermaphrodite" },
+ { "mixed female and male", "mixed, female, and male" },
+ { "mixed female/male", "mixed, female, and male" },
+ { "mixed male and female", "mixed, male, and female" },
+ { "mixed male/female", "mixed, male, and female" },
+ { NULL, NULL }
+};
+
extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
{
@@ -6730,6 +6744,7 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
Int2 i;
Boolean in_brackets = FALSE;
SubSourcePtr last = NULL;
+ size_t len;
SubSourcePtr next;
SubSourcePtr PNTR prev;
CharPtr ptr;
@@ -6738,9 +6753,11 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
CharPtr str;
CharPtr tmp;
Boolean unlink;
+ /*
FloatHi ns, ew;
Char lon, lat;
Int4 processed;
+ */
/*
SubSourcePtr fwd_seq = NULL, rev_seq = NULL, fwd_name = NULL, rev_name = NULL;
size_t len;
@@ -6762,6 +6779,13 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
}
if (ssp->subtype == SUBSRC_country) {
CleanVisStringJunk (&(ssp->name));
+ len = StringLen (ssp->name);
+ if (len > 2) {
+ str = ssp->name;
+ if (str [len - 1] == ':') {
+ str [len - 1] = '\0';
+ }
+ }
if (StringICmp (ssp->name, "United States") == 0 ||
StringICmp (ssp->name, "United States of America") == 0 ||
StringICmp (ssp->name, "U.S.A.") == 0) {
@@ -6787,15 +6811,60 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
}
} else if (ssp->subtype == SUBSRC_clone) {
CleanVisStringJunk (&(ssp->name));
+ } else if (ssp->subtype == SUBSRC_altitude) {
+ if (ssp->name != NULL && (! AltitudeIsValid (ssp->name))) {
+ CleanupAltitude (ssp);
+ }
} else if (ssp->subtype == SUBSRC_lat_lon) {
+ /*
str = ssp->name;
- if (sscanf (str, "%lf %c, %lf %c%n", &ns, &lat, &ew, &lon, &processed) == 4 && processed == StringLen (str)) {
+ if (str != NULL) {
+ ptr = StringStr (str, " N, ");
+ if (ptr == NULL) {
+ ptr = StringStr (str, " S, ");
+ }
+ if (ptr != NULL) {
+ ptr += 2;
+ *ptr = ' ';
+ Asn2gnbkCompressSpaces (str);
+ }
+ }
+ */
+ /*
+ if (str != NULL && sscanf (str, "%lf %c, %lf %c%n", &ns, &lat, &ew, &lon, &processed) == 4 && processed == StringLen (str)) {
ptr = StringChr (str, ',');
if (ptr != NULL) {
*ptr = ' ';
Asn2gnbkCompressSpaces (str);
}
}
+ */
+ } else if (ssp->subtype == SUBSRC_other && StringDoesHaveText (ssp->name)) {
+ CorrectTildes (&(ssp->name));
+ } else if (ssp->subtype == SUBSRC_sex) {
+ ptr = ssp->name;
+ if (StringDoesHaveText (ptr)) {
+ ch = *ptr;
+ while (ch != '\0') {
+ ch = TO_LOWER(ch);
+ *ptr = ch;
+ ptr++;
+ ch = *ptr;
+ }
+ ptr = ssp->name;
+ for (i = 0; sex_conv[i].from != NULL; i++) {
+ if (StringCmp (ptr, sex_conv[i].from) == 0) {
+ ssp->name = MemFree (ssp->name);
+ ssp->name = StringSave (sex_conv[i].to);
+ break;
+ }
+ }
+ }
+ } else if (ssp->subtype == SUBSRC_collection_date) {
+ ptr = ssp->name;
+ if (StringDoesHaveText (ptr)) {
+ CorrectMonthCapitalization (ptr);
+ }
}
if (ssp->subtype == SUBSRC_fwd_primer_seq ||
ssp->subtype == SUBSRC_rev_primer_seq) {
@@ -6994,6 +7063,7 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
*/
}
+//LCOV_EXCL_START
extern void CleanSubSourcePrimers (SubSourcePtr PNTR sspp)
{
@@ -7122,196 +7192,7 @@ extern void CleanSubSourcePrimers (SubSourcePtr PNTR sspp)
}
}
}
-
-/* if string starts with given prefix, return pointer to remaining text */
-
-static CharPtr StringHasPrefix (CharPtr str, CharPtr pref, Boolean novalneeded, Boolean skippref)
-
-{
- Char ch;
- size_t len;
- Char tmp [64];
- CharPtr val;
-
- if (StringHasNoText (str) || StringHasNoText (pref)) return NULL;
- len = StringLen (pref);
- StringNCpy_0 (tmp, pref, sizeof (tmp));
- if (StringNICmp (str, tmp, len) != 0) {
- /* try after replacing dash with underscore */
- val = tmp;
- ch = *val;
- while (ch != '\0') {
- if (ch == '-') {
- *val = '_';
- }
- val++;
- ch = *val;
- }
- if (StringNICmp (str, tmp, len) != 0) return NULL;
- }
- if (skippref) {
- val = str + len;
- } else {
- val = str;
- }
- if (StringHasNoText (val)) {
- if (novalneeded) return " ";
- return NULL;
- }
- ch = *(str + len);
- if (ch != '=' && ch != ' ' && ch != ':' && ch != '\0') return NULL;
- ch = *val;
- while (ch == '=' || ch == ' ' || ch == ':') {
- val++;
- ch = *val;
- }
- if (StringHasNoText (val)) return NULL;
- return val;
-}
-
-
-Nlm_QualNameAssoc current_orgmod_subtype_alist[] = {
- {" ", 0},
- {"Acronym", ORGMOD_acronym},
- {"Anamorph", ORGMOD_anamorph},
- {"Authority", ORGMOD_authority},
- {"Bio-material", ORGMOD_bio_material},
- {"Biotype", ORGMOD_biotype},
- {"Biovar", ORGMOD_biovar},
- {"Breed", ORGMOD_breed},
- {"Chemovar", ORGMOD_chemovar},
- {"Common", ORGMOD_common},
- {"Cultivar", ORGMOD_cultivar},
- {"Culture-collection", ORGMOD_culture_collection},
- {"Ecotype", ORGMOD_ecotype},
- {"Forma", ORGMOD_forma},
- {"Forma-specialis", ORGMOD_forma_specialis},
- {"Group", ORGMOD_group},
- {"Host", ORGMOD_nat_host},
- {"Isolate", ORGMOD_isolate},
- {"Metagenome-source", ORGMOD_metagenome_source},
- {"Pathovar", ORGMOD_pathovar},
- {"Serogroup", ORGMOD_serogroup},
- {"Serotype", ORGMOD_serotype},
- {"Serovar", ORGMOD_serovar},
- {"Specimen-voucher", ORGMOD_specimen_voucher},
- {"Strain", ORGMOD_strain},
- {"Subgroup", ORGMOD_subgroup},
- {"Sub-species", ORGMOD_sub_species},
- {"Substrain", ORGMOD_substrain},
- {"Subtype", ORGMOD_subtype},
- {"Synonym", ORGMOD_synonym},
- {"Teleomorph", ORGMOD_teleomorph},
- {"Type", ORGMOD_type},
- {"Variety", ORGMOD_variety},
- { NULL, 0 } };
-
-Nlm_QualNameAssoc discouraged_orgmod_subtype_alist[] = {
- {"Old Lineage", ORGMOD_old_lineage},
- {"Old Name", ORGMOD_old_name},
- { NULL, 0 } };
-
-Nlm_QualNameAssoc discontinued_orgmod_subtype_alist[] = {
- {"Dosage", ORGMOD_dosage},
- { NULL, 0 } };
-
-
-Nlm_NameNameAssoc orgmod_aliases[] = {
- {"Sub-species", "subspecies", ORGMOD_sub_species},
- {"Host", "nat-host", ORGMOD_nat_host},
- {"Host", "specific-host", ORGMOD_nat_host},
- {"Substrain", "Sub_strain", ORGMOD_substrain},
- { NULL, NULL, 0 } };
-
-extern CharPtr GetOrgModQualName (Uint1 subtype)
-{
- Int4 i;
-
- if (subtype == ORGMOD_other) {
- return "Note";
- }
- for (i = 0; current_orgmod_subtype_alist[i].name != NULL; i++) {
- if (current_orgmod_subtype_alist[i].value == subtype) {
- return current_orgmod_subtype_alist[i].name;
- }
- }
- for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL; i++) {
- if (discouraged_orgmod_subtype_alist[i].value == subtype) {
- return discouraged_orgmod_subtype_alist[i].name;
- }
- }
-
- for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL; i++) {
- if (discontinued_orgmod_subtype_alist[i].value == subtype) {
- return discontinued_orgmod_subtype_alist[i].name;
- }
- }
-
- return NULL;
-}
-
-
-extern void BioSourceHasOldOrgModQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
-{
- OrgModPtr mod;
- Boolean discouraged = FALSE, discontinued = FALSE;
- Int4 i;
-
- if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL) {
- mod = biop->org->orgname->mod;
- while (mod != NULL && (!discouraged || !discontinued)) {
- for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL && !discouraged; i++) {
- if (mod->subtype == discouraged_orgmod_subtype_alist[i].value) {
- discouraged = TRUE;
- }
- }
- for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL && !discontinued; i++) {
- if (mod->subtype == discontinued_orgmod_subtype_alist[i].value) {
- discontinued = TRUE;
- }
- }
- mod = mod->next;
- }
- }
-
- if (has_discouraged != NULL) {
- *has_discouraged = discouraged;
- }
- if (has_discontinued != NULL) {
- *has_discontinued = discontinued;
- }
-}
-
-
-static void StringHasOrgModPrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
-{
- Int2 i;
- CharPtr val = NULL;
- Uint1 subtype_val = 0;
-
- for (i = 0; current_orgmod_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
- if (current_orgmod_subtype_alist[i].value == ORGMOD_nat_host) continue;
- val = StringHasPrefix (str, current_orgmod_subtype_alist [i].name, FALSE, skippref);
- if (val != NULL) {
- subtype_val = current_orgmod_subtype_alist[i].value;
- }
- }
- if (subtype_val == 0) {
- for (i = 0; orgmod_aliases[i].name != NULL && subtype_val == 0; i++) {
- if (orgmod_aliases[i].value == ORGMOD_nat_host) continue;
- val = StringHasPrefix (str, orgmod_aliases [i].alias, FALSE, skippref);
- if (val != NULL) {
- subtype_val = orgmod_aliases[i].value;
- }
- }
- }
- if (pval != NULL) {
- *pval = val;
- }
- if (p_subtypeval != NULL) {
- *p_subtypeval = subtype_val;
- }
-}
+//LCOV_EXCL_STOP
static void OrpModToOrgMod (ValNodePtr PNTR vnpp, OrgModPtr PNTR ompp)
@@ -7371,127 +7252,6 @@ static void OrpModToOrgMod (ValNodePtr PNTR vnpp, OrgModPtr PNTR ompp)
}
}
-Nlm_QualNameAssoc current_subsource_subtype_alist[] = {
- {" ", 0},
- {"Cell-line", SUBSRC_cell_line},
- {"Cell-type", SUBSRC_cell_type},
- {"Chromosome", SUBSRC_chromosome},
- {"Clone", SUBSRC_clone},
- {"Clone-lib", SUBSRC_clone_lib},
- {"Collected-by", SUBSRC_collected_by},
- {"Collection-date", SUBSRC_collection_date},
- {"Country", SUBSRC_country},
- {"Dev-stage", SUBSRC_dev_stage},
- {"Endogenous-virus-name", SUBSRC_endogenous_virus_name},
- {"Environmental-sample", SUBSRC_environmental_sample},
- {"Frequency", SUBSRC_frequency},
- {"Genotype", SUBSRC_genotype},
- {"Germline", SUBSRC_germline},
- {"Haplogroup", SUBSRC_haplogroup},
- {"Haplotype", SUBSRC_haplotype},
- {"Identified-by", SUBSRC_identified_by},
- {"Isolation-source", SUBSRC_isolation_source},
- {"Lab-host", SUBSRC_lab_host},
- {"Lat-Lon", SUBSRC_lat_lon},
- {"Linkage-group", SUBSRC_linkage_group},
- {"Map", SUBSRC_map},
- {"Mating-type", SUBSRC_mating_type},
- {"Metagenomic", SUBSRC_metagenomic},
- {"Plasmid-name", SUBSRC_plasmid_name},
- {"Pop-variant", SUBSRC_pop_variant},
- {"Rearranged", SUBSRC_rearranged},
- {"Segment", SUBSRC_segment},
- {"Sex", SUBSRC_sex},
- {"Subclone", SUBSRC_subclone},
- {"Tissue-lib", SUBSRC_tissue_lib},
- {"Tissue-type", SUBSRC_tissue_type},
- {"Transgenic", SUBSRC_transgenic},
- { NULL, 0 } };
-
-Nlm_QualNameAssoc discouraged_subsource_subtype_alist[] = {
- {"Plastid-name", SUBSRC_plastid_name},
- { NULL, 0 } };
-
-Nlm_QualNameAssoc discontinued_subsource_subtype_alist[] = {
- {"Ins-seq-name", SUBSRC_insertion_seq_name},
- {"Transposon-name", SUBSRC_transposon_name},
- {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name},
- {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq},
- {"Rev-PCR-primer-name", SUBSRC_rev_primer_name},
- {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq},
- { NULL, 0 } };
-
-Nlm_NameNameAssoc subsource_aliases[] = {
- {"Fwd-PCR-primer-name", "fwd-primer-name", SUBSRC_fwd_primer_name},
- {"Fwd-PCR-primer-seq", "fwd-primer-seq", SUBSRC_fwd_primer_seq},
- {"Rev-PCR-primer-name", "rev-primer-name", SUBSRC_rev_primer_name},
- {"Rev-PCR-primer-seq", "rev-primer-seq", SUBSRC_rev_primer_seq},
- {"Subclone", "sub-clone", SUBSRC_subclone},
- {"Lat-Lon", "Lat-long", SUBSRC_lat_lon},
- {"Lat-Lon", "Latitude-Longitude", SUBSRC_lat_lon },
- { NULL, NULL, 0 } };
-
-extern CharPtr GetSubsourceQualName (Uint1 subtype)
-{
- Int4 i;
-
- if (subtype == SUBSRC_other) {
- return "Note";
- }
- for (i = 0; current_subsource_subtype_alist[i].name != NULL; i++) {
- if (current_subsource_subtype_alist[i].value == subtype) {
- return current_subsource_subtype_alist[i].name;
- }
- }
-
- for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL; i++) {
- if (discouraged_subsource_subtype_alist[i].value == subtype) {
- return discouraged_subsource_subtype_alist[i].name;
- }
- }
-
- for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL; i++) {
- if (discontinued_subsource_subtype_alist[i].value == subtype) {
- return discontinued_subsource_subtype_alist[i].name;
- }
- }
-
- return NULL;
-}
-
-
-extern void BioSourceHasOldSubSourceQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
-{
- SubSourcePtr ssp;
- Boolean discouraged = FALSE, discontinued = FALSE;
- Int4 i;
-
- if (biop != NULL) {
- ssp = biop->subtype;
- while (ssp != NULL && (!discouraged || !discontinued)) {
- for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL && !discouraged; i++) {
- if (ssp->subtype == discouraged_subsource_subtype_alist[i].value) {
- discouraged = TRUE;
- }
- }
- for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL && !discontinued; i++) {
- if (ssp->subtype == discontinued_subsource_subtype_alist[i].value) {
- discontinued = TRUE;
- }
- }
- ssp = ssp->next;
- }
- }
-
- if (has_discouraged != NULL) {
- *has_discouraged = discouraged;
- }
- if (has_discontinued != NULL) {
- *has_discontinued = discontinued;
- }
-}
-
-
static void StringHasSubSourcePrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
{
Int2 i;
@@ -7785,6 +7545,7 @@ static CharPtr MergeTildeStrings (ValNodePtr head)
return ptr;
}
+
static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
{
@@ -7837,6 +7598,12 @@ static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
if (StringHasNoText (str)) {
unlink = TRUE;
}
+ } else if (omp->subtype == ORGMOD_bio_material
+ || omp->subtype == ORGMOD_culture_collection
+ || omp->subtype == ORGMOD_specimen_voucher) {
+ /*
+ FixOrgModVoucher (omp);
+ */
}
if (unlink) {
*prev = omp->next;
@@ -7959,208 +7726,262 @@ static int LIBCALLBACK SortDbxref (VoidPtr ptr1, VoidPtr ptr2)
return 0;
}
-static void FixNumericDbxrefs (ValNodePtr vnp)
+static void FixNumericDbxref (DbtagPtr dbt)
{
- Char ch;
- DbtagPtr dbt;
- Boolean isNum;
- Boolean leadingzero;
- Boolean notallzero;
+ size_t len;
ObjectIdPtr oip;
CharPtr ptr;
long val;
+ if (dbt != NULL) {
+ oip = dbt->tag;
+ if (oip != NULL) {
+ ptr = oip->str;
+ if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
+ len = StringLen (ptr);
+ if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
+ if (sscanf (oip->str, "%ld", &val) == 1) {
+ oip->id = (Int4) val;
+ oip->str = MemFree (oip->str);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void FixNumericDbxrefs (ValNodePtr vnp)
+
+{
+ DbtagPtr dbt;
+
while (vnp != NULL) {
dbt = (DbtagPtr) vnp->data.ptrvalue;
if (dbt != NULL) {
+ FixNumericDbxref (dbt);
+ }
+ vnp = vnp->next;
+ }
+}
+
+static void FixOldDbxref (DbtagPtr dbt)
+
+{
+ Boolean all_digits;
+ Char buf [32];
+ Char ch;
+ CharPtr ident;
+ size_t len;
+ ObjectIdPtr oip;
+ CharPtr ptr;
+ CharPtr str;
+
+ if (dbt != NULL) {
+
+ TrimSpacesAroundString (dbt->db);
+ oip = dbt->tag;
+ if (oip != NULL && oip->str != NULL) {
+ /*
+ TrimSpacesAroundString (oip->str);
+ */
+ TrimSpacesSemicolonsAndCommas (oip->str);
+ }
+
+ if (StringICmp (dbt->db, "SWISS-PROT") == 0 &&
+ StringCmp (dbt->db, "Swiss-Prot") != 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("Swiss-Prot");
+ } else if (StringICmp (dbt->db, "SPTREMBL") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("TrEMBL");
+ } else if (StringICmp (dbt->db, "SUBTILIS") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("SubtiList");
+ } else if (StringICmp (dbt->db, "MGD") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("MGI");
+ } else if (StringCmp (dbt->db, "cdd") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("CDD");
+ } else if (StringCmp (dbt->db, "FlyBase") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("FLYBASE");
+ } else if (StringCmp (dbt->db, "GENEDB") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("GeneDB");
+ } else if (StringCmp (dbt->db, "GreengenesID") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("Greengenes");
+ } else if (StringCmp (dbt->db, "HMPID") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("HMP");
+ }
+ if (StringICmp (dbt->db, "HPRD") == 0) {
oip = dbt->tag;
- if (oip != NULL) {
- ptr = oip->str;
- if (ptr != NULL) {
- leadingzero = FALSE;
- notallzero = FALSE;
- isNum = TRUE;
+ if (oip != NULL && StringDoesHaveText (oip->str)) {
+ str = oip->str;
+ if (str != NULL && StringNICmp (str, "HPRD_", 5) == 0) {
+ str [0] = ' ';
+ str [1] = ' ';
+ str [2] = ' ';
+ str [3] = ' ';
+ str [4] = ' ';
+ TrimSpacesAroundString (str);
+ }
+ }
+ } else if (StringICmp (dbt->db, "MGI") == 0) {
+ oip = dbt->tag;
+ if (oip != NULL && oip->str != NULL && StringDoesHaveText (oip->str)) {
+ str = oip->str;
+ if (StringNICmp (str, "MGI:", 4) == 0 || StringNICmp (str, "MGD:", 4) == 0) {
+ str [0] = ' ';
+ str [1] = ' ';
+ str [2] = ' ';
+ str [3] = ' ';
+ TrimSpacesAroundString (str);
+ } else if (StringNICmp (str, "J:", 2) == 0) {
+ ptr = str + 2;
ch = *ptr;
- if (ch == '0') {
- leadingzero = TRUE;
- }
+ all_digits = TRUE;
while (ch != '\0') {
- if ((! IS_DIGIT (ch)) && (! IS_WHITESP (ch))) {
- isNum = FALSE;
- } else if ('1'<= ch && ch <='9') {
- notallzero = TRUE;
+ if (! IS_DIGIT (ch)) {
+ all_digits = FALSE;
}
ptr++;
ch = *ptr;
}
- if (isNum) {
- if (leadingzero && notallzero) {
- /* suppress conversion */
- } else if (sscanf (oip->str, "%ld", &val) == 1) {
- oip->id = (Int4) val;
+ if (all_digits) {
+ oip->str = MemFree (oip->str);
+ oip->str = StringSave ("");
+ }
+ }
+ }
+ }
+ if (StringICmp (dbt->db, "Swiss-Prot") == 0 ||
+ StringICmp (dbt->db, "SWISSPROT") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("UniProt/Swiss-Prot");
+ } else if (StringICmp (dbt->db, "TrEMBL") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("UniProt/TrEMBL");
+ } else if (StringICmp (dbt->db, "LocusID") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("GeneID");
+ } else if (StringICmp (dbt->db, "MaizeDB") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("MaizeGDB");
+ }
+ if (StringICmp (dbt->db, "UniProt/Swiss-Prot") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("UniProtKB/Swiss-Prot");
+ } else if (StringICmp (dbt->db, "UniProt/TrEMBL") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("UniProtKB/TrEMBL");
+ } else if (StringICmp (dbt->db, "Genew") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("HGNC");
+ } else if (StringICmp (dbt->db, "IFO") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("NBRC");
+ } else if (StringICmp (dbt->db, "BHB") == 0 ||
+ StringICmp (dbt->db, "BioHealthBase") == 0) {
+ dbt->db = MemFree (dbt->db);
+ dbt->db = StringSave ("IRD");
+ }
+
+ oip = dbt->tag;
+ if (oip != NULL && oip->str != NULL) {
+ ident = oip->str;
+ if (StringCmp (dbt->db, "HGNC") == 0 && StringNCmp (ident, "HGNC:", 5) == 0 ) {
+ ident += 5;
+ ptr = StringSave (ident);
+ oip->str = MemFree (oip->str);
+ oip->str = ptr;
+ } else if (StringCmp (dbt->db, "VGNC") == 0 && StringNCmp (ident, "VGNC:", 5) == 0 ) {
+ ident += 5;
+ ptr = StringSave (ident);
+ oip->str = MemFree (oip->str);
+ oip->str = ptr;
+ } else if (StringCmp (dbt->db, "MGI") == 0 && StringNCmp (ident, "MGI:", 4) == 0 ) {
+ ident += 4;
+ ptr = StringSave (ident);
+ oip->str = MemFree (oip->str);
+ oip->str = ptr;
+ } else if (StringCmp (dbt->db, "RGD") == 0 && StringNCmp (ident, "RGD:", 4) == 0 ) {
+ ident += 4;
+ ptr = StringSave (ident);
+ oip->str = MemFree (oip->str);
+ oip->str = ptr;
+ }
+ }
+ if (oip != NULL) {
+ if (StringCmp (dbt->db, "HGNC") == 0 || StringCmp (dbt->db, "VGNC") == 0 || StringCmp (dbt->db, "MGI") == 0) {
+ if (oip->str == NULL && oip->id > 0) {
+ sprintf (buf, "%ld", (long) oip->id);
+ ptr = StringSave (buf);
+ oip->id = 0;
+ oip->str = ptr;
+ }
+ ident = oip->str;
+ if (ident != NULL) {
+ if (StringChr (ident, ':') == NULL) {
+ len = StringLen (dbt->db) + StringLen (ident) + 5;
+ ptr = (CharPtr) MemNew (sizeof (Char) * len);
+ if (ptr != NULL) {
+ sprintf (ptr, "%s:%s", dbt->db, ident);
oip->str = MemFree (oip->str);
+ oip->str = ptr;
}
}
}
}
}
- vnp = vnp->next;
}
}
-static void FixOldDbxrefs (ValNodePtr vnp)
+static void FixOldDbxrefs (ValNodePtr vnp, Boolean isEmblOrDdbj)
{
- Boolean all_digits;
- Char ch;
DbtagPtr dbt;
ObjectIdPtr oip;
CharPtr ptr;
- CharPtr str;
CharPtr tmp;
ValNodePtr vp2;
while (vnp != NULL) {
dbt = (DbtagPtr) vnp->data.ptrvalue;
if (dbt != NULL) {
+ FixOldDbxref (dbt);
- TrimSpacesAroundString (dbt->db);
- oip = dbt->tag;
- if (oip != NULL && oip->str != NULL) {
- /*
- TrimSpacesAroundString (oip->str);
- */
- TrimSpacesSemicolonsAndCommas (oip->str);
- }
-
- if (StringICmp (dbt->db, "SWISS-PROT") == 0 &&
- StringCmp (dbt->db, "Swiss-Prot") != 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("Swiss-Prot");
- } else if (StringICmp (dbt->db, "SPTREMBL") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("TrEMBL");
- } else if (StringICmp (dbt->db, "SUBTILIS") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("SubtiList");
- } else if (StringICmp (dbt->db, "MGD") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("MGI");
- } else if (StringCmp (dbt->db, "cdd") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("CDD");
- } else if (StringCmp (dbt->db, "FlyBase") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("FLYBASE");
- } else if (StringCmp (dbt->db, "GENEDB") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("GeneDB");
- } else if (StringCmp (dbt->db, "GreengenesID") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("Greengenes");
- } else if (StringCmp (dbt->db, "HMPID") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("HMP");
- }
- if (StringICmp (dbt->db, "HPRD") == 0) {
- oip = dbt->tag;
- if (oip != NULL && StringDoesHaveText (oip->str)) {
- str = oip->str;
- if (StringNICmp (str, "HPRD_", 5) == 0) {
- str [0] = ' ';
- str [1] = ' ';
- str [2] = ' ';
- str [3] = ' ';
- str [4] = ' ';
- TrimSpacesAroundString (str);
- }
- }
- } else if (StringICmp (dbt->db, "MGI") == 0) {
- oip = dbt->tag;
- if (oip != NULL && StringDoesHaveText (oip->str)) {
- str = oip->str;
- if (StringNICmp (str, "MGI:", 4) == 0 || StringNICmp (str, "MGD:", 4) == 0) {
- str [0] = ' ';
- str [1] = ' ';
- str [2] = ' ';
- str [3] = ' ';
- TrimSpacesAroundString (str);
- } else if (StringNICmp (str, "J:", 2) == 0) {
- ptr = str + 2;
- ch = *ptr;
- all_digits = TRUE;
- while (ch != '\0') {
- if (! IS_DIGIT (ch)) {
- all_digits = FALSE;
- }
- ptr++;
- ch = *ptr;
- }
- if (all_digits) {
- oip->str = MemFree (oip->str);
- oip->str = StringSave ("");
- }
- }
- }
- }
- if (StringICmp (dbt->db, "Swiss-Prot") == 0 ||
- StringICmp (dbt->db, "SWISSPROT") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("UniProt/Swiss-Prot");
- } else if (StringICmp (dbt->db, "TrEMBL") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("UniProt/TrEMBL");
- } else if (StringICmp (dbt->db, "LocusID") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("GeneID");
- } else if (StringICmp (dbt->db, "MaizeDB") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("MaizeGDB");
- }
- if (StringICmp (dbt->db, "UniProt/Swiss-Prot") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("UniProtKB/Swiss-Prot");
- } else if (StringICmp (dbt->db, "UniProt/TrEMBL") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("UniProtKB/TrEMBL");
- } else if (StringICmp (dbt->db, "Genew") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("HGNC");
- } else if (StringICmp (dbt->db, "IFO") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("NBRC");
- } else if (StringICmp (dbt->db, "BHB") == 0 ||
- StringICmp (dbt->db, "BioHealthBase") == 0) {
- dbt->db = MemFree (dbt->db);
- dbt->db = StringSave ("IRD");
- }
-
- /* expand db_xrefs with colons inside tags */
-
- oip = dbt->tag;
- if (oip != NULL && oip->str != NULL) {
- ptr = StringChr (oip->str, ':');
- if (ptr != NULL) {
- if (StringHasNoText (ptr + 1)) {
- *ptr = '\0';
- } else {
- tmp = dbt->db;
- dbt = DbtagNew ();
- if (dbt != NULL) {
- oip = ObjectIdNew ();
- if (oip != NULL) {
- vp2 = ValNodeNew (NULL);
- if (vp2 != NULL) {
- *ptr = '\0';
- ptr++;
- TrimSpacesAroundString (ptr);
- dbt->db = StringSave (tmp);
- oip->str = StringSave (ptr);
- dbt->tag = oip;
- vp2->data.ptrvalue = (Pointer) dbt;
- vp2->next = vnp->next;
- vnp->next = vp2;
+ if (! isEmblOrDdbj) {
+ if (StringCmp (dbt->db, "HGNC") != 0 && StringCmp (dbt->db, "VGNC") != 0 && StringCmp (dbt->db, "MGI") != 0) {
+ /* expand db_xrefs with colons inside tags */
+ oip = dbt->tag;
+ if (oip != NULL && oip->str != NULL) {
+ ptr = StringChr (oip->str, ':');
+ if (ptr != NULL) {
+ if (StringHasNoText (ptr + 1)) {
+ *ptr = '\0';
+ } else {
+ tmp = dbt->db;
+ dbt = DbtagNew ();
+ if (dbt != NULL) {
+ oip = ObjectIdNew ();
+ if (oip != NULL) {
+ vp2 = ValNodeNew (NULL);
+ if (vp2 != NULL) {
+ *ptr = '\0';
+ ptr++;
+ TrimSpacesAroundString (ptr);
+ dbt->db = StringSave (tmp);
+ oip->str = StringSave (ptr);
+ dbt->tag = oip;
+ vp2->data.ptrvalue = (Pointer) dbt;
+ vp2->next = vnp->next;
+ vnp->next = vp2;
+ }
+ }
}
}
}
@@ -8284,6 +8105,46 @@ static void CleanupObsoleteDbxrefs (ValNodePtr PNTR prevvnp)
}
}
+static void CleanupGoDbxrefs (ValNodePtr vnp)
+
+{
+ DbtagPtr dbt;
+ size_t idx;
+ size_t len;
+ ObjectIdPtr oip;
+ CharPtr ptr;
+ Char tmp [32];
+
+ while (vnp != NULL) {
+ dbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbt != NULL) {
+ if (StringICmp (dbt->db, "GO") == 0) {
+ oip = dbt->tag;
+ if (oip != NULL) {
+ if (oip->str == NULL && oip->id > 0) {
+ sprintf (tmp, "%ld", (long) oip->id);
+ oip->str = StringSave (tmp);
+ oip->id = 0;
+ }
+ ptr = oip->str;
+ if (ptr != NULL && StringIsAllDigits(ptr)) {
+ len = StringLen (ptr);
+ if (len < 7) {
+ idx = 7 - len;
+ StringCpy (tmp, "0000000");
+ tmp [idx] = '\0';
+ StringCat (tmp, ptr);
+ oip->str = MemFree (oip->str);
+ oip->str = StringSave (tmp);
+ }
+ }
+ }
+ }
+ }
+ vnp = vnp->next;
+ }
+}
+
static int LIBCALLBACK SortCits (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -8741,19 +8602,9 @@ static NameStdPtr TabbedStringToNameStdPtr (CharPtr txt, Boolean fixInitials)
} else if (StringICmp (suffix, "1st") == 0) {
StringCpy (suffix, "I");
} else if (StringICmp (suffix, "2d") == 0) {
- StringCpy (suffix, "II");
- } else if (StringICmp (suffix, "2nd") == 0) {
- StringCpy (suffix, "II");
+ StringCpy (suffix, "2nd");
} else if (StringICmp (suffix, "3d") == 0) {
- StringCpy (suffix, "III");
- } else if (StringICmp (suffix, "3rd") == 0) {
- StringCpy (suffix, "III");
- } else if (StringICmp (suffix, "4th") == 0) {
- StringCpy (suffix, "IV");
- } else if (StringICmp (suffix, "5th") == 0) {
- StringCpy (suffix, "V");
- } else if (StringICmp (suffix, "6th") == 0) {
- StringCpy (suffix, "VI");
+ StringCpy (suffix, "3rd");
} else if (StringICmp (suffix, "Sr") == 0) {
StringCpy (suffix, "Sr.");
} else if (StringICmp (suffix, "Jr") == 0) {
@@ -8786,20 +8637,29 @@ static AffilPtr CleanAffil (AffilPtr afp)
{
if (afp == NULL) return NULL;
CleanVisStringJunkAndCompress (&(afp->affil));
- CleanVisStringJunkAndCompress (&(afp->div));
- CleanVisStringJunkAndCompress (&(afp->city));
- CleanVisStringJunkAndCompress (&(afp->sub));
- CleanVisStringJunkAndCompress (&(afp->country));
- CleanVisStringJunkAndCompress (&(afp->street));
- CleanVisStringJunkAndCompress (&(afp->email));
- CleanVisStringJunkAndCompress (&(afp->fax));
- CleanVisStringJunkAndCompress (&(afp->phone));
- CleanVisStringJunkAndCompress (&(afp->postal_code));
if (afp->choice == 2) {
- if (StringCmp (afp->country, "U.S.A.") == 0) {
+ CleanVisStringJunkAndCompress (&(afp->div));
+ CleanVisStringJunkAndCompress (&(afp->city));
+ CleanVisStringJunkAndCompress (&(afp->sub));
+ CleanVisStringJunkAndCompress (&(afp->country));
+ CleanVisStringJunkAndCompress (&(afp->street));
+ CleanVisStringJunkAndCompress (&(afp->email));
+ CleanVisStringJunkAndCompress (&(afp->fax));
+ CleanVisStringJunkAndCompress (&(afp->phone));
+ CleanVisStringJunkAndCompress (&(afp->postal_code));
+ TrimSpacesSemicolonsAndCommas (afp->postal_code);
+ if (StringICmp (afp->country, "U.S.A.") == 0) {
afp->country = MemFree (afp->country);
afp->country = StringSave ("USA");
}
+ if (StringICmp (afp->country, "USA") == 0 && StringCmp (afp->country, "USA") != 0) {
+ afp->country = MemFree (afp->country);
+ afp->country = StringSave ("USA");
+ }
+ if (StringCmp (afp->country, "USA") == 0 && afp->sub != NULL) {
+ StripPeriods (afp->sub);
+ TrimSpacesAroundString (afp->sub);
+ }
}
if (afp->affil == NULL &&
afp->div == NULL &&
@@ -8839,6 +8699,7 @@ static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials)
for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
str = (CharPtr) vnp->data.ptrvalue;
TrimSpacesAroundString (str);
+ TrimSpacesAndJunkFromEnds (str, FALSE);
Asn2gnbkCompressSpaces (str);
}
}
@@ -9179,6 +9040,24 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti
}
}
break;
+ case PUB_Book :
+ cbp = (CitBookPtr) vnp->data.ptrvalue;
+ if (cbp != NULL) {
+ imp = cbp->imp;
+ }
+ break;
+ case PUB_Man :
+ cbp = (CitBookPtr) vnp->data.ptrvalue;
+ if (cbp != NULL) {
+ imp = cbp->imp;
+ if (imp != NULL) {
+ affil = imp->pub;
+ if (affil != NULL && affil->choice == 1) {
+ CleanVisStringJunkAndCompress (&(affil->affil));
+ }
+ }
+ }
+ break;
case PUB_Patent :
cpp = (CitPatPtr) vnp->data.ptrvalue;
if (cpp != NULL) {
@@ -9202,6 +9081,7 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti
}
}
+//LCOV_EXCL_START
NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp)
{
@@ -9235,29 +9115,82 @@ NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp)
vnp = next;
}
}
+//LCOV_EXCL_STOP
+
+static int pub_order [] = {
+ 0,
+ 3,
+ 4,
+ 13,
+ 2,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 1
+};
+
+static int LIBCALLBACK SortByPubType (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ Uint1 chs1;
+ Uint1 chs2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ chs1 = (Uint1) vnp1->choice;
+ chs2 = (Uint1) vnp2->choice;
+ if (chs1 < 14 && chs2 < 14) {
+ chs1 = pub_order [chs1];
+ chs2 = pub_order [chs2];
+ }
+ if (chs1 > chs2) {
+ return 1;
+ } else if (chs1 < chs2) {
+ return -1;
+ }
+ return 0;
+}
static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAuthors, ValNodePtr PNTR publist)
{
- ArticleIdPtr aip;
- Int4 artpmid = 0;
- Char buf1 [121];
- Char buf2 [121];
- CitArtPtr cap = NULL;
- CitGenPtr cgp;
- CitJourPtr cjp;
- Boolean fixInitials = TRUE;
- Boolean hasArt = FALSE;
- Boolean hasUid = FALSE;
- ImprintPtr imp;
- ValNodePtr next;
- Int4 pmid = 0;
- ValNodePtr PNTR prev;
- ValNodePtr vnp;
+ ArticleIdPtr aip;
+ Int4 artpmid = 0;
+ Char buf1 [121];
+ Char buf2 [121];
+ CitArtPtr cap = NULL;
+ CitGenPtr cgp;
+ CitJourPtr cjp;
+ Boolean fixInitials = TRUE;
+ Boolean hasArt = FALSE;
+ Boolean hasUid = FALSE;
+ ImprintPtr imp;
+ Int4 lastartpmid = 0;
+ Int4 muid = 0;
+ ValNodePtr next;
+ ArticleIdPtr nextaip;
+ Int4 pmid = 0;
+ ValNodePtr PNTR prev;
+ ArticleIdPtr PNTR prevaip;
+ ValNodePtr vnp;
if (pdp == NULL) return;
CleanVisString (&(pdp->comment));
for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Muid) {
+ if (vnp->data.intvalue > 0) {
+ muid = vnp->data.intvalue;
+ }
+ }
if (vnp->choice == PUB_Muid || vnp->choice == PUB_PMid) {
if (vnp->data.intvalue > 0) {
hasUid = TRUE;
@@ -9269,6 +9202,25 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut
if (hasArt && hasUid) {
fixInitials = FALSE;
}
+ if (pdp->pub != NULL) {
+ pdp->pub = ValNodeSort (pdp->pub, SortByPubType);
+ }
+
+ /* remove zero muid where there is also a non-zero muid */
+ prev = &(pdp->pub);
+ vnp = pdp->pub;
+ while (vnp != NULL) {
+ next = vnp->next;
+ if (vnp->choice == PUB_Muid && vnp->data.intvalue == 0 && muid != 0) {
+ *prev = vnp->next;
+ vnp->next = NULL;
+ PubFree (vnp);
+ } else {
+ prev = &(vnp->next);
+ }
+ vnp = next;
+ }
+
prev = &(pdp->pub);
vnp = pdp->pub;
if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
@@ -9332,10 +9284,27 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut
}
}
}
- for (aip = cap->ids; aip != NULL; aip = aip->next) {
+ }
+ if (cap != NULL) {
+ aip = cap->ids;
+ prevaip = (ArticleIdPtr PNTR) &(cap->ids);
+ lastartpmid = 0;
+ while (aip != NULL) {
+ nextaip = aip->next;
if (aip->choice == ARTICLEID_PUBMED) {
artpmid = aip->data.intvalue;
+ if (lastartpmid != 0 && lastartpmid == artpmid) {
+ aip->next = NULL;
+ *prevaip = nextaip;
+ ArticleIdFree (aip);
+ } else {
+ prevaip = (ArticleIdPtr PNTR) &(aip->next);
+ }
+ lastartpmid = artpmid;
+ } else {
+ prevaip = (ArticleIdPtr PNTR) &(aip->next);
}
+ aip = nextaip;
}
}
} else if (vnp->choice == PUB_PMid) {
@@ -9363,12 +9332,14 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut
}
}
+//LCOV_EXCL_START
NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial)
{
if (pdp == NULL) return;
NormalizePubdesc (pdp, stripSerial, FALSE, NULL);
}
+//LCOV_EXCL_STOP
static Boolean KeywordAlreadyInList (ValNodePtr head, CharPtr kwd)
@@ -9590,13 +9561,14 @@ static void CleanUserStrings (
}
if (ufp->choice == 1) {
if (! StringHasNoText ((CharPtr) ufp->data.ptrvalue)) {
- CleanVisString ((CharPtr PNTR) &(ufp->data.ptrvalue));
+ CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
}
} else if (ufp->choice == 7) {
cpp = (CharPtr PNTR) ufp->data.ptrvalue;
if (cpp != NULL) {
for (i = 0; i < ufp->num; i++) {
- CleanVisString ((CharPtr PNTR) &(cpp [i]));
+ TrimSpacesSemicolonsAndCommas (cpp [i]);
+ Asn2gnbkCompressSpaces (cpp [i]);
}
}
}
@@ -9619,6 +9591,7 @@ static void CleanUserFields (
VisitUserFieldsInUfp (ufp, userdata, CleanUserStrings);
}
+//LCOV_EXCL_START
NLM_EXTERN UserFieldPtr LIBCALL UserFieldSort (UserFieldPtr list, int (LIBCALLBACK *compar ) PROTO((VoidPtr, VoidPtr)))
{
@@ -9652,6 +9625,7 @@ NLM_EXTERN UserFieldPtr LIBCALL UserFieldSort (UserFieldPtr list, int (LIBCALLBA
return list;
}
+//LCOV_EXCL_STOP
/*
static CharPtr barcodeOrder [] = {
@@ -9778,6 +9752,10 @@ NLM_EXTERN void CleanStructuredComment (
ufp->data.ptrvalue = StringSave ("Noncontiguous Finished");
str = MemFree (str);
}
+ } else if (StringCmp(ufp->label->str, "Assembly Date") == 0) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ ReformatAssemblyDate(&str);
+ ufp->data.ptrvalue = str;
}
}
}
@@ -9792,6 +9770,49 @@ NLM_EXTERN void CleanStructuredComment (
}
+//LCOV_EXCL_START
+// change made as a result of SQD-2399, which will not be implemented for the C++ Toolkit
+// going forward. bad data was generated internally, production process has been fixed.
+static void CleanRefGeneTrackingUserObject (
+ UserObjectPtr uop
+)
+
+{
+ UserFieldPtr asmbly = NULL, entry, tmp, ufp;
+ ObjectIdPtr oip;
+
+ if (uop == NULL) return;
+ oip = uop->type;
+ if (oip == NULL) return;
+ if (StringCmp (oip->str, "RefGeneTracking") != 0) return;
+
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "Assembly") != 0) continue;
+ asmbly = ufp;
+ break;
+ }
+
+ if (asmbly == NULL || asmbly->choice != 11) return;
+ tmp = asmbly->data.ptrvalue;
+ if (tmp == NULL || tmp->choice == 11) return;
+
+ entry = UserFieldNew ();
+ if (entry == NULL) return;
+ oip = ObjectIdNew ();
+ if (oip == NULL) return;
+
+ entry->data.ptrvalue = (Pointer) tmp;
+ entry->choice = 11;
+ entry->label = oip;
+ oip->id = 0;
+
+ asmbly->data.ptrvalue = (Pointer) entry;
+ asmbly->choice = 11;
+}
+//LCOV_EXCL_STOP
+
static void CleanUserObject (
UserObjectPtr uop,
Pointer userdata
@@ -9808,6 +9829,7 @@ static void CleanUserObject (
}
VisitUserFieldsInUop (uop, userdata, CleanUserFields);
CleanStructuredComment (uop);
+ CleanRefGeneTrackingUserObject (uop);
}
static CharPtr bsecSiteList [] = {
@@ -10423,6 +10445,7 @@ static Boolean NotExceptedRibosomalName (
return FALSE;
}
+//LCOV_EXCL_START
NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (
SeqFeatPtr sfp,
Pointer userdata
@@ -10470,6 +10493,8 @@ NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (
}
CleanupSubSourceOther (biop, onp);
}
+//LCOV_EXCL_STOP
+
typedef struct xmltable {
CharPtr code;
@@ -10570,45 +10595,51 @@ static CharPtr BSECDecodeXml (
static void CleanupFeatureStrings (
SeqFeatPtr sfp,
Boolean isJscan,
+ Boolean isEmblOrDdbj,
Boolean stripSerial,
Boolean modernizeFeats,
ValNodePtr PNTR publist
)
{
- Uint1 aa;
- BioSourcePtr biop;
- Char ch;
- Uint1 codon [6];
- GeneRefPtr grp;
- ImpFeatPtr ifp;
- Boolean is_fMet = FALSE;
- Int2 j;
- Boolean justTrnaText;
- size_t len;
- CharPtr name;
- OrgNamePtr onp = NULL;
- OrgRefPtr orp;
- PubdescPtr pdp;
- ProtRefPtr prp;
- CharPtr ptr;
- RnaRefPtr rrp;
- SubSourcePtr ssp;
- CharPtr str;
- CharPtr suff;
- CharPtr temp;
- Char tmp [64];
- Boolean trimming_junk;
- tRNAPtr trp;
- CharPtr val;
- ValNodePtr vnp, vnp2;
- RNAGenPtr rgp;
- RNAQualPtr rqp;
- SeqFeatXrefPtr xref;
+ Uint1 aa;
+ BioSourcePtr biop;
+ Char ch;
+ Uint1 codon [6];
+ GeneNomenclaturePtr gnp;
+ GeneRefPtr grp;
+ ImpFeatPtr ifp;
+ Boolean is_fMet = FALSE;
+ Boolean is_iMet = FALSE;
+ Int2 j;
+ Boolean justTrnaText;
+ size_t len;
+ CharPtr name;
+ ObjectIdPtr oip;
+ OrgNamePtr onp = NULL;
+ OrgRefPtr orp;
+ PubdescPtr pdp;
+ ProtRefPtr prp;
+ CharPtr ptr;
+ RNAGenPtr rgp;
+ RNAQualPtr rqp;
+ RnaRefPtr rrp;
+ SubSourcePtr ssp;
+ CharPtr str;
+ CharPtr suff;
+ CharPtr temp;
+ Char tmp [64];
+ Boolean trimming_junk;
+ tRNAPtr trp;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+ CharPtr val;
+ ValNodePtr vnp, vnp2;
+ SeqFeatXrefPtr xref;
if (sfp == NULL) return;
BSECDecodeXml (sfp->comment);
- CleanVisString (&(sfp->comment));
+ CleanVisStringAndCompress (&(sfp->comment));
len = StringLen (sfp->comment);
if (len > 4) {
if (StringCmp (sfp->comment + len - 3, ",..") == 0 ||
@@ -10645,6 +10676,8 @@ static void CleanupFeatureStrings (
if (prp == NULL) continue;
RemoveFlankingQuotes (&(prp->desc));
RemoveFlankingQuotesList (&(prp->name));
+ CleanVisStringAndCompress (&(prp->desc));
+ CleanVisStringListAndCompress (&(prp->name));
}
switch (sfp->data.choice) {
@@ -10716,7 +10749,7 @@ static void CleanupFeatureStrings (
HandleXrefOnGene (sfp);
}
BSECDecodeXml (grp->locus);
- CleanVisString (&(grp->locus));
+ CleanVisStringAndCompress (&(grp->locus));
/*
if (isJscan && StringDoesHaveText (grp->locus)) {
ptr = CleanUpSgml (grp->locus);
@@ -10727,7 +10760,7 @@ static void CleanupFeatureStrings (
}
*/
CleanVisString (&(grp->allele));
- CleanVisString (&(grp->desc));
+ CleanVisStringAndCompress (&(grp->desc));
CleanVisString (&(grp->maploc));
CleanVisString (&(grp->locus_tag));
ExpandGeneSynList (grp);
@@ -10768,23 +10801,29 @@ static void CleanupFeatureStrings (
CleanDoubleQuote (grp->maploc);
CleanDoubleQuote (grp->locus_tag);
CleanDoubleQuoteList (grp->syn);
- FixOldDbxrefs (grp->db);
+ FixOldDbxrefs (grp->db, isEmblOrDdbj);
FixNumericDbxrefs (grp->db);
grp->db = ValNodeSort (grp->db, SortDbxref);
CleanupDuplicateDbxrefs (&(grp->db));
CleanupObsoleteDbxrefs (&(grp->db));
+ CleanupGoDbxrefs (grp->db);
/* now move grp->dbxref to sfp->dbxref */
vnp = grp->db;
grp->db = NULL;
ValNodeLink ((&sfp->dbxref), vnp);
if (grp->locus != NULL && grp->syn != NULL) {
- vnp = grp->syn;
- str = (CharPtr) vnp->data.ptrvalue;
- if (StringCmp (grp->locus, str) == 0) {
- grp->syn = vnp->next;
- vnp->next = NULL;
- ValNodeFreeData (vnp);
+ for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringCmp (grp->locus, str) == 0) {
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ }
}
+ CleanVisStringListCaseSensitive (&(grp->syn));
+ }
+ gnp = grp->formal_name;
+ if (gnp != NULL) {
+ FixOldDbxref (gnp->source);
+ FixNumericDbxref (gnp->source);
}
/*
if (grp->locus != NULL && sfp->comment != NULL) {
@@ -10833,20 +10872,21 @@ static void CleanupFeatureStrings (
}
BSECDecodeXml (prp->desc);
CleanVisStringAndCompress (&(prp->desc));
- CleanVisStringListAndCompress (&(prp->name));
+ CleanVisStringJunkListAndCompress (&(prp->name));
CleanVisStringList (&(prp->ec));
- CleanVisStringList (&(prp->activity));
+ CleanVisStringJunkListAndCompress (&(prp->activity));
CleanDoubleQuote (prp->desc);
CleanDoubleQuoteList (prp->name);
CleanDoubleQuoteList (prp->ec);
CleanDoubleQuoteList (prp->activity);
RemoveFlankingQuotes (&(prp->desc));
RemoveFlankingQuotesList (&(prp->name));
- FixOldDbxrefs (prp->db);
+ FixOldDbxrefs (prp->db, isEmblOrDdbj);
FixNumericDbxrefs (prp->db);
prp->db = ValNodeSort (prp->db, SortDbxref);
CleanupDuplicateDbxrefs (&(prp->db));
CleanupObsoleteDbxrefs (&(prp->db));
+ CleanupGoDbxrefs (prp->db);
/* now move prp->dbxref to sfp->dbxref */
vnp = prp->db;
prp->db = NULL;
@@ -10871,7 +10911,7 @@ static void CleanupFeatureStrings (
}
}
}
- if ((prp->processed == 1 || prp->processed == 2) && prp->name == NULL) {
+ if ((prp->processed == 1 || prp->processed == 2 || prp->processed == 5) && prp->name == NULL) {
ValNodeCopyStr (&(prp->name), 0, "unnamed");
}
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
@@ -10900,6 +10940,7 @@ static void CleanupFeatureStrings (
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp->ext.choice == 1) {
BSECDecodeXml ((CharPtr) rrp->ext.value.ptrvalue);
+ str = (CharPtr) rrp->ext.value.ptrvalue;
CleanVisStringAndCompress ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
CleanDoubleQuote ((CharPtr) rrp->ext.value.ptrvalue);
RemoveFlankingQuotes ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
@@ -10909,6 +10950,10 @@ static void CleanupFeatureStrings (
name = (CharPtr) rrp->ext.value.ptrvalue;
len = StringLen (name);
if (len > 5) {
+ if (len > 16 && StringNICmp (name + len - 16, " ribosomal RNA .", 14) == 0) {
+ name [len-2] = '\0';
+ len = StringLen (name);
+ }
if (len > 14 && StringNICmp (name + len - 14, " ribosomal rRNA", 14) == 0) {
} else if (StringNICmp (name + len - 5, " rRNA", 5) == 0) {
str = MemNew (len + 10);
@@ -10933,6 +10978,7 @@ static void CleanupFeatureStrings (
aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
if (aa != 0) {
is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
+ is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
trp = (tRNAPtr) MemNew (sizeof (tRNA));
if (trp != NULL) {
@@ -10963,6 +11009,19 @@ static void CleanupFeatureStrings (
sfp->comment = str;
}
}
+ if (is_iMet) {
+ if (sfp->comment == NULL) {
+ sfp->comment = StringSave ("iMet");
+ } else {
+ len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
+ str = MemNew (sizeof (Char) * len);
+ StringCpy (str, sfp->comment);
+ StringCat (str, "; ");
+ StringCat (str, "iMet");
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = str;
+ }
+ }
}
}
} else if (rrp->ext.choice == 2) {
@@ -10986,7 +11045,8 @@ static void CleanupFeatureStrings (
rrp->ext.choice = 2;
rrp->ext.value.ptrvalue = (Pointer) trp;
if (justTrnaText) {
- if (StringCmp (sfp->comment, "fMet") != 0 &&
+ if (StringCmp (sfp->comment, "tRNA-fMet") != 0 &&
+ StringCmp (sfp->comment, "fMet") != 0 &&
StringCmp (sfp->comment, "fMet tRNA") != 0 &&
StringCmp (sfp->comment, "fMet-tRNA") != 0) {
sfp->comment = MemFree (sfp->comment);
@@ -10994,6 +11054,15 @@ static void CleanupFeatureStrings (
sfp->comment = MemFree (sfp->comment);
sfp->comment = StringSave ("fMet");
}
+ if (StringCmp (sfp->comment, "tRNA-iMet") != 0 &&
+ StringCmp (sfp->comment, "iMet") != 0 &&
+ StringCmp (sfp->comment, "iMet tRNA") != 0 &&
+ StringCmp (sfp->comment, "iMet-tRNA") != 0) {
+ sfp->comment = MemFree (sfp->comment);
+ } else {
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = StringSave ("iMet");
+ }
}
}
}
@@ -11001,9 +11070,20 @@ static void CleanupFeatureStrings (
if (rrp->ext.choice == 3) {
rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
if (rgp != NULL) {
+ str = (CharPtr) rgp->product;
CleanVisStringAndCompress (&(rgp->product));
CleanDoubleQuote (rgp->product);
RemoveFlankingQuotes (&(rgp->product));
+ if (StringICmp (rgp->product, "internal transcribed spacer 1 (ITS1)") == 0) {
+ rgp->product = MemFree (rgp->product);
+ rgp->product = StringSave ("internal transcribed spacer 1");
+ } else if (StringICmp (rgp->product, "internal transcribed spacer 2 (ITS2)") == 0) {
+ rgp->product = MemFree (rgp->product);
+ rgp->product = StringSave ("internal transcribed spacer 2");
+ } else if (StringICmp (rgp->product, "internal transcribed spacer 3 (ITS3)") == 0) {
+ rgp->product = MemFree (rgp->product);
+ rgp->product = StringSave ("internal transcribed spacer 3");
+ }
CleanVisStringAndCompress (&(rgp->_class));
CleanDoubleQuote (rgp->_class);
for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
@@ -11270,6 +11350,28 @@ static void CleanupFeatureStrings (
rrp->ext.choice = 1;
rrp->ext.value.ptrvalue = sfp->comment;
sfp->comment = NULL;
+ } else if (StringICmp (sfp->comment, "internal transcribed spacer 1 (ITS1)") == 0 ||
+ StringICmp (sfp->comment, "internal transcribed spacer 2 (ITS2)") == 0 ||
+ StringICmp (sfp->comment, "internal transcribed spacer 3 (ITS3)") == 0) {
+ ptr = StringStr (sfp->comment, " (");
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ rrp->ext.choice = 1;
+ rrp->ext.value.ptrvalue = sfp->comment;
+ sfp->comment = NULL;
+ } else if (StringICmp (sfp->comment, "ITS1") == 0 || StringICmp (sfp->comment, "ITS 1") == 0) {
+ rrp->ext.choice = 1;
+ rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 1");
+ sfp->comment = MemFree (sfp->comment);
+ } else if (StringICmp (sfp->comment, "ITS2") == 0 || StringICmp (sfp->comment, "ITS 2") == 0) {
+ rrp->ext.choice = 1;
+ rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 2");
+ sfp->comment = MemFree (sfp->comment);
+ } else if (StringICmp (sfp->comment, "ITS3") == 0 || StringICmp (sfp->comment, "ITS 3") == 0) {
+ rrp->ext.choice = 1;
+ rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 3");
+ sfp->comment = MemFree (sfp->comment);
}
}
break;
@@ -11291,6 +11393,21 @@ static void CleanupFeatureStrings (
CleanDoubleQuote ((CharPtr) sfp->data.value.ptrvalue);
if (sfp->data.value.ptrvalue == NULL) {
sfp->data.choice = SEQFEAT_COMMENT;
+ } else {
+ if (sfp->ext != NULL) {
+ uop = FindUopByTag (sfp->ext, "cddScoreData");
+ if (uop != NULL) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice != 1) continue;
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringICmp (oip->str, "definition") == 0) {
+ CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
+ CleanDoubleQuote ((CharPtr) ufp->data.ptrvalue);
+ }
+ }
+ }
+ }
}
break;
case SEQFEAT_COMMENT :
@@ -11354,13 +11471,14 @@ static void CleanupFeatureStrings (
CleanVisStringAndCompress (&(orp->common));
CleanVisStringList (&(orp->mod));
CleanVisStringList (&(orp->syn));
- FixOldDbxrefs (orp->db);
+ FixOldDbxrefs (orp->db, isEmblOrDdbj);
FixNumericDbxrefs (orp->db);
orp->db = ValNodeSort (orp->db, SortDbxref);
orp->syn = ValNodeSort (orp->syn, SortVnpByString);
orp->syn = UniqueValNode (orp->syn);
CleanupDuplicateDbxrefs (&(orp->db));
CleanupObsoleteDbxrefs (&(orp->db));
+ CleanupGoDbxrefs (orp->db);
onp = orp->orgname;
while (onp != NULL) {
CleanVisString (&(onp->attrib));
@@ -11375,6 +11493,34 @@ static void CleanupFeatureStrings (
}
}
+static ValNodePtr SplitStringsAtSemicolon (ValNodePtr PNTR head)
+
+{
+ ValNodePtr curr, vnp;
+ CharPtr ptr, str;
+
+ if (head == NULL || *head == NULL) return NULL;
+
+ curr = *head;
+ while (curr != NULL) {
+ str = (CharPtr) curr->data.ptrvalue;
+ ptr = StringChr (str, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ vnp = ValNodeCopyStr (NULL, 0, ptr);
+ if (vnp != NULL) {
+ vnp->next = curr->next;
+ curr->next = vnp;
+ }
+ }
+ curr = curr->next;
+ }
+
+ return *head;
+}
+
+
static void CleanupDescriptorStrings (
ValNodePtr sdp,
Boolean stripSerial,
@@ -11390,7 +11536,12 @@ static void CleanupDescriptorStrings (
OrgNamePtr onp = NULL;
OrgRefPtr orp;
PubdescPtr pdp;
+ PirBlockPtr pir;
+ PrfBlockPtr prf;
+ SPBlockPtr sp;
SubSourcePtr ssp;
+ CharPtr str;
+ ValNodePtr vnp;
if (sdp == NULL) return;
switch (sdp->choice) {
@@ -11419,6 +11570,7 @@ static void CleanupDescriptorStrings (
break;
case Seq_descr_title :
BSECDecodeXml ((CharPtr) sdp->data.ptrvalue);
+ str = (CharPtr) sdp->data.ptrvalue;
CleanVisStringAndCompress ((CharPtr PNTR) &sdp->data.ptrvalue);
if (sdp->data.ptrvalue == NULL) {
sdp->data.ptrvalue = StringSave ("");
@@ -11440,9 +11592,25 @@ static void CleanupDescriptorStrings (
case Seq_descr_maploc :
break;
case Seq_descr_pir :
+ pir = (PirBlockPtr) sdp->data.ptrvalue;
+ SplitStringsAtSemicolon (&(pir->keywords));
break;
case Seq_descr_genbank :
gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ SplitStringsAtSemicolon (&(gbp->keywords));
+ for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringICmp (str, "TPA:reassembly") == 0) {
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = StringSave ("TPA:assembly");
+ } else if (StringICmp (str, "TPA_reassembly") == 0) {
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = StringSave ("TPA:assembly");
+ } else if (StringICmp (str, "TPA_assembly") == 0) {
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = StringSave ("TPA:assembly");
+ }
+ }
CleanVisStringList (&(gbp->extra_accessions));
gbp->extra_accessions = ValNodeSort (gbp->extra_accessions, SortVnpByString);
gbp->extra_accessions = UniqueValNode (gbp->extra_accessions);
@@ -11478,6 +11646,8 @@ static void CleanupDescriptorStrings (
VisitAllUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject);
break;
case Seq_descr_sp :
+ sp = (SPBlockPtr) sdp->data.ptrvalue;
+ SplitStringsAtSemicolon (&(sp->keywords));
break;
case Seq_descr_dbxref :
break;
@@ -11485,6 +11655,7 @@ static void CleanupDescriptorStrings (
ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
CleanVisStringList (&(ebp->extra_acc));
ebp->extra_acc = ValNodeSort (ebp->extra_acc, SortVnpByString);
+ SplitStringsAtSemicolon (&(ebp->keywords));
CleanVisStringListCaseSensitive (&(ebp->keywords));
break;
case Seq_descr_create_date :
@@ -11492,6 +11663,8 @@ static void CleanupDescriptorStrings (
case Seq_descr_update_date :
break;
case Seq_descr_prf :
+ prf = (PrfBlockPtr) sdp->data.ptrvalue;
+ SplitStringsAtSemicolon (&(prf->keywords));
break;
case Seq_descr_pdb :
break;
@@ -11539,13 +11712,14 @@ static void CleanupDescriptorStrings (
CleanVisStringAndCompress (&(orp->common));
CleanVisStringList (&(orp->mod));
CleanVisStringList (&(orp->syn));
- FixOldDbxrefs (orp->db);
+ FixOldDbxrefs (orp->db, isEmblOrDdbj);
FixNumericDbxrefs (orp->db);
orp->db = ValNodeSort (orp->db, SortDbxref);
orp->syn = ValNodeSort (orp->syn, SortVnpByString);
orp->syn = UniqueValNode (orp->syn);
CleanupDuplicateDbxrefs (&(orp->db));
CleanupObsoleteDbxrefs (&(orp->db));
+ CleanupGoDbxrefs (orp->db);
onp = orp->orgname;
while (onp != NULL) {
CleanVisString (&(onp->attrib));
@@ -11620,6 +11794,7 @@ static void AddReplaceQual (SeqFeatPtr sfp, CharPtr p)
*s = '\"';
}
+//LCOV_EXCL_START
NLM_EXTERN Boolean SerialNumberInString (CharPtr str)
{
@@ -11652,6 +11827,7 @@ NLM_EXTERN Boolean SerialNumberInString (CharPtr str)
}
return suspicious;
}
+//LCOV_EXCL_STOP
/* now only strips serials for local, general, refseq, and 2+6 genbank ids */
static void CheckForSwissProtID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
@@ -12006,7 +12182,7 @@ static void CleanupDuplicatedCodeBreaks (CodeBreakPtr PNTR prevcbp)
}
}
-
+//LCOV_EXCL_START
CharPtr ncrnaClassList[] = {
"antisense_RNA",
"autocatalytically_spliced_intron",
@@ -12026,12 +12202,12 @@ CharPtr ncrnaClassList[] = {
"SRP_RNA",
"vault_RNA",
"Y_RNA",
+"lncRNA",
"other",
NULL};
Int4 NcrnaOTHER = sizeof (ncrnaClassList) / sizeof (CharPtr) - 1;
-
extern Boolean IsStringInNcRNAClassList (CharPtr str)
{
CharPtr PNTR p;
@@ -12048,6 +12224,52 @@ extern Boolean IsStringInNcRNAClassList (CharPtr str)
}
+CharPtr regulatoryClassList[] = {
+"attenuator",
+"CAAT_signal",
+"DNase_I_hypersensitive_site",
+"enhancer_blocking_element",
+"enhancer",
+"GC_signal",
+"imprinting_control_region",
+"insulator",
+"locus_control_region",
+"matrix_attachment_region",
+"minus_10_signal",
+"minus_35_signal",
+"polyA_signal_sequence",
+"promoter",
+"recoding_stimulatory_region",
+"replication_regulatory_region",
+"response_element",
+"ribosome_binding_site",
+"riboswitch",
+"silencer",
+"TATA_box",
+"terminator",
+"transcriptional_cis_regulatory_region",
+"other",
+NULL};
+
+Int4 RegulatoryOTHER = sizeof (regulatoryClassList) / sizeof (CharPtr) - 1;
+
+extern Boolean IsStringInRegulatoryClassList (CharPtr str)
+
+{
+ CharPtr PNTR p;
+
+ if (StringHasNoText (str)) return FALSE;
+ for (p = regulatoryClassList; *p != NULL; p++)
+ {
+ if (StringICmp (str, *p) == 0)
+ {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+//LCOV_EXCL_STOP
+
static void AddNonCopiedQual (SeqFeatPtr sfp, CharPtr qual, CharPtr class_val)
{
GBQualPtr gbq;
@@ -12537,6 +12759,7 @@ static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp)
residue = FindTrnaAA3 (comment);
if (residue == aa) {
if (aa == 'M' && StringICmp ("fMet", comment) == 0) return FALSE;
+ if (aa == 'M' && StringICmp ("iMet", comment) == 0) return FALSE;
return TRUE;
}
residue = FindTrnaAA (comment);
@@ -12725,6 +12948,183 @@ static void FixncRNAClass (SeqFeatPtr sfp)
}
+static void MoveBioSourceFeatureNoteToSubSourceNote (SeqFeatPtr sfp)
+{
+ ValNode vn;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || StringHasNoText (sfp->comment)) {
+ return;
+ }
+
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = SourceQualChoice_textqual;
+ vn.data.intvalue = Source_qual_subsource_note;
+
+ SetSourceQualInBioSource (sfp->data.value.ptrvalue, &vn, NULL, sfp->comment, ExistingTextOption_append_semi);
+ sfp->comment = MemFree (sfp->comment);
+}
+
+
+NLM_EXTERN void ConsolidateOneLikeSubSourceModifier (
+ SubSourcePtr match_to,
+ Boolean use_semicolon
+)
+{
+ SubSourcePtr prev, index;
+ Int4 len, num_matches;
+ CharPtr new_value;
+
+ if (match_to == NULL) return;
+ len = StringLen (match_to->name) + 1;
+ num_matches = 0;
+ prev = match_to;
+ index = match_to->next;
+ while (index != NULL)
+ {
+ if (index->subtype == match_to->subtype && index->name != NULL)
+ {
+ len += StringLen (index->name) + 2;
+ num_matches++;
+ }
+ index = index->next;
+ }
+ if (num_matches == 0) return;
+
+ new_value = MemNew (len * sizeof (char));
+ if (new_value == NULL) return;
+
+ StringCpy (new_value, match_to->name);
+ index = match_to->next;
+ while (index != NULL)
+ {
+ if (index->subtype == match_to->subtype && index->name != NULL)
+ {
+ if (use_semicolon)
+ {
+ StringCat (new_value, "; ");
+ }
+ else
+ {
+ StringCat (new_value, " ");
+ }
+ StringCat (new_value, index->name);
+ prev->next = index->next;
+ index->next = NULL;
+ SubSourceFree (index);
+ index = prev;
+ }
+ prev = index;
+ index = index->next;
+ }
+ MemFree (match_to->name);
+ match_to->name = new_value;
+}
+
+
+NLM_EXTERN void ConsolidateOneLikeOrganismModifier (
+ OrgModPtr match_to,
+ Boolean use_semicolon
+)
+{
+ OrgModPtr prev, index;
+ Int4 len, num_matches;
+ CharPtr new_value;
+
+ if (match_to == NULL) return;
+ len = StringLen (match_to->subname) + 1;
+ num_matches = 0;
+ prev = match_to;
+ index = match_to->next;
+ while (index != NULL)
+ {
+ if (index->subtype == match_to->subtype && index->subname != NULL)
+ {
+ len += StringLen (index->subname) + 2;
+ num_matches++;
+ }
+ index = index->next;
+ }
+ if (num_matches == 0) return;
+
+ new_value = MemNew (len * sizeof (char));
+ if (new_value == NULL) return;
+
+ StringCpy (new_value, match_to->subname);
+ index = match_to->next;
+ while (index != NULL)
+ {
+ if (index->subtype == match_to->subtype && index->subname != NULL)
+ {
+ if (use_semicolon)
+ {
+ StringCat (new_value, "; ");
+ }
+ else
+ {
+ StringCat (new_value, " ");
+ }
+ StringCat (new_value, index->subname);
+ prev->next = index->next;
+ index->next = NULL;
+ OrgModFree (index);
+ index = prev;
+ }
+ prev = index;
+ index = index->next;
+ }
+ MemFree (match_to->subname);
+ match_to->subname = new_value;
+}
+
+typedef struct reg_feat {
+ CharPtr feat_key;
+ CharPtr reg_class;
+} RegFeatData, PNTR RegFeatPtr;
+
+static RegFeatData reg_feat_keys [] = {
+ { "enhancer", "enhancer" },
+ { "promoter", "promoter" },
+ { "CAAT_signal", "CAAT_signal" },
+ { "TATA_signal", "TATA_box" },
+ { "-35_signal", "minus_35_signal" },
+ { "-10_signal", "minus_10_signal" },
+ { "GC_signal", "GC_signal" },
+ { "RBS", "ribosome_binding_site" },
+ { "polyA_signal", "polyA_signal_sequence" },
+ { "attenuator", "attenuator" },
+ { "terminator", "terminator" },
+ { "misc_signal", "other" },
+ { NULL, NULL }
+};
+
+NLM_EXTERN void ConsolidateBioSourceNotes (BioSourcePtr biop)
+{
+ SubSourcePtr ssp, note_ssp;
+ OrgModPtr mod, note_mod;
+
+ if (biop == NULL) return;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
+ {
+ if (ssp->subtype == 255 && ssp->name != NULL)
+ {
+ ConsolidateOneLikeSubSourceModifier (ssp, TRUE);
+ note_ssp = ssp;
+ }
+ }
+
+ if (biop->org == NULL || biop->org->orgname == NULL) return;
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next)
+ {
+ if (mod->subtype == 255 && mod->subname != NULL)
+ {
+ ConsolidateOneLikeOrganismModifier (mod, TRUE);
+ note_mod = mod;
+ }
+ }
+}
+
+
NLM_EXTERN void CleanUpSeqFeat (
SeqFeatPtr sfp,
Boolean isEmblOrDdbj,
@@ -12740,14 +13140,19 @@ NLM_EXTERN void CleanUpSeqFeat (
CdRegionPtr crp;
GBQualPtr gbq;
Boolean emptyRNA;
+ IntFuzzPtr fuzz;
GeneRefPtr grp;
+ Boolean hasGibbsq;
Boolean hasNulls;
SeqIdPtr id;
ImpFeatPtr ifp;
Int2 j;
+ MolInfoPtr mip;
CharPtr name;
+ CharPtr note;
Boolean partial5;
Boolean partial3;
+ SeqPntPtr pntp;
Uint1 processed;
ProtRefPtr prp;
ValNodePtr psp;
@@ -12756,12 +13161,13 @@ NLM_EXTERN void CleanUpSeqFeat (
RnaRefPtr rrp;
Uint1 rrptype;
CharPtr satellite_type;
+ SeqDescrPtr sdp;
SeqIntPtr sintp;
- SeqPntPtr pntp;
SeqIdPtr sip;
SeqLocPtr slp;
CharPtr str;
Uint1 strand;
+ Boolean sync_mol_info;
tRNAPtr trp;
SeqFeatXrefPtr xref, next, PNTR prevlink;
@@ -12817,6 +13223,17 @@ NLM_EXTERN void CleanUpSeqFeat (
gbq->next = sfp->qual;
sfp->qual = gbq;
}
+ } else if (StringCmp (ifp->key, "LTR") == 0) {
+ ifp->key = MemFree (ifp->key);
+ ifp->key = StringSave ("repeat_region");
+ sfp->idx.subtype = FEATDEF_repeat_region;
+ gbq = GBQualNew ();
+ if (gbq != NULL) {
+ gbq->qual = StringSave ("rpt_type");
+ gbq->val = StringSave ("long_terminal_repeat");
+ gbq->next = sfp->qual;
+ sfp->qual = gbq;
+ }
} else if (StringHasNoText (ifp->loc)) {
rrptype = 0;
if (StringCmp (ifp->key, "precursor_RNA") == 0) {
@@ -12853,6 +13270,8 @@ NLM_EXTERN void CleanUpSeqFeat (
processed = 3;
} else if (StringCmp (ifp->key, "transit_peptide") == 0) {
processed = 4;
+ } else if (StringCmp (ifp->key, "propeptide") == 0 || StringCmp (ifp->key, "pro_peptide") == 0) {
+ processed = 5;
}
if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
bsp = BioseqFind (SeqLocId (sfp->location));
@@ -12884,6 +13303,52 @@ NLM_EXTERN void CleanUpSeqFeat (
if (crp != NULL && crp->frame == 0 && (! sfp->pseudo)) {
crp->frame = GetFrameFromLoc (sfp->location);
}
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (ifp != NULL) {
+ for (j = 0; reg_feat_keys [j].feat_key != NULL; j++) {
+ if (StringICmp (ifp->key, reg_feat_keys [j].feat_key) == 0) {
+ ifp->key = MemFree (ifp->key);
+ ifp->key = StringSave ("regulatory");
+ sfp->idx.subtype = FEATDEF_regulatory;
+ gbq = GBQualNew ();
+ if (gbq != NULL) {
+ gbq->qual = StringSave ("regulatory_class");
+ gbq->val = StringSave (reg_feat_keys [j].reg_class);
+ gbq->next = sfp->qual;
+ sfp->qual = gbq;
+ }
+ break;
+ }
+ }
+ }
+ }
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (ifp != NULL && StringCmp (ifp->key, "regulatory") == 0) {
+ note = NULL;
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "regulatory_class") != 0) continue;
+ str = StringChr (gbq->val, ':');
+ if (str == NULL) continue;
+ if (StringNCmp (gbq->val, "other:", 6) == 0) continue;
+ *str = '\0';
+ str++;
+ TrimSpacesAroundString (str);
+ if (StringHasNoText (str)) continue;
+ note = str;
+ }
+ if (StringDoesHaveText (note)) {
+ gbq = GBQualNew ();
+ if (gbq != NULL) {
+ gbq->qual = StringSave ("note");
+ gbq->val = StringSave (note);
+ gbq->next = sfp->qual;
+ sfp->qual = gbq;
+ }
+ }
+ }
+ }
if (sfp->data.choice == SEQFEAT_RNA) {
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp != NULL) {
@@ -12933,12 +13398,13 @@ NLM_EXTERN void CleanUpSeqFeat (
CleanupDuplicateGBQuals (&(sfp->qual));
CleanupFeatureGBQuals (sfp, isEmblOrDdbj);
sfp->qual = SortIllegalGBQuals (sfp->qual);
- CleanupFeatureStrings (sfp, isJscan, stripSerial, modernizeFeats, publist);
- FixOldDbxrefs (sfp->dbxref);
+ CleanupFeatureStrings (sfp, isJscan, isEmblOrDdbj, stripSerial, modernizeFeats, publist);
+ FixOldDbxrefs (sfp->dbxref, isEmblOrDdbj);
FixNumericDbxrefs (sfp->dbxref);
sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
CleanupDuplicateDbxrefs (&(sfp->dbxref));
CleanupObsoleteDbxrefs (&(sfp->dbxref));
+ CleanupGoDbxrefs (sfp->dbxref);
psp = sfp->cit;
if (psp != NULL && psp->data.ptrvalue) {
psp->data.ptrvalue = ValNodeSort ((ValNodePtr) psp->data.ptrvalue, SortCits);
@@ -12955,7 +13421,8 @@ NLM_EXTERN void CleanUpSeqFeat (
grp->pseudo = FALSE;
}
}
- } else if (sfp->data.choice == SEQFEAT_CDREGION) {
+ }
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
if (crp != NULL) {
crp->code_break = SortCodeBreaks (sfp, crp->code_break);
@@ -12976,7 +13443,8 @@ NLM_EXTERN void CleanUpSeqFeat (
}
}
}
- } else if (sfp->data.choice == SEQFEAT_RNA) {
+ }
+ if (sfp->data.choice == SEQFEAT_RNA) {
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp != NULL) {
if (rrp->pseudo) {
@@ -13008,7 +13476,61 @@ NLM_EXTERN void CleanUpSeqFeat (
if (sfp->idx.subtype == FEATDEF_ncRNA) {
FixncRNAClass (sfp);
}
- } else if (sfp->data.choice == SEQFEAT_REGION ||
+ }
+ if (sfp->data.choice == SEQFEAT_PROT) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp != NULL && sfp->partial) {
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ if (! partial5 && ! partial3) {
+ bsp = BioseqFind (SeqLocId (sfp->location));
+ if (bsp != NULL && ISA_aa (bsp->mol)) {
+ hasGibbsq = FALSE;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GIBBSQ) {
+ hasGibbsq = TRUE;
+ }
+ }
+ if (hasGibbsq) {
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL);
+ if (sdp != NULL && sdp->choice == Seq_descr_title) {
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ sync_mol_info = FALSE;
+ if (StringStr (str, "{N-terminal}") != NULL) {
+ partial3 = TRUE;
+ sync_mol_info = TRUE;
+ } else if (StringStr (str, "{C-terminal}") != NULL) {
+ partial5 = TRUE;
+ sync_mol_info = TRUE;
+ }
+ if (sync_mol_info) {
+ SetSeqLocPartial (sfp->location, partial5, partial3);
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
+ if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL) {
+ if (partial5 && partial3) {
+ mip->completeness = 5;
+ } else if (partial5) {
+ mip->completeness = 3;
+ } else if (partial3) {
+ mip->completeness = 4;
+ } else if (sfp->partial) {
+ mip->completeness = 2;
+ } else {
+ mip->completeness = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ if (sfp->data.choice == SEQFEAT_REGION ||
sfp->data.choice == SEQFEAT_SITE ||
sfp->data.choice == SEQFEAT_BOND ||
sfp->data.choice == SEQFEAT_PROT) {
@@ -13033,6 +13555,14 @@ NLM_EXTERN void CleanUpSeqFeat (
}
}
}
+ if (sfp->data.choice == SEQFEAT_BIOSRC) {
+ /* combine multiple orgmod or subsource note qualifiers */
+ ConsolidateBioSourceNotes(sfp->data.value.ptrvalue);
+ /* if a BioSource feature has a comment, move the comment to
+ * a subsource note.
+ */
+ MoveBioSourceFeatureNoteToSubSourceNote(sfp);
+ }
ModernizeFeatureStrings (sfp, isEmblOrDdbj);
@@ -13057,6 +13587,22 @@ NLM_EXTERN void CleanUpSeqFeat (
CleanupDuplicateGBQuals (&(sfp->qual));
sfp->qual = SortIllegalGBQuals (sfp->qual);
+ /* normalize Seq-point fuzz tl to tr and decrement position */
+ slp = SeqLocFindNext (sfp->location, NULL);
+ for (slp = SeqLocFindNext (sfp->location, NULL);
+ slp != NULL;
+ slp = SeqLocFindNext (sfp->location, slp)) {
+ if (slp->choice != SEQLOC_PNT) continue;
+ pntp = (SeqPntPtr) slp->data.ptrvalue;
+ if (pntp == NULL) continue;
+ fuzz = pntp->fuzz;
+ if (fuzz == NULL) continue;
+ if (fuzz->choice == 4 /* lim */ && fuzz->a == 4 /* tl */ && pntp->point > 0) {
+ (pntp->point)--;
+ fuzz->a = 3; /* tr */
+ }
+ }
+
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
hasNulls = LocationHasNullsBetween (sfp->location);
sfp->partial = (sfp->partial || partial5 || partial3 || (hasNulls && ! isEmblOrDdbj));
@@ -13115,9 +13661,39 @@ static void RemoveZeroLengthSeqLits (BioseqPtr bsp)
}
}
+/*
+static Boolean CleanUpObjId (ObjectIdPtr oip)
+
+{
+ size_t len;
+ CharPtr ptr;
+ Boolean rval = FALSE;
+ long val;
+
+ if (oip == NULL) return FALSE;
+ if (StringDoesHaveText (oip->str)) {
+ if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
+ TrimSpacesAroundString (oip->str);
+ rval = TRUE;
+ }
+ }
+ ptr = oip->str;
+ if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
+ len = StringLen (ptr);
+ if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
+ if (sscanf (oip->str, "%ld", &val) == 1) {
+ oip->id = (Int4) val;
+ oip->str = MemFree (oip->str);
+ rval = TRUE;
+ }
+ }
+ }
+ return rval;
+}
static Boolean CleanUpSeqIdText (SeqIdPtr sip)
{
+ DbtagPtr dbt;
ObjectIdPtr oip;
Boolean rval = FALSE;
@@ -13125,9 +13701,16 @@ static Boolean CleanUpSeqIdText (SeqIdPtr sip)
if (sip->choice == SEQID_LOCAL) {
oip = (ObjectIdPtr) sip->data.ptrvalue;
if (oip != NULL) {
- if (StringDoesHaveText (oip->str)) {
- if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
- TrimSpacesAroundString (oip->str);
+ if (CleanUpObjId (oip)) {
+ rval = TRUE;
+ }
+ }
+ } else if (sip->choice == SEQID_GENERAL) {
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt != NULL) {
+ oip = dbt->tag;
+ if (oip != NULL) {
+ if (CleanUpObjId (oip)) {
rval = TRUE;
}
}
@@ -13135,7 +13718,28 @@ static Boolean CleanUpSeqIdText (SeqIdPtr sip)
}
return rval;
}
+*/
+
+
+static Boolean CleanUpSeqIdText (SeqIdPtr sip)
+{
+ ObjectIdPtr oip;
+ Boolean rval = FALSE;
+ if (sip == NULL) return FALSE;
+ if (sip->choice == SEQID_LOCAL) {
+ oip = (ObjectIdPtr) sip->data.ptrvalue;
+ if (oip != NULL) {
+ if (StringDoesHaveText (oip->str)) {
+ if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
+ TrimSpacesAroundString (oip->str);
+ rval = TRUE;
+ }
+ }
+ }
+ }
+ return rval;
+}
static void CleanUpSeqId (
SeqIdPtr sip,
@@ -13738,6 +14342,7 @@ static void UpdateShortFeatCits (SeqFeatPtr sfp, Pointer userdata)
}
}
+//LCOV_EXCL_START
NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap)
{
@@ -13762,6 +14367,7 @@ NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap)
}
}
}
+//LCOV_EXCL_STOP
/*
static CharPtr proteinOrganellePrefix [] = {
@@ -13795,33 +14401,34 @@ static CharPtr proteinOrganellePrefix [] = {
NULL,
NULL,
"chloroplast",
- NULL,
- NULL,
+ "chromoplast",
+ "kinetoplast",
"mitochondrion",
+ "plastid",
+ "macronuclear",
NULL,
+ "plasmid",
NULL,
NULL,
+ "cyanelle",
NULL,
NULL,
+ "nucleomorph",
+ "apicoplast",
+ "leucoplast",
+ "protoplast",
+ "endogenous virus",
+ "hydrogenosome",
NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
+ "chromatophore"
};
static CharPtr TitleEndsInOrganism (
CharPtr title,
CharPtr organism,
CharPtr organelle,
- CharPtr PNTR onlp
+ CharPtr PNTR onlp,
+ BoolPtr case_diffp
)
{
@@ -13832,14 +14439,20 @@ static CharPtr TitleEndsInOrganism (
if (onlp != NULL) {
*onlp = NULL;
}
+ if (case_diffp != NULL) {
+ *case_diffp = FALSE;
+ }
if (StringHasNoText (title) || StringHasNoText (organism)) return NULL;
len1 = StringLen (title);
len2 = StringLen (organism);
- if (len2 + 4 >= len1) return NULL;
+ if (len2 + 4 > len1) return NULL;
tmp = title + len1 - len2 - 3;
if (tmp [0] != ' ' || tmp [1] != '[' || tmp [len2 + 2] != ']') return NULL;
if (StringNICmp (tmp + 2, organism, len2) != 0) return NULL;
+ if (StringNCmp (tmp + 2, organism, len2) != 0 && case_diffp != NULL) {
+ *case_diffp = TRUE;
+ }
if (onlp != NULL) {
len3 = len1 - len2 - 3;
@@ -13859,44 +14472,95 @@ static CharPtr TitleEndsInOrganism (
return tmp;
}
+static void RemoveOrgFromEndOfProtein (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ CharPtr cp;
+ size_t len;
+ ProtRefPtr prp;
+ CharPtr str;
+ CharPtr taxname;
+ ValNodePtr vnp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp == NULL) return;
+
+ taxname = (CharPtr) userdata;
+ if (StringHasNoText (taxname)) return;
+
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ len = StringLen (str);
+ if (len < 5) continue;
+ if (str [len - 1] != ']') continue;
+ cp = StringRChr (str, '[');
+ if (cp == NULL) continue;
+ if (StringNCmp (cp, "[NAD", 4) == 0) continue;
+ len = StringLen (taxname);
+ if (StringLen (cp) != len + 2) continue;
+ if (StringNICmp (cp + 1, taxname, len - 1) != 0) continue;
+ *cp = '\0';
+ TrimSpacesAroundString (str);
+ }
+}
+
static void AddPartialToProteinTitle (
BioseqPtr bsp,
Pointer userdata
)
{
- BioSourcePtr biop;
- int genome = 0;
- size_t len;
- MolInfoPtr mip;
- CharPtr oldname = NULL;
- OrgModPtr omp;
- OrgNamePtr onp;
- CharPtr organelle = NULL;
- OrgRefPtr orp;
- Boolean partial = FALSE;
- CharPtr penult = NULL;
- SeqDescrPtr sdp;
- SeqIdPtr sip;
- CharPtr str;
- CharPtr suffix = NULL;
- CharPtr taxname = NULL;
- CharPtr title;
- CharPtr tmp;
- SeqDescrPtr ttl = NULL;
+ CharPtr binomial = NULL;
+ BioSourcePtr biop;
+ BinomialOrgNamePtr bonp;
+ Boolean case_difference = FALSE;
+ CharPtr first_super_kingdom = NULL;
+ int genome = 0;
+ CharPtr genus = NULL;
+ Boolean is_cross_kingdom = FALSE;
+ Boolean is_wp = FALSE;
+ size_t len;
+ MolInfoPtr mip;
+ Int2 num_super_kingdom = 0;
+ CharPtr oldname = NULL;
+ OrgModPtr omp;
+ OrgNamePtr onp;
+ CharPtr organelle = NULL;
+ OrgRefPtr orp;
+ Boolean partial = FALSE;
+ CharPtr penult = NULL;
+ CharPtr ptr;
+ SeqDescrPtr sdp;
+ CharPtr second_super_kingdom = NULL;
+ SeqIdPtr sip;
+ CharPtr species = NULL;
+ CharPtr str;
+ CharPtr suffix = NULL;
+ Boolean super_kingdoms_different = FALSE;
+ CharPtr taxname = NULL;
+ TaxElementPtr tep;
+ CharPtr title;
+ CharPtr tmp;
+ TextSeqIdPtr tsip;
+ SeqDescrPtr ttl = NULL;
if (bsp == NULL) return;
if (! ISA_aa (bsp->mol)) return;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_SWISSPROT) return;
+ if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ is_wp = TRUE;
+ }
+ }
+ }
}
- ttl = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
- if (ttl == NULL || ttl->choice != Seq_descr_title) return;
- str = (CharPtr) ttl->data.ptrvalue;
- if (StringHasNoText (str)) return;
-
sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
mip = (MolInfoPtr) sdp->data.ptrvalue;
@@ -13916,11 +14580,20 @@ static void AddPartialToProteinTitle (
orp = biop->org;
if (orp != NULL) {
taxname = orp->taxname;
+ /*
if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0) {
organelle = NULL;
}
+ */
onp = orp->orgname;
if (onp != NULL) {
+ if (onp->choice == 1) {
+ bonp = (BinomialOrgNamePtr) onp->data;
+ if (bonp != NULL) {
+ genus = bonp->genus;
+ species = bonp->species;
+ }
+ }
for (omp = onp->mod; omp != NULL; omp = omp->next) {
if (omp->subtype == ORGMOD_old_name) {
oldname = omp->subname;
@@ -13931,6 +14604,42 @@ static void AddPartialToProteinTitle (
}
}
+ VisitFeaturesOnBsp (bsp, (Pointer) taxname, RemoveOrgFromEndOfProtein);
+
+ ttl = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
+ if (ttl == NULL || ttl->choice != Seq_descr_title) return;
+ str = (CharPtr) ttl->data.ptrvalue;
+ if (StringHasNoText (str)) return;
+
+ if (is_wp) {
+ for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
+ sdp != NULL;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, sdp)) {
+ if (sdp->choice != Seq_descr_source) continue;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) continue;
+ orp = biop->org;
+ if (orp == NULL) continue;
+ onp = orp->orgname;
+ if (onp == NULL) continue;
+ if (onp->choice != 5) continue;
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ num_super_kingdom++;
+ if (first_super_kingdom == NULL) {
+ first_super_kingdom = tep->name;
+ } else if (StringICmp (first_super_kingdom, tep->name) != 0) {
+ second_super_kingdom = tep->name;
+ super_kingdoms_different = TRUE;
+ }
+ if (num_super_kingdom > 1 && super_kingdoms_different) {
+ is_cross_kingdom = TRUE;
+ }
+ }
+ }
+ }
+ }
+
/* search for partial, must be just before parenthesized organelle or bracketed organism */
tmp = StringSearch (str, ", partial [");
if (tmp == NULL) {
@@ -13939,14 +14648,32 @@ static void AddPartialToProteinTitle (
/* find oldname or taxname in brackets at end of protein title */
if (oldname != NULL && taxname != NULL) {
- suffix = TitleEndsInOrganism (str, oldname, organelle, &penult);
+ suffix = TitleEndsInOrganism (str, oldname, organelle, &penult, &case_difference);
}
if (suffix == NULL && taxname != NULL) {
- suffix = TitleEndsInOrganism (str, taxname, organelle, &penult);
- if (suffix != NULL) {
+ suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
+ if (suffix == NULL && StringDoesHaveText (genus) && StringDoesHaveText (species)) {
+ len = StringLen (genus) + StringLen (species) + 5;
+ binomial = (CharPtr) MemNew (len);
+ if (binomial != NULL) {
+ StringCpy (binomial, genus);
+ StringCat (binomial, " ");
+ StringCat (binomial, species);
+ suffix = TitleEndsInOrganism (str, binomial, organelle, &penult, &case_difference);
+ }
+ }
+ if (suffix == NULL && is_cross_kingdom) {
+ ptr = StringStr (str, "][");
+ if (ptr != NULL) {
+ *(ptr + 1) = '\0';
+ suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
+ }
+ } else {
if (organelle == NULL && penult != NULL) {
} else if (organelle != NULL && penult == NULL) {
} else if (StringCmp (organelle, penult) != 0) {
+ } else if (binomial != NULL) {
+ } else if (case_difference) {
} else {
/* bail if no need to change partial text (organelle) [organism name] */
if (partial) {
@@ -13958,6 +14685,8 @@ static void AddPartialToProteinTitle (
}
}
+ binomial = MemFree (binomial);
+
/* do not change unless [genus species] was at the end */
if (suffix == NULL) return;
@@ -13976,7 +14705,7 @@ static void AddPartialToProteinTitle (
}
TrimSpacesAroundString (str);
- len = StringLen (str) + StringLen (organelle) + StringLen (suffix) + 20;
+ len = StringLen (str) + StringLen (organelle) + StringLen (suffix) + StringLen (first_super_kingdom) + StringLen (second_super_kingdom) + 20;
title = MemNew (sizeof (Char) * len);
if (title == NULL) return;
@@ -13989,7 +14718,13 @@ static void AddPartialToProteinTitle (
StringCat (title, organelle);
StringCat (title, ")");
}
- if (suffix != NULL) {
+ if (is_cross_kingdom && StringDoesHaveText (first_super_kingdom) && StringDoesHaveText (second_super_kingdom)) {
+ StringCat (title, " [");
+ StringCat (title, first_super_kingdom);
+ StringCat (title, "][");
+ StringCat (title, second_super_kingdom);
+ StringCat (title, "]");
+ } else if (suffix != NULL) {
StringCat (title, " [");
StringCat (title, suffix);
StringCat (title, "]");
@@ -13998,14 +14733,16 @@ static void AddPartialToProteinTitle (
ttl->data.ptrvalue = title;
}
+//LCOV_EXCL_START
NLM_EXTERN void CleanUpProteinTitles (SeqEntryPtr sep)
{
if (sep == NULL) return;
VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
}
+//LCOV_EXCL_STOP
-NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
+static void BasicSeqEntryCleanupEx (SeqEntryPtr sep, Boolean resync)
{
AuthorPtr ap;
@@ -14106,6 +14843,12 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
}
}
+ if (resync) {
+ ResynchCodingRegionPartials (sep);
+ ResynchMessengerRNAPartials (sep);
+ ResynchProteinPartials (sep);
+ }
+
/*
dynamically add missing partial to already instantiated protein
titles, in between main title and bracketed organism name
@@ -14114,6 +14857,20 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
}
+NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
+
+{
+ BasicSeqEntryCleanupEx (sep, FALSE);
+}
+
+//LCOV_EXCL_START
+NLM_EXTERN void AdvancedSeqEntryCleanup (SeqEntryPtr sep)
+
+{
+ BasicSeqEntryCleanupEx (sep, TRUE);
+}
+//LCOV_EXCL_STOP
+
typedef struct bsecsmfedata {
Int4 max;
Int4 num_at_max;
@@ -14223,6 +14980,7 @@ NLM_EXTERN void RemoveUnnecessaryGeneXrefs (
}
}
+//LCOV_EXCL_START
static void SortSeqFeatFields (
SeqFeatPtr sfp,
Pointer userdata
@@ -14289,9 +15047,122 @@ NLM_EXTERN void SortSeqEntryQualifiers (
VisitFeaturesInSep (sep, NULL, SortSeqFeatFields);
VisitBioSourcesInSep (sep, NULL, SortBioSourceFields);
}
+//LCOV_EXCL_STOP
/* end BasicSeqEntryCleanup section */
+NLM_EXTERN void CDSPartialsFromTranslation (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ Int4 i;
+ Int4 len;
+ ByteStorePtr newprot;
+ Boolean partial5 = FALSE;
+ Boolean partial3 = TRUE;
+ CharPtr protseq;
+ Int2 residue;
+
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_CDREGION) return;
+
+ newprot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, NULL, FALSE);
+ if (newprot == NULL) return;
+
+ protseq = BSMerge (newprot, NULL);
+ if (protseq != NULL) {
+ len = StringLen (protseq);
+
+ for (i = 0; i < len; i++) {
+ residue = protseq [i];
+ if (i == 0 && residue == '-') {
+ partial5 = TRUE;
+ }
+ if (i == len - 1 && residue == '*') {
+ partial3 = FALSE;
+ }
+ }
+
+ MemFree (protseq);
+
+ SetSeqLocPartial (sfp->location, partial5, partial3);
+ sfp->partial = (Boolean) (partial5 || partial3);
+ }
+
+ BSFree (newprot);
+}
+
+NLM_EXTERN void CodingRegionPartialsFromTranslation (SeqEntryPtr sep)
+
+{
+ VisitFeaturesInSep (sep, NULL, CDSPartialsFromTranslation);
+}
+
+NLM_EXTERN void ImposeGenePartials (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ BioseqPtr bsp;
+ SeqMgrFeatContext fcontext, gcontext;
+ SeqFeatPtr feat, longest = NULL;
+ Int4 len, min = INT4_MAX;
+ Boolean new_partial, partial5, partial3;
+
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_GENE) return;
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return;
+
+ if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &gcontext) != sfp) return;
+
+ feat = SeqMgrGetDesiredFeature (0, bsp, 0, gcontext.index + 1, NULL, &fcontext);
+ while (feat != NULL && gcontext.right >= fcontext.left) {
+ len = TestFeatOverlap(feat, sfp, CONTAINED_WITHIN);
+ if (len >= 0) {
+ if (len < min) {
+ min = len;
+ longest = feat;
+ }
+ }
+ feat = SeqMgrGetNextFeature (bsp, feat, 0, 0, &fcontext);
+ }
+
+ if (longest != NULL) {
+ CheckSeqLocForPartial (longest->location, &partial5, &partial3);
+ new_partial = (Boolean) (longest->partial || partial5 || partial3);
+ SetSeqLocPartial (sfp->location, partial5, partial3);
+ sfp->partial = new_partial;
+ }
+}
+
+NLM_EXTERN void ImposeCDSPartials (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ SeqFeatPtr mrna;
+ Boolean new_partial, partial5, partial3;
+
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_CDREGION) return;
+
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ new_partial = (Boolean) (sfp->partial || partial5 || partial3);
+ if (new_partial != sfp->partial) {
+ sfp->partial = new_partial;
+ }
+
+ mrna = GetmRNAforCDS (sfp);
+ if (mrna != NULL) {
+ SetSeqLocPartial (mrna->location, partial5, partial3);
+ mrna->partial = new_partial;
+ }
+}
+
+NLM_EXTERN void ImposeCodingRegionPartials (SeqEntryPtr sep)
+
+{
+ VisitFeaturesInSep (sep, NULL, ImposeCDSPartials);
+ VisitFeaturesInSep (sep, NULL, ImposeGenePartials);
+}
+
NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
{
@@ -14309,11 +15180,22 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
LogInfoPtr lip;
CharPtr orig_loc = NULL, new_loc;
Char id_buf[100];
+ Boolean new_partial;
if (sfp->data.choice != SEQFEAT_CDREGION) return;
lip = (LogInfoPtr) userdata;
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
- sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
+ new_partial = (Boolean) (sfp->partial || partial5 || partial3);
+ if (new_partial != sfp->partial) {
+ sfp->partial = new_partial;
+ if (lip != NULL) {
+ lip->data_in_log = TRUE;
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed partial flag for coding region\n");
+ }
+ }
+ }
+
/*
slp = SeqLocFindNext (sfp->location, NULL);
if (slp == NULL) return;
@@ -14336,6 +15218,7 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
prp = (ProtRefPtr) bestprot->data.value.ptrvalue;
slp = bestprot->location;
if (prp != NULL && prp->processed < 2 && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)) {
+
if (lip != NULL) {
orig_loc = SeqLocPrintUseBestID (bestprot->location);
}
@@ -14347,21 +15230,23 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
if (slp == NULL) {
slp = CreateWholeInterval (sep);
}
- if (slp != NULL) {
+ SetSeqLocPartial (slp, partial5, partial3);
+ if (slp != NULL
+ && (!AsnIoMemComp (slp, bestprot->location, (AsnWriteFunc) SeqLocAsnWrite) || bestprot->partial != sfp->partial)) {
bestprot->location = SeqLocFree (bestprot->location);
bestprot->location = slp;
- }
- SetSeqLocPartial (bestprot->location, partial5, partial3);
- bestprot->partial = sfp->partial;
- if (lip != NULL) {
- new_loc = SeqLocPrintUseBestID (bestprot->location);
- if (StringCmp (orig_loc, new_loc) != 0) {
+
+ bestprot->partial = sfp->partial;
+ if (lip != NULL) {
+ new_loc = SeqLocPrintUseBestID (bestprot->location);
lip->data_in_log = TRUE;
if (lip->fp != NULL) {
fprintf (lip->fp, "Synchronized coding region partials for protein feature location at %s\n", orig_loc/*, new_loc*/);
}
+ new_loc = MemFree (new_loc);
}
- new_loc = MemFree (new_loc);
+ } else {
+ slp = SeqLocFree (slp);
}
orig_loc = MemFree (orig_loc);
}
@@ -14387,73 +15272,71 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
}
}
}
- if (vnp != NULL) {
- mip = (MolInfoPtr) vnp->data.ptrvalue;
- if (mip != NULL) {
- if (partial5 && partial3) {
- if (mip->completeness != 5) {
- mip->completeness = 5;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- if (id_buf[0] == 0) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- }
- fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
- lip->data_in_log = TRUE;
+
+ if (vnp != NULL && (mip = (MolInfoPtr) vnp->data.ptrvalue) != NULL) {
+ if (partial5 && partial3) {
+ if (mip->completeness != 5) {
+ mip->completeness = 5;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ if (id_buf[0] == 0) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
}
+ fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
+ lip->data_in_log = TRUE;
}
}
- } else if (partial5) {
- if (mip->completeness != 3) {
- mip->completeness = 3;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- if (id_buf[0] == 0) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- }
- fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
+ }
+ } else if (partial5) {
+ if (mip->completeness != 3) {
+ mip->completeness = 3;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ if (id_buf[0] == 0) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
}
- lip->data_in_log = TRUE;
+ fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
}
+ lip->data_in_log = TRUE;
}
- } else if (partial3) {
- if (mip->completeness != 4) {
- mip->completeness = 4;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- if (id_buf[0] == 0) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- }
- fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
+ }
+ } else if (partial3) {
+ if (mip->completeness != 4) {
+ mip->completeness = 4;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ if (id_buf[0] == 0) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
}
- lip->data_in_log = TRUE;
+ fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
}
+ lip->data_in_log = TRUE;
}
- } else if (sfp->partial) {
- if (mip->completeness != 2) {
- mip->completeness = 2;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- if (id_buf[0] == 0) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- }
- fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
+ }
+ } else if (sfp->partial) {
+ if (mip->completeness != 2) {
+ mip->completeness = 2;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ if (id_buf[0] == 0) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
}
- lip->data_in_log = TRUE;
+ fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
}
+ lip->data_in_log = TRUE;
}
- } else {
- if (mip->completeness != 0) {
- mip->completeness = 0;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- if (id_buf[0] == 0) {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- }
- fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
+ }
+ } else {
+ if (mip->completeness != 0 && mip->completeness != 1) {
+ mip->completeness = 0;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ if (id_buf[0] == 0) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
}
- lip->data_in_log = TRUE;
+ fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
}
+ lip->data_in_log = TRUE;
}
}
}
@@ -14558,7 +15441,7 @@ NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata)
if (sfp->data.choice != SEQFEAT_PROT) return;
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
if (prp == NULL) return;
- if (prp->processed < 1 || prp->processed > 4) return;
+ if (prp->processed < 1 || prp->processed > 5) return;
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
/*
@@ -14664,6 +15547,7 @@ NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip)
return sip;
}
+//LCOV_EXCL_START
NLM_EXTERN SeqLocPtr StripLocusFromSeqLoc (SeqLocPtr location)
{
@@ -14775,6 +15659,7 @@ NLM_EXTERN void GetRidOfLocusInSeqIds (Uint2 entityID, SeqEntryPtr sep)
if (sep == NULL) return;
SeqEntryExplore (sep, NULL, GetRidOfLocusCallback);
}
+//LCOV_EXCL_STOP
/* Mac can now use static parse tables by using
Make Strings Read-Only and Store Static Data in TOC
@@ -14831,6 +15716,7 @@ static Boolean CheckErrMsgPath (CharPtr dirname, CharPtr subdir)
return (Boolean) (FileExists (dirname, subdir, "valid.msg"));
}
+//LCOV_EXCL_START
static void SetTransientPath (CharPtr dirname, CharPtr subname, CharPtr file,
CharPtr section, CharPtr type)
@@ -14920,278 +15806,6 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
return FALSE;
}
-typedef struct miscdata {
- SeqEntryPtr sep;
- Int2 count;
- Int2 desired;
- Uint1 _class;
-} MiscData, PNTR MiscDataPtr;
-
-static void FindNthSeqEntryCallback (SeqEntryPtr sep, Pointer mydata,
- Int4 index, Int2 indent)
-
-{
- MiscDataPtr mdp;
-
- if (sep != NULL && mydata != NULL) {
- mdp = (MiscDataPtr) mydata;
- (mdp->count)++;
- if (mdp->count == mdp->desired) {
- mdp->sep = sep;
- }
- }
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FindNthSeqEntry (SeqEntryPtr sep, Int2 seq)
-
-{
- MiscData md;
-
- md.sep = NULL;
- md.count = 0;
- md.desired = seq;
- if (sep != NULL) {
- SeqEntryExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
- }
- return md.sep;
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FindNthBioseq (SeqEntryPtr sep, Int2 seq)
-
-{
- MiscData md;
-
- md.sep = NULL;
- md.count = 0;
- md.desired = seq;
- if (sep != NULL) {
- BioseqExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
- }
- return md.sep;
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FindNthSequinEntry (SeqEntryPtr sep, Int2 seq)
-
-{
- MiscData md;
-
- md.sep = NULL;
- md.count = 0;
- md.desired = seq;
- if (sep != NULL) {
- SequinEntryExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
- }
- return md.sep;
-}
-
-static void FindNucSeqEntryCallback (SeqEntryPtr sep, Pointer mydata,
- Int4 index, Int2 indent)
-
-{
- BioseqPtr bsp;
- MiscDataPtr mdp;
-
- if (sep != NULL && sep->choice == 1 && mydata != NULL) {
- mdp = (MiscDataPtr) mydata;
- bsp = (BioseqPtr) sep->data.ptrvalue;
- if (bsp != NULL && ISA_na (bsp->mol)) {
- if (mdp->sep == NULL) {
- mdp->sep = sep;
- }
- }
- }
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FindNucSeqEntry (SeqEntryPtr sep)
-
-{
- MiscData md;
-
- md.sep = NULL;
- md.count = 0;
- md.desired = 0;
- if (sep != NULL) {
- BioseqExplore (sep, (Pointer) (&md), FindNucSeqEntryCallback);
- }
- return md.sep;
-}
-
-NLM_EXTERN BioseqPtr LIBCALL FindNucBioseq (SeqEntryPtr sep)
-
-{
- BioseqPtr nbsp;
- SeqEntryPtr nsep;
-
- nsep = FindNucSeqEntry (sep);
- if (nsep == NULL) return NULL;
- if (! IS_Bioseq (nsep)) return NULL;
- nbsp = (BioseqPtr) nsep->data.ptrvalue;
- return nbsp;
-}
-
-static void FindBioseqSetByClassCallback (SeqEntryPtr sep, Pointer mydata,
- Int4 index, Int2 indent)
-
-{
- BioseqSetPtr bssp;
- MiscDataPtr mdp;
-
- if (sep != NULL && sep->choice == 2 && mydata != NULL) {
- mdp = (MiscDataPtr) mydata;
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- if (bssp != NULL && bssp->_class == mdp->_class) {
- if (mdp->sep == NULL) {
- mdp->sep = sep;
- }
- }
- }
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FindBioseqSetByClass (SeqEntryPtr sep, Uint1 _class)
-
-{
- MiscData md;
-
- md.sep = NULL;
- md.count = 0;
- md.desired = 0;
- md._class = _class;
- if (sep != NULL) {
- SeqEntryExplore (sep, (Pointer) (&md), FindBioseqSetByClassCallback);
- }
- return md.sep;
-}
-
-typedef struct kinddata {
- Boolean hasNuc;
- Boolean hasProt;
-} KindData, PNTR KindPtr;
-
-static void HasNucOrProtCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
-
-{
- BioseqPtr bsp;
- KindPtr kptr;
-
- if (sep != NULL && sep->choice == 1 && sep->data.ptrvalue != NULL && mydata != NULL) {
- kptr = (KindPtr) mydata;
- bsp = (BioseqPtr) sep->data.ptrvalue;
- if (ISA_na (bsp->mol)) {
- kptr->hasNuc = TRUE;
- } else if (ISA_aa (bsp->mol)) {
- kptr->hasProt = TRUE;
- }
- }
-}
-
-NLM_EXTERN Boolean LIBCALL SeqEntryHasNucs (SeqEntryPtr sep)
-
-{
- KindData kd;
-
- kd.hasNuc = FALSE;
- kd.hasProt = FALSE;
- if (sep != NULL) {
- BioseqExplore (sep, (Pointer) (&kd), HasNucOrProtCallback);
- }
- return kd.hasNuc;
-}
-
-NLM_EXTERN Boolean LIBCALL SeqEntryHasProts (SeqEntryPtr sep)
-
-{
- KindData kd;
-
- kd.hasNuc = FALSE;
- kd.hasProt = FALSE;
- if (sep != NULL) {
- BioseqExplore (sep, (Pointer) (&kd), HasNucOrProtCallback);
- }
- return kd.hasProt;
-}
-
-static Boolean CheckForAlignments (GatherContextPtr gcp)
-
-{
- BoolPtr boolptr;
-
- if (gcp == NULL) return TRUE;
-
- boolptr = (BoolPtr) gcp->userdata;
- if (boolptr == NULL ) return TRUE;
-
- switch (gcp->thistype) {
- case OBJ_SEQALIGN :
- case OBJ_SEQHIST_ALIGN :
- *boolptr = TRUE;
- return TRUE;
- default :
- break;
- }
- return TRUE;
-}
-
-NLM_EXTERN Boolean LIBCALL SeqEntryHasAligns (Uint2 entityID, SeqEntryPtr sep)
-
-{
- GatherScope gs;
- Boolean rsult;
-
- rsult = FALSE;
- if (entityID == 0 || sep == NULL) return FALSE;
- MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
- gs.seglevels = 1;
- MemSet((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean)));
- gs.ignore[OBJ_BIOSEQ] = FALSE;
- gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
- gs.ignore[OBJ_SEQALIGN] = FALSE;
- gs.ignore[OBJ_SEQANNOT] = FALSE;
- gs.ignore[OBJ_SEQHIST] = FALSE;
- gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
- gs.scope = sep;
- GatherEntity (entityID, (Pointer) (&rsult), CheckForAlignments, &gs);
- return rsult;
-}
-
-static void FindPowerBLASTAsnCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
-
-{
- AnnotDescrPtr desc;
- ObjectIdPtr oip;
- SeqAnnotPtr sap;
- BoolPtr rsult;
-
- if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
- rsult = (BoolPtr) mydata;
- sap = (IS_Bioseq (sep)) ?
- ((BioseqPtr) sep->data.ptrvalue)->annot :
- ((BioseqSetPtr) sep->data.ptrvalue)->annot;
- while (sap != NULL) {
- if (sap->type == 2) {
- desc = NULL;
- while ((desc = ValNodeFindNext (sap->desc, desc, Annot_descr_user)) != NULL) {
- if (desc->data.ptrvalue != NULL) {
- oip = ((UserObjectPtr) desc->data.ptrvalue)->type;
- if (oip != NULL && StringCmp (oip->str, "Hist Seqalign") == 0) {
- *rsult = TRUE;
- }
- }
- }
- }
- sap = sap->next;
- }
-}
-
-NLM_EXTERN Boolean LIBCALL PowerBLASTASN1Detected (SeqEntryPtr sep)
-
-{
- Boolean rsult;
-
- rsult = FALSE;
- SeqEntryExplore (sep, (Pointer) &rsult, FindPowerBLASTAsnCallback);
- return rsult;
-}
-
NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep)
{
@@ -15219,6 +15833,8 @@ NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep)
}
return slp;
}
+//LCOV_EXCL_STOP
+
NLM_EXTERN SeqLocPtr WholeIntervalFromSeqId (SeqIdPtr sip)
@@ -15245,6 +15861,7 @@ NLM_EXTERN SeqLocPtr WholeIntervalFromSeqId (SeqIdPtr sip)
return slp;
}
+//LCOV_EXCL_START
NLM_EXTERN void FreeAllFuzz (SeqLocPtr location)
{
@@ -15264,6 +15881,7 @@ NLM_EXTERN void FreeAllFuzz (SeqLocPtr location)
slp = SeqLocFindNext (location, slp);
}
}
+//LCOV_EXCL_STOP
NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location)
@@ -15364,14 +15982,18 @@ NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype)
if (subtype >= FEATDEF_gap && subtype <= FEATDEF_oriT) {
return SEQFEAT_IMP;
}
- if (subtype >= FEATDEF_mobile_element && subtype <= FEATDEF_assembly_gap) {
+ if (subtype >= FEATDEF_mobile_element && subtype <= FEATDEF_regulatory) {
return SEQFEAT_IMP;
}
+ if (subtype == FEATDEF_propeptide) {
+ return SEQFEAT_PROT;
+ }
}
return 0;
}
-NLM_EXTERN SeqIdPtr MakeSeqID (CharPtr str)
+//LCOV_EXCL_START
+NLM_EXTERN SeqIdPtr MakeSeqID(CharPtr str)
{
CharPtr buf;
@@ -15632,6 +16254,7 @@ NLM_EXTERN ValNodePtr CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice)
return vnp;
}
+
NLM_EXTERN ValNodePtr CreateNewDescriptorOnBioseq (BioseqPtr bsp, Uint1 choice)
{
@@ -15643,6 +16266,7 @@ NLM_EXTERN ValNodePtr CreateNewDescriptorOnBioseq (BioseqPtr bsp, Uint1 choice)
return CreateNewDescriptor (sep, choice);
}
+
/* common functions to scan binary ASN.1 file of entire release as Bioseq-set */
static Int4 VisitSeqIdList (SeqIdPtr sip, Pointer userdata, VisitSeqIdFunc callback)
@@ -15994,6 +16618,7 @@ NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, V
index++;
return index;
}
+//LCOV_EXCL_STOP
typedef struct uopdata {
UserObjectPtr rsult;
@@ -16029,6 +16654,7 @@ NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag)
return ud.rsult;
}
+//LCOV_EXCL_START
NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop)
{
@@ -17063,825 +17689,6 @@ NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class)
}
-static Int4 ScanBioseqSetReleaseInt (
- CharPtr inputFile,
- Boolean binary,
- Boolean compressed,
- Pointer userdata,
- ScanBioseqSetFunc callback,
- Boolean freesep,
- TNlmMutexPtr mutex
-)
-
-{
- AsnIoPtr aip;
- AsnModulePtr amp;
- AsnTypePtr atp, atp_bss, atp_se;
- FILE *fp;
- Int4 index = 0;
- SeqEntryPtr sep;
-#ifdef OS_UNIX
- Char cmmd [256];
- CharPtr gzcatprog;
- int ret;
- Boolean usedPopen = FALSE;
-#endif
- if (StringHasNoText (inputFile) || callback == NULL) return index;
-
-#ifndef OS_UNIX
- if (compressed) {
- Message (MSG_ERROR, "Can only decompress on-the-fly on UNIX machines");
- return index;
- }
-#endif
-
- amp = AsnAllModPtr ();
- if (amp == NULL) {
- Message (MSG_ERROR, "Unable to load AsnAllModPtr");
- return index;
- }
-
- atp_bss = AsnFind ("Bioseq-set");
- if (atp_bss == NULL) {
- Message (MSG_ERROR, "Unable to find ASN.1 type Bioseq-set");
- return index;
- }
-
- atp_se = AsnFind ("Bioseq-set.seq-set.E");
- if (atp_se == NULL) {
- Message (MSG_ERROR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
- return index;
- }
-
-#ifdef OS_UNIX
- if (compressed) {
- gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
- if (gzcatprog != NULL) {
- sprintf (cmmd, "%s %s", gzcatprog, inputFile);
- } else {
- ret = system ("gzcat -h >/dev/null 2>&1");
- if (ret == 0) {
- sprintf (cmmd, "gzcat %s", inputFile);
- } else if (ret == -1) {
- Message (MSG_FATAL, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
- return index;
- } else {
- ret = system ("zcat -h >/dev/null 2>&1");
- if (ret == 0) {
- sprintf (cmmd, "zcat %s", inputFile);
- } else if (ret == -1) {
- Message (MSG_FATAL, "Unable to fork or exec zcat in ScanBioseqSetRelease");
- return index;
- } else {
- Message (MSG_FATAL, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
- return index;
- }
- }
- }
- fp = popen (cmmd, /* binary? "rb" : */ "r");
- usedPopen = TRUE;
- } else {
- fp = FileOpen (inputFile, binary? "rb" : "r");
- }
-#else
- fp = FileOpen (inputFile, binary? "rb" : "r");
-#endif
- if (fp == NULL) {
- Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
- return index;
- }
-
- aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
- if (aip == NULL) {
- Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", inputFile);
- return index;
- }
-
- atp = atp_bss;
-
- while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
- if (atp == atp_se) {
- if (mutex != NULL) {
- NlmMutexLockEx (mutex);
- }
- SeqMgrHoldIndexing (TRUE);
- sep = SeqEntryAsnRead (aip, atp);
- SeqMgrHoldIndexing (FALSE);
- if (mutex != NULL) {
- NlmMutexUnlock (*mutex);
- }
- callback (sep, userdata);
- if (freesep) {
- SeqEntryFree (sep);
- }
- index++;
- } else {
- AsnReadVal (aip, atp, NULL);
- }
- }
-
- AsnIoFree (aip, FALSE);
-
-#ifdef OS_UNIX
- if (usedPopen) {
- pclose (fp);
- } else {
- FileClose (fp);
- }
-#else
- FileClose (fp);
-#endif
- return index;
-}
-
-NLM_EXTERN Int4 ScanBioseqSetRelease (
- CharPtr inputFile,
- Boolean binary,
- Boolean compressed,
- Pointer userdata,
- ScanBioseqSetFunc callback
-)
-
-{
- return ScanBioseqSetReleaseInt (inputFile, binary, compressed, userdata, callback, TRUE, NULL);
-}
-
-static TNlmMutex scan_bioseq_set_release_mutex = NULL;
-
-NLM_EXTERN Int4 ScanBioseqSetReleaseMT (
- CharPtr inputFile,
- Boolean binary,
- Boolean compressed,
- Pointer userdata,
- ScanBioseqSetFunc callback
-)
-
-{
- return ScanBioseqSetReleaseInt (inputFile, binary, compressed, userdata, callback, FALSE, &scan_bioseq_set_release_mutex);
-}
-
-NLM_EXTERN SeqEntryPtr LIBCALL FreeScanSeqEntryMT (
- SeqEntryPtr sep
-)
-
-{
- if (sep == NULL) return NULL;
-
- NlmMutexLockEx (&scan_bioseq_set_release_mutex);
-
- SeqMgrHoldIndexing (TRUE);
- SeqEntryFree (sep);
- SeqMgrHoldIndexing (FALSE);
-
- NlmMutexUnlock (scan_bioseq_set_release_mutex);
-
- return NULL;
-}
-
-NLM_EXTERN Int4 ScanEntrezgeneSetRelease (
- CharPtr inputFile,
- Boolean binary,
- Boolean compressed,
- Pointer userdata,
- ScanEntrezgeneSetFunc callback
-)
-
-{
- AsnIoPtr aip;
- AsnModulePtr amp;
- AsnTypePtr atp, atp_egs, atp_egse;
- EntrezgenePtr egp;
- FILE *fp;
- Int4 index = 0;
-#ifdef OS_UNIX
- Char cmmd [256];
- CharPtr gzcatprog;
- int ret;
- Boolean usedPopen = FALSE;
-#endif
- if (StringHasNoText (inputFile) || callback == NULL) return index;
-
-#ifndef OS_UNIX
- if (compressed) {
- Message (MSG_ERROR, "Can only decompress on-the-fly on UNIX machines");
- return index;
- }
-#endif
-
- amp = AsnAllModPtr ();
- if (amp == NULL) {
- Message (MSG_ERROR, "Unable to load AsnAllModPtr");
- return index;
- }
-
- atp_egs = AsnFind ("Entrezgene-Set");
- if (atp_egs == NULL) {
- Message (MSG_ERROR, "Unable to find ASN.1 type Entrezgene-Set");
- return index;
- }
-
- atp_egse = AsnFind ("Entrezgene-Set.E");
- if (atp_egse == NULL) {
- Message (MSG_ERROR, "Unable to find ASN.1 type Entrezgene-Set.E");
- return index;
- }
-
-#ifdef OS_UNIX
- if (compressed) {
- gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
- if (gzcatprog != NULL) {
- sprintf (cmmd, "%s %s", gzcatprog, inputFile);
- } else {
- ret = system ("gzcat -h >/dev/null 2>&1");
- if (ret == 0) {
- sprintf (cmmd, "gzcat %s", inputFile);
- } else if (ret == -1) {
- Message (MSG_FATAL, "Unable to fork or exec gzcat in ScanEntrezgeneSetRelease");
- return index;
- } else {
- ret = system ("zcat -h >/dev/null 2>&1");
- if (ret == 0) {
- sprintf (cmmd, "zcat %s", inputFile);
- } else if (ret == -1) {
- Message (MSG_FATAL, "Unable to fork or exec zcat in ScanEntrezgeneSetRelease");
- return index;
- } else {
- Message (MSG_FATAL, "Unable to find zcat or gzcat in ScanEntrezgeneSetRelease - please edit your PATH environment variable");
- return index;
- }
- }
- }
- fp = popen (cmmd, /* binary? "rb" : */ "r");
- usedPopen = TRUE;
- } else {
- fp = FileOpen (inputFile, binary? "rb" : "r");
- }
-#else
- fp = FileOpen (inputFile, binary? "rb" : "r");
-#endif
- if (fp == NULL) {
- Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
- return index;
- }
-
- aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
- if (aip == NULL) {
- Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", inputFile);
- return index;
- }
-
- atp = atp_egs;
-
- while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
- if (atp == atp_egse) {
- egp = EntrezgeneAsnRead (aip, atp);
- callback (egp, userdata);
- EntrezgeneFree (egp);
- index++;
- } else {
- AsnReadVal (aip, atp, NULL);
- }
- }
-
- AsnIoFree (aip, FALSE);
-
-#ifdef OS_UNIX
- if (usedPopen) {
- pclose (fp);
- } else {
- FileClose (fp);
- }
-#else
- FileClose (fp);
-#endif
- return index;
-}
-
-
-
-NLM_EXTERN int LIBCALL ObjectIdCompare (ObjectIdPtr a, ObjectIdPtr b)
-{
- int rval = 0;
- Char buf[30];
-
- if (a == b) {
- rval = 0;
- } else if (a == NULL) {
- rval = -1;
- } else if (b == NULL) {
- rval = 1;
- } else if (a->str == NULL && b->str == NULL) {
- if (a->id < b->id) {
- rval = -1;
- } else if (a->id > b->id) {
- rval = 1;
- }
- } else if (a->str == NULL) {
- sprintf (buf, "%d", a->id);
- rval = StringCmp (buf, b->str);
- } else if (b->str == NULL) {
- sprintf (buf, "%d", b->id);
- rval = StringCmp (a->str, buf);
- } else {
- rval = StringCmp (a->str, b->str);
- }
- return rval;
-}
-
-
-/*****************************************************************************
-*
-* DbtagMatch(a, b)
-*
-*****************************************************************************/
-NLM_EXTERN int LIBCALL DbtagCompare (DbtagPtr a, DbtagPtr b)
-{
- int rval = 0;
-
- if (a == b) {
- rval = 0;
- } else if (a == NULL) {
- rval = -1;
- } else if (b == NULL) {
- rval = 1;
- } else if ((rval = StringICmp (a->db, b->db)) == 0) {
- rval = ObjectIdCompare (a->tag, b->tag);
- }
- return rval;
-}
-
-
-static int LIBCALLBACK SortVnpByDbtag (VoidPtr ptr1, VoidPtr ptr2)
-
-{
- ValNodePtr vnp1;
- ValNodePtr vnp2;
-
- if (ptr1 != NULL && ptr2 != NULL) {
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 != NULL && vnp2 != NULL) {
- return DbtagCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
- }
- }
- return 0;
-}
-
-NLM_EXTERN int LIBCALL OrgModSetCompare (OrgModPtr mod1, OrgModPtr mod2)
-{
- int rval = 0;
-
- while (mod1 != NULL && mod2 != NULL && rval == 0)
- {
- if (mod1->subtype < mod2->subtype)
- {
- rval = -1;
- }
- else if (mod1->subtype > mod2->subtype)
- {
- rval = 1;
- }
- else if ((rval = StringCmp (mod1->subname, mod2->subname)) == 0
- && (rval = StringCmp (mod1->attrib, mod2->attrib)) == 0)
- {
- mod1 = mod1->next;
- mod2 = mod2->next;
- }
- }
-
- if (rval == 0)
- {
- if (mod1 == NULL && mod2 == NULL)
- {
- rval = 0;
- }
- else if (mod1 == NULL)
- {
- rval = -1;
- }
- else if (mod2 == NULL)
- {
- rval = 1;
- }
- }
- return rval;
-}
-
-
-NLM_EXTERN int LIBCALL OrgNameCompare (OrgNamePtr onp1, OrgNamePtr onp2)
-{
- int rval = 0;
-
- while (onp1 != NULL && onp2 != NULL && rval == 0)
- {
- if ((rval = OrgModSetCompare(onp1->mod, onp2->mod)) != 0
- || (rval = StringCmp (onp1->lineage, onp2->lineage)) != 0
- || (rval = StringCmp (onp1->div, onp2->div)) != 0
- || (rval = StringCmp (onp1->attrib, onp2->attrib)) != 0)
- {
- /* no further processing */
- }
- else if (onp1->choice < onp2->choice)
- {
- rval = -1;
- }
- else if (onp1->choice > onp2->choice)
- {
- rval = 1;
- }
- else if (onp1->gcode < onp2->gcode)
- {
- rval = -1;
- }
- else if (onp1->gcode > onp2->gcode)
- {
- rval = 1;
- }
- else if (onp1->mgcode < onp2->mgcode)
- {
- rval = -1;
- }
- else if (onp1->mgcode > onp2->mgcode)
- {
- rval = 1;
- }
- else if (onp1->pgcode < onp2->pgcode)
- {
- rval = -1;
- }
- else if (onp1->pgcode > onp2->pgcode)
- {
- rval = 1;
- }
- onp1 = onp1->next;
- onp2 = onp2->next;
- }
- if (rval == 0)
- {
- if (onp1 == NULL && onp2 == NULL)
- {
- rval = 0;
- }
- else if (onp1 == NULL)
- {
- rval = -1;
- }
- else if (onp2 == NULL)
- {
- rval = 1;
- }
- }
- return rval;
-}
-
-
-/*****************************************************************************
-*
-* OrgRefCompare (orp1, orp2)
-*
-*****************************************************************************/
-NLM_EXTERN int LIBCALL OrgRefCompare (OrgRefPtr orp1, OrgRefPtr orp2)
-{
- int rval = 0;
- if (orp1 == NULL && orp2 == NULL)
- {
- return 0;
- }
- else if (orp1 == NULL)
- {
- return -1;
- }
- else if (orp2 == NULL)
- {
- return 1;
- }
- else if ((rval = StringCmp (orp1->taxname, orp2->taxname)) != 0)
- {
- return rval;
- }
- else if ((rval = StringCmp (orp1->common, orp2->common)) != 0)
- {
- return rval;
- }
- else if ((rval = ValNodeCompare (orp1->syn, orp2->syn, SortVnpByString)) != 0)
- {
- return rval;
- }
- else if ((rval = ValNodeCompare (orp1->db, orp2->db, SortVnpByDbtag)) != 0)
- {
- return rval;
- }
- else
- {
- rval = OrgNameCompare (orp1->orgname, orp2->orgname);
- }
- return rval;
-}
-
-
-static Boolean DoStringsMatch (CharPtr str1, CharPtr str2, Boolean case_sensitive)
-{
- Boolean rval = FALSE;
-
- if (case_sensitive) {
- if (StringCmp (str1, str2) == 0) {
- rval = TRUE;
- }
- } else if (StringICmp (str1, str2) == 0) {
- rval = TRUE;
- }
- return rval;
-}
-
-
-static Boolean DoGBQualListsMatch (GBQualPtr gbq1, GBQualPtr gbq2, Boolean case_sensitive)
-{
- Boolean rval = TRUE;
-
- while (rval && gbq1 != NULL && gbq2 != NULL) {
- if (!DoStringsMatch (gbq1->qual, gbq2->qual, case_sensitive)) {
- rval = FALSE;
- } else if (!DoStringsMatch (gbq1->val, gbq2->val, case_sensitive)) {
- rval = FALSE;
- } else {
- gbq1 = gbq1->next;
- gbq2 = gbq2->next;
- }
- }
- if (gbq1 != NULL || gbq2 != NULL) {
- rval = FALSE;
- }
- return rval;
-}
-
-
-static Boolean CheckBioseqForPartial (BioseqPtr bsp, BoolPtr partial5, BoolPtr partial3)
-{
- SeqMgrDescContext context;
- SeqDescrPtr sdp;
- MolInfoPtr mip;
- Boolean rval = FALSE;
-
- if (bsp == NULL) {
- return FALSE;
- }
- if (partial5 != NULL) {
- *partial5 = FALSE;
- }
- if (partial3 != NULL) {
- *partial3 = FALSE;
- }
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
- if (sdp != NULL && (mip = (MolInfoPtr) sdp->data.ptrvalue) != NULL) {
- /* partial 5 */
- if (mip->completeness == 3 || mip->completeness == 5) {
- if (partial5 != NULL) {
- *partial5 = TRUE;
- }
- rval = TRUE;
- }
- /* partial 3 */
- if (mip->completeness == 4 || mip->completeness == 5) {
- if (partial3 != NULL) {
- *partial3 = TRUE;
- }
- rval = TRUE;
- }
- if (mip->completeness == 2) {
- rval = TRUE;
- }
- }
- return rval;
-}
-
-
-static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensitive, Boolean ignore_partial)
-{
- BioseqPtr bsp1, bsp2;
- Int2 ctr, pos1, pos2;
- Char buf1[51];
- Char buf2[51];
- Int4 len = 50;
- SeqFeatPtr sfp1, sfp2;
- SeqMgrFeatContext fcontext1, fcontext2;
- Boolean partial5_1, partial5_2, partial3_1, partial3_2;
-
- if (slp1 == NULL && slp2 == NULL) {
- return TRUE;
- } else if (slp1 == NULL || slp2 == NULL) {
- return FALSE;
- } else if (SeqLocCompare (slp1, slp2) == SLC_A_EQ_B) {
- return TRUE;
- } else {
- bsp1 = BioseqFindFromSeqLoc (slp1);
- bsp2 = BioseqFindFromSeqLoc (slp2);
- if (bsp1 == NULL || bsp2 == NULL) {
- /* can't compare, assume they don't match */
- return FALSE;
- } else if (bsp1->length != bsp2->length) {
- return FALSE;
- } else {
- CheckBioseqForPartial (bsp1, &partial5_1, &partial3_1);
- CheckBioseqForPartial (bsp2, &partial5_2, &partial3_2);
- if (!ignore_partial
- && ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)
- || (partial3_1 && !partial3_2) || (!partial3_1 && partial3_2))) {
- return FALSE;
- }
- /* check that translation sequences match */
- pos1 = 0;
- pos2 = 0;
- if (ignore_partial) {
- if (partial5_1 || partial5_2) {
- pos1++;
- pos2++;
- }
- }
- while (pos1 < bsp1->length && pos2 < bsp2->length) {
- ctr = SeqPortStreamInt (bsp1, pos1, MIN(pos1 + len - 1, bsp1->length - 1), Seq_strand_plus,
- STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
- (Pointer) buf1, NULL);
- ctr = SeqPortStreamInt (bsp2, pos2, MIN(pos2 + len - 1, bsp2->length - 1), Seq_strand_plus,
- STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
- (Pointer) buf2, NULL);
- if (StringNCmp (buf1, buf2, ctr) != 0) {
- return FALSE;
- }
- pos1 += len;
- pos2 += len;
- }
-
- /* now check that protein features match */
- sfp1 = SeqMgrGetNextFeature (bsp1, NULL, 0, 0, &fcontext1);
- sfp2 = SeqMgrGetNextFeature (bsp2, NULL, 0, 0, &fcontext2);
- while (sfp1 != NULL && sfp2 != NULL) {
- if (!DoFeaturesMatch (sfp1, sfp2, TRUE, case_sensitive, ignore_partial)) {
- return FALSE;
- }
- sfp1 = SeqMgrGetNextFeature (bsp1, sfp1, SEQFEAT_PROT, 0, &fcontext1);
- sfp2 = SeqMgrGetNextFeature (bsp2, sfp2, SEQFEAT_PROT, 0, &fcontext2);
- }
- if (sfp1 != NULL || sfp2 != NULL) {
- return FALSE;
- } else {
- return TRUE;
- }
- }
- }
-}
-
-
-static Boolean DoLocationPartialsMatch (SeqLocPtr slp1, SeqLocPtr slp2)
-{
- Boolean partial5_1, partial3_1, partial1;
- Boolean partial5_2, partial3_2, partial2;
-
- partial1 = CheckSeqLocForPartial (slp1, &partial5_1, &partial3_1);
- partial2 = CheckSeqLocForPartial (slp2, &partial5_2, &partial3_2);
- if ((partial1 && !partial2) || (!partial1 && partial2)) {
- return FALSE;
- }
- if ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)) {
- return FALSE;
- }
- if ((partial3_1 && !partial3_2) || (!partial3_1 && partial3_2)) {
- return FALSE;
- }
- return TRUE;
-}
-
-
-static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_different_sequences, Boolean ignore_partial)
-{
- SeqLocPtr slp_tmp1, slp_tmp2;
-
- if (slp1 == NULL && slp2 == NULL) {
- return TRUE;
- } else if (slp1 == NULL || slp2 == NULL) {
- return FALSE;
- }
-
- if (!ignore_partial && !DoLocationPartialsMatch (slp1, slp2)) {
- return FALSE;
- }
- if (allow_different_sequences) {
- for (slp_tmp1 = SeqLocFindNext (slp1, NULL), slp_tmp2 = SeqLocFindNext (slp2, NULL);
- slp_tmp1 != NULL && slp_tmp2 != NULL;
- slp_tmp1 = SeqLocFindNext (slp1, slp_tmp1), slp_tmp2 = SeqLocFindNext (slp2, slp_tmp2)) {
- if (SeqLocStart (slp_tmp1) != SeqLocStart (slp_tmp2)
- || SeqLocStop (slp_tmp1) != SeqLocStop (slp_tmp2)
- || (!ignore_partial && !DoLocationPartialsMatch (slp_tmp1, slp_tmp2))) {
- return FALSE;
- }
- }
- } else if (SeqLocCompare (slp1, slp2) != SLC_A_EQ_B) {
- return FALSE;
- }
- return TRUE;
-}
-
-
-static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2)
-{
- if (crp1 == NULL && crp2 == NULL) {
- return TRUE;
- } else if (crp1 == NULL || crp2 == NULL) {
- return FALSE;
- } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){
- return FALSE;
- } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){
- return FALSE;
- } else if (crp1->gaps != crp2->gaps) {
- return FALSE;
- } else if (crp1->mismatch != crp2->mismatch) {
- return FALSE;
- } else if (crp1->stops != crp2->stops) {
- return FALSE;
- } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL)
- || (crp1->genetic_code != NULL && crp2->genetic_code == NULL)
- || (crp1->genetic_code != NULL && crp2->genetic_code != NULL
- && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) {
- return FALSE;
- } else if ((crp1->code_break == NULL && crp2->code_break != NULL)
- || (crp1->code_break != NULL && crp2->code_break == NULL)
- || (crp1->code_break != NULL && crp2->code_break != NULL
- && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) {
- return FALSE;
- } else if (crp1->frame != crp2->frame) {
- if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) {
- /* both effectively frame 1, ignore this difference */
- } else {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-
-static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2)
-{
- if (d1 == NULL && d2 == NULL) {
- return TRUE;
- } else if (d1 == NULL || d2 == NULL) {
- return FALSE;
- } else if (d1->choice != d2->choice) {
- return FALSE;
- } else if (d1->choice == SEQFEAT_CDREGION) {
- return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue);
- } else {
- return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite);
- }
-}
-
-
-NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial)
-{
- if (sfp1 == NULL && sfp2 == NULL) {
- return TRUE;
- } else if (sfp1 == NULL || sfp2 == NULL) {
- return FALSE;
- } if (sfp1->data.choice != sfp2->data.choice) {
- return FALSE;
- } else if (sfp1->idx.subtype != sfp2->idx.subtype) {
- return FALSE;
- } else if (!ignore_partial && ((sfp1->partial && !sfp2->partial) || (!sfp1->partial && sfp2->partial))) {
- return FALSE;
- } else if ((sfp1->pseudo && !sfp2->pseudo) || (!sfp1->pseudo && sfp2->pseudo)) {
- return FALSE;
- } else if ((sfp1->excpt && !sfp2->excpt) || (!sfp1->excpt && sfp2->excpt)) {
- return FALSE;
- } else if (!DoLocationsMatch (sfp1->location, sfp2->location, allow_different_sequences, ignore_partial)) {
- return FALSE;
- } else if (!DoStringsMatch (sfp1->comment, sfp2->comment, case_sensitive)) {
- return FALSE;
- } else if (!DoStringsMatch (sfp1->title, sfp2->title, case_sensitive)) {
- return FALSE;
- } else if (sfp1->ext != NULL || sfp2->ext != NULL) {
- return FALSE;
- } else if (sfp1->exts != NULL || sfp2->exts != NULL) {
- return FALSE;
- } else if (!DoStringsMatch (sfp1->except_text, sfp2->except_text, case_sensitive)) {
- return FALSE;
- } else if (sfp1->exp_ev != sfp2->exp_ev) {
- return FALSE;
- } else if (!DoGBQualListsMatch (sfp1->qual, sfp2->qual, case_sensitive)) {
- return FALSE;
- } else if ((sfp1->cit != NULL || sfp2->cit != NULL) && PubMatch (sfp1->cit, sfp2->cit) != 0) {
- return FALSE;
- } else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) {
- return FALSE;
- } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) {
- return FALSE;
- } else if (!XrefsMatch (sfp1->xref, sfp2->xref)) {
- return FALSE;
- } else if (!ProductsMatch (sfp1->product, sfp2->product, case_sensitive, ignore_partial)) {
- return FALSE;
- } else {
- return TRUE;
- }
-}
-
-
NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep)
{
Boolean stripSerial = FALSE;
@@ -17901,7 +17708,6 @@ NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep)
}
-
NLM_EXTERN void CleanupOneSeqFeat (SeqFeatPtr sfp)
{
Boolean isEmblOrDdbj = FALSE;
@@ -17926,234 +17732,7 @@ NLM_EXTERN void CleanupOneSeqFeat (SeqFeatPtr sfp)
}
ValNodeFreeData (publist);
}
-
-/* special cases for chloroplast genetic code until implemented in taxonomy database */
-
-typedef struct pgorg {
- CharPtr organism;
- Uint1 pgcode;
-} PgOrg;
-
-static PgOrg pgOrgList [] = {
- { "Chromera velia", 4 } ,
- { NULL, 0 }
-};
-
-typedef struct pglin {
- CharPtr lineage;
- Uint1 pgcode;
-} PgLin;
-
-static PgLin pgLinList [] = {
- { "Eukaryota; Alveolata; Apicomplexa; Coccidia; ", 4 } ,
- { NULL, 0 }
-};
-
-NLM_EXTERN Uint1 GetSpecialPlastidGenCode (
- CharPtr taxname,
- CharPtr lineage
-)
-
-{
- Int2 i;
- size_t max;
- Uint1 pgcode = 0;
-
- if (StringDoesHaveText (taxname)) {
- for (i = 0; pgOrgList [i].organism != NULL; i++) {
- if (StringICmp (taxname, pgOrgList [i].organism) != 0) continue;
- pgcode = pgOrgList [i].pgcode;
- }
- }
-
- if (StringDoesHaveText (lineage)) {
- for (i = 0; pgLinList [i].lineage != NULL; i++) {
- max = StringLen (pgLinList [i].lineage);
- if (StringNICmp (lineage, pgLinList [i].lineage, max) != 0) continue;
- pgcode = pgLinList [i].pgcode;
- }
- }
-
- if (pgcode == 11) {
- pgcode = 0;
- }
-
- return pgcode;
-}
-
-
-static void TrimStopsFromCompleteCodingRegionsCallback (SeqFeatPtr sfp, Pointer data)
-{
- Boolean p5, p3;
- BioseqPtr protbsp;
- CharPtr prot_str;
- Int4 len;
- /* variables for shortening protein features */
- SeqFeatPtr prot_sfp;
- SeqMgrFeatContext fcontext;
- SeqIntPtr sintp;
- /* variables for logging */
- LogInfoPtr lip;
- Char id_buf[100];
-
- if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) {
- return;
- }
-
- CheckSeqLocForPartial (sfp->location, &p5, &p3);
- if (p3) {
- return;
- }
-
- protbsp = BioseqFindFromSeqLoc (sfp->product);
- if (protbsp == NULL) {
- return;
- }
-
- prot_str = GetSequenceByBsp (protbsp);
- if (prot_str == NULL || (len = StringLen (prot_str)) == 0
- || prot_str[len - 1] != '*') {
- prot_str = MemFree (prot_str);
- return;
- }
-
- BSSeek ((ByteStorePtr) protbsp->seq_data, -1, SEEK_END);
- BSDelete ((ByteStorePtr) protbsp->seq_data, 1);
- protbsp->length -= 1;
- prot_str = MemFree (prot_str);
-
- for (prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, 0, &fcontext);
- prot_sfp != NULL;
- prot_sfp = SeqMgrGetNextFeature (protbsp, prot_sfp, 0, 0, &fcontext)) {
- if (prot_sfp->location != NULL
- && prot_sfp->location->choice == SEQLOC_INT
- && (sintp = (SeqIntPtr)prot_sfp->location->data.ptrvalue) != NULL) {
- if (sintp->to > protbsp->length - 1) {
- sintp->to = protbsp->length - 1;
- }
- }
- }
-
- lip = (LogInfoPtr) data;
- if (lip != NULL) {
- if (lip->fp != NULL) {
- SeqIdWrite (SeqIdFindBest (protbsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
- fprintf (lip->fp, "Trimmed trailing * from %s\n", id_buf);
- }
- lip->data_in_log = TRUE;
- }
-}
-
-
-NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp)
-{
- LogInfoData lid;
- MemSet (&lid, 0, sizeof (LogInfoData));
- lid.fp = log_fp;
- VisitFeaturesInSep (sep, &lid, TrimStopsFromCompleteCodingRegionsCallback);
- return lid.data_in_log;
-}
-
-
-NLM_EXTERN void
-FixCapitalizationInTitle
-(CharPtr PNTR pTitle,
- Boolean first_is_upper,
- ValNodePtr org_names)
-{
- if (pTitle == NULL) return;
- ResetCapitalization (first_is_upper, *pTitle);
- FixAbbreviationsInElement (pTitle);
- FixOrgNamesInString (*pTitle, org_names);
-}
-
-
-typedef struct structuredcommentconversion {
- Int4 num_converted;
- Int4 num_unable_to_convert;
-} StructuredCommentConversionData, PNTR StructuredCommentConversionPtr;
-
-static void CommentWithSpacesToStructuredCommentCallback (SeqDescPtr sdp, Pointer userdata)
-{
- UserObjectPtr uop;
- CharPtr str, start, stop;
- Int4 len;
- UserFieldPtr ufp = NULL, prev_ufp = NULL;
- StructuredCommentConversionPtr sd;
-
- if (sdp == NULL || sdp->choice != Seq_descr_comment || StringHasNoText (sdp->data.ptrvalue)) {
- return;
- }
-
- uop = UserObjectNew ();
- uop->type = ObjectIdNew ();
- uop->type->str = StringSave ("StructuredComment");
-
- start = sdp->data.ptrvalue;
- while (*start != 0) {
- stop = start + StringCSpn (start, " ~");
- while (*stop != 0 && *stop != '~' && !isspace (*(stop + 1)) && *(stop + 1) != 0) {
- stop = stop + 1 + StringCSpn (stop + 1, " ~");
- }
- len = 1 + stop - start;
- str = (CharPtr) MemNew (sizeof (Char) * len);
- StringNCpy (str, start, len - 1);
- str[len - 1] = 0;
- if (ufp == NULL) {
- /* add new field */
- ufp = UserFieldNew ();
- if (prev_ufp == NULL) {
- uop->data = ufp;
- } else {
- prev_ufp->next = ufp;
- }
- ufp->label = ObjectIdNew ();
- ufp->label->str = str;
- } else {
- /* add value to last field */
- ufp->choice = 1;
- ufp->data.ptrvalue = str;
- prev_ufp = ufp;
- ufp = NULL;
- }
- if (*stop == 0) {
- start = stop;
- } else {
- start = stop + 1 + StringSpn (stop + 1, " ");
- }
- }
-
- if (prev_ufp == NULL) {
- uop = UserObjectFree (uop);
- return;
- }
- sd = (StructuredCommentConversionPtr) userdata;
- if (ufp == NULL) {
- sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
- sdp->data.ptrvalue = uop;
- sdp->choice = Seq_descr_user;
- if (sd != NULL) {
- sd->num_converted++;
- }
- } else {
- uop = UserObjectFree (uop);
- if (sd != NULL) {
- sd->num_unable_to_convert++;
- }
- }
-}
-
-
-NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep)
-{
- StructuredCommentConversionData sd;
-
- MemSet (&sd, 0, sizeof (StructuredCommentConversionData));
- VisitDescriptorsInSep (sep, &sd, CommentWithSpacesToStructuredCommentCallback);
-
- return sd.num_unable_to_convert;
-}
-
+//LCOV_EXCL_STOP
NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2)
{
@@ -18246,2603 +17825,740 @@ NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp)
}
}
+/* basic cleanup code from sqnutil3.c */
-static void MakeFeatureXrefsFromProteinIdQualsCallback (SeqFeatPtr sfp, Pointer data)
-{
- GBQualPtr gbq;
- SeqIdPtr sip;
- BioseqPtr pbsp;
- SeqFeatPtr cds;
- CharPtr product;
- ProtRefPtr prp;
- SeqEntryPtr sep;
-
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
- return;
- }
-
- for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
- if (StringICmp (gbq->qual, "protein_id") == 0 || StringICmp (gbq->qual, "orig_protein_id") == 0) {
- sip = CreateSeqIdFromText (gbq->val, sep);
- pbsp = BioseqFind (sip);
- cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
- if (cds != NULL) {
- LinkTwoFeatures (cds, sfp);
- LinkTwoFeatures (sfp, cds);
- product = GetRNAProductString(sfp, NULL);
- if (StringHasNoText (product)) {
- prp = GetProtRefForFeature (cds);
- if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
- SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
- }
- }
- product = MemFree (product);
- }
- }
- }
-}
-
-
-NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep)
-{
- /* assign feature IDs, so that we can create xrefs that use them */
- AssignFeatureIDs (sep);
-
- VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromProteinIdQualsCallback);
-}
-
-
-static void MakeFeatureXrefsFromTranscriptIdQualsCallback (SeqFeatPtr sfp, Pointer data)
-{
- GBQualPtr gbq;
- SeqIdPtr sip;
- BioseqPtr pbsp;
- SeqFeatPtr cds;
- CharPtr product;
- ProtRefPtr prp;
- SeqEntryPtr sep;
-
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
- return;
- }
-
- for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
- if (StringICmp (gbq->qual, "transcript_id") == 0 || StringICmp (gbq->qual, "orig_transcript_id") == 0) {
- sip = CreateSeqIdFromText (gbq->val, sep);
- pbsp = BioseqFind (sip);
- cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
- if (cds != NULL) {
- LinkTwoFeatures (cds, sfp);
- LinkTwoFeatures (sfp, cds);
- product = GetRNAProductString(sfp, NULL);
- if (StringHasNoText (product)) {
- prp = GetProtRefForFeature (cds);
- if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
- SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
- }
- }
- product = MemFree (product);
- }
- }
- }
-}
-
-
-NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep)
-{
- /* assign feature IDs, so that we can create xrefs that use them */
- AssignFeatureIDs (sep);
-
- VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromTranscriptIdQualsCallback);
-}
-
-
-static void FinishHalfXrefsCallback (SeqFeatPtr sfp, Pointer data)
-{
- SeqFeatPtr other;
- SeqFeatXrefPtr xref, xref_other;
- Boolean has_other_xref;
-
- if (sfp == NULL) {
- return;
- }
-
- xref = sfp->xref;
- while (xref != NULL) {
- if (xref->id.choice == 3) {
- other = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
- if (other != NULL) {
- xref_other = other->xref;
- has_other_xref = FALSE;
- while (xref_other != NULL && !has_other_xref) {
- if (xref_other->id.choice == 3) {
- has_other_xref = TRUE;
- }
- xref_other = xref_other->next;
- }
- if (!has_other_xref) {
- LinkTwoFeatures (sfp, other);
- }
- }
- }
- xref = xref->next;
- }
-}
-
-
-NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep)
-{
- VisitFeaturesInSep (sep, (Pointer) sep, FinishHalfXrefsCallback);
-}
-
-
-NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp)
-{
- Uint1 aa;
- Uint1 from;
- SeqMapTablePtr smtp;
-
- if (trp == NULL) {
- return 0;
- }
-
- aa = 0;
- if (trp->aatype == 2) {
- aa = trp->aa;
- } else {
- from = 0;
- switch (trp->aatype) {
- case 0:
- from = 0;
- break;
- case 1:
- from = Seq_code_iupacaa;
- break;
- case 2:
- from = Seq_code_ncbieaa;
- break;
- case 3:
- from = Seq_code_ncbi8aa;
- break;
- case 4:
- from = Seq_code_ncbistdaa;
- break;
- default:
- break;
- }
- smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
- if (smtp != NULL) {
- aa = SeqMapTableConvert (smtp, trp->aa);
- }
- }
- return aa;
-}
-
-
-NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode)
-{
- BioseqPtr bsp;
- Int2 code = 0;
- GeneticCodePtr gncp;
- ValNodePtr vnp;
- CharPtr codes = NULL;
-
- if (sfp == NULL) {
- return NULL;
- }
-
- /* find genetic code table */
-
- bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID);
- BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL);
-
- gncp = GeneticCodeFind (code, NULL);
- if (gncp == NULL) {
- gncp = GeneticCodeFind (1, NULL);
- code = 1;
- }
- if (gncp != NULL) {
- for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
- if (vnp->choice != 3) continue;
- codes = (CharPtr) vnp->data.ptrvalue;
- break;
- }
- }
- if (pCode != NULL) {
- *pCode = code;
- }
- return codes;
-}
-
-
-static Boolean DoesCodonMatchAminoAcid (Uint1 aa, Uint1 index, CharPtr codes)
-{
- Uint1 taa;
- Boolean rval = FALSE;
-
- if (aa == 0 || aa == 255 || codes == NULL)
- {
- return TRUE;
- }
- taa = codes [index];
-
- if (taa == aa)
- {
- rval = TRUE;
- }
- /* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */
- else if (aa == 'U' && taa == '*' && index == 14)
- {
- rval = TRUE;
- }
- /* pyrrolysine normally uses TAG (11) in archaebacteria, ignore without requiring exception */
- else if (aa == 'O' && taa == '*' && index == 11) {
- rval = TRUE;
- }
- /* TAA (10) is not yet known to be used for an exceptional amino acid, but the night is young */
-
- return rval;
-}
-
-
-static Boolean IsATGC (Char ch)
-{
- if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C') {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
-static Char s_comp (Char ch)
-{
- if (ch == 'A') {
- return 'T';
- } else if (ch == 'G') {
- return 'C';
- } else if (ch == 'C') {
- return 'G';
- } else if (ch == 'T') {
- return 'A';
- } else {
- return 'N';
- }
-}
-
+extern void ConvertSourceFeatDescProc (SeqFeatPtr sfp, Pointer userdata)
-static CharPtr GetFlipCodonLoggingInfo (SeqFeatPtr sfp)
{
- SeqFeatPtr gene = NULL;
- GeneRefPtr grp = NULL;
- ValNode vn;
- CharPtr txt = NULL;
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ SubSourcePtr lastssp;
+ ObjValNodePtr ovp;
+ SeqDescPtr sdp;
+ SeqEntryPtr sep;
+ SeqIdPtr sip;
+ SubSourcePtr ssp;
+ ValNode vn;
+ ValNodePtr last_dbxref;
- GetGeneInfoForFeature (sfp, &grp, &gene);
- if (grp != NULL && !StringHasNoText (grp->locus_tag)) {
- txt = StringSave (grp->locus_tag);
+ /* look for biosource features */
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
+ /* get bioseq by feature location */
+ sip = SeqLocId (sfp->location);
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) return;
+ sip = SeqIdFindBest(bsp->id, 0);
+ if (sip == NULL) return;
+ vn.choice = SEQLOC_WHOLE;
+ vn.extended = 0;
+ vn.data.ptrvalue = (Pointer) sip;
+ vn.next = NULL;
+ /* is feature full length? */
+ if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return;
+ sep = SeqMgrGetSeqEntryForData (bsp);
+ if (sep == NULL) return;
+ sdp = CreateNewDescriptor (sep, Seq_descr_source);
+ if (sdp == NULL) return;
+ /* move biosource from feature to descriptor */
+ sdp->data.ptrvalue = sfp->data.value.ptrvalue;
+ if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.subtype = Seq_descr_source;
+ }
+ sfp->data.value.ptrvalue = NULL;
+ /* flag old feature for removal */
+ sfp->idx.deleteme = TRUE;
+ /* move comment to subsource note */
+ if (sfp->comment == NULL) return;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) return;
+ ssp = SubSourceNew ();
+ if (ssp == NULL) return;
+ ssp->subtype = SUBSRC_other;
+ ssp->name = sfp->comment;
+ sfp->comment = NULL;
+ /* link in at end, since BasicSeqEntry will have sorted this list */
+ if (biop->subtype == NULL) {
+ biop->subtype = ssp;
} else {
- MemSet (&vn, 0, sizeof (ValNode));
- vn.choice = OBJ_SEQFEAT;
- vn.data.ptrvalue = sfp;
- txt = GetDiscrepancyItemText (&vn);
- }
- return txt;
-}
-
-
-static Int4 CountCodonsRecognized (tRNAPtr trp)
-{
- Int4 num = 0, i;
-
- if (trp == NULL) {
- return 0;
- }
- for (i = 0; i < 6; i++) {
- if (trp->codon [i] < 64) {
- num++;
+ lastssp = biop->subtype;
+ while (lastssp->next != NULL) {
+ lastssp = lastssp->next;
}
+ lastssp->next = ssp;
}
- return num;
-}
-
-
-static Int4 CountMatchingCodons (tRNAPtr trp, Uint1 aa, CharPtr codes)
-{
- Int4 num = 0, i;
- if (trp == NULL) {
- return 0;
- }
- for (i = 0; i < 6; i++) {
- if (trp->codon [i] < 64) {
- if (DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)) {
- num++;
- }
+ /* move dbxrefs on feature to source */
+ if (sfp->dbxref != NULL) {
+ if (biop->org == NULL) {
+ biop->org = OrgRefNew();
}
- }
-
- return num;
-}
-
-
-static Int4 CountFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
-{
- Int4 num = 0, i;
- Int2 index;
- Uint1 codon [4];
- Uint1 rcodon [4];
-
- if (trp == NULL) {
- return 0;
- }
- /* Note - it is important to set the fourth character in the codon array to NULL
- * because CodonForIndex only fills in the three characters of actual codon,
- * so if you StringCpy the codon array and the NULL character is not found after
- * the three codon characters, you will write in memory you did not intend to.
- */
- codon [3] = 0;
- rcodon [3] = 0;
- for (i = 0; i < 6; i++)
- {
- if (trp->codon [i] < 64
- && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
- && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
- && IsATGC(codon[0])
- && IsATGC(codon[1])
- && IsATGC(codon[2]))
- {
- rcodon[0] = s_comp(codon[2]);
- rcodon[1] = s_comp(codon[1]);
- rcodon[2] = s_comp(codon[0]);
- index = IndexForCodon (rcodon, code);
- if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
- {
- num++;
- }
+ last_dbxref = biop->org->db;
+ while (last_dbxref != NULL && last_dbxref->next != NULL) {
+ last_dbxref = last_dbxref->next;
}
- }
-
- return num;
-}
-
-
-static Int4 FlipFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
-{
- Int4 num = 0, i;
- Int2 index;
- Uint1 codon [4];
- Uint1 rcodon [4];
-
- if (trp == NULL) {
- return 0;
- }
- /* Note - it is important to set the fourth character in the codon array to NULL
- * because CodonForIndex only fills in the three characters of actual codon,
- * so if you StringCpy the codon array and the NULL character is not found after
- * the three codon characters, you will write in memory you did not intend to.
- */
- codon [3] = 0;
- rcodon [3] = 0;
- for (i = 0; i < 6; i++)
- {
- if (trp->codon [i] < 64
- && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
- && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
- && IsATGC(codon[0])
- && IsATGC(codon[1])
- && IsATGC(codon[2]))
- {
- rcodon[0] = s_comp(codon[2]);
- rcodon[1] = s_comp(codon[1]);
- rcodon[2] = s_comp(codon[0]);
- index = IndexForCodon (rcodon, code);
- if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
- {
- trp->codon[i] = index;
- num++;
- }
- }
- }
-
- return num;
-}
-
-
-static Boolean IgnoretRNACodonRecognized (SeqFeatPtr sfp)
-{
- if (sfp == NULL
- || StringISearch (sfp->except_text, "RNA editing") != NULL
- || StringISearch (sfp->except_text, "modified codon recognition") != NULL)
- {
- return TRUE;
- }
- else
- {
- return FALSE;
- }
-}
-
-
-static void FlipCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
-{
- RnaRefPtr rrp;
- tRNAPtr trp;
- Uint1 aa;
- CharPtr txt;
- LogInfoPtr lip;
- Int2 code = 0;
- CharPtr codes = NULL;
- Int4 num_codons, num_match, num_flippable;
-
- if (IgnoretRNACodonRecognized(sfp)
- || sfp->idx.subtype != FEATDEF_tRNA
- || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
- || rrp->ext.choice != 2
- || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
- {
- return;
- }
-
- num_codons = CountCodonsRecognized (trp);
- if (num_codons == 0) {
- return;
- }
-
- lip = (LogInfoPtr) data;
-
- aa = GetAaFromtRNA (trp);
-
- /* find genetic code table */
- codes = GetCodesFortRNA (sfp, &code);
-
- if (codes == NULL) return;
-
- num_match = CountMatchingCodons (trp, aa, codes);
- if (num_codons == num_match) {
- return;
- } else if (num_codons > 1) {
- if (lip != NULL)
- {
- if (lip->fp != NULL)
- {
- /* text for log */
- txt = GetFlipCodonLoggingInfo (sfp);
- fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
- txt = MemFree (txt);
- }
- lip->data_in_log = TRUE;
- }
- } else {
- num_flippable = CountFlippableCodons(trp, aa, codes, code);
- if (num_flippable == num_codons) {
- FlipFlippableCodons (trp, aa, codes, code);
+ if (last_dbxref == NULL) {
+ biop->org->db = sfp->dbxref;
} else {
- if (lip != NULL)
- {
- if (lip->fp != NULL)
- {
- /* text for log */
- txt = GetFlipCodonLoggingInfo (sfp);
- fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
- txt = MemFree (txt);
- }
- lip->data_in_log = TRUE;
- }
+ last_dbxref->next = sfp->dbxref;
}
+ sfp->dbxref = NULL;
}
}
+extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
-NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
{
- VisitFeaturesInSep (sep, lip, FlipCodonRecognizedCallback);
-}
-
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+ Boolean is_mrna = FALSE, is_master_seq = FALSE, has_nulls = FALSE;
+ SeqFeatPtr gene = NULL;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+ Int4 num_cds = 0;
+ Int4 num_mrna = 0;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ Boolean partial5, partial3;
+ BioSourcePtr biop;
+ OrgRefPtr orp;
+ BioseqSetPtr bssp;
-static void RemoveBadCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
-{
- RnaRefPtr rrp;
- tRNAPtr trp;
- Int2 j, k;
- Uint1 aa;
- Uint1 codon [4];
- Uint1 rcodon [4];
- CharPtr txt;
- LogInfoPtr lip;
- Int2 code = 0;
- CharPtr codes = NULL;
- Int4 num_codons, num_match;
-
- if (IgnoretRNACodonRecognized(sfp)
- || sfp->idx.subtype != FEATDEF_tRNA
- || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
- || rrp->ext.choice != 2
- || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
- {
+ if (bsp == NULL || bsp->length == 0
+ || !ISA_na (bsp->mol)) {
return;
}
- num_codons = CountCodonsRecognized (trp);
- if (num_codons == 0) {
- return;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
+ if (sdp != NULL) {
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL && mip->biomol == MOLECULE_TYPE_MRNA) {
+ is_mrna = TRUE;
+ }
}
-
- lip = (LogInfoPtr) data;
-
- aa = GetAaFromtRNA (trp);
-
- /* find genetic code table */
- codes = GetCodesFortRNA (sfp, &code);
-
- if (codes == NULL) return;
-
- num_match = CountMatchingCodons (trp, aa, codes);
- if (num_match == num_codons) {
+ if (!is_mrna) {
return;
}
- /* Note - it is important to set the fourth character in the codon array to NULL
- * because CodonForIndex only fills in the three characters of actual codon,
- * so if you StringCpy the codon array and the NULL character is not found after
- * the three codon characters, you will write in memory you did not intend to.
- */
- codon [3] = 0;
- rcodon [3] = 0;
-
- for (j = 0; j < 6; j++)
- {
- if (trp->codon [j] < 64)
- {
- if (DoesCodonMatchAminoAcid (aa, trp->codon[j], codes))
- {
- /* already ok - skip it */
- }
- else if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)
- && IsATGC(codon[0])
- && IsATGC(codon[1])
- && IsATGC(codon[2]))
- {
- for (k = j + 1; k < 6; k++)
- {
- trp->codon[k - 1] = trp->codon[k];
- }
- trp->codon[5] = 255;
- if (lip != NULL)
- {
- if (lip->fp != NULL)
- {
- /* text for log */
- txt = GetFlipCodonLoggingInfo (sfp);
- fprintf (lip->fp, "Removed codon_recognized '%s' for %s\n", codon, txt);
- txt = MemFree (txt);
- }
- lip->data_in_log = TRUE;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
+ if (sdp != NULL) {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop != NULL) {
+ if (biop->origin == ORG_ARTIFICIAL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ if (StringICmp (orp->taxname, "synthetic construct") == 0) return;
}
- /* push index down, so we don't skip over a codon */
- j--;
}
}
}
-}
-
-NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
-{
- VisitFeaturesInSep (sep, lip, RemoveBadCodonRecognizedCallback);
-}
-
-
-NLM_EXTERN void ReverseBioseqInAlignment (SeqAlignPtr salp, Pointer userdata)
-{
- BioseqPtr bsp;
- SeqIdPtr sip;
- Boolean found = FALSE;
- Int4 order;
-
- if (salp == NULL || userdata == NULL) return;
-
- bsp = (BioseqPtr) userdata;
-
- for (sip = bsp->id; sip != NULL && ! found; sip = sip->next)
- {
- order = SeqIdOrderInBioseqIdList(sip, SeqIdPtrFromSeqAlign (salp));
- if (order > 0) {
- AlnMgr2IndexSeqAlignEx(salp, FALSE);
- ReverseAlignmentStrand (salp, order);
- SeqAlignIndexFree(salp->saip);
- salp->saip = NULL;
- found = TRUE;
- }
- }
-}
-
-
-/* need to reverse the order of the segments and flip the strands */
-NLM_EXTERN void FlipAlignment (SeqAlignPtr salp)
-{
- DenseSegPtr dsp;
- Int4 row, seg, swap_start, swap_len, opp_seg;
- Score swap_score;
- Uint1 swap_strand;
-
- if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
- {
- return;
- }
-
- dsp = (DenseSegPtr) salp->segs;
- if (dsp->strands == NULL) {
- dsp->strands = (Uint1Ptr) MemNew (dsp->numseg * dsp->dim * sizeof (Uint1));
- MemSet (dsp->strands, Seq_strand_plus, dsp->numseg * dsp->dim * sizeof (Uint1));
- }
-
- for (seg = 0; seg < dsp->numseg / 2; seg++) {
- /* swap segments to reverse order */
- opp_seg = dsp->numseg - 1 - seg;
- /* swap lens */
- swap_len = dsp->lens[seg];
- dsp->lens[seg] = dsp->lens[opp_seg];
- dsp->lens[opp_seg] = swap_len;
- /* swap scores */
- if (dsp->scores != NULL) {
- swap_score = dsp->scores[seg];
- dsp->scores[seg] = dsp->scores[opp_seg];
- dsp->scores[opp_seg] = swap_score;
- }
- for (row = 0; row < dsp->dim; row++) {
- /* swap strands */
- swap_strand = dsp->strands[dsp->dim * seg + row];
- dsp->strands[dsp->dim * seg + row] = dsp->strands[dsp->dim * opp_seg + row];
- dsp->strands[dsp->dim * opp_seg + row] = swap_strand;
-
- /* swap starts */
- swap_start = dsp->starts[dsp->dim * seg + row];
- dsp->starts[dsp->dim * seg + row] = dsp->starts[dsp->dim * opp_seg + row];
- dsp->starts[dsp->dim * opp_seg + row] = swap_start;
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
+ is_master_seq = TRUE;
}
}
- /* reverse segments */
- for (seg = 0; seg < dsp->numseg; seg++) {
- for (row = 0; row < dsp->dim; row++) {
- if (dsp->strands[dsp->dim * seg + row] == Seq_strand_minus) {
- dsp->strands[dsp->dim * seg + row] = Seq_strand_plus;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
+ if (sfp->data.choice == SEQFEAT_GENE) {
+ /* skip this sequence if it has more than one gene */
+ if (gene == NULL) {
+ gene = sfp;
} else {
- dsp->strands[dsp->dim * seg + row] = Seq_strand_minus;
+ return;
}
- }
- }
- SAIndex2Free2(salp->saip);
- salp->saip = NULL;
-}
-
-
-NLM_EXTERN void FlipEntireAlignmentIfAllSequencesFlipped (SeqAnnotPtr sap, Pointer userdata)
-{
- SeqAlignPtr salp;
- ValNodePtr vnp;
- BioseqPtr bsp;
- SeqIdPtr sip;
- Boolean found;
- Int4 row, num_rows;
-
- if (sap == NULL || sap->type != 2 || userdata == NULL) return;
- salp = (SeqAlignPtr) sap->data;
- if (salp == NULL || salp->idx.deleteme) return;
-
-
- AlnMgr2IndexSingleChildSeqAlign(salp);
- num_rows = AlnMgr2GetNumRows(salp);
- for (row = 1; row <= num_rows; row++) {
- sip = AlnMgr2GetNthSeqIdPtr(salp, row);
- found = FALSE;
- vnp = (ValNodePtr)userdata;
- while (vnp != NULL && !found) {
- bsp = (BioseqPtr) vnp->data.ptrvalue;
- if (SeqIdOrderInBioseqIdList (sip, bsp->id) > 0) {
- found = TRUE;
+ } else if (sfp->data.choice == SEQFEAT_CDREGION) {
+ num_cds++;
+ /* skip this sequence if it has more than one coding region */
+ if (num_cds > 1 && !is_master_seq) {
+ return;
}
- vnp = vnp->next;
- }
- if (!found) return;
- }
-
- FlipAlignment(salp);
-}
-
-
-NLM_EXTERN ValNodePtr ListSequencesWithAlignments (ValNodePtr bsp_list)
-{
- BioseqPtr bsp;
- ValNodePtr vnp, aln_bsp = NULL;
-
- for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
- bsp = (BioseqPtr) vnp->data.ptrvalue;
- if (bsp != NULL && IsBioseqInAnyAlignment (bsp, bsp->idx.entityID)) {
- ValNodeAddPointer (&aln_bsp, 0, bsp);
+ } else if (sfp->idx.subtype == FEATDEF_mRNA) {
+ num_mrna++;
+ /* skip this sequence if it has more than one mRNA */
+ if (num_mrna > 1) return;
}
}
- return aln_bsp;
-}
-
-NLM_EXTERN void RevCompBioseqList (ValNodePtr bsp_list,
- Uint2 entityID,
- BioseqFunc func,
- Boolean revCompFeats,
- Boolean check_for_aln)
-{
- SeqEntryPtr sep;
- BioseqPtr bsp;
- ValNodePtr vnp;
-
- sep = GetTopSeqEntryForEntityID (entityID);
-
- for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
- bsp = (BioseqPtr) vnp->data.ptrvalue;
- if (func != NULL) {
- func (bsp);
- if (check_for_aln) {
- VisitAlignmentsInSep (sep, (Pointer) bsp, ReverseBioseqInAlignment);
- }
- }
- if (revCompFeats) {
- if (bsp->repr == Seq_repr_raw || bsp->repr == Seq_repr_const) {
-
- if (sep != NULL) {
- SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
- }
+ if (gene != NULL && gene->location != NULL) {
+ slp = gene->location;
+ if (slp->choice != SEQLOC_INT) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ /* skip this sequence if it is multi-interval and EMBL or DDBJ */
+ if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return;
}
}
}
-}
-
-
-typedef struct bioseqinalignmentdata {
- Boolean found;
- BioseqPtr lookingfor;
-} BioseqInAlignmentData, PNTR BioseqInAlignmentPtr;
-
-static Boolean IsBioseqInThisAlignment (SeqAlignPtr salp, BioseqPtr bsp)
-{
- SeqIdPtr sip;
- Boolean found = FALSE;
-
- for (sip = bsp->id; sip != NULL && ! found; sip = sip->next)
- {
- found = SeqAlignFindSeqId (salp, sip);
- }
- return found;
-}
-
-static void FindAlignmentCallback (SeqAnnotPtr sap, Pointer userdata)
-{
- BioseqInAlignmentPtr biap;
- SeqAlignPtr salp;
-
- if (sap == NULL || sap->type != 2 || userdata == NULL)
- {
- return;
- }
- biap = (BioseqInAlignmentPtr) userdata;
- if (biap->found) return;
- salp = (SeqAlignPtr) sap->data;
- if (salp == NULL) return;
- biap->found = IsBioseqInThisAlignment (salp, biap->lookingfor);
-
-}
-
-NLM_EXTERN Boolean IsBioseqInAnyAlignment (BioseqPtr bsp, Uint2 input_entityID)
-{
- SeqEntryPtr topsep;
- BioseqInAlignmentData biad;
-
- topsep = GetTopSeqEntryForEntityID (input_entityID);
- biad.found = FALSE;
- biad.lookingfor = bsp;
- VisitAnnotsInSep (topsep, &biad, FindAlignmentCallback);
- return biad.found;
-}
-
-
-static void RemoveAlignmentsWithSequenceCallback (SeqAnnotPtr sap, Pointer userdata)
-{
- SeqAlignPtr salp;
- SeqIdPtr sip;
-
- if (sap == NULL || sap->type != 2 || userdata == NULL) return;
- salp = (SeqAlignPtr) sap->data;
- if (salp == NULL || salp->idx.deleteme) return;
- sip = (SeqIdPtr) userdata;
- while (sip != NULL && !sap->idx.deleteme) {
- if (FindSeqIdinSeqAlign (salp, sip)) {
- sap->idx.deleteme = TRUE;
- }
- sip = sip->next;
- }
-}
-
-NLM_EXTERN void RemoveAlignmentsWithSequence (BioseqPtr bsp, Uint2 input_entityID)
-{
- SeqEntryPtr topsep;
-
- if (bsp == NULL) return;
- topsep = GetTopSeqEntryForEntityID (input_entityID);
-
- VisitAnnotsInSep (topsep, bsp->id, RemoveAlignmentsWithSequenceCallback);
-}
-
-
-/* assumes locations on same Bioseq */
-static Boolean OutOfOrder (SeqLocPtr slp_prev, SeqLocPtr slp_next)
-{
- Uint1 strand_p, strand_n;
- Boolean rval = FALSE;
- Int4 start_p, start_n, stop_p, stop_n;
-
- if (slp_prev == NULL || slp_next == NULL)
- {
- return FALSE;
- }
-
- strand_p = SeqLocStrand (slp_prev);
- strand_n = SeqLocStrand (slp_next);
- if (strand_p == Seq_strand_minus)
- {
- if (strand_n != Seq_strand_minus)
- {
- /* mixed strand, not necessarily out of order */
- rval = FALSE;
- } else {
- start_p = SeqLocStart (slp_prev);
- stop_p = SeqLocStop (slp_prev);
- start_n = SeqLocStart (slp_next);
- stop_n = SeqLocStop (slp_next);
- if (start_p < start_n || stop_p < stop_n)
- {
- rval = TRUE;
- }
- }
- } else {
- if (strand_n == Seq_strand_minus)
- {
- /* mixed strand, not necessarily out of order */
- rval = FALSE;
- } else {
- start_p = SeqLocStart (slp_prev);
- stop_p = SeqLocStop (slp_prev);
- start_n = SeqLocStart (slp_next);
- stop_n = SeqLocStop (slp_next);
- if (start_p > start_n || stop_p > stop_n)
- {
- rval = TRUE;
- }
+ if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) {
+ CheckSeqLocForPartial (gene->location, &partial5, &partial3);
+ has_nulls = LocationHasNullsBetween (gene->location);
+ /* gene should cover entire length of sequence */
+ slp = SeqLocIntNew (0, bsp->length - 1, SeqLocStrand (gene->location), SeqIdFindBest (bsp->id, 0));
+ SetSeqLocPartial (slp, partial5, partial3);
+ gene->location = SeqLocFree (gene->location);
+ gene->location = slp;
+ if (is_master_seq) {
+ MergeFeatureIntervalsToParts (gene, has_nulls);
}
}
- return rval;
}
+//LCOV_EXCL_START
+static DbtagPtr DbtagParse (
+ CharPtr str
+)
-/* assumes locations on same Bioseq and in order on same strand*/
-static Boolean TooFarApartForTransSplicing (SeqLocPtr slp_prev, SeqLocPtr slp_next)
{
- Boolean rval = FALSE;
- Int4 start_n, start_p, stop_n, stop_p;
-
- if (slp_prev == NULL || slp_next == NULL)
- {
- return FALSE;
- }
+ Boolean all_digits = TRUE;
+ Char ch;
+ DbtagPtr dbt;
+ long num;
+ Int2 num_digits = 0;
+ ObjectIdPtr oip;
+ CharPtr ptr;
+ CharPtr tmp;
- if (SeqLocStrand (slp_prev) == Seq_strand_minus)
- {
- start_p = SeqLocStart (slp_prev);
- stop_n = SeqLocStop (slp_next);
- if (start_p - stop_n > 10000)
- {
- rval = TRUE;
- }
- } else {
- stop_p = SeqLocStop (slp_prev);
- start_n = SeqLocStart (slp_next);
- if (start_n - stop_p > 10000)
- {
- rval = TRUE;
- }
- }
- return rval;
-}
+ if (StringHasNoText (str)) return NULL;
+ ptr = StringChr (str, ':');
+ if (ptr == NULL) return NULL;
+ dbt = DbtagNew ();
+ oip = ObjectIdNew ();
+ if (dbt == NULL || oip == NULL) return NULL;
-NLM_EXTERN SeqLocPtr MakeGeneLocForFeatureLoc (SeqLocPtr floc, Uint2 entityID, Boolean trans_spliced)
-{
- /* in the age of small-set genomes, we're going to pretend that segmented sets do not exist.
- * A gene location for a feature location that includes multiple bioseqs should include
- * one interval per bioseq that covers all locations of the feature that occur on that bioseq.
- */
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
- SeqLocPtr slp_new = NULL, slp_tmp, slp_last = NULL, add_slp;
- SeqLocPtr PNTR pAddSlp = NULL;
- BioseqPtr bsp, last_bsp = NULL;
- Boolean partial5 = FALSE, partial3 = FALSE;
- Uint2 strand, last_strand = Seq_strand_plus;
+ dbt->db = StringSave (str);
+ dbt->tag = oip;
- pAddSlp = &slp_new;
- for (slp_tmp = SeqLocFindNext (floc, NULL);
- slp_tmp != NULL;
- slp_tmp = SeqLocFindNext (floc, slp_tmp))
- {
- bsp = GetBioseqGivenSeqLoc (slp_tmp, entityID);
- strand = SeqLocStrand (slp_tmp);
- if (bsp != last_bsp || strand != last_strand
- || (trans_spliced && OutOfOrder (slp_last, slp_tmp))
- || (trans_spliced && TooFarApartForTransSplicing(slp_last, slp_tmp))) {
- add_slp = SeqLocMerge (bsp, slp_tmp, NULL, TRUE, FALSE, FALSE);
- if (slp_last == NULL) {
- slp_new = add_slp;
- } else {
- slp_last->next = add_slp;
- pAddSlp = &(slp_last->next);
- }
- slp_last = add_slp;
- last_bsp = bsp;
- last_strand = strand;
+ tmp = ptr;
+ ch = *tmp;
+ while (ch != '\0') {
+ if (IS_DIGIT (ch)) {
+ num_digits++;
} else {
- add_slp = SeqLocMerge (bsp, *pAddSlp, slp_tmp, TRUE, FALSE, FALSE);
- *pAddSlp = SeqLocFree (*pAddSlp);
- *pAddSlp = add_slp;
- slp_last = add_slp;
+ all_digits = FALSE;
}
- }
- if (slp_new != NULL && slp_new->next != NULL) {
- slp_tmp = ValNodeNew (NULL);
- slp_tmp->choice = SEQLOC_MIX;
- slp_tmp->data.ptrvalue = slp_new;
- slp_new = slp_tmp;
- }
- if (slp_new != NULL) {
- CheckSeqLocForPartial (floc, &partial5, &partial3);
- SetSeqLocPartial (slp_new, partial5, partial3);
- }
-
- return slp_new;
-}
-
-
-/* code for resolving conflicting IDs */
-typedef struct {
- CharPtr oldStr;
- SeqIdPtr newSip;
-} ReplaceIDStruct, PNTR ReplaceIDStructPtr;
-
-
-/********************************************************************
-*
-* SeqLocReplaceLocalID
-* replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
-* only if the Seq-Id is a local one.
-*
-**********************************************************************/
-
-static SeqLocPtr SeqLocReplaceLocalID (SeqLocPtr slp,
- SeqIdPtr new_sip)
-{
- SeqLocPtr curr;
- PackSeqPntPtr pspp;
- SeqIntPtr target_sit;
- SeqPntPtr spp;
- SeqIdPtr currId;
-
- switch (slp->choice) {
- case SEQLOC_PACKED_INT :
- case SEQLOC_MIX :
- case SEQLOC_EQUIV :
- curr = NULL;
- while ((curr = SeqLocFindNext (slp, curr)) != NULL) {
- curr = SeqLocReplaceLocalID (curr, new_sip);
- }
- break;
- case SEQLOC_PACKED_PNT :
- pspp = (PackSeqPntPtr) slp->data.ptrvalue;
- if ((pspp != NULL) && (pspp->id->choice == SEQID_LOCAL)) {
- SeqIdFree (pspp->id);
- pspp->id = SeqIdDup (new_sip);
- }
- break;
- case SEQLOC_EMPTY :
- case SEQLOC_WHOLE :
- currId = (SeqIdPtr) slp->data.ptrvalue;
- if (currId->choice == SEQID_LOCAL)
- {
- SeqIdFree (currId);
- slp->data.ptrvalue = (Pointer) SeqIdDup (new_sip);
- }
- break;
- case SEQLOC_INT :
- target_sit = (SeqIntPtr) slp->data.ptrvalue;
- if (target_sit->id->choice == SEQID_LOCAL)
- {
- SeqIdFree (target_sit->id);
- target_sit->id = SeqIdDup (new_sip);
- }
- break;
- case SEQLOC_PNT :
- spp = (SeqPntPtr)slp->data.ptrvalue;
- if (spp->id->choice == SEQID_LOCAL)
- {
- SeqIdFree(spp->id);
- spp->id = SeqIdDup(new_sip);
- }
- break;
- default :
- break;
- }
- return slp;
-}
-
-static void ReplaceIdForFeature (SeqFeatPtr sfp, SeqIdPtr sip)
-{
- CdRegionPtr crp;
- CodeBreakPtr cbp;
- RnaRefPtr rrp;
- tRNAPtr trp;
-
- if (sfp == NULL || sip == NULL) {
- return;
- }
- /* replace local ID in location */
- if (sfp->location != NULL) {
- SeqLocReplaceLocalID (sfp->location, sip);
+ tmp++;
+ ch = *tmp;
}
- /* also replace local ID in code breaks */
- if (sfp->data.choice == SEQFEAT_CDREGION
- && (crp = (CdRegionPtr)sfp->data.value.ptrvalue) != NULL
- && crp->code_break != NULL) {
- for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
- SeqLocReplaceLocalID (cbp->loc, sip);
+ if (all_digits && *ptr != '0') {
+ if (num_digits < 10 || (num_digits == 10 && StringCmp (ptr, "2147483647") <= 0)) {
+ sscanf (ptr, "%ld", &num);
+ oip->id = (Int4) num;
+ return dbt;
}
}
- /* also replace local ID in anticodons */
- if (sfp->data.choice == SEQFEAT_RNA
- && (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) != NULL
- && rrp->type == 3 && rrp->ext.choice == 2
- && (trp = (tRNAPtr) rrp->ext.value.ptrvalue) != NULL
- && trp->anticodon != NULL) {
- SeqLocReplaceLocalID (trp->anticodon, sip);
- }
-}
-
-
-static void ReplaceLocalIdOnLoc_callback (SeqFeatPtr sfp, Pointer userdata)
-{
- SeqIdPtr sip;
-
- if (sfp == NULL) {
- return;
- }
+ oip->str = StringSave (ptr);
- sip = (SeqIdPtr) userdata;
- ReplaceIdForFeature (sfp, sip);
+ return dbt;
}
+//LCOV_EXCL_STOP
+static void GetNomenclatureUOP (
+ UserObjectPtr uop,
+ Pointer userdata
+)
-static void CheckFeatForNuclID_callback (SeqFeatPtr sfp, Pointer userdata)
-{
- SeqIdPtr featSip = NULL;
- ReplaceIDStructPtr idsPtr;
- ObjectIdPtr oip;
- Char tmpIdStr [128];
-
- if (NULL == sfp)
- return;
-
- /* Get the old Seq Id and the new */
- /* one that it was changed to. */
-
- idsPtr = (ReplaceIDStructPtr) userdata;
- if ((NULL == idsPtr) ||
- (NULL == idsPtr->oldStr) ||
- (NULL == idsPtr->newSip))
- return;
-
- /* Get the location Seq ID for this CDS feature */
-
- featSip = SeqLocId (sfp->location);
- if (featSip == NULL) return;
- oip = (ObjectIdPtr) featSip->data.ptrvalue;
-
- /* If the location Seq ID matches the old Seq Id */
- /* then change the location to point to the new. */
-
- if (NULL == oip->str) {
- sprintf (tmpIdStr, "%d", oip->id);
- if (StringCmp (tmpIdStr, idsPtr->oldStr) == 0) {
- ReplaceIdForFeature (sfp, idsPtr->newSip);
- }
- } else if (StringCmp (oip->str, idsPtr->oldStr) == 0){
- ReplaceIdForFeature (sfp, idsPtr->newSip);
- }
-}
-
-
-static void CheckFeatForProductID_callback (SeqFeatPtr sfp, Pointer userdata)
{
- SeqIdPtr featSip = NULL;
- ReplaceIDStructPtr idsPtr;
ObjectIdPtr oip;
- Char tmpIdStr [128];
-
- if (NULL == sfp)
- return;
-
- if ((sfp->data.choice == SEQFEAT_CDREGION) &&
- (sfp->product != NULL)) {
+ UserObjectPtr PNTR uopp;
- /* Get the old Seq Id and the new */
- /* one that it was changed to. */
-
- idsPtr = (ReplaceIDStructPtr) userdata;
- if ((NULL == idsPtr) ||
- (NULL == idsPtr->oldStr) ||
- (NULL == idsPtr->newSip))
- return;
-
- /* Get the product Seq ID for this CDS feature */
-
- featSip = SeqLocId (sfp->product);
- oip = (ObjectIdPtr) featSip->data.ptrvalue;
-
- /* If the product Seq ID matches the old Seq Id */
- /* then change the product to point to the new. */
-
- if (NULL == oip->str) {
- sprintf (tmpIdStr, "%d", oip->id);
- if (StringCmp (tmpIdStr, idsPtr->oldStr) == 0)
- SeqLocReplaceLocalID (sfp->product, idsPtr->newSip);
- }
- if (StringCmp (oip->str, idsPtr->oldStr) == 0)
- SeqLocReplaceLocalID (sfp->product, idsPtr->newSip);
-
- }
+ if (uop == NULL || userdata == NULL) return;
+ oip = uop->type;
+ if (oip == NULL) return;
+ if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
+ uopp = (UserObjectPtr PNTR) userdata;
+ *uopp = uop;
}
-static void ReplaceLocalID (BioseqPtr bsp,
- SeqIdPtr sip,
- CharPtr key,
- Int2 count)
+//LCOV_EXCL_START
+NLM_EXTERN void ModernizeGeneFields (
+ SeqFeatPtr sfp
+)
{
- ObjectIdPtr oip;
- Char str [64];
- Char tmp [70];
- BioseqSetPtr bssp = NULL;
- ReplaceIDStruct ids;
- BioseqPtr siblingBsp;
- SeqEntryPtr sep;
- Int2 parentType;
-
- if (bsp == NULL || sip == NULL || StringHasNoText (key)) return;
- oip = (ObjectIdPtr) sip->data.ptrvalue;
- if (oip == NULL) return;
-
- /* Create the new ID string */
-
- StringNCpy_0 (str, key, sizeof (str));
- sprintf (tmp, "%s__%d", str, (int) count);
-
- /* Save the original SeqId for later passing */
- /* to CheckSetForNuclID_callback () and */
- /* CheckSetForProductId_callback (). */
-
- if (NULL != oip->str)
- ids.oldStr = StringSave (oip->str);
- else {
- ids.oldStr = (CharPtr) MemNew (32);
- sprintf (ids.oldStr, "%d", oip->id);
- }
-
-
- /* Update the Seq ID with the new string */
-
- oip->str = StringSave (tmp);
- ids.newSip = sip;
- SeqMgrReplaceInBioseqIndex (bsp);
-
- /* Replace the local ID on all the features of the bioseq */
+ GeneNomenclaturePtr gnp;
+ GeneRefPtr grp;
+ ObjectIdPtr oip;
+ CharPtr str;
+ CharPtr symbol = NULL, name = NULL, source = NULL;
+ Uint2 status = 0;
+ UserFieldPtr ufp;
+ UserObjectPtr uop = NULL;
+ UserObjectPtr curr, next;
+ UserObjectPtr PNTR prev;
- VisitFeaturesOnBsp (bsp, (Pointer) sip, ReplaceLocalIdOnLoc_callback);
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_GENE) return;
- /* Check the parent (and grandparent, etc.) BioseqSet */
- /* for features that use the changed ID. */
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ if (grp == NULL) return;
- parentType = bsp->idx.parenttype;
- if (parentType == OBJ_BIOSEQSET)
- bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ if (grp->formal_name != NULL) return;
- while (parentType == OBJ_BIOSEQSET) {
+ if (sfp->ext == NULL) return;
+ VisitUserObjectsInUop (sfp->ext, (Pointer) &uop, GetNomenclatureUOP);
+ if (uop == NULL) return;
- if ((bssp != NULL) && (bssp->_class == 1)) {
-
- /* Check features that are attached to */
- /* the parent set itself. */
-
- if (ISA_na(bsp->mol))
- VisitFeaturesOnSet (bssp, (Pointer) &ids,
- CheckFeatForNuclID_callback);
- else if (ISA_aa(bsp->mol))
- VisitFeaturesOnSet (bssp, (Pointer) &ids,
- CheckFeatForProductID_callback);
-
- /* Check features that are attached to */
- /* other Bioseqs in the set. */
-
- sep = bssp->seqentry;
- while (NULL != sep) {
- if (sep->choice == 1) { /* bioseq */
- siblingBsp = (BioseqPtr) sep->data.ptrvalue;
- if (ISA_na(bsp->mol))
- VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
- CheckFeatForNuclID_callback);
- else if (ISA_aa(bsp->mol))
- VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
- CheckFeatForProductID_callback);
- }
- sep = sep->next;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL) continue;
+ if (StringICmp (oip->str, "Symbol") == 0) {
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (str != NULL) {
+ symbol = str;
+ }
}
-
- sep = bssp->seq_set;
- while (NULL != sep) {
- if (sep->choice == 1) { /* bioseq */
- siblingBsp = (BioseqPtr) sep->data.ptrvalue;
- if (ISA_na(bsp->mol))
- VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
- CheckFeatForNuclID_callback);
- else if (ISA_aa(bsp->mol))
- VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
- CheckFeatForProductID_callback);
- }
- sep = sep->next;
+ } else if (StringICmp (oip->str, "Name") == 0) {
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (str != NULL) {
+ name = str;
+ }
+ }
+ } else if (StringICmp (oip->str, "DataSource") == 0) {
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (str != NULL) {
+ source = str;
+ }
+ }
+ } else if (StringICmp (oip->str, "Status") == 0) {
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (str != NULL) {
+ if (StringICmp (str, "Official") == 0) {
+ status = 1;
+ } else if (StringICmp (str, "Interim") == 0) {
+ status = 2;
+ }
+ }
}
}
- parentType = bssp->idx.parenttype;
- bssp = (BioseqSetPtr) bssp->idx.parentptr;
}
+ if (symbol == NULL && name == NULL && source == NULL && status == 0) return;
- /* Clean up before exiting */
-
- MemFree (ids.oldStr);
+ gnp = GeneNomenclatureNew ();
+ if (gnp == NULL) return;
-}
-
-
-static void BuildLclTree (LclIdListPtr PNTR head, BioseqPtr bsp, CharPtr x, SeqIdPtr sip)
+ gnp->status = status;
+ gnp->symbol = StringSaveNoNull (symbol);
+ gnp->name = StringSaveNoNull (name);
+ gnp->source = DbtagParse (source);
-{
- Int2 comp;
- LclIdListPtr idlist;
+ grp->formal_name = gnp;
- if (*head != NULL) {
- idlist = *head;
- comp = StringICmp (idlist->key, x);
- if (comp < 0) {
- BuildLclTree (&(idlist->right), bsp, x, sip);
- } else if (comp > 0) {
- BuildLclTree (&(idlist->left), bsp, x, sip);
+ prev = (UserObjectPtr PNTR) &(sfp->ext);
+ curr = sfp->ext;
+ while (curr != NULL) {
+ next = curr->next;
+ if (uop == curr) {
+ *(prev) = curr->next;
+ curr->next = NULL;
+ UserObjectFree (curr);
} else {
- if (idlist->firstbsp != NULL && idlist->firstsip != NULL) {
- ReplaceLocalID (idlist->firstbsp, idlist->firstsip, x, 1);
- idlist->count = 2;
- idlist->firstbsp = NULL;
- idlist->firstsip = NULL;
- }
- ReplaceLocalID (bsp, sip, x, idlist->count);
- (idlist->count)++;
- }
- } else {
- idlist = MemNew (sizeof (LclIdList));
- if (idlist != NULL) {
- *head = idlist;
- idlist->firstbsp = bsp;
- idlist->firstsip = sip;
- idlist->count = 1;
- idlist->key = StringSave (x);
- idlist->left = NULL;
- idlist->right = NULL;
+ prev = (UserObjectPtr PNTR) &(curr->next);
}
+ curr = next;
}
}
+//LCOV_EXCL_STOP
-NLM_EXTERN void FreeLclTree (LclIdListPtr PNTR head)
-{
- LclIdListPtr idlist;
+/* PCR_primer manipulation functions */
- if (head != NULL && *head != NULL) {
- idlist = *head;
- FreeLclTree (&(idlist->left));
- FreeLclTree (&(idlist->right));
- MemFree (idlist->key);
- MemFree (idlist);
- }
-}
-
-
-NLM_EXTERN void ResolveExistingIDsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
+static ValNodePtr ParsePCRComponent (
+ CharPtr strs
+)
{
- BioseqPtr bsp;
- LclIdListPtr PNTR head;
- SeqIdPtr sip;
- Char str [64];
+ ValNodePtr head = NULL;
+ size_t len;
+ CharPtr ptr, str, tmp;
- head = (LclIdListPtr PNTR) mydata;
- if (sep == NULL || head == NULL) return;
- if (IS_Bioseq (sep)) {
- bsp = (BioseqPtr) sep->data.ptrvalue;
- if (bsp != NULL) {
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice == SEQID_LOCAL) {
- SeqIdWrite (sip, str, PRINTID_REPORT, sizeof (str));
- BuildLclTree (head, bsp, str, sip);
- }
- }
- }
- }
-}
+ if (StringHasNoText (strs)) return NULL;
+ tmp = StringSave (strs);
+ if (tmp == NULL) return NULL;
-static Boolean DoesIdListHaveLocal (SeqIdPtr sip)
-{
- while (sip != NULL) {
- if (sip->choice == SEQID_LOCAL) {
- return TRUE;
- }
- sip = sip->next;
+ str = tmp;
+ len = StringLen (str);
+ if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
+ str [len - 1] = '\0';
+ str++;
}
- return FALSE;
-}
-
-static Boolean DoesSeqLocListHaveLocalId (SeqLocPtr slp)
-{
- SeqLocPtr loc;
- PackSeqPntPtr psp;
- SeqBondPtr sbp;
- SeqIntPtr sinp;
- SeqIdPtr sip;
- SeqPntPtr spp;
- Boolean has_local = FALSE;
-
- while (slp != NULL) {
- switch (slp->choice) {
- case SEQLOC_NULL :
- break;
- case SEQLOC_EMPTY :
- case SEQLOC_WHOLE :
- sip = (SeqIdPtr) slp->data.ptrvalue;
- has_local = DoesIdListHaveLocal (sip);
- break;
- case SEQLOC_INT :
- sinp = (SeqIntPtr) slp->data.ptrvalue;
- if (sinp != NULL) {
- sip = sinp->id;
- has_local = DoesIdListHaveLocal (sip);
- }
- break;
- case SEQLOC_PNT :
- spp = (SeqPntPtr) slp->data.ptrvalue;
- if (spp != NULL) {
- sip = spp->id;
- has_local = DoesIdListHaveLocal (sip);
- }
- break;
- case SEQLOC_PACKED_PNT :
- psp = (PackSeqPntPtr) slp->data.ptrvalue;
- if (psp != NULL) {
- sip = psp->id;
- has_local = DoesIdListHaveLocal (sip);
- }
- break;
- case SEQLOC_PACKED_INT :
- case SEQLOC_MIX :
- case SEQLOC_EQUIV :
- loc = (SeqLocPtr) slp->data.ptrvalue;
- while (loc != NULL && !has_local) {
- has_local = DoesSeqLocListHaveLocalId(loc);
- loc = loc->next;
- }
- break;
- case SEQLOC_BOND :
- sbp = (SeqBondPtr) slp->data.ptrvalue;
- if (sbp != NULL) {
- spp = (SeqPntPtr) sbp->a;
- if (spp != NULL) {
- sip = spp->id;
- has_local = DoesIdListHaveLocal (sip);
- }
- spp = (SeqPntPtr) sbp->b;
- if (spp != NULL) {
- sip = spp->id;
- has_local = DoesIdListHaveLocal (sip);
- }
- }
- break;
- case SEQLOC_FEAT :
- break;
- default :
- break;
+ while (StringDoesHaveText (str)) {
+ ptr = StringChr (str, ',');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
}
- slp = slp->next;
- }
- return FALSE;
-}
+ TrimSpacesAroundString (str);
+ ValNodeCopyStr (&head, 0, str);
-static void SeqEntryHasAlignmentsWithLocalIDsCallback (SeqAnnotPtr sap, Pointer userdata)
-{
- DenseDiagPtr ddp;
- DenseSegPtr dsp;
- PackSegPtr psp;
- SeqAlignPtr salp;
- StdSegPtr ssp;
- Boolean has_local = FALSE;
- BoolPtr bp;
-
- if (sap == NULL || sap->type != 2 || userdata == NULL) return;
- salp = (SeqAlignPtr) sap->data;
- if (salp != NULL)
- {
- switch (salp->segtype) {
- case SAS_DENDIAG :
- for (ddp = salp->segs; ddp != NULL && !has_local; ddp = ddp->next) {
- has_local = DoesIdListHaveLocal (ddp->id);
- }
- break;
- case SAS_DENSEG :
- dsp = salp->segs;
- if (dsp != NULL) {
- has_local = DoesIdListHaveLocal (dsp->ids);
- }
- break;
- case SAS_STD :
- for (ssp = salp->segs; ssp != NULL && !has_local; ssp = ssp->next) {
- has_local = DoesIdListHaveLocal (ssp->ids);
- if (!has_local) {
- has_local = DoesSeqLocListHaveLocalId (ssp->loc);
- }
- }
- break;
- case SAS_PACKED :
- psp = (PackSegPtr) salp->segs;
- if (psp != NULL) {
- has_local = DoesIdListHaveLocal (psp->ids);
- }
- break;
- default :
- break;
- }
+ str = ptr;
}
- bp = (BoolPtr) userdata;
- *bp |= has_local;
-}
-
-
-NLM_EXTERN Boolean HasAlignmentsWithLocalIDs (SeqEntryPtr sep)
-{
- Boolean has_alignments = FALSE;
-
- VisitAnnotsInSep (sep, (Pointer) &has_alignments, SeqEntryHasAlignmentsWithLocalIDsCallback);
-
- return has_alignments;
-}
-
-NLM_EXTERN int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2)
-
-{
- ValNodePtr vnp1;
- ValNodePtr vnp2;
-
- if (ptr1 == NULL || ptr2 == NULL) return 0;
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 == NULL || vnp2 == NULL) return 0;
-
- if (vnp1->choice > vnp2->choice) {
- return 1;
- } else if (vnp1->choice < vnp2->choice) {
- return -1;
- } else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) {
- return 1;
- } else if (vnp1->data.ptrvalue < vnp2->data.ptrvalue) {
- return -1;
- } else {
- return 0;
- }
+ MemFree (tmp);
+ return head;
}
+NLM_EXTERN ValNodePtr ParsePCRStrings (
+ CharPtr fwd_primer_seq,
+ CharPtr rev_primer_seq,
+ CharPtr fwd_primer_name,
+ CharPtr rev_primer_name
+)
-NLM_EXTERN CharPtr GetRepliconChromosomeName (BioSourcePtr biop)
{
- SubSourcePtr ssp;
+ ValNodePtr curr_fwd_name;
+ ValNodePtr curr_fwd_seq;
+ ValNodePtr curr_rev_name;
+ ValNodePtr curr_rev_seq;
+ CharPtr fwd_name;
+ CharPtr fwd_seq;
+ CharPtr rev_name;
+ CharPtr rev_seq;
+ ValNodePtr fwd_name_list = NULL;
+ ValNodePtr fwd_seq_list = NULL;
+ ValNodePtr rev_name_list = NULL;
+ ValNodePtr rev_seq_list = NULL;
+ ValNodePtr head = NULL;
+ Boolean okay;
+ Int2 orig_order = 0;
+ PcrSetPtr psp;
+
+ fwd_seq_list = ParsePCRComponent (fwd_primer_seq);
+ rev_seq_list = ParsePCRComponent (rev_primer_seq);
+ fwd_name_list = ParsePCRComponent (fwd_primer_name);
+ rev_name_list = ParsePCRComponent (rev_primer_name);
+
+ curr_fwd_seq = fwd_seq_list;
+ curr_rev_seq = rev_seq_list;
+ curr_fwd_name = fwd_name_list;
+ curr_rev_name = rev_name_list;
- if (biop == NULL) {
- return NULL;
- } else if (biop->genome == GENOME_mitochondrion) {
- return StringSave ("MT");
- }
+ while (curr_fwd_seq != NULL || curr_rev_seq != NULL || curr_fwd_name != NULL || curr_rev_name != NULL) {
+ fwd_seq = NULL;
+ rev_seq = NULL;
+ fwd_name = NULL;
+ rev_name = NULL;
+ okay = FALSE;
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_plasmid_name) {
- return StringSave(ssp->name);
+ if (curr_fwd_seq != NULL) {
+ fwd_seq = (CharPtr) curr_fwd_seq->data.ptrvalue;
+ curr_fwd_seq = curr_fwd_seq->next;
+ okay = TRUE;
}
- }
- if (biop->genome == GENOME_chromosome) {
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_linkage_group) {
- return StringSave(ssp->name);
- }
+ if (curr_rev_seq != NULL) {
+ rev_seq = (CharPtr) curr_rev_seq->data.ptrvalue;
+ curr_rev_seq = curr_rev_seq->next;
+ okay = TRUE;
}
- }
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_chromosome) {
- return StringSave(ssp->name);
+ if (curr_fwd_name != NULL) {
+ fwd_name = (CharPtr) curr_fwd_name->data.ptrvalue;
+ curr_fwd_name = curr_fwd_name->next;
+ okay = TRUE;
}
- }
-
- /* no other name found */
- switch (biop->genome) {
- case GENOME_plasmid:
- return StringSave("unnamed");
- break;
- case GENOME_chromosome:
- return StringSave("ANONYMOUS");
- break;
- case GENOME_kinetoplast:
- return StringSave("kinetoplast");
- break;
- case GENOME_plastid :
- case GENOME_chloroplast:
- case GENOME_chromoplast:
- case GENOME_apicoplast :
- case GENOME_leucoplast :
- case GENOME_proplastid :
- return StringSave("Pltd");
- break;
- }
-
- return NULL;
-}
-
-
-NLM_EXTERN CharPtr GetRepliconType (BioSourcePtr biop)
-{
- SubSourcePtr ssp;
- CharPtr type_str = NULL;
-
- if (biop == NULL) {
- return type_str;
- }
- if (biop->genome == GENOME_plasmid) {
- return StringSave("ePlasmid");
- }
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_plasmid_name) {
- type_str = StringSave ("ePlasmid");
- return type_str;
+ if (curr_rev_name != NULL) {
+ rev_name = (CharPtr) curr_rev_name->data.ptrvalue;
+ curr_rev_name = curr_rev_name->next;
+ okay = TRUE;
}
- }
- if (biop->genome == GENOME_chromosome) {
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_linkage_group) {
- type_str = StringSave("eLinkageGroup");
- return type_str;
+ if (okay) {
+ psp = (PcrSetPtr) MemNew (sizeof (PcrSet));
+ if (psp != NULL) {
+ psp->fwd_seq = StringSaveNoNull (fwd_seq);
+ psp->rev_seq = StringSaveNoNull (rev_seq);
+ psp->fwd_name = StringSaveNoNull (fwd_name);
+ psp->rev_name = StringSaveNoNull (rev_name);
+ orig_order++;
+ psp->orig_order = orig_order;
+ ValNodeAddPointer (&head, 0, (Pointer) psp);
}
}
}
- type_str = StringSave ("eChromosome");
- return type_str;
-}
-
-
-NLM_EXTERN CharPtr GetRepliconLocation (BioSourcePtr biop)
-{
- if (biop == NULL) {
- return NULL;
- }
- if (biop->genome == GENOME_chromosome || StringCmp (GetRepliconType (biop), "ePlasmid") == 0) {
- return StringSave("eNuclearProkaryote");
- }
+ ValNodeFreeData (fwd_seq_list);
+ ValNodeFreeData (rev_seq_list);
+ ValNodeFreeData (fwd_name_list);
+ ValNodeFreeData (rev_name_list);
- switch (biop->genome) {
- case GENOME_unknown:
- case GENOME_genomic:
- return StringSave("eNuclearProkaryote");
- break;
- case GENOME_mitochondrion:
- case GENOME_kinetoplast :
- return StringSave("eMitochondrion");
- break;
- case GENOME_chromosome:
- return StringSave("eChromosome");
- break;
- case GENOME_chloroplast:
- return StringSave("eChloroplast");
- break;
- case GENOME_chromoplast:
- return StringSave("eChromoplast");
- break;
- case GENOME_plastid :
- return StringSave("ePlastid");
- break;
- case GENOME_macronuclear :
- return StringSave("eMacronuclear");
- break;
- case GENOME_extrachrom :
- return StringSave("eExtrachromosomal");
- break;
- case GENOME_cyanelle :
- return StringSave("eCyanelle");
- break;
- case GENOME_proviral :
- return StringSave("eProviral");
- break;
- case GENOME_virion :
- return StringSave("eVirion");
- break;
- case GENOME_nucleomorph :
- return StringSave("eNucleomorph");
- break;
- case GENOME_apicoplast :
- return StringSave("eApicoplast");
- break;
- case GENOME_leucoplast :
- return StringSave("eLeucoplast");
- break;
- case GENOME_proplastid :
- return StringSave("eProplastid");
- break;
- case GENOME_endogenous_virus :
- return StringSave("eEndogenous-virus");
- break;
- case GENOME_hydrogenosome :
- return StringSave("eHydrogenosome");
- break;
- case GENOME_chromatophore :
- return StringSave("eChromatophore");
- break;
- }
-
- return NULL;
+ return head;
}
+NLM_EXTERN ValNodePtr ParsePCRSet (
+ BioSourcePtr biop
+)
-NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiers (BioseqPtr bsp, Boolean include_subsource)
{
- SeqMgrDescContext dcontext;
- SeqMgrFeatContext fcontext;
- SeqDescPtr sdp;
- SeqFeatPtr sfp;
- OrgModPtr mod;
- BioSourcePtr biop = NULL;
- SubSourcePtr ssp;
- Int4 len = 1;
- CharPtr summ, val;
- ValNodePtr vals = NULL, vnp;
- GeneRefPtr grp;
- Boolean geneFound = FALSE;
+ CharPtr fwd_primer_seq = NULL;
+ CharPtr rev_primer_seq = NULL;
+ CharPtr fwd_primer_name = NULL;
+ CharPtr rev_primer_name = NULL;
+ SubSourcePtr ssp;
- if (bsp == NULL) {
- return NULL;
- }
+ if (biop == NULL) return NULL;
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp != NULL && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL) {
- if (biop->org != NULL && !StringHasNoText (biop->org->taxname)) {
- ValNodeAddPointer (&vals, 0, "org");
- ValNodeAddPointer (&vals, 0, biop->org->taxname);
- len += StringLen (biop->org->taxname) + 6;
- }
- if (include_subsource) {
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- val = GetSourceQualName (GetSrcQualFromSubSrcOrOrgMod (ssp->subtype, FALSE));
- ValNodeAddPointer (&vals, 0, val);
- ValNodeAddPointer (&vals, 0, ssp->name);
- len += StringLen (val) + StringLen (ssp->name) + 3;
- }
- }
- if (biop->org != NULL && biop->org->orgname != NULL) {
- for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
- val = GetSourceQualName (GetSrcQualFromSubSrcOrOrgMod (mod->subtype, TRUE));
- ValNodeAddPointer (&vals, 0, val);
- ValNodeAddPointer (&vals, 0, mod->subname);
- len += StringLen (val) + StringLen (mod->subname) + 3;
- }
- }
- }
- for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
- sfp != NULL;
- sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &fcontext)) {
- if ((grp = (GeneRefPtr) sfp->data.value.ptrvalue) != NULL
- && !StringHasNoText (grp->locus)) {
- ValNodeAddPointer (&vals, 0, "gene");
- ValNodeAddPointer (&vals, 0, grp->locus);
- len += StringLen (grp->locus) + 7;
- geneFound = TRUE;
- }
- }
- if (!geneFound)
- {
- for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &fcontext);
- sfp != NULL;
- sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &fcontext)) {
- CharPtr str = GetRNAProductString (sfp, NULL);
- if (str != NULL && !StringHasNoText (str)) {
- ValNodeAddPointer (&vals, 0, "product");
- ValNodeAddPointer (&vals, 0, str);
- len += StringLen (str) + 10;
- }
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_fwd_primer_seq) {
+ fwd_primer_seq = ssp->name;
+ } else if (ssp->subtype == SUBSRC_rev_primer_seq) {
+ rev_primer_seq = ssp->name;
+ } else if (ssp->subtype == SUBSRC_fwd_primer_name) {
+ fwd_primer_name = ssp->name;
+ } else if (ssp->subtype == SUBSRC_rev_primer_name) {
+ rev_primer_name = ssp->name;
}
}
- summ = (CharPtr) MemNew (sizeof (Char) * (len));
- vnp = vals;
- while (vnp != NULL && vnp->next != NULL) {
- StringCat (summ, "[");
- StringCat (summ, (CharPtr) vnp->data.ptrvalue);
- StringCat (summ, "=");
- StringCat (summ, (CharPtr) vnp->next->data.ptrvalue);
- StringCat (summ, "]");
- vnp = vnp->next->next;
- }
- vals = ValNodeFree (vals);
- return summ;
-}
-
-
-/* code for finding frameshifts in alignments */
-typedef struct exoninterval {
- Int4 start;
- Int4 stop;
-} ExonIntervalData, PNTR ExonIntervalPtr;
+ return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
+}
+static ValNodePtr ParsePCRColonString (
+ CharPtr strs
+)
-static ExonIntervalPtr ExonIntervalNew (Int4 start, Int4 stop)
{
- ExonIntervalPtr p = (ExonIntervalPtr) MemNew (sizeof (ExonIntervalData));
- if (start < stop) {
- p->start = start;
- p->stop = stop;
- } else {
- p->start = stop;
- p->stop = start;
- }
- return p;
-}
-
+ ValNodePtr head = NULL;
+ size_t len;
+ CharPtr ptr, str, tmp;
-static int LIBCALLBACK SortExonIntervals (VoidPtr ptr1, VoidPtr ptr2)
+ if (StringHasNoText (strs)) return NULL;
-{
- ValNodePtr vnp1, vnp2;
- ExonIntervalPtr p1, p2;
-
- if (ptr1 != NULL && ptr2 != NULL) {
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 != NULL && vnp2 != NULL) {
- p1 = (ExonIntervalPtr) vnp1->data.ptrvalue;
- p2 = (ExonIntervalPtr) vnp2->data.ptrvalue;
- if (p1 != NULL && p2 != NULL) {
- if (p1->start < p2->start)
- {
- return -1;
- }
- else if (p1->start > p2->start)
- {
- return 1;
- }
- else if (p1->stop < p2->stop)
- {
- return -1;
- }
- else if (p1->stop > p2->stop)
- {
- return 1;
- }
- else
- {
- return 0;
- }
+ tmp = StringSave (strs);
+ str = tmp;
+ len = StringLen (str);
+ if (len > 1 && StringChr (str, ':') != NULL) {
+ while (StringDoesHaveText (str)) {
+ ptr = StringChr (str, ':');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
}
+ TrimSpacesAroundString (str);
+ ValNodeCopyStr (&head, 0, str);
+ str = ptr;
}
- }
- return 0;
-}
-
-
-typedef struct exonintervallist {
- ExonIntervalPtr intervals;
- Int4 num_intervals;
-} ExonIntervalListData, PNTR ExonIntervalListPtr;
-
-
-static ExonIntervalListPtr ExonIntervalListFree (ExonIntervalListPtr list)
-{
- if (list != NULL) {
- list->intervals = MemFree (list->intervals);
- list = MemFree (list);
- }
- return list;
-}
-
-
-static ExonIntervalListPtr ExonIntervalListNew (ValNodePtr interval_list)
-{
- ExonIntervalListPtr list = NULL;
- ExonIntervalPtr exint;
- ValNodePtr vnp;
- Int4 i;
-
- list = (ExonIntervalListPtr) MemNew (sizeof (ExonIntervalListData));
- list->num_intervals = ValNodeLen (interval_list);
- if (list->num_intervals == 0) {
- list->intervals = NULL;
} else {
- list->intervals = (ExonIntervalPtr) MemNew (sizeof (ExonIntervalData) * list->num_intervals);
- for (vnp = interval_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
- exint = (ExonIntervalPtr) vnp->data.ptrvalue;
- list->intervals[i].start = exint->start;
- list->intervals[i].stop = exint->stop;
- }
- }
- return list;
-}
-
-
-static ExonIntervalListPtr GetExonIntervalsForBioseq (BioseqPtr bsp)
-{
- SeqFeatPtr sfp;
- SeqMgrFeatContext fcontext;
- ExonIntervalListPtr list = NULL;
- ValNodePtr unsorted_list = NULL;
- SeqLocPtr slp;
- Int4 num_intervals = 0;
-
- if (bsp == NULL || ISA_aa (bsp->mol)) {
- return NULL;
- }
-
- for (sfp = SeqMgrGetNextFeature(bsp, NULL, 0, FEATDEF_CDS, &fcontext);
- sfp != NULL;
- sfp = SeqMgrGetNextFeature(bsp, sfp, 0, FEATDEF_CDS, &fcontext)) {
- for (slp = SeqLocFindNext (sfp->location, NULL);
- slp != NULL;
- slp = SeqLocFindNext (sfp->location, slp)) {
- ValNodeAddPointer (&unsorted_list, 0, ExonIntervalNew (SeqLocStart (slp), SeqLocStop (slp)));
- num_intervals++;
- }
- }
-
- for (sfp = SeqMgrGetNextFeature(bsp, NULL, 0, FEATDEF_exon, &fcontext);
- sfp != NULL;
- sfp = SeqMgrGetNextFeature(bsp, sfp, 0, FEATDEF_exon, &fcontext)) {
- for (slp = SeqLocFindNext (sfp->location, NULL);
- slp != NULL;
- slp = SeqLocFindNext (sfp->location, slp)) {
- ValNodeAddPointer (&unsorted_list, 0, ExonIntervalNew (SeqLocStart (slp), SeqLocStop (slp)));
- num_intervals++;
- }
+ ValNodeCopyStr (&head, 0, str);
}
- if (num_intervals > 0) {
- unsorted_list = ValNodeSort (unsorted_list, SortExonIntervals);
- ValNodeUnique (&unsorted_list, SortExonIntervals, ValNodeFreeData);
- list = ExonIntervalListNew(unsorted_list);
- unsorted_list = ValNodeFreeData (unsorted_list);
- }
- return list;
+ MemFree (tmp);
+ return head;
}
+//LCOV_EXCL_START
+static CharPtr FusePrimerNames(
+ CharPtr first,
+ CharPtr second
+)
-static Boolean IsPointInExon (Int4 pos, ExonIntervalListPtr list)
{
- Int4 i = 0;
- Boolean found = FALSE;
-
- if (list == NULL) {
- return FALSE;
- }
+ size_t len;
+ CharPtr str;
- /* looking for interval that contains pos */
- while (i < list->num_intervals && !found && list->intervals[i].start <= pos) {
- if (list->intervals[i].stop >= pos) {
- found = TRUE;
- }
- i++;
- }
- return found;
-}
+ if (first == NULL) return second;
+ if (second == NULL) return first;
+ len = StringLen (first) + StringLen (second) + 5;
+ str = MemNew (len);
+ if (str == NULL) return NULL;
-static ExonIntervalListPtr PNTR GetExonIntervalLists (DenseSegPtr dsp, Int4 examine_dim)
-{
- SeqIdPtr sip;
- BioseqPtr bsp;
- ExonIntervalListPtr PNTR exon_lists;
- Int4 i;
+ StringCpy (str, first);
+ StringCat (str, ":");
+ StringCat (str, second);
- if (dsp == NULL || examine_dim < 1) {
- return NULL;
- }
- exon_lists = (ExonIntervalListPtr PNTR) MemNew (sizeof (ExonIntervalListPtr) * examine_dim);
- for (sip = dsp->ids, i = 0; sip != NULL && i < examine_dim; sip = sip->next, i++) {
- bsp = BioseqLockById (sip);
- exon_lists[i] = GetExonIntervalsForBioseq(bsp);
- BioseqUnlock (bsp);
- }
- return exon_lists;
+ return str;
}
+static PCRPrimerPtr ModernizePCRPrimerHalf (
+ CharPtr seq,
+ CharPtr name
+)
-static ExonIntervalListPtr PNTR FreeExonIntervalLists (ExonIntervalListPtr PNTR exon_lists, Int4 examine_dim)
{
- Int4 i;
- for (i = 0; i < examine_dim; i++) {
- exon_lists[i] = ExonIntervalListFree(exon_lists[i]);
- }
- exon_lists = MemFree (exon_lists);
- return exon_lists;
-}
+ CharPtr curr_name = NULL, curr_seq = NULL, fused_name;
+ PCRPrimerPtr curr_primer = NULL, last_primer = NULL, primer_set = NULL;
+ ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
+ seq_list = ParsePCRColonString (seq);
+ name_list = ParsePCRColonString (name);
-/* note - we have already determined that this alignment position is in at least one
- * exon. The question here is, if some sequences are in gaps at this point and others
- * are not, do all of the sequences in one group have an exon at this position and all
- * of the others do not?
- */
-static Boolean IsShiftInExon (SeqAlignPtr salp, Int4 pos, Int4 examine_dim)
-{
- Int4 i;
- Int4 num_in_gap_with_exon = 0;
- Int4 num_in_gap_no_exon = 0;
- Int4 num_not_gap_with_exon = 0;
- Int4 num_not_gap_no_exon = 0;
- Int4 num_gap, num_no_gap;
- Int4 seq_pos = 0, j, before_pos, after_pos, aln_len;
- DenseSegPtr dsp;
- ExonIntervalListPtr PNTR exon_lists;
- Boolean in_exon;
- Boolean rval = FALSE;
-
- if (salp == NULL || pos < 0 || salp->segtype != SAS_DENSEG || (dsp = (DenseSegPtr) salp->segs) == NULL) {
- return FALSE;
- }
-
- exon_lists = GetExonIntervalLists(dsp, examine_dim);
-
- AlnMgr2IndexSeqAlign (salp);
- aln_len = SeqAlignLength (salp);
-
- for (i = 0;
- i < examine_dim
- && (num_in_gap_with_exon == 0
- || num_in_gap_no_exon == 0
- || num_not_gap_with_exon == 0
- || num_not_gap_no_exon == 0);
- i++) {
- seq_pos = AlnMgr2MapSeqAlignToBioseq (salp, pos, i + 1);
- if (seq_pos < 0) {
- j = pos - 1;
- before_pos = -1;
- while (j > -1 && before_pos < 0) {
- before_pos = AlnMgr2MapSeqAlignToBioseq (salp, j, i + 1);
- j--;
- }
- j = pos + 1;
- after_pos = -1;
- while (j < aln_len && after_pos < 0) {
- after_pos = AlnMgr2MapSeqAlignToBioseq (salp, j, i + 1);
- j++;
- }
- in_exon = FALSE;
- if (before_pos == after_pos - 1) {
- if (IsPointInExon(before_pos, exon_lists[i]) && IsPointInExon(after_pos, exon_lists[i])) {
- in_exon = TRUE;
- }
- }
-
- if (in_exon) {
- num_in_gap_with_exon++;
- } else {
- num_in_gap_no_exon++;
- }
- } else {
- in_exon = IsPointInExon(seq_pos, exon_lists[i]);
- if (in_exon) {
- num_not_gap_with_exon++;
- } else {
- num_not_gap_no_exon++;
- }
- }
- }
- exon_lists = FreeExonIntervalLists(exon_lists, examine_dim);
+ seq_vnp = seq_list;
+ name_vnp = name_list;
- /* are we looking at an insertion or a deletion? */
- num_gap = num_in_gap_with_exon + num_in_gap_no_exon;
- num_no_gap = num_not_gap_with_exon + num_not_gap_no_exon;
- if (num_gap > num_no_gap) {
- /* this is an insertion */
- if (num_not_gap_with_exon > 0) {
- rval = TRUE;
- }
- } else if (num_gap < num_no_gap) {
- /* this is a deletion */
- if (num_in_gap_with_exon > 0) {
- rval = TRUE;
+ while (seq_vnp != NULL /* || name_vnp != NULL */) {
+ if (seq_vnp != NULL) {
+ curr_seq = (CharPtr) seq_vnp->data.ptrvalue;
+ seq_vnp = seq_vnp->next;
}
- } else {
- /* evenly divided - no way to tell */
- if (num_in_gap_with_exon > 0 || num_not_gap_with_exon > 0) {
- rval = TRUE;
+ if (name_vnp != NULL) {
+ curr_name = (CharPtr) name_vnp->data.ptrvalue;
+ name_vnp = name_vnp->next;
+ } else {
+ curr_name = NULL;
}
- }
- return rval;
-}
-
-
-static ExonIntervalListPtr GetAlignedExons
-(DenseSegPtr dsp,
- Int4 examine_dim)
-{
- Int4 seg, i, j;
- Int4 aln_pos = 1, start = -1;
- Boolean in_exon = FALSE;
- ValNodePtr align_intervals = NULL;
- ExonIntervalListPtr list = NULL;
- ExonIntervalListPtr PNTR exon_lists;
- /* create lists of exons for individual sequences */
- exon_lists = GetExonIntervalLists(dsp, examine_dim);
+ curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
+ if (curr_primer != NULL) {
+ curr_primer->seq = StringSaveNoNull (curr_seq);
+ curr_primer->name = StringSaveNoNull (curr_name);
- for (seg = 0; seg < dsp->numseg; seg++) {
- for (j = 0; j < dsp->lens[seg]; j++) {
- in_exon = FALSE;
- for (i = 0; i < examine_dim && !in_exon; i++) {
- if (dsp->starts[seg * dsp->dim + i] != -1
- && IsPointInExon(dsp->starts[seg * dsp->dim + i] + j, exon_lists[i])) {
- in_exon = TRUE;
- }
+ if (primer_set == NULL) {
+ primer_set = curr_primer;
}
- if (in_exon) {
- if (start < 0) {
- /* found the beginning of an interval */
- start = aln_pos;
- }
- } else {
- if (start > -1) {
- /* found the end of an interval */
- ValNodeAddPointer (&align_intervals, 0, ExonIntervalNew(start, aln_pos - 1));
- start = -1;
- }
+ if (last_primer != NULL) {
+ last_primer->next = curr_primer;
}
- aln_pos++;
+ last_primer = curr_primer;
}
}
- if (start > -1) {
- /* end of interval is same as end of alignment */
- ValNodeAddPointer (&align_intervals, 0, ExonIntervalNew(start, aln_pos - 1));
- start = -1;
- }
- /* free individual sequence exon lists */
- exon_lists = FreeExonIntervalLists(exon_lists, examine_dim);
-
- if (align_intervals != NULL) {
- list = ExonIntervalListNew (align_intervals);
- align_intervals = ValNodeFreeData (align_intervals);
+ while (name_vnp != NULL && last_primer != NULL) {
+ curr_name = (CharPtr) name_vnp->data.ptrvalue;
+ fused_name = FusePrimerNames (last_primer->name, curr_name);
+ MemFree (last_primer->name);
+ last_primer->name = StringSaveNoNull (fused_name);
+ name_vnp = name_vnp->next;
}
-
- return list;
-}
-
-static CharPtr FrameShiftReportString (EFrameShiftReport flag, Int4 aln_pos, Int4 gap, Int4 non_gap, Int4Ptr report, BoolPtr ignore, Int4 len, CharPtr fmt, CharPtr ids, Boolean possible_error)
-{
- CharPtr msg = NULL;
- Int4 num_items = 0, i, msg_len, num_flag = 0, num_normal = 0;
- Boolean first = TRUE, show_flag;
- CharPtr gap_fmt = "Gap: %d Non-gap: %d\n";
- CharPtr possible_error_msg = "(Shift occurs at alignment position where exons exist on other sequences, but may not actually be in exon for this sequence)";
+ while (name_vnp != NULL && last_primer == NULL) {
+ curr_name = (CharPtr) name_vnp->data.ptrvalue;
+ curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
+ if (curr_primer != NULL) {
+ curr_primer->name = StringSaveNoNull (curr_name);
- for (i = 0; i < len; i++) {
- if (!ignore[i]) {
- if (report[i] == flag) {
- num_flag++;
- } else {
- num_normal++;
+ if (primer_set == NULL) {
+ primer_set = curr_primer;
}
- }
- }
-
- if (num_flag == 0 || num_normal == 0) {
- return NULL;
- }
-
- if (num_flag <= num_normal) {
- num_items = num_flag;
- show_flag = TRUE;
- } else {
- num_items = num_normal;
- show_flag = FALSE;
- }
-
- msg_len = StringLen (fmt) + StringLen (gap_fmt) + 30 + (num_items * 204);
- if (possible_error) {
- msg_len += StringLen (possible_error_msg) + 1;
- }
- msg = (CharPtr) MemNew (sizeof (CharPtr) * msg_len);
- sprintf (msg, fmt, aln_pos);
- sprintf (msg + StringLen (msg), gap_fmt, gap, non_gap);
- num_items = 0;
- for (i = 0; i < len; i++) {
- if (!ignore[i] && ((show_flag && report[i] == flag) || (!show_flag && report[i] != flag))) {
- if (!first) {
- StringCat (msg, ", ");
- if (num_items % 10 == 0) {
- StringCat (msg, "\n");
- }
+ if (last_primer != NULL) {
+ last_primer->next = curr_primer;
}
- StringCat (msg, ids + (200 * i));
- first = FALSE;
- num_items++;
+ last_primer = curr_primer;
}
+ name_vnp = name_vnp->next;
}
- if (possible_error) {
- StringCat (msg, possible_error_msg);
- }
- return msg;
-}
+ ValNodeFreeData (seq_list);
+ ValNodeFreeData (name_list);
-static CharPtr FrameShiftReportMult (Int4 aln_pos, Int4Ptr report, BoolPtr ignore, Int4 len, CharPtr fmt, CharPtr ids)
-{
- CharPtr msg = NULL;
- Int4 num_items = 0, i, msg_len, num_flag = 0, num_normal = 0;
- Boolean first = TRUE, show_flag;
-
- for (i = 0; i < len; i++) {
- if (!ignore[i]) {
- if (report[i] == eFrameShiftReport_ExonMult3) {
- num_flag++;
- } else {
- num_normal++;
- }
- }
- }
-
- if (num_flag == 0 || num_normal == 0) {
- return NULL;
- }
-
- if (num_flag <= num_normal) {
- num_items = num_flag;
- show_flag = TRUE;
- } else {
- num_items = num_normal;
- show_flag = FALSE;
- }
-
- msg_len = StringLen (fmt) + (num_items * 204);
- msg = (CharPtr) MemNew (sizeof (CharPtr) * msg_len);
- sprintf (msg, fmt, aln_pos);
- num_items = 0;
- for (i = 0; i < len; i++) {
- if (!ignore[i]
- && ((show_flag && report[i] == eFrameShiftReport_ExonMult3)
- || (!show_flag && report[i] != eFrameShiftReport_ExonMult3))) {
- if (!first) {
- StringCat (msg, ", ");
- if (num_items % 10 == 0) {
- StringCat (msg, "\n");
- }
- }
- StringCat (msg, ids + (200 * i));
- first = FALSE;
- num_items++;
- }
- }
- return msg;
-}
-
-
-static FrameShiftReportPtr FrameShiftReportNew (CharPtr msg, Int4 aln_pos, Int4 first_related_seq)
-{
- FrameShiftReportPtr r = (FrameShiftReportPtr) MemNew (sizeof (FrameShiftReportData));
- r->msg = msg;
- r->aln_pos = aln_pos;
- r->first_related_seq = first_related_seq;
- return r;
+ return primer_set;
}
+NLM_EXTERN void ModernizePCRPrimers (
+ BioSourcePtr biop
+)
-static FrameShiftReportPtr FrameShiftReportFree (FrameShiftReportPtr r)
{
- if (r != NULL) {
- r->msg = MemFree (r->msg);
- r = MemFree (r);
- }
- return r;
-}
-
+ PCRReactionSetPtr curr_reaction, last_reaction = NULL, reaction_set = NULL;
+ PCRPrimerPtr forward, reverse;
+ PcrSetPtr psp;
+ ValNodePtr pset, vnp;
+ SubSourcePtr nextssp;
+ SubSourcePtr PNTR prevssp;
+ SubSourcePtr ssp;
+ Boolean unlink;
-NLM_EXTERN ValNodePtr FrameShiftReportListFree (ValNodePtr vnp)
-{
- ValNodePtr tmp;
+ if (biop == NULL) return;
+ /* if (biop->pcr_primers != NULL) return; */
- while (vnp != NULL) {
- tmp = vnp->next;
- vnp->next = NULL;
- vnp->data.ptrvalue = FrameShiftReportFree (vnp->data.ptrvalue);
- vnp = ValNodeFree (vnp);
- vnp = tmp;
- }
- return vnp;
-}
+ pset = ParsePCRSet (biop);
+ if (pset == NULL) return;
+ for (vnp = pset; vnp != NULL; vnp = vnp->next) {
+ psp = (PcrSetPtr) vnp->data.ptrvalue;
+ if (psp == NULL) continue;
-static int FrameShiftReportCompare (FrameShiftReportPtr r1, FrameShiftReportPtr r2)
-{
- if (r1 == NULL && r2 == NULL) {
- return 0;
- } else if (r1 == NULL) {
- return -1;
- } else if (r2 == NULL) {
- return 1;
- } else if (r1->aln_pos < r2->aln_pos) {
- return -1;
- } else if (r1->aln_pos > r2->aln_pos) {
- return 1;
- } else {
- return StringCmp (r1->msg, r2->msg);
- }
-}
+ forward = ModernizePCRPrimerHalf (psp->fwd_seq, psp->fwd_name);
+ reverse = ModernizePCRPrimerHalf (psp->rev_seq, psp->rev_name);
+ if (forward != NULL || reverse != NULL) {
-static int LIBCALLBACK SortFrameShiftReports (VoidPtr ptr1, VoidPtr ptr2)
+ curr_reaction = (PCRReactionSetPtr) MemNew (sizeof (PCRReactionSet));
+ if (curr_reaction != NULL) {
+ curr_reaction->forward = forward;
+ curr_reaction->reverse = reverse;
-{
- ValNodePtr vnp1, vnp2;
-
- if (ptr1 != NULL && ptr2 != NULL) {
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 != NULL && vnp2 != NULL) {
- if (vnp1->choice == vnp2->choice) {
- return FrameShiftReportCompare(vnp1->data.ptrvalue, vnp2->data.ptrvalue);
- } else if (vnp1->choice == eFrameShiftReport_Exon) {
- return -1;
- } else if (vnp2->choice == eFrameShiftReport_Exon) {
- return 1;
- } else if (vnp1->choice == eFrameShiftReport_Intron) {
- return -1;
- } else if (vnp2->choice == eFrameShiftReport_Intron) {
- return 1;
- } else if (vnp1->choice == eFrameShiftReport_ExonMult3) {
- return -1;
- } else if (vnp2->choice == eFrameShiftReport_ExonMult3) {
- return 1;
- }
- }
- }
- return 0;
-}
-
-
-NLM_EXTERN void PrintFrameShiftReportList (ValNodePtr list, Boolean has_exons, LogInfoPtr lip)
-{
- ValNodePtr vnp;
- FrameShiftReportPtr r;
- EFrameShiftReport section = eFrameShiftReport_NoReport;
-
- for (vnp = list; vnp != NULL; vnp = vnp->next) {
- if (vnp->choice != section) {
- if (vnp->choice == eFrameShiftReport_Exon) {
- fprintf (lip->fp, "FRAMESHIFTS IN EXONS\n\n");
- } else if (vnp->choice == eFrameShiftReport_Intron) {
- if (has_exons) {
- fprintf (lip->fp, "FRAMESHIFTS IN INTRONS\n\n");
- } else {
- fprintf (lip->fp, "FRAMESHIFTS\n\n");
+ if (reaction_set == NULL) {
+ reaction_set = curr_reaction;
}
- } else if (vnp->choice == eFrameShiftReport_ExonMult3) {
- fprintf (lip->fp, "MULTIPLES OF THREE ARE IGNORED\n\n");
+ if (last_reaction != NULL) {
+ last_reaction->next = curr_reaction;
+ }
+ last_reaction = curr_reaction;
}
- section = vnp->choice;
- }
- if ((r = (FrameShiftReportPtr) vnp->data.ptrvalue) != NULL) {
- fprintf (lip->fp, "%s\n\n", r->msg);
- lip->data_in_log = TRUE;
}
}
-}
-
-
-static Int4 LenBeforeBoundary (Int4 i, Int4 seg, Int4 offset, Int4 aln_pos,
- DenseSegPtr dsp, ExonIntervalListPtr exon_intervals)
-{
- Int4 len = 1;
- Boolean is_gap;
- Boolean is_exon;
- Boolean found_boundary = FALSE;
- if (dsp == NULL) {
- return 1;
- }
+ FreePCRSet (pset);
- if (dsp->starts[dsp->dim * seg + i] == -1) {
- is_gap = TRUE;
- } else {
- is_gap = FALSE;
- }
+ if (reaction_set != NULL) {
+ if (last_reaction != NULL) {
+ /* merge with existing structured pcr_primers */
+ last_reaction->next = biop->pcr_primers;
+ }
+ biop->pcr_primers = reaction_set;
- is_exon = IsPointInExon (aln_pos, exon_intervals);
+ ssp = biop->subtype;
+ prevssp = (SubSourcePtr PNTR) &(biop->subtype);
+ while (ssp != NULL) {
+ nextssp = ssp->next;
+ unlink= FALSE;
- offset++;
- aln_pos++;
- while (seg < dsp->numseg && !found_boundary) {
- while (offset < dsp->lens[seg] && !found_boundary) {
- if (IsPointInExon(aln_pos, exon_intervals) != is_exon) {
- found_boundary = TRUE;
- } else {
- len++;
- offset++;
- aln_pos++;
+ if (ssp->subtype == SUBSRC_fwd_primer_seq ||
+ ssp->subtype == SUBSRC_rev_primer_seq ||
+ ssp->subtype == SUBSRC_fwd_primer_name ||
+ ssp->subtype == SUBSRC_rev_primer_name) {
+ unlink = TRUE;
}
- }
- if (!found_boundary) {
- seg++;
- offset = 0;
- if (seg < dsp->numseg) {
- if (dsp->starts[dsp->dim * seg + i] == -1 && !is_gap) {
- found_boundary = TRUE;
- } else if (dsp->starts[dsp->dim * seg + i] != -1 && is_gap) {
- found_boundary = TRUE;
- }
+ if (unlink) {
+ *prevssp = ssp->next;
+ ssp->next = NULL;
+ SubSourceFree (ssp);
+ } else {
+ prevssp = (SubSourcePtr PNTR) &(ssp->next);
}
+ ssp = nextssp;
}
}
-
- return len;
}
+//LCOV_EXCL_STOP
+NLM_EXTERN Boolean PubIsEffectivelyEmpty (PubdescPtr pdp)
-static Int4
-FindFirstSeqWithProblem
-(Int4Ptr report, BoolPtr current_gap_ignore, Int4 num,
- EFrameShiftReport report_type, Int4 num_gap, Int4 num_non_gap)
{
- Int4 i;
+ ValNodePtr vnp;
- if (num_non_gap >= num_gap) {
- for (i = 0; i < num; i++) {
- if (current_gap_ignore[i]) {
- /* don't report this one */
- } else if (report[i] == report_type) {
- return i;
- }
- }
- } else {
- /* look for transition between problem/not-problem */
- for (i = 0; i < num; i++) {
- if (report[i] == report_type) {
- if (i < num - 1 && report[i + 1] != report_type) {
- return i;
- } else if (i > 0 && report[i - 1] != report_type) {
- return i;
- }
- }
+ if (pdp == NULL) return FALSE;
+ vnp = pdp->pub;
+ if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
+ if (empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
+ return TRUE;
}
}
-
- return -1;
+ return FALSE;
}
-NLM_EXTERN ValNodePtr FindFrameShiftsInAlignment (SeqAlignPtr salp, BoolPtr has_exons)
-{
- DenseSegPtr dsp;
- Int4 seg, i, j, aln_pos = 1, len_gap, extend;
- Int4 num_gap, num_non_gap;
- BoolPtr current_gap_ignore = NULL;
- Int4Ptr current_gap_examined = NULL, gap_mult3 = NULL;
- Int4Ptr report = NULL;
- Boolean any_report;
- Int4 num_mult;
- CharPtr ids = NULL;
- ValNodePtr report_list = NULL;
- SeqIdPtr sip;
- Int4 examine_dim;
- CharPtr msg;
- CharPtr exon_insert_fmt = "Insertion in exon at position %d:\n";
- CharPtr exon_delete_fmt = "Deletion in exon at position %d:\n";
- CharPtr intron_insert_fmt = "Insertion at position %d:\n";
- CharPtr intron_delete_fmt = "Deletion at position %d:\n";
- CharPtr mult_fmt = "Ignored multiple of 3 at %d:\n";
- ExonIntervalListPtr exon_intervals;
- Int4 first_related_seq;
- Boolean possible_error;
-
- if (salp == NULL) {
- return NULL;
- }
-
- if (salp->segtype != SAS_DENSEG || (dsp = (DenseSegPtr) salp->segs) == NULL) {
- return NULL;
- }
- ids = (CharPtr) MemNew (sizeof (Char) * dsp->dim * 200);
- for (sip = dsp->ids, i = 0; sip != NULL; sip = sip->next, i++) {
- SeqIdWrite (sip, ids + (200 * i), PRINTID_REPORT, 199);
- }
- if (StringCmp (ids + (200 * (dsp->dim - 1)), "Consensus") == 0) {
- examine_dim = dsp->dim - 1;
- } else {
- examine_dim = dsp->dim;
- }
-
- current_gap_examined = (Int4Ptr) MemNew (sizeof (Int4) * examine_dim);
- gap_mult3 = (Int4Ptr) MemNew (sizeof (Int4Ptr) * examine_dim);
- current_gap_ignore = (BoolPtr) MemNew (sizeof (Boolean) * examine_dim);
- report = (Int4Ptr) MemNew (sizeof (Int4) * examine_dim);
- for (i = 0; i < examine_dim; i++) {
- current_gap_examined[i] = 0;
- gap_mult3[i] = 0;
- current_gap_ignore[i] = FALSE;
- }
-
- exon_intervals = GetAlignedExons (dsp, examine_dim);
- if (has_exons != NULL) {
- if (exon_intervals == NULL) {
- *has_exons = FALSE;
- } else {
- *has_exons = TRUE;
- }
- }
-
- for (seg = 0; seg < dsp->numseg; seg++) {
- num_gap = 0;
- num_non_gap = 0;
- for (i = 0; i < examine_dim; i++) {
- if (dsp->starts[seg * dsp->dim + i] == -1) {
- if (!current_gap_ignore[i] && gap_mult3[i] == 0) {
- if (seg == 0) {
- /* ignore - beginning gap */
- current_gap_ignore[i] = TRUE;
- } else {
- /* check to see if gap goes to end */
- extend = seg + 1;
- while (extend < dsp->numseg && dsp->starts[extend * dsp->dim + i] == -1) {
- extend++;
- }
- if (extend == dsp->numseg) {
- /* ignore - gap extends to end of alignment */
- current_gap_ignore[i] = TRUE;
- }
- }
- if (!current_gap_ignore[i]) {
- num_gap ++;
- }
- }
- } else {
- current_gap_ignore[i] = FALSE;
- num_non_gap++;
- }
- }
-
- if (num_gap > 0) {
- /* report for each position in seg */
- for (j = 0; j < dsp->lens[seg]; j++) {
- MemSet (report, eFrameShiftReport_NoReport, sizeof (Int4) * examine_dim);
- num_mult = 0;
- any_report = FALSE;
- if (IsPointInExon (aln_pos + j, exon_intervals)) {
- possible_error = FALSE;
- for (i = 0; i < examine_dim; i++) {
- if (gap_mult3[i] > 0) {
- gap_mult3[i]--;
- current_gap_examined[i] --;
- } else if (!current_gap_ignore[i] && dsp->starts[dsp->dim * seg + i] == -1) {
- len_gap = 1;
- if (current_gap_examined[i] > 0) {
- current_gap_examined[i] --;
- } else {
- /* check for multiple of 3 */
- len_gap = LenBeforeBoundary (i, seg, j, aln_pos + j, dsp, exon_intervals);
- current_gap_examined[i] = len_gap - 1;
- }
- if (len_gap % 3 == 0) {
- report[i] = eFrameShiftReport_ExonMult3;
- gap_mult3[i] = len_gap - 1;
- num_mult++;
- num_gap--;
- } else {
- report[i] = eFrameShiftReport_Exon;
- possible_error = ! IsShiftInExon (salp, aln_pos + j, examine_dim);
- any_report = TRUE;
- }
- }
- }
- if (any_report) {
- msg = FrameShiftReportString(eFrameShiftReport_Exon, aln_pos + j, num_gap, num_non_gap, report, current_gap_ignore, examine_dim,
- num_gap > num_non_gap ? exon_insert_fmt : exon_delete_fmt, ids, possible_error);
- first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_Exon, num_gap, num_non_gap);
- ValNodeAddPointer (&report_list, eFrameShiftReport_Exon, FrameShiftReportNew (msg, aln_pos + j, first_related_seq));
- }
- } else {
- /* point is not in exon */
- for (i = 0; i < examine_dim; i++) {
- if (gap_mult3[i] > 0) {
- gap_mult3[i]--;
- current_gap_examined[i] --;
- } else if (!current_gap_ignore[i] && dsp->starts[dsp->dim * seg + i] == -1) {
- len_gap = 1;
- if (current_gap_examined[i] > 0) {
- current_gap_examined[i] --;
- } else {
- /* check for multiple of 3 */
- len_gap = LenBeforeBoundary (i, seg, j, aln_pos + j, dsp, exon_intervals);
- current_gap_examined[i] = len_gap - 1;
- }
- if (len_gap % 3 == 0) {
- report[i] = eFrameShiftReport_ExonMult3;
- gap_mult3[i] = len_gap - 1;
- num_mult++;
- num_gap--;
- } else {
- report[i] = eFrameShiftReport_Intron;
- }
- }
- }
- /* report introns later */
- msg = FrameShiftReportString(eFrameShiftReport_Intron, aln_pos + j, num_gap, num_non_gap, report, current_gap_ignore, examine_dim,
- num_gap > num_non_gap ? intron_insert_fmt : intron_delete_fmt, ids, FALSE);
- if (msg != NULL) {
- first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_Intron, num_gap, num_non_gap);
- ValNodeAddPointer (&report_list, eFrameShiftReport_Intron, FrameShiftReportNew(msg, aln_pos + j, first_related_seq));
- }
- }
- /* report multiples of 3 later */
- if (num_mult > 0) {
- msg = FrameShiftReportMult (aln_pos + j, report, current_gap_ignore, examine_dim, mult_fmt, ids);
- first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_ExonMult3, num_gap, num_non_gap);
- ValNodeAddPointer (&report_list, eFrameShiftReport_ExonMult3, FrameShiftReportNew(msg, aln_pos + j, first_related_seq));
- }
- }
- /* finished reporting for each position in seg */
- }
- aln_pos += dsp->lens[seg];
- }
-
- exon_intervals = ExonIntervalListFree (exon_intervals);
-
- report_list = ValNodeSort (report_list, SortFrameShiftReports);
-
- ids = MemFree (ids);
- current_gap_examined = MemFree (current_gap_examined);
- current_gap_ignore = MemFree (current_gap_ignore);
- report = MemFree (report);
- return report_list;
-}
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index 95572fe3..6c435bae 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.538 $
+* $Revision: 6.633 $
*
* File Description:
*
@@ -59,6 +59,18 @@
#include <alignval.h>
#include <objvalid.h>
#include <valapi.h>
+#include <salstruc.h>
+
+/* for publookup */
+#include <mla2api.h>
+#include <pmfapi.h>
+
+/* for SUC */
+#include <asn2gnbp.h>
+
+/* for country list */
+#include <valid.h>
+
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
#include <macroapi.h>
@@ -106,6 +118,8 @@ static CharPtr SqnTrimSpacesAroundString (CharPtr str)
return str;
}
+
+//LCOV_EXCL_START
static CharPtr SqnStringSave (CharPtr from)
{
@@ -444,7 +458,6 @@ static SeqLocRangePtr SeqLocRangeFree (SeqLocRangePtr slrp)
return NULL;
}
-
static Boolean IsLocationOnCircularBioseq (SeqLocPtr slp)
{
BioseqPtr bsp;
@@ -1016,6 +1029,7 @@ NLM_EXTERN Boolean SeqLocMixedStrands (BioseqPtr bsp, SeqLocPtr slp)
SeqLocRangeFree (head);
return FALSE;
}
+//LCOV_EXCL_STOP
static void ConvertToFeatsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent, Boolean toProts)
@@ -1941,8 +1955,10 @@ NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
)
{
+ Char ch;
DbtagPtr db;
Int2 i;
+ size_t len;
ObjectIdPtr oip;
OrgNamePtr onp;
OrgRefPtr orp;
@@ -2094,6 +2110,23 @@ NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
ssp->name = StringSave (str);
ssp->next = biop->subtype;
biop->subtype = ssp;
+
+ /* convert angle brackets to square brackets in source notes */
+ str = ssp->name;
+ len = StringLen (str);
+ if (len > 0 && str [0] == '<' && str [len - 1] == '>') {
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == '<') {
+ *str = '[';
+ } else if (ch == '>') {
+ *str = ']';
+ }
+ str++;
+ ch = *str;
+ }
+ }
+
}
}
@@ -2289,6 +2322,9 @@ NLM_EXTERN ProtRefPtr ParseTitleIntoProtRef (
if (str == NULL) {
str = SqnTagFind (stp, "prot");
}
+ if (str == NULL) {
+ str = SqnTagFind (stp, "product");
+ }
if (str != NULL) {
ValNodeCopyStr (&(prp->name), 0, str);
}
@@ -2491,24 +2527,6 @@ NLM_EXTERN GBBlockPtr ParseTitleIntoGenBank (
}
-static Boolean IsAllDigits (CharPtr str)
-{
- CharPtr cp;
-
- if (StringHasNoText (str)) return FALSE;
-
- cp = str;
- while (*cp != 0 && isdigit (*cp)) {
- cp++;
- }
- if (*cp == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static void AddStringToSeqHist (
SeqHistPtr shp,
CharPtr str
@@ -2534,10 +2552,10 @@ static void AddStringToSeqHist (
tsip->accession = StringSave (str + 8);
sip->data.ptrvalue = tsip;
sip->choice = SEQID_OTHER;
- } else if (StringNICmp (str, "gi|", 3) == 0 && IsAllDigits (str + 3)) {
+ } else if (StringNICmp (str, "gi|", 3) == 0 && StringIsAllDigits (str + 3)) {
sip->data.intvalue = atoi (str + 3);
sip->choice = SEQID_GI;
- } else if (IsAllDigits (str)) {
+ } else if (StringIsAllDigits (str)) {
sip->data.intvalue = atoi (str);
sip->choice = SEQID_GI;
} else {
@@ -2911,7 +2929,7 @@ NLM_EXTERN void AddFieldStringToDbLinkUserObject (
i++;
}
if (i > 0) {
- AddFieldsToDBLinkUserObject (uop, field_name, i, cpp);
+ AddStringListFieldToDBLinkUserObject(uop, i, cpp, field_name);
}
}
MemFree (cpp);
@@ -3242,8 +3260,8 @@ NLM_EXTERN SeqGraphPtr ReadPhrapQualityFC (FileCachePtr fcp, BioseqPtr bsp)
{
ByteStorePtr bs = NULL;
- Char buf [256];
- Uint1 bytes [128];
+ Char buf [2048];
+ Uint1 bytes [2048];
Char ch;
Boolean goOn = TRUE;
Int2 i;
@@ -3253,7 +3271,7 @@ NLM_EXTERN SeqGraphPtr ReadPhrapQualityFC (FileCachePtr fcp, BioseqPtr bsp)
CharPtr p;
Int4 pos;
CharPtr q;
- Char prefix [256];
+ Char prefix [2048];
size_t prefixlen;
SeqGraphPtr sgp = NULL;
SeqIntPtr sintp;
@@ -3274,7 +3292,8 @@ NLM_EXTERN SeqGraphPtr ReadPhrapQualityFC (FileCachePtr fcp, BioseqPtr bsp)
prefix [0] = '\0';
prefixlen = StringLen (buf);
pos = FileCacheTell (fcp);
- if (NULL == FileCacheReadLine (fcp, buf + prefixlen, sizeof (buf) - prefixlen, &nonewline)) {
+ str = FileCacheReadLine (fcp, buf + prefixlen, sizeof (buf) - prefixlen, &nonewline);
+ if (HasNoText (str)) {
goOn = FALSE;
} else {
/* above function returned prefix characters past buf start */
@@ -3971,6 +3990,8 @@ NLM_EXTERN Int4 ReadSequenceAsnFile (
SeqEntrySetScope (NULL);
}
+ } else if (! is_bioseq_set) {
+ Message (MSG_POSTERR, "Unable to read format of input file '%s'", inputFile);
}
AsnIoFree (aip, FALSE);
@@ -4221,11 +4242,13 @@ static void ProcessOneContigLine (ValNodePtr line, BioseqPtr segseq, Int4 lineNu
ValNodePtr vnp;
if (line == NULL || segseq == NULL) return;
+
+ for (i = 0; i < 5; i++) {
+ field [i] = NULL;
+ }
+
vnp = line->data.ptrvalue;
if (vnp != NULL) {
- for (i = 0; i < 5; i++) {
- field [i] = NULL;
- }
start = -1;
stop = -1;
size = -1;
@@ -5129,6 +5152,7 @@ NLM_EXTERN Uint1 FindTrnaAA3 (CharPtr str)
}
}
if (StringICmp ("fMet", tmp) == 0) return (Uint1) 'M';
+ if (StringICmp ("iMet", tmp) == 0) return (Uint1) 'M';
if (StringICmp ("OTHER", tmp) == 0) return (Uint1) 'X';
if (StringICmp ("Aspartate", tmp) == 0) return (Uint1) 'D';
if (StringICmp ("Aspartic", tmp) == 0) return (Uint1) 'D';
@@ -5159,6 +5183,7 @@ NLM_EXTERN Uint1 FindTrnaAA (CharPtr str)
}
}
if (StringICmp ("fMet", tmp) == 0) return (Uint1) 'M';
+ if (StringICmp ("iMet", tmp) == 0) return (Uint1) 'M';
if (StringICmp ("OTHER", tmp) == 0) return (Uint1) 'X';
if (StringICmp ("Aspartate", tmp) == 0) return (Uint1) 'D';
if (StringICmp ("Aspartic", tmp) == 0) return (Uint1) 'D';
@@ -5185,6 +5210,7 @@ NLM_EXTERN CharPtr FindTrnaAAIndex (CharPtr str)
}
}
if (StringICmp ("fMet", tmp) == 0) return "Methionine";
+ if (StringICmp ("iMet", tmp) == 0) return "Methionine";
if (StringICmp ("OTHER", tmp) == 0) return "Selenocysteine";
if (StringICmp ("Aspartate", tmp) == 0) return "Aspartic Acid";
if (StringICmp ("Glutamate", tmp) == 0) return "Glutamic Acid";
@@ -5229,7 +5255,10 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
Uint1 curraa;
ValNodePtr head;
Int2 i;
+ Boolean is_A = FALSE;
+ Boolean is_ambig = FALSE;
Boolean justt = TRUE;
+ size_t len;
CharPtr str;
tRNA tr;
ValNodePtr vnp;
@@ -5251,15 +5280,23 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
aa = 0;
head = TokenizeTRnaString (strx);
- for (vnp = head; (aa == 0 || aa == 'A') && vnp != NULL; vnp = vnp->next) {
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
str = (CharPtr) vnp->data.ptrvalue;
+ len = StringLen (str);
+ if (len < 1) continue;
curraa = FindTrnaAA (str);
- if (noSingleLetter && StringLen (str) == 1) {
+ if (noSingleLetter && len == 1) {
curraa = 0;
}
- if (curraa != 0) {
- if (aa == 0 || aa == 'A') {
+ if (curraa == 'A' && len == 1) {
+ is_A = TRUE;
+ curraa = 0;
+ } else if (curraa != 0) {
+ if (aa == 0) {
aa = curraa;
+ } else if (curraa != aa) {
+ is_ambig = TRUE;
}
} else if (StringICmp ("tRNA", str) != 0 &&
StringICmp ("transfer", str) != 0 &&
@@ -5287,37 +5324,16 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
}
}
}
- for (; vnp != NULL; vnp = vnp->next) {
- str = (CharPtr) vnp->data.ptrvalue;
- curraa = FindTrnaAA (str);
- if (curraa != 0) {
- } else if (StringICmp ("tRNA", str) != 0 &&
- StringICmp ("transfer", str) != 0 &&
- StringICmp ("RNA", str) != 0 &&
- StringICmp ("product", str) != 0) {
- if (cdP != NULL && StringLen (str) == 3) {
- StringCpy (codon, str);
- for (i = 0; i < 3; i++) {
- if (codon [i] == 'U') {
- codon [i] = 'T';
- }
- }
- if (ParseDegenerateCodon (&tr, (Uint1Ptr) codon)) {
- /*
- for (i = 0; i < 6; i++) {
- cdP [i] = tr.codon [i];
- }
- */
- justt = FALSE;
- } else {
- justt = FALSE;
- }
- } else {
- justt = FALSE;
- }
- }
- }
+
ValNodeFreeData (head);
+
+ if (is_A && aa == 0) {
+ aa = 'A';
+ }
+ if (is_ambig) {
+ aa = 0;
+ }
+
if (justt) {
str = strx;
ch = *str;
@@ -5332,6 +5348,7 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
if (justTrnaText != NULL) {
*justTrnaText = justt;
}
+
return aa;
}
@@ -6389,11 +6406,18 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
} else if (StringICmp (qual, "Affil") == 0 || StringICmp (qual, "Affiliation") == 0) {
isAffil = TRUE;
}
+ } else if (StringICmp (qual, "product_id") == 0) {
+ qnum = GBQUAL_protein_id;
+ qual = "protein_id";
}
}
if (qnum == GBQUAL_evidence) {
qnum = -1; /* no longer legal */
}
+ if (qnum == GBQUAL_gene_synonym) {
+ qnum = GBQUAL_gene;
+ isGeneSyn = TRUE;
+ }
if (qnum <= -1) {
bail = TRUE;
if (sfp->data.choice == SEQFEAT_IMP) {
@@ -6811,11 +6835,25 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
for (j = 0; j < 6; j++) {
trna->codon [j] = codon [j];
}
+ } else {
+ if (sfp->comment == NULL) {
+ sfp->comment = StringSave (val);
+ } else {
+ len = StringLen (sfp->comment) + StringLen (val) + 5;
+ str = MemNew (sizeof (Char) * len);
+ StringCpy (str, sfp->comment);
+ StringCat (str, "; ");
+ StringCat (str, val);
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = str;
+ }
}
trna->aa = aa;
}
if (aa == 'M') {
if (StringStr (val, "fMet") != NULL) {
+ val = "tRNA-fMet";
+ /*
if (sfp->comment == NULL) {
sfp->comment = StringSave ("fMet");
} else {
@@ -6827,6 +6865,9 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
sfp->comment = MemFree (sfp->comment);
sfp->comment = str;
}
+ */
+ } else if (StringStr (val, "iMet") != NULL) {
+ val = "tRNA-iMet";
}
}
} else {
@@ -6873,6 +6914,17 @@ static void AddQualifierToFeatureEx (SeqFeatPtr sfp, CharPtr qual, CharPtr val,
if (ParseQualIntoBioSource (sfp, qual, val)) return;
}
+ /* only allow protein_id on CDS and mRNA */
+ if (qnum == GBQUAL_protein_id) {
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ } else if (sfp->data.choice == SEQFEAT_RNA) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp == NULL || rrp->type != RNA_TYPE_mRNA) return;
+ } else {
+ return;
+ }
+ }
+
gbq = GBQualNew ();
if (gbq == NULL) return;
gbq->qual = StringSave (qual);
@@ -7409,6 +7461,10 @@ static SeqAnnotPtr ReadFeatureTableEx (FileCachePtr fcp, CharPtr seqid, CharPtr
rnatype = 255;
rrp->ext.choice = 1;
rrp->ext.value.ptrvalue = StringSave ("tmRNA");
+ } else {
+ /* unrecognized RNA type, mark feature for deletion */
+ sfp->idx.deleteme = TRUE;
+ ErrPostEx (SEV_ERROR, ERR_SEQ_FEAT_UnknownImpFeatKey, "Unknown feature %s", feat);
}
rrp->type = rnatype;
}
@@ -7580,6 +7636,8 @@ static SeqAnnotPtr ReadFeatureTableEx (FileCachePtr fcp, CharPtr seqid, CharPtr
}
}
NormalizeNullsBetween (sfp->location);
+
+
} else {
if (StringICmp (qual, "note") == 0) {
isnote = TRUE;
@@ -8096,7 +8154,7 @@ static ByteStorePtr ReadUidList (FileCachePtr fcp, Boolean nucdb, Boolean lastRe
CharPtr ptr;
CharPtr str;
TextSeqId tsid;
- Int4 uid;
+ BIG_ID uid;
long int val;
ValNode vn;
@@ -8137,8 +8195,8 @@ static ByteStorePtr ReadUidList (FileCachePtr fcp, Boolean nucdb, Boolean lastRe
ch = *ptr;
}
if (allDigits && sscanf (line, "%ld", &val) == 1) {
- uid = (Int4) val;
- BSWrite (bs, &uid, sizeof (Int4));
+ uid = (BIG_ID) val;
+ BSWrite (bs, &uid, sizeof (BIG_ID));
} else if (nucdb) {
tsid.name = NULL;
tsid.accession = line;
@@ -8148,7 +8206,7 @@ static ByteStorePtr ReadUidList (FileCachePtr fcp, Boolean nucdb, Boolean lastRe
vn.data.ptrvalue = (Pointer) (&tsid);
uid = GetGIForSeqId (&vn);
if (uid > 0) {
- BSWrite (bs, &uid, sizeof (Int4));
+ BSWrite (bs, &uid, sizeof (BIG_ID));
} else if (lastResortSeqIDs) {
abort = TRUE;
}
@@ -8717,7 +8775,9 @@ static BioseqSetPtr BioseqSetPartialRead (AsnIoPtr aip, AsnTypePtr PNTR orig, Se
ret:
- AsnUnlinkType(*orig); /* unlink local tree */
+ if (orig != NULL) {
+ AsnUnlinkType(*orig); /* unlink local tree */
+ }
return bsp;
erret:
aip->io_failure = TRUE;
@@ -9181,7 +9241,9 @@ static SeqEntryPtr BioseqSetCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqE
}
ret:
- AsnUnlinkType(*orig); /* unlink local tree */
+ if (orig != NULL) {
+ AsnUnlinkType(*orig); /* unlink local tree */
+ }
return sep_return;
erret:
aip_in->io_failure = TRUE;
@@ -10274,9 +10336,11 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool
}
/* set up on-all */
- tmp = SeqIdListToValNodeSeqIdList (*sip_list);
- SetOnAllValsForDescStreamList(sr.desc_list.head, tmp);
- tmp = ValNodeSeqIdListFree (tmp);
+ if (sip_list != NULL) {
+ tmp = SeqIdListToValNodeSeqIdList (*sip_list);
+ SetOnAllValsForDescStreamList(sr.desc_list.head, tmp);
+ tmp = ValNodeSeqIdListFree (tmp);
+ }
return sr.desc_list.head;
}
@@ -10634,9 +10698,9 @@ static Boolean StreamingReadWriteSeqSubmit (ValNodePtr desc_stream_list, AsnIoPt
if (AsnReadVal(aip_in, atp, &av) <= 0) goto erret; /* end struct */
if (! AsnCloseStruct(aip_out, atp, (Pointer)ssp)) goto erret;
-ret:
ssp = SeqSubmitFree (ssp);
- return TRUE;
+ return TRUE;
+
erret:
aip_in->io_failure = TRUE;
ssp = SeqSubmitFree(ssp);
@@ -10765,7 +10829,7 @@ NLM_EXTERN Pointer ReadAsnFastaOrFlatFileEx (
Boolean isProt = FALSE;
Int4 j;
long len;
- Char line [4096];
+ Char line [10000];
Boolean mayBeAccessionList = TRUE;
Boolean mayBePlainFasta = TRUE;
SeqFeatPtr nextsfp;
@@ -12019,7 +12083,7 @@ extern BioseqPtr ReadFastaOnly (FILE *fp,
* FASTA other than numbers.
*/
-static ValNodePtr ReadDeltaLits (FileCachePtr fcp, BoolPtr perr, CharPtr idstr)
+static ValNodePtr ReadDeltaLits (FileCachePtr fcp, BoolPtr perr, BoolPtr cerr, CharPtr idstr)
{
ByteStorePtr bs = NULL;
@@ -12092,6 +12156,9 @@ static ValNodePtr ReadDeltaLits (FileCachePtr fcp, BoolPtr perr, CharPtr idstr)
{
*perr |= error_flag;
}
+ if (cerr != NULL) {
+ *cerr |= fcp->failed;
+ }
}
}
@@ -12109,7 +12176,7 @@ static ValNodePtr ReadDeltaLits (FileCachePtr fcp, BoolPtr perr, CharPtr idstr)
/* perrors is set to TRUE if characters other than digits had to be stripped
* from the FASTA sequence characters.
*/
-static BioseqPtr ReadDeltaSet (FileCachePtr fcp, BoolPtr perrors, CharPtr idstr)
+static BioseqPtr ReadDeltaSet (FileCachePtr fcp, BoolPtr perrors, BoolPtr cerrors, CharPtr idstr)
{
ByteStorePtr bs;
@@ -12121,7 +12188,7 @@ static BioseqPtr ReadDeltaSet (FileCachePtr fcp, BoolPtr perrors, CharPtr idstr)
if (fcp == NULL) return NULL;
- head = ReadDeltaLits (fcp, perrors, idstr);
+ head = ReadDeltaLits (fcp, perrors, cerrors, idstr);
if (head == NULL) return NULL;
if (head->next == NULL && head->choice == 1) {
@@ -12188,7 +12255,7 @@ static BioseqPtr ReadDeltaSet (FileCachePtr fcp, BoolPtr perrors, CharPtr idstr)
return bsp;
}
-NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped)
+NLM_EXTERN BioseqPtr ReadDeltaFastaExEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped, BoolPtr cache_failed)
{
Int4 begin, pos;
@@ -12283,38 +12350,46 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr c
title = StringSaveNoNull (tmp);
}
- bsp = ReadDeltaSet (&fc, chars_stripped, seqid);
+ bsp = ReadDeltaSet (&fc, chars_stripped, cache_failed, seqid);
if (bsp != NULL) {
sep = SeqEntryNew ();
- if (sep != NULL) {
+ if (sep == NULL) {
+ Message (MSG_POSTERR, "Out of memory!");
+ bsp = BioseqFree (bsp);
+ } else {
sep->choice = 1;
sep->data.ptrvalue = (Pointer) bsp;
SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
- }
- if (title != NULL) {
- SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) title);
- }
+ if (title != NULL) {
+ SeqDescrAddPointer (&(bsp->descr), Seq_descr_title, (Pointer) title);
+ }
- if (StringNICmp (seqid, "lcl|", 4) == 0
- ||StringNICmp (seqid, "gnl|", 4) == 0) {
- bsp->id = SeqIdParse (seqid);
- }
- if (bsp->id == NULL) {
- bsp->id = MakeSeqID (seqid);
- }
- SeqMgrAddToBioseqIndex (bsp);
+ if (StringNICmp (seqid, "lcl|", 4) == 0
+ ||StringNICmp (seqid, "gnl|", 4) == 0) {
+ bsp->id = SeqIdParse (seqid);
+ }
+ if (bsp->id == NULL) {
+ bsp->id = MakeSeqID (seqid);
+ }
+ if (bsp->id == NULL) {
+ Message (MSG_POSTERR, "Unable to make sequence identifier from '%s'", seqid);
+ bsp = BioseqFree (bsp);
+ } else {
+ SeqMgrAddToBioseqIndex (bsp);
- if (entityIDptr != NULL) {
- *entityIDptr = ObjMgrRegister (OBJ_BIOSEQ, (Pointer) bsp);
- }
+ if (entityIDptr != NULL) {
+ *entityIDptr = ObjMgrRegister (OBJ_BIOSEQ, (Pointer) bsp);
+ }
+ }
- pos = FileCacheTell (&fc);
- FileCacheSetup (&fc, fp);
- FileCacheSeek (&fc, pos);
- fseek (fp, pos, SEEK_SET);
+ pos = FileCacheTell (&fc);
+ FileCacheSetup (&fc, fp);
+ FileCacheSeek (&fc, pos);
+ fseek (fp, pos, SEEK_SET);
+ }
return bsp;
}
@@ -12335,12 +12410,21 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr c
return NULL;
}
+NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped)
+
+{
+ Boolean cache_failed = FALSE;
+
+ return ReadDeltaFastaExEx (fp, entityIDptr, chars_stripped, &cache_failed);
+}
+
NLM_EXTERN BioseqPtr ReadDeltaFasta (FILE *fp, Uint2Ptr entityIDptr)
{
+ Boolean cache_failed = FALSE;
Boolean chars_stripped = FALSE;
- return ReadDeltaFastaEx (fp, entityIDptr, &chars_stripped);
+ return ReadDeltaFastaExEx (fp, entityIDptr, &chars_stripped, &cache_failed);
}
NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped)
@@ -12348,6 +12432,7 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityID
{
Int4 begin, pos;
BioseqPtr bsp = NULL;
+ Boolean cache_failed = FALSE;
FileCache fc;
Char line [4096], seqid [2048];
SeqEntryPtr sep;
@@ -12374,7 +12459,7 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityID
TrimSpacesAroundString (line);
if (line [0] == '>' && line [1] == 0)
{
- bsp = ReadDeltaSet (&fc, chars_stripped, NULL);
+ bsp = ReadDeltaSet (&fc, chars_stripped, &cache_failed, NULL);
if (bsp != NULL) {
@@ -13013,7 +13098,7 @@ NLM_EXTERN void PrintQualityScoresToBuffer (BioseqPtr bsp, Boolean gapIsZero, Po
if (max == INT4_MIN) {
max = 0;
}
- sprintf (buf, ">%s %s (Length:%ld, Min: %ld, Max: %ld)\n", id, title,
+ sprintf (buf, ">%s %s (Length: %ld, Min: %ld, Max: %ld)\n", id, title,
(long) len, (long) min, (long) max);
callback (buf, sizeof (buf), userdata);
@@ -14373,6 +14458,7 @@ NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl)
ReplaceItemPair KnownAbbreviationList[] = {
+ { "Northwest a&F University", "Northwest A&F University" },
{ "po box", "PO Box" },
{ "Pobox", "PO Box" },
{ "P.O box", "P.O. Box" },
@@ -14559,7 +14645,7 @@ NLM_EXTERN SeqIdPtr CreateSeqIdFromText (CharPtr id_str, SeqEntryPtr sep)
}
}
- if (bsp == NULL && IsAllDigits (id_str)) {
+ if (bsp == NULL && StringIsAllDigits (id_str)) {
sprintf (tmpstr, "gi|%s", id_str);
sip = SeqIdParse (tmpstr);
if (sip != NULL) {
@@ -14620,6 +14706,30 @@ NLM_EXTERN Int4 GetDeltaSeqLen (DeltaSeqPtr dsp)
}
+NLM_EXTERN DeltaSeqPtr GetDeltaSeqForPosition(Int4 pos, BioseqPtr bsp, Int4Ptr pStart)
+{
+ DeltaSeqPtr dsp;
+ Int4 offset = 0;
+ Int4 len;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) {
+ return NULL;
+ }
+
+ for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
+ len = GetDeltaSeqLen(dsp);
+ if (offset + len > pos) {
+ if (pStart != NULL) {
+ *pStart = offset;
+ }
+ return dsp;
+ }
+ offset += len;
+ }
+ return NULL;
+}
+
+
/* The following section of code is used for retranslating a CDS and updating
* the protein features based on an alignment between the old and new protein
* sequences.
@@ -15279,6 +15389,68 @@ LocationContainsGaps
}
+NLM_EXTERN void SetPartialsAfterSplittingAtGap (SeqLocPtr before, SeqLocPtr after, Boolean set_partial_ends, Boolean partial5, Boolean partial3)
+{
+ Uint1 strand;
+
+ if (before == NULL && after == NULL) {
+ return;
+ } else if (before == NULL) {
+ strand = SeqLocStrand (after);
+ } else {
+ strand = SeqLocStrand (before);
+ }
+
+ if (strand == Seq_strand_minus)
+ {
+ if (before == NULL)
+ {
+ /* truncated at 3' end */
+ SetSeqLocPartial (after, partial5, set_partial_ends);
+ }
+ else
+ {
+ SetSeqLocPartial (after, partial5, TRUE);
+ }
+
+ }
+ else
+ {
+ if (before == NULL)
+ {
+ /* truncated at 5' end*/
+ SetSeqLocPartial (after, set_partial_ends, partial3);
+ }
+ else
+ {
+ SetSeqLocPartial (after, TRUE, partial3);
+ }
+ }
+
+ if (strand == Seq_strand_minus)
+ {
+ if (after == NULL)
+ {
+ /* truncated at 5' end*/
+ SetSeqLocPartial (before, set_partial_ends, partial3);
+ } else {
+ SetSeqLocPartial (before, TRUE, partial3);
+ }
+ }
+ else
+ {
+ if (after == NULL)
+ {
+ /* truncated */
+ SetSeqLocPartial (before, partial5, set_partial_ends);
+ } else {
+ SetSeqLocPartial (before, partial5, TRUE);
+ }
+ }
+
+}
+
+
static SeqLocPtr
RemoveGapsFromDeltaLocation
(SeqLocPtr slp,
@@ -15384,55 +15556,7 @@ RemoveGapsFromDeltaLocation
seq_offset, bsp->length,
FALSE, &changed, &partial5, &partial3);
- /* handle partialness for ends */
- CheckSeqLocForPartial (after, &partial5, &partial3);
- if (strand == Seq_strand_minus)
- {
- if (before == NULL)
- {
- /* truncated at 3' end */
- SetSeqLocPartial (after, partial5, set_partial_ends);
- }
- else
- {
- SetSeqLocPartial (after, partial5, TRUE);
- }
-
- }
- else
- {
- if (before == NULL)
- {
- /* truncated at 5' end*/
- SetSeqLocPartial (after, set_partial_ends, partial3);
- }
- else
- {
- SetSeqLocPartial (after, TRUE, partial3);
- }
- }
-
- CheckSeqLocForPartial (before, &partial5, &partial3);
- if (strand == Seq_strand_minus)
- {
- if (after == NULL)
- {
- /* truncated at 5' end*/
- SetSeqLocPartial (before, set_partial_ends, partial3);
- } else {
- SetSeqLocPartial (before, TRUE, partial3);
- }
- }
- else
- {
- if (after == NULL)
- {
- /* truncated */
- SetSeqLocPartial (before, partial5, set_partial_ends);
- } else {
- SetSeqLocPartial (before, partial5, TRUE);
- }
- }
+ SetPartialsAfterSplittingAtGap(before, after, set_partial_ends, partial5, partial3);
/* we're done with these IDs now */
after_sip = SeqIdFree (after_sip);
@@ -15550,7 +15674,7 @@ NLM_EXTERN void AdjustFrame (SeqFeatPtr sfp, BioseqPtr oldprot)
{
best_frame = crp->frame;
}
- else
+ else if (newprot_str != NULL)
{
newprot_str [StringLen (newprot_str) - 1] = 0;
if (StringSearch (oldprot_str, newprot_str) != NULL
@@ -15814,7 +15938,7 @@ static SeqLocPtr MakeMixedLocFromLocList (SeqLocPtr loc_list)
NLM_EXTERN void AdjustFeatureForGapsCallback (SeqFeatPtr sfp, Pointer data)
{
AdjustFeatForGapPtr afgp;
- BioseqPtr protbsp = NULL, new_protbsp;
+ BioseqPtr protbsp = NULL, new_protbsp = NULL;
SeqFeatPtr new_sfp, tmp, gene, mrna;
Boolean partial5, partial3;
Uint2 entityID;
@@ -16175,39 +16299,6 @@ static ValNodePtr MakeTranscriptomeIDTokensFromLine (CharPtr line)
}
-NLM_EXTERN Boolean HasExistingSeqHistAssembly (ValNodePtr list)
-{
- TranscriptomeIdsPtr t;
- Boolean has_tables = FALSE;
-
- while (list != NULL && !has_tables) {
- t = list->data.ptrvalue;
- if (t != NULL && t->consensus_bsp != NULL
- && t->consensus_bsp->hist != NULL
- && t->consensus_bsp->hist->assembly != NULL) {
- has_tables = TRUE;
- }
- list = list->next;
- }
- return has_tables;
-}
-
-
-NLM_EXTERN void DeleteSeqHistAssembliesForList (ValNodePtr list)
-{
- TranscriptomeIdsPtr t;
-
- while (list != NULL) {
- t = (TranscriptomeIdsPtr) list->data.ptrvalue;
- if (t != NULL && t->consensus_bsp != NULL && t->consensus_bsp->hist != NULL
- && t->consensus_bsp->hist->assembly != NULL) {
- t->consensus_bsp->hist->assembly = SeqAlignFree (t->consensus_bsp->hist->assembly);
- }
- list = list->next;
- }
-}
-
-
static void AddTSARangeError (ValNodePtr PNTR range_list, CharPtr id, Int4 start, Int4 stop)
{
CharPtr big_range_fmt = "%s: Large gap in coverage (>50) from %d to %d";
@@ -16314,282 +16405,6 @@ NLM_EXTERN ValNodePtr ReportCoverageForBioseqSeqHist (BioseqPtr bsp)
}
-static Boolean IsSingleEstCoverage (BioseqPtr bsp)
-{
- SeqAlignPtr salp;
- SeqIdPtr sip;
-
- if (bsp == NULL || bsp->hist == NULL || bsp->hist->assembly == NULL) {
- return FALSE;
- }
- if (bsp->hist->assembly->next == NULL) {
- return TRUE;
- } else {
- sip = AlnMgr2GetNthSeqIdPtr (bsp->hist->assembly, 2);
- for (salp = bsp->hist->assembly->next; salp != NULL; salp = salp->next) {
- if (SeqIdComp (AlnMgr2GetNthSeqIdPtr (salp, 2), sip) == SIC_NO) {
- return FALSE;
- }
- }
- return TRUE;
- }
-}
-
-
-NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr list)
-{
- ValNodePtr range_list = NULL, new_list;
- TranscriptomeIdsPtr t;
- Char id_str[255];
- CharPtr good_fmt = "Coverage is complete for %s";
- CharPtr msg;
- Int4 num_single = 0, num_mult = 0;
- CharPtr single_fmt = "%d records are covered by a single EST";
- CharPtr mult_fmt = "%d records are covered by multiple ESTs";
-
- while (list != NULL) {
- t = (TranscriptomeIdsPtr) list->data.ptrvalue;
- if (t != NULL && t->consensus_bsp != NULL) {
- new_list = ReportCoverageForBioseqSeqHist (t->consensus_bsp);
- if (new_list == NULL) {
- SeqIdWrite (SeqIdFindBest (t->consensus_bsp->id, SEQID_GENBANK), id_str, PRINTID_REPORT, sizeof (id_str) - 1);
- msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (good_fmt) + StringLen (id_str)));
- sprintf (msg, good_fmt, id_str);
- ValNodeAddPointer (&range_list, 0, msg);
- } else {
- ValNodeLink (&range_list, new_list);
- }
- if (IsSingleEstCoverage(t->consensus_bsp)) {
- num_single++;
- } else {
- num_mult++;
- }
- }
- list = list->next;
- }
- msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (single_fmt) + 15));
- sprintf (msg, single_fmt, num_single);
- ValNodeAddPointer (&range_list, 0, msg);
- msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mult_fmt) + 15));
- sprintf (msg, mult_fmt, num_mult);
- ValNodeAddPointer (&range_list, 0, msg);
-
- return range_list;
-}
-
-
-NLM_EXTERN ValNodePtr ReportGapsInSeqHistAlignmentForBioseq (BioseqPtr bsp)
-{
- ValNodePtr err_list = NULL;
- SeqAlignPtr salp;
- DenseSegPtr dsp;
- Char id_buf[255];
- Char id_buf2[255];
- CharPtr err_msg;
- CharPtr gaps_fmt = "Alignment between %s and %s contains gaps";
- Int4 j, max_gap;
- Boolean has_gaps;
-
- if (bsp == NULL || bsp->hist == NULL || bsp->hist->assembly == NULL) return NULL;
-
- max_gap = bsp->length / 10;
- if (max_gap > 50) {
- max_gap = 50;
- }
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
- for (salp = bsp->hist->assembly; salp != NULL; salp = salp->next) {
- if (salp->segtype == SAS_DENSEG && (dsp = (DenseSegPtr) salp->segs) != NULL && dsp->numseg > 1) {
- has_gaps = FALSE;
- for (j = 0; j < dsp->numseg && !has_gaps; j++) {
- if (dsp->lens[j] >= max_gap) {
- if (dsp->starts[j * dsp->dim] == -1 || dsp->starts[(j * dsp->dim) + 1] == -1) {
- has_gaps = TRUE;
- }
- }
- }
- if (has_gaps) {
- SeqIdWrite (AlnMgr2GetNthSeqIdPtr (salp, 2), id_buf2, PRINTID_REPORT, sizeof (id_buf2) - 1);
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (gaps_fmt) + StringLen (id_buf) + StringLen (id_buf2)));
- sprintf (err_msg, gaps_fmt, id_buf, id_buf2);
- ValNodeAddPointer (&err_list, 0, err_msg);
- }
- }
- }
- return err_list;
-}
-
-
-NLM_EXTERN ValNodePtr ReportGapsInSeqHistAlignmentsForIdsList (ValNodePtr list)
-{
- TranscriptomeIdsPtr t;
- ValNodePtr errs = NULL;
-
- while (list != NULL) {
- t = (TranscriptomeIdsPtr) list->data.ptrvalue;
- if (t != NULL && t->consensus_bsp != NULL) {
- ValNodeLink (&errs, ReportGapsInSeqHistAlignmentForBioseq (t->consensus_bsp));
- }
- list = list->next;
- }
- return errs;
-}
-
-
-static ValNodePtr GetSeqHistAlignmentSummaryForRange (Int4 cons_start, Int4 cons_stop, SeqAlignPtr salp_list)
-{
- Int4 aln_start, aln_stop;
- Uint1Ptr buf1 = NULL, buf2 = NULL;
- Int4 buf_size = -1;
- Int4 aln_len = 0;
- SeqIdPtr sip;
- Char id1[255], id2[255];
- CharPtr aln_msg;
- ValNodePtr summary = NULL;
- Boolean show_consensus = TRUE;
-
- if (salp_list == NULL || cons_start < 0 || cons_stop < cons_start) {
- return NULL;
- }
-
- while (salp_list != NULL) {
- sip = AlnMgr2GetNthSeqIdPtr (salp_list, 1);
- SeqIdWrite (sip, id1, PRINTID_REPORT, sizeof (id1) - 1);
- sip = SeqIdFree (sip);
- sip = AlnMgr2GetNthSeqIdPtr (salp_list, 2);
- SeqIdWrite (sip, id2, PRINTID_REPORT, sizeof (id2) - 1);
- sip = SeqIdFree (sip);
-
- aln_start = AlnMgr2MapBioseqToSeqAlign(salp_list, cons_start, 1);
- aln_stop = AlnMgr2MapBioseqToSeqAlign(salp_list, cons_stop, 1);
-
- if (aln_start >= 0 && aln_stop >= 0) {
- if (buf_size < aln_stop - aln_start + 2) {
- buf1 = MemFree (buf1);
- buf2 = MemFree (buf2);
- buf_size = aln_stop - aln_start + 2;
- buf1 = (Uint1Ptr) MemNew (sizeof (Uint1) * buf_size);
- buf2 = (Uint1Ptr) MemNew (sizeof (Uint1) * buf_size);
- }
- if (show_consensus) {
- AlignmentIntervalToString (salp_list, 1, aln_start, aln_stop, 1, FALSE,
- buf1, buf2, &aln_len, FALSE);
- aln_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (id1) + aln_len + 3));
- StringCpy (aln_msg, id1);
- StringCat (aln_msg, " ");
- StringNCat (aln_msg, (CharPtr) buf2, aln_len);
- ValNodeAddPointer (&summary, 0, aln_msg);
- if (aln_start == aln_stop) {
- show_consensus = FALSE;
- }
- }
- AlignmentIntervalToString (salp_list, 2, aln_start, aln_stop, 2, FALSE,
- buf1, buf2, &aln_len, FALSE);
- aln_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (id2) + aln_len + 3));
- StringCpy (aln_msg, id2);
- StringCat (aln_msg, " ");
- StringNCat (aln_msg, (CharPtr) buf2, aln_len);
- if (show_consensus) {
- StringCat (aln_msg, "\n");
- }
- ValNodeAddPointer (&summary, 0, aln_msg);
- }
- salp_list = salp_list->next;
- }
- buf1 = MemFree (buf1);
- buf2 = MemFree (buf2);
- return summary;
-}
-
-
-NLM_EXTERN ValNodePtr ReportConsensusMatchForBioseqSeqHist (BioseqPtr bsp)
-{
- ValNodePtr err_list = NULL;
- SeqAlignPtr salp;
- Int4 assembly_from = 0, assembly_to = 0;
- Int4 aln_pos, i, read_pos, pct;
- Int4Ptr coverage;
- Int4Ptr match;
- Char id_buf[255];
- CharPtr err_msg;
- CharPtr err_fmt = "Consensus sequence %s matches less than half of reads at position %d";
- CharPtr err_range_fmt = "Consensus sequence %s matches less than half of reads at positions %d-%d";
- SeqIdPtr sip;
- BioseqPtr read_bsp;
- Char buf1[2], buf2[2];
- Uint1 read_strand;
- Int4 start_range = -1;
-
- if (bsp == NULL || bsp->hist == NULL || bsp->hist->assembly == NULL) return NULL;
-
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
- coverage = (Int4Ptr) MemNew (sizeof (Int4) * bsp->length);
- MemSet (coverage, 0, sizeof (Int4) * bsp->length);
- match = (Int4Ptr) MemNew (sizeof (Int4) * bsp->length);
- MemSet (match, 0, sizeof (Int4) * bsp->length);
-
- for (salp = bsp->hist->assembly; salp != NULL; salp = salp->next) {
- sip = AlnMgr2GetNthSeqIdPtr (salp, 2);
- read_bsp = BioseqLockById (sip);
- sip = SeqIdFree (sip);
- read_strand = SeqAlignStrand (salp, 1);
- AlnMgr2GetNthSeqRangeInSA(salp, 1, &assembly_from, &assembly_to);
- for (i = assembly_from; i <= assembly_to; i++) {
- aln_pos = AlnMgr2MapBioseqToSeqAlign(salp, i, 1);
- if ((read_pos = AlnMgr2MapSeqAlignToBioseq (salp, aln_pos, 2)) > -1) {
- coverage[i] ++;
- SeqPortStreamInt (bsp, i, i, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf1, NULL);
- SeqPortStreamInt (read_bsp, read_pos, read_pos, read_strand, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
- if (buf1[0] == buf2[0]) {
- match[i] ++;
- }
- }
- }
- BioseqUnlock (read_bsp);
- }
-
- for (i = assembly_from; i <= assembly_to; i++) {
- if (coverage[i] > 0) {
- pct = (100 * match[i]) / coverage[i];
- if (pct < 50) {
- if (start_range < 0) {
- start_range = i;
- }
- } else {
- if (start_range > -1) {
- if (i > start_range + 1) {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (err_range_fmt) + StringLen (id_buf) + 30));
- sprintf (err_msg, err_range_fmt, id_buf, start_range + 1, i);
- } else {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (err_fmt) + StringLen (id_buf) + 15));
- sprintf (err_msg, err_fmt, id_buf, i);
- }
- ValNodeAddPointer (&err_list, 0, err_msg);
- ValNodeLink (&err_list, GetSeqHistAlignmentSummaryForRange (start_range, i - 1, bsp->hist->assembly));
- start_range = -1;
- }
- }
- }
- }
-
- if (start_range > -1) {
- if (i > start_range + 1) {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (err_range_fmt) + StringLen (id_buf) + 30));
- sprintf (err_msg, err_range_fmt, id_buf, start_range + 1, i);
- } else {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (err_fmt) + StringLen (id_buf) + 15));
- sprintf (err_msg, err_fmt, id_buf, i);
- }
- ValNodeAddPointer (&err_list, 0, err_msg);
- ValNodeLink (&err_list, GetSeqHistAlignmentSummaryForRange (start_range, i - 1, bsp->hist->assembly));
- start_range = -1;
- }
-
- coverage = MemFree (coverage);
- match = MemFree (match);
- return err_list;
-}
-
-
static int LIBCALLBACK SortAlignmentByRange (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -16700,373 +16515,6 @@ extern void ReverseAlignmentStrand (SeqAlignPtr salp, Int4 nth)
}
-NLM_EXTERN ValNodePtr
-MakeTranscriptomeAssemblySeqHist
-(TranscriptomeIdsPtr t,
- LocalAlignFunc aln_func,
- Nlm_ChangeNotifyProc change_notify,
- Pointer change_userdata)
-{
- BioseqPtr read_bsp;
- ValNodePtr salp_list = NULL;
- SeqAlignPtr salp, salp_prev;
- ValNodePtr vnp;
- SeqIdPtr sip;
- Boolean dirty;
- ValNodePtr err_list = NULL;
- CharPtr err_msg;
- CharPtr no_aln_fmt = "No alignment between %s and consensus sequence %s";
- CharPtr invalid_aln_fmt = "Alignment between %s and consensus sequence %s is invalid";
- CharPtr not_replaced_fmt = "Existing assembly for %s was not replaced";
- CharPtr no_download_fmt = "Unable to download %s";
- Char id_buf[255];
- Char consensus_id_buf[255];
- ErrSev old_sev;
-
- if (t == NULL || t->consensus_bsp == NULL || t->token_list == NULL) return NULL;
-
- SeqIdWrite (SeqIdFindBest (t->consensus_bsp->id, SEQID_GENBANK),
- consensus_id_buf, PRINTID_REPORT, sizeof (consensus_id_buf) - 1);
-
- if (t->consensus_bsp->hist != NULL && t->consensus_bsp->hist->assembly != NULL) {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_replaced_fmt) + StringLen (consensus_id_buf)));
- sprintf (err_msg, not_replaced_fmt, id_buf);
- ValNodeAddPointer (&err_list, 0, err_msg);
- return err_list;
- }
-
-
- for (vnp = t->token_list; vnp != NULL; vnp = vnp->next) {
- if (StringChr (vnp->data.ptrvalue, '|') == NULL) {
- sprintf (id_buf, "gb|%s", (char *) vnp->data.ptrvalue);
- } else {
- sprintf (id_buf, "%s", (char *) vnp->data.ptrvalue);
- }
- sip = MakeSeqID (id_buf);
- read_bsp = BioseqLockById (sip);
- if (read_bsp == NULL) {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_download_fmt) + StringLen (id_buf)));
- sprintf (err_msg, no_download_fmt, id_buf);
- ValNodeAddPointer (&err_list, 0, err_msg);
- }
- sip = SeqIdFree (sip);
-
- if (read_bsp == NULL) continue;
- salp = aln_func (t->consensus_bsp, read_bsp);
-
- old_sev = ErrSetMessageLevel (SEV_INFO);
- SeqIdWrite (SeqIdFindBest (read_bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
- if (salp == NULL)
- {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_aln_fmt) + StringLen (id_buf) + StringLen (consensus_id_buf)));
- sprintf (err_msg, no_aln_fmt, id_buf, consensus_id_buf);
- ValNodeAddPointer (&err_list, 0, err_msg);
- }
- else if (! ValidateSeqAlign (salp, t->consensus_bsp->idx.entityID, FALSE, FALSE, TRUE, FALSE, FALSE, &dirty))
- {
- /* if the new alignment wasn't valid, don't add to multiple alignment */
- salp = SeqAlignFree (salp);
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (invalid_aln_fmt) + StringLen (id_buf) + StringLen (consensus_id_buf)));
- sprintf (err_msg, invalid_aln_fmt, id_buf, consensus_id_buf);
- ValNodeAddPointer (&err_list, 0, err_msg);
- }
- else
- {
- ValNodeAddPointer (&salp_list, OBJ_SEQALIGN, salp);
- }
- ErrSetMessageLevel (old_sev);
-
- if (change_notify) {
- change_notify (change_userdata);
- }
- }
-
- salp_list = ValNodeSort (salp_list, SortAlignmentByRange);
-
- if (salp_list != NULL) {
- if (t->consensus_bsp->hist == NULL) {
- t->consensus_bsp->hist = SeqHistNew ();
- }
- if (t->consensus_bsp->hist->assembly != NULL) {
- t->consensus_bsp->hist->assembly = SeqAlignFree (t->consensus_bsp->hist->assembly);
- }
- t->consensus_bsp->hist->assembly = salp_list->data.ptrvalue;
- salp_prev = t->consensus_bsp->hist->assembly;
- for (vnp = salp_list->next; vnp != NULL; vnp = vnp->next) {
- salp_prev->next = vnp->data.ptrvalue;
- salp_prev = salp_prev->next;
- }
- }
-
- return err_list;
-}
-
-
-NLM_EXTERN ValNodePtr
-ApplyTranscriptomeIdsListToSeqEntrySeqHist
-(ValNodePtr list,
- LocalAlignFunc aln_func,
- Nlm_ChangeNotifyProc change_notify,
- Pointer change_userdata)
-{
- ValNodePtr err_list = NULL;
-
- while (list != NULL) {
- ValNodeLink (&err_list, MakeTranscriptomeAssemblySeqHist (list->data.ptrvalue, aln_func, change_notify, change_userdata));
- list = list->next;
- }
- return err_list;
-}
-
-
-NLM_EXTERN TranscriptomeIdsPtr TranscriptomeIdsNew (BioseqPtr bsp, ValNodePtr token_list)
-{
- TranscriptomeIdsPtr t;
-
- t = (TranscriptomeIdsPtr) MemNew (sizeof (TranscriptomeIdsData));
- t->consensus_bsp = bsp;
- t->token_list = token_list;
- return t;
-}
-
-
-NLM_EXTERN TranscriptomeIdsPtr TranscriptomeIdsFree (TranscriptomeIdsPtr t)
-{
- if (t != NULL) {
- t->token_list = ValNodeFreeData (t->token_list);
- t = MemFree (t);
- }
- return t;
-}
-
-
-NLM_EXTERN ValNodePtr TranscriptomeIdsListFree (ValNodePtr list)
-{
- ValNodePtr list_next;
- while (list != NULL) {
- list_next = list->next;
- list->next = NULL;
- list->data.ptrvalue = TranscriptomeIdsFree (list->data.ptrvalue);
- list = ValNodeFree (list);
- list = list_next;
- }
- return list;
-}
-
-
-static BioseqPtr GetTranscriptomeBioseqFromStringId (CharPtr str)
-{
- BioseqPtr bsp = NULL, tbsp;
- CharPtr rev_id;
- Int4 len;
- SeqMgrPtr smp;
- Int4 i, j, num = -1, imin = 0, imax;
- SeqIdIndexElementPtr PNTR sipp;
- SeqEntryPtr scope;
- Boolean found;
-
- if (StringHasNoText (str)) {
- return NULL;
- }
-
- scope = SeqEntryGetScope();
-
-/* SeqMgrProcessNonIndexedBioseq(FALSE); */
-
- /* reverse the string we're looking for */
- len = StringLen (str);
- rev_id = (CharPtr) MemNew (sizeof (Char) * (len + 1));
- for (i = 0; i < len; i++) {
- rev_id[i] = toupper (str[len - i - 1]);
- }
- rev_id[len] = 0;
-
- smp = SeqMgrReadLock();
- imax = smp->BioseqIndexCnt - 1;
- sipp = smp->BioseqIndex;
-
- while (imax >= imin) {
- i = (imax + imin)/2;
- if (StringLen (sipp[i]->str) > len && StringNCmp (sipp[i]->str, rev_id, len) == 0) {
- if (sipp[i]->str[len] == '|') {
- num = i;
- break;
- } else {
- /* search down the list */
- j = i - 1;
- found = FALSE;
- while (!found && j > -1 && StringLen (sipp[j]->str) > len
- && StringNCmp (sipp[j]->str, rev_id, len) == 0) {
- if (sipp[j]->str[len] == '|') {
- found = TRUE;
- } else {
- j--;
- }
- }
- if (found) {
- num = j;
- break;
- }
- /* search up the list */
- j = i + 1;
- while (!found && j < smp->BioseqIndexCnt && StringLen (sipp[j]->str) > len
- && StringNCmp (sipp[j]->str, rev_id, len) == 0) {
- if (sipp[j]->str[len] == '|') {
- found = TRUE;
- } else {
- j++;
- }
- }
- if (found) {
- num = j;
- break;
- } else {
- break;
- }
- }
- } else if ((j = StringCmp (sipp[i]->str, rev_id)) > 0) {
- imax = i - 1;
- } else if (j < 0) {
- imin = i + 1;
- } else {
- num = i;
- break;
- }
- }
-
- if (num > -1) {
- if (scope == NULL) {
- /* no scope set, take the first one found */
- bsp = sipp[num]->omdp->dataptr;
- } else {
- /* check in scope */
- tbsp = (BioseqPtr)(sipp[num]->omdp->dataptr);
- if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
- {
- bsp = tbsp;
- }
- else
- { /* not in scope, could be duplicate SeqId */
- i = num-1;
- while ((i >= 0) && (bsp == NULL)
- && (StringLen (sipp[i]->str) > len
- && sipp[i]->str[len] == '|'
- && StringNCmp(sipp[i]->str, rev_id, len) == 0)) /* back up */
- {
- tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
- if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
- {
- bsp = tbsp;
- }
- i--;
- }
- i = num + 1;
- imax = smp->BioseqIndexCnt - 1;
- while ((bsp == NULL) && (i <= imax)
- && (StringLen (sipp[i]->str) > len
- && sipp[i]->str[len] == '|'
- && StringNCmp(sipp[i]->str, rev_id, len) == 0))
- {
- tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
- if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
- {
- bsp = tbsp;
- }
- i++;
- }
- }
- }
- }
-
- SeqMgrUnlock();
- return bsp;
-}
-
-
-NLM_EXTERN ValNodePtr GetTranscriptomeIdsList (FILE *fp, SeqEntryPtr sep, ValNodePtr PNTR err_list)
-{
- ReadBufferData rbd;
- ValNodePtr list = NULL, token_list;
- SeqIdPtr sip;
- BioseqPtr bsp;
- CharPtr line, err_str;
- CharPtr bad_id_fmt = "Unable to make SeqId from %s";
-
- rbd.current_data = NULL;
- rbd.fp = fp;
-
- line = AbstractReadFunction (&rbd);
-
- while (line != NULL && line[0] != EOF) {
- if (!StringHasNoText (line)) {
- token_list = MakeTranscriptomeIDTokensFromLine (line);
- if (token_list != NULL && token_list->next != NULL) {
- sip = CreateSeqIdFromText (token_list->data.ptrvalue, sep);
- bsp = BioseqFind (sip);
- sip = SeqIdFree (sip);
- if (bsp == NULL) {
- bsp = GetTranscriptomeBioseqFromStringId (token_list->data.ptrvalue);
- }
- if (bsp == NULL) {
- err_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (token_list->data.ptrvalue) + StringLen (bad_id_fmt)));
- sprintf (err_str, bad_id_fmt, token_list->data.ptrvalue);
- ValNodeAddPointer (err_list, 0, err_str);
- } else {
- ValNodeAddPointer (&list, 0, TranscriptomeIdsNew(bsp, token_list->next));
- token_list->next = NULL;
- }
- }
- token_list = ValNodeFreeData (token_list);
- }
- line = MemFree (line);
- line = AbstractReadFunction (&rbd);
- }
- return list;
-}
-
-
-static void GetExistingTSATableIdsCallback (BioseqPtr bsp, Pointer userdata)
-{
- TranscriptomeIdsPtr t;
- ValNodePtr token_list = NULL;
- SeqAlignPtr salp;
- Char buf[255];
- DenseSegPtr dsp;
-
- if (bsp == NULL || userdata == NULL || ISA_aa (bsp->mol) || bsp->hist == NULL || bsp->hist->assembly == NULL) {
- return;
- }
-
- salp = bsp->hist->assembly;
- while (salp != NULL) {
- if (salp->segtype == SAS_DENSEG && salp->segs != NULL) {
- dsp = (DenseSegPtr) salp->segs;
- if (dsp->dim == 2 && dsp->ids != NULL && dsp->ids->next != NULL) {
- SeqIdWrite (dsp->ids->next, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
- ValNodeAddPointer (&token_list, 0, StringSave (buf));
- }
- }
- salp = salp->next;
- }
-
- if (token_list != NULL) {
- t = TranscriptomeIdsNew (bsp, token_list);
- if (t != NULL) {
- ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, t);
- }
- }
-}
-
-
-NLM_EXTERN ValNodePtr GetExistingTSATableIds (SeqEntryPtr sep)
-{
- ValNodePtr ids_list = NULL;
-
- VisitBioseqsInSep (sep, &ids_list, GetExistingTSATableIdsCallback);
- return ids_list;
-}
-
-
static Int4 ReadNumberFromPortionOfString (CharPtr str, Int4 len)
{
CharPtr num_buf;
@@ -17097,8 +16545,8 @@ static Boolean IsStringInSpan (CharPtr str, CharPtr first, CharPtr second)
return FALSE;
}
- if (IsAllDigits (first)) {
- if (IsAllDigits (str) && IsAllDigits (second)) {
+ if (StringIsAllDigits (first)) {
+ if (StringIsAllDigits (str) && StringIsAllDigits (second)) {
str_num = atoi (str);
first_num = atoi (first);
second_num = atoi (second);
@@ -17107,15 +16555,15 @@ static Boolean IsStringInSpan (CharPtr str, CharPtr first, CharPtr second)
rval = TRUE;
}
}
- } else if (IsAllDigits(second)) {
+ } else if (StringIsAllDigits(second)) {
cp = first;
while (!isdigit (*cp)) {
prefix_len ++;
cp++;
}
if (StringNCmp (str, first, prefix_len) == 0
- && IsAllDigits (str + prefix_len)
- && IsAllDigits (first + prefix_len)) {
+ && StringIsAllDigits (str + prefix_len)
+ && StringIsAllDigits (first + prefix_len)) {
first_num = atoi (cp);
second_num = atoi (second);
str_num = atoi (str);
@@ -17136,7 +16584,7 @@ static Boolean IsStringInSpan (CharPtr str, CharPtr first, CharPtr second)
if (*cp1 != 0 && *cp2 != 0
&& isdigit (*cp1) && isdigit (*cp2)
&& StringNCmp (str, first, prefix_len) == 0) {
- if (IsAllDigits (cp1) && IsAllDigits (cp2) && IsAllDigits (str + prefix_len)) {
+ if (StringIsAllDigits (cp1) && StringIsAllDigits (cp2) && StringIsAllDigits (str + prefix_len)) {
first_num = atoi (cp1);
second_num = atoi (cp2);
str_num = atoi (str + prefix_len);
@@ -17170,6 +16618,7 @@ static Boolean GetSpanFromHyphenInString (CharPtr str, CharPtr hyphen, CharPtr P
{
CharPtr cp;
Int4 len;
+ Boolean rval;
*first = NULL;
*second = NULL;
@@ -17211,7 +16660,19 @@ static Boolean GetSpanFromHyphenInString (CharPtr str, CharPtr hyphen, CharPtr P
StringNCpy (*second, hyphen + 1, len);
(*second)[len] = 0;
TrimSpacesAroundString (*second);
- return TRUE;
+
+ rval = TRUE;
+ if (StringHasNoText (*first) || StringHasNoText (*second)) {
+ rval = FALSE;
+ } else if (!isdigit ((*first)[StringLen (*first) - 1]) || !isdigit ((*second)[StringLen (*second) - 1])) {
+ /* if this is a span, then neither end point can end with anything other than a number */
+ rval = FALSE;
+ }
+ if (!rval) {
+ *first = MemFree (*first);
+ *second = MemFree (*second);
+ }
+ return rval;
}
@@ -17624,6 +17085,7 @@ NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list)
{
ValNodePtr vnp, vnp_c;
ValNodePtr new_table = NULL, vnp_new_row = NULL, vnp_new;
+ Int4 expected_columns = 0, this_row_columns;
if (row_list == NULL) {
return NULL;
@@ -17633,6 +17095,7 @@ NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list)
for (vnp = row_list; vnp != NULL; vnp = vnp->next) {
vnp_c = vnp->data.ptrvalue;
vnp_new_row = new_table;
+ this_row_columns = 0;
while (vnp_c != NULL) {
if (vnp_new_row == NULL) {
vnp_new_row = ValNodeNew (new_table);
@@ -17642,8 +17105,24 @@ NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list)
vnp_new_row->data.ptrvalue = vnp_new;
vnp_new_row = vnp_new_row->next;
vnp_c = vnp_c->next;
+ this_row_columns++;
+ }
+ if (expected_columns < this_row_columns) {
+ expected_columns = this_row_columns;
+ } else {
+ while (this_row_columns < expected_columns) {
+ if (vnp_new_row == NULL) {
+ vnp_new_row = ValNodeNew (new_table);
+ }
+ vnp_new = vnp_new_row->data.ptrvalue;
+ ValNodeAddPointer (&vnp_new, 0, StringSave (""));
+ vnp_new_row->data.ptrvalue = vnp_new;
+ vnp_new_row = vnp_new_row->next;
+ this_row_columns++;
+ }
}
}
+ RemoveEmptyColumnsFromTabTable (&new_table);
return new_table;
}
@@ -18589,159 +18068,6 @@ static ValNodePtr AuthAffilListFree (ValNodePtr list)
}
-typedef struct splitpub {
- BioseqPtr bsp;
- ValNodePtr auth_affil_list;
-} SplitPubData, PNTR SplitPubPtr;
-
-
-static SplitPubPtr SplitPubNew (BioseqPtr bsp)
-{
- SplitPubPtr s;
-
- s = (SplitPubPtr) MemNew (sizeof (SplitPubData));
- s->bsp = bsp;
- s->auth_affil_list = NULL;
- return s;
-}
-
-
-static SplitPubPtr SplitPubFree (SplitPubPtr s)
-{
- if (s != NULL) {
- s->auth_affil_list = AuthAffilListFree (s->auth_affil_list);
- s = MemFree (s);
- }
- return s;
-}
-
-
-NLM_EXTERN ValNodePtr SplitPubListFree (ValNodePtr list)
-{
- ValNodePtr list_next;
-
- while (list != NULL) {
- list_next = list->next;
- list->next = NULL;
- list->data.ptrvalue = SplitPubFree (list->data.ptrvalue);
- list = ValNodeFree (list);
- list = list_next;
- }
- return list;
-}
-
-
-static int LIBCALLBACK SortBySplitPubTabTableRow (VoidPtr ptr1, VoidPtr ptr2)
-
-{
- ValNodePtr vnp1;
- ValNodePtr vnp2;
- ValNodePtr col1, col2;
- int rval = 0;
-
- if (ptr1 == NULL || ptr2 == NULL) return 0;
- vnp1 = *((ValNodePtr PNTR) ptr1);
- vnp2 = *((ValNodePtr PNTR) ptr2);
- if (vnp1 == NULL || vnp2 == NULL) return 0;
- col1 = vnp1->data.ptrvalue;
- col2 = vnp2->data.ptrvalue;
- while (col1 != NULL && col2 != NULL && rval == 0) {
- rval = StringCmp (col1->data.ptrvalue, col2->data.ptrvalue);
- col1 = col1->next;
- col2 = col2->next;
- }
- if (rval == 0) {
- if (col1 == NULL && col2 != NULL) {
- rval = -1;
- } else if (col1 != NULL && col2 == NULL) {
- rval = 1;
- }
- }
- return rval;
-}
-
-
-NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, SeqEntryPtr sep, ValNodePtr PNTR err_list)
-{
- ValNodePtr row, col;
- SeqIdPtr sip;
- BioseqPtr bsp = NULL;
- CharPtr last_id = NULL;
- CharPtr not_found_fmt = "Unable to locate sequence for %s";
- CharPtr err_msg;
- ValNodePtr split_pub_list = NULL;
- SplitPubPtr s = NULL;
- AuthAffilPtr a = NULL;
- CharPtr cp;
-
- if (tab_table == NULL || sep == NULL) {
- return NULL;
- }
-
- *tab_table = ValNodeSort (*tab_table, SortBySplitPubTabTableRow);
-
- for (row = *tab_table; row != NULL; row = row->next) {
- col = row->data.ptrvalue;
- if (col == NULL) {
- continue;
- }
- if (last_id == NULL || StringCmp (last_id, col->data.ptrvalue) != 0) {
- sip = CreateSeqIdFromText (col->data.ptrvalue, sep);
- bsp = BioseqFind (sip);
- sip = SeqIdFree (sip);
- if (bsp == NULL) {
- if (err_list != NULL) {
- err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_found_fmt) + StringLen (col->data.ptrvalue)));
- sprintf (err_msg, not_found_fmt, col->data.ptrvalue);
- ValNodeAddPointer (err_list, 0, err_msg);
- }
- s = NULL;
- } else {
- s = SplitPubNew (bsp);
- ValNodeAddPointer (&split_pub_list, 0, s);
- }
- a = NULL;
- last_id = col->data.ptrvalue;
- }
- if (s == NULL) {
- continue;
- }
- col = col->next;
- if (col == NULL) {
- continue;
- }
- if (a == NULL || StringCmp (a->affil, col->data.ptrvalue) != 0) {
- a = AuthAffilNew (col->data.ptrvalue);
- ValNodeAddPointer (&(s->auth_affil_list), 0, a);
- }
- col = col->next;
- if (col != NULL && col->data.ptrvalue != NULL) {
- /* skip dr, mr, ms, mrs title */
- cp = col->data.ptrvalue;
- if (StringNICmp (cp, "dr ", 3) == 0
- || StringNICmp (cp, "dr.", 3) == 0
- || StringNICmp (cp, "mr ", 3) == 0
- || StringNICmp (cp, "mr.", 3) == 0
- || StringNICmp (cp, "ms ", 3) == 0
- || StringNICmp (cp, "ms.", 3) == 0) {
- cp += 3;
- cp += StringSpn (cp, " ");
- } else if (StringNICmp (cp, "mrs ", 4) == 0
- || StringNICmp (cp, "mrs.", 4) == 0) {
- cp += 4;
- cp += StringSpn (cp, " ");
- } else if (StringNICmp (cp, "prof ", 5) == 0
- || StringNICmp (cp, "prof.", 5) == 0) {
- cp += 4;
- cp += StringSpn (cp, " ");
- }
- ValNodeAddPointer (&a->authors, 0, StringSave (cp));
- }
- }
- return split_pub_list;
-}
-
-
NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub)
{
CitGenPtr cgp;
@@ -18782,227 +18108,6 @@ NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub)
}
-static void ReplaceAuthorListForPub (PubPtr the_pub, AuthListPtr new_alp)
-{
- CitGenPtr cgp;
- CitSubPtr csp;
- CitArtPtr cap;
- CitBookPtr cbp;
- CitPatPtr cpp;
- AuthListPtr alp = NULL;
-
- if (the_pub == NULL) return;
-
- switch (the_pub->choice) {
- case PUB_Gen :
- cgp = (CitGenPtr) the_pub->data.ptrvalue;
- alp = cgp->authors;
- cgp->authors = new_alp;
- break;
- case PUB_Sub :
- csp = (CitSubPtr) the_pub->data.ptrvalue;
- alp = csp->authors;
- csp->authors = new_alp;
- break;
- case PUB_Article :
- cap = (CitArtPtr) the_pub->data.ptrvalue;
- alp = cap->authors;
- cap->authors = new_alp;
- break;
- case PUB_Book :
- case PUB_Man :
- cbp = (CitBookPtr) the_pub->data.ptrvalue;
- alp = cbp->authors;
- cbp->authors = new_alp;
- break;
- case PUB_Patent :
- cpp = (CitPatPtr) the_pub->data.ptrvalue;
- alp = cpp->authors;
- cpp->authors = new_alp;
- break;
- default :
- break;
- }
- alp = AuthListFree (alp);
-}
-
-
-static Boolean DoesStringMatchAuthor (CharPtr str, AuthorPtr author)
-{
- Boolean rval = FALSE;
- NameStdPtr nsp;
- CharPtr cp;
- Int4 len;
-
- if (StringHasNoText (str) || author == NULL || author->name == NULL) {
- return FALSE;
- }
- switch (author->name->choice) {
- case 2:
- nsp = (NameStdPtr) author->name->data;
- if (nsp != NULL) {
- cp = StringRChr (str, ' ');
- if (cp == NULL) {
- cp = StringRChr (str, '.');
- }
- if (cp != NULL && StringCmp (cp + 1, nsp->names[0]) == 0) {
- len = StringCSpn (str, " .");
- if (StringNCmp (str, nsp->names[1], len) == 0) {
- rval = TRUE;
- }
- }
- }
- break;
- case 4:
- case 5:
- if (StringCmp (author->name->data, str) == 0) {
- rval = TRUE;
- }
- break;
- }
- return rval;
-}
-
-
-static AuthListPtr RemoveAuthorsFromAuthList (AuthListPtr alp, ValNodePtr auth_list)
-{
- AuthListPtr removed = NULL;
- ValNodePtr vnp_a, vnp_next, vnp_remove, vnp_prev;
- ValNodePtr extract_list = NULL;
-
- if (alp == NULL || auth_list == NULL || alp->names == NULL || alp->choice != 1) {
- return NULL;
- }
-
- for (vnp_remove = auth_list; vnp_remove != NULL; vnp_remove = vnp_remove->next) {
- vnp_prev = NULL;
- for (vnp_a = alp->names; vnp_a != NULL; vnp_a = vnp_next) {
- vnp_next = vnp_a->next;
- if ((vnp_a->choice == 1 && DoesStringMatchAuthor (vnp_remove->data.ptrvalue, vnp_a->data.ptrvalue))
- || (vnp_a->choice == 2 && StringCmp (vnp_remove->data.ptrvalue, vnp_a->data.ptrvalue))) {
- if (vnp_prev == NULL) {
- alp->names = vnp_a->next;
- } else {
- vnp_prev->next = vnp_a->next;
- }
- vnp_a->next = NULL;
- ValNodeLink (&extract_list, vnp_a);
- } else {
- vnp_prev = vnp_a;
- }
- }
- }
-
- if (extract_list != NULL) {
- removed = AuthListNew ();
- removed->choice = 1;
- removed->names = extract_list;
- }
- return removed;
-}
-
-
-static AuthListPtr RemoveAuthorsFromPub (PubdescPtr pub, ValNodePtr auth_list)
-{
- AuthListPtr alp, removed_alp;
-
- if (pub == NULL || auth_list == NULL) return NULL;
- alp = GetAuthorListForPub (pub->pub);
- removed_alp = RemoveAuthorsFromAuthList (alp, auth_list);
- return removed_alp;
-}
-
-
-typedef struct newpub {
- SeqDescrPtr PNTR descr;
- PubdescPtr pub;
-} NewPubData, PNTR NewPubPtr;
-
-
-static NewPubPtr NewPubNew (SeqDescrPtr PNTR descr, PubdescPtr pub)
-{
- NewPubPtr n;
-
- n = (NewPubPtr) MemNew (sizeof (NewPubData));
- n->descr = descr;
- n->pub = pub;
- return n;
-}
-
-
-static void SplitPubsForBioseq (SplitPubPtr s)
-{
- SeqDescrPtr sdp, new_sdp;
- SeqDescrPtr PNTR descr;
- SeqMgrDescContext context;
- AuthListPtr alp;
- PubdescPtr pubdesc, pubdesc_new;
- ValNodePtr vnp;
- AuthAffilPtr aa;
- ValNodePtr new_pubs = NULL;
- ObjValNodePtr ovp;
- BioseqPtr p_bsp;
- BioseqSetPtr p_bssp;
- NewPubPtr n;
-
- if (s == NULL || s->bsp == NULL || s->auth_affil_list == NULL) {
- return;
- }
-
- for (sdp = SeqMgrGetNextDescriptor (s->bsp, NULL, Seq_descr_pub, &context);
- sdp != NULL;
- sdp = SeqMgrGetNextDescriptor (s->bsp, sdp, Seq_descr_pub, &context)) {
- pubdesc = sdp->data.ptrvalue;
- for (vnp = s->auth_affil_list; vnp != NULL; vnp = vnp->next) {
- aa = (AuthAffilPtr) vnp->data.ptrvalue;
- if (aa != NULL) {
- alp = RemoveAuthorsFromPub (pubdesc, aa->authors);
- if (alp != NULL) {
- pubdesc_new = AsnIoMemCopy (pubdesc, (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite);
- alp->affil = AffilNew ();
- alp->affil->affil = StringSave (aa->affil);
- ReplaceAuthorListForPub (pubdesc_new->pub, alp);
- descr = &(s->bsp->descr);
- if (sdp->extended == 1) {
- ovp = (ObjValNodePtr) sdp;
- if (ovp->idx.parenttype == OBJ_BIOSEQ) {
- p_bsp = (BioseqPtr) ovp->idx.parentptr;
- descr = &(p_bsp->descr);
- } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
- p_bssp = (BioseqSetPtr) ovp->idx.parentptr;
- descr = &(p_bssp->descr);
- }
- }
- ValNodeAddPointer (&new_pubs, 0, NewPubNew (descr, pubdesc_new));
- }
- }
- }
- }
- for (vnp = new_pubs; vnp != NULL; vnp = vnp->next) {
- n = (NewPubPtr) vnp->data.ptrvalue;
-
- new_sdp = SeqDescrNew (*(n->descr));
- new_sdp->choice = Seq_descr_pub;
- new_sdp->data.ptrvalue = n->pub;
- if (n->descr == NULL) {
- *(n->descr) = new_sdp;
- }
- }
- new_pubs = ValNodeFreeData (new_pubs);
-
-}
-
-
-NLM_EXTERN void SplitPubsByList (ValNodePtr split_list)
-{
- ValNodePtr vnp;
-
- for (vnp = split_list; vnp != NULL; vnp = vnp->next) {
- SplitPubsForBioseq (vnp->data.ptrvalue);
- }
-}
-
-
static void AddStructuredCommentCallback (BioseqPtr bsp, Pointer data)
{
UserObjectPtr uop;
@@ -19023,12 +18128,23 @@ static CharPtr official_prefix_list[] = {
"MIMS-Data",
"MIENS-Data",
"MIGS:3.0-Data",
+ "MIGS:4.0-Data",
+ "MIMS:3.0-Data",
+ "MIMS:4.0-Data",
+ "MIMARKS:3.0-Data",
+ "MIMARKS:4.0-Data",
"GISAID_EpiFlu(TM)Data",
"FluData",
"EpifluData",
"International Barcode of Life (iBOL)Data",
"Assembly-Data",
"Genome-Assembly-Data",
+ "Genome-Annotation-Data",
+ "RefSeq-Attributes",
+ "HCVDataBaseData",
+ "Evidence-Data",
+ "BWP:1.0",
+ "Taxonomic-Update-Statistics",
NULL
};
@@ -19154,6 +18270,45 @@ static CharPtr MakeStructuredCommentSuffixFromString (CharPtr orig)
}
+NLM_EXTERN void SetStructuredCommentPrefixAndSuffix (UserObjectPtr uop, CharPtr string)
+{
+ CharPtr new_str, str;
+ UserFieldPtr ufp;
+ Boolean found_prefix = FALSE, found_suffix = FALSE;
+
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0 || StringHasNoText (string)) {
+ return;
+ }
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL
+ && ufp->choice == 1
+ && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
+ if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
+ new_str = MakeStructuredCommentPrefixFromString (string);
+ str = MemFree (str);
+ ufp->data.ptrvalue = new_str;
+ found_prefix = TRUE;
+ } else if (StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
+ new_str = MakeStructuredCommentSuffixFromString (string);
+ str = MemFree (str);
+ ufp->data.ptrvalue = new_str;
+ found_suffix = TRUE;
+ }
+ }
+ }
+ if (!found_prefix) {
+ new_str = MakeStructuredCommentPrefixFromString (string);
+ AddItemStructuredCommentUserObject (uop, "StructuredCommentPrefix", string);
+ new_str = MemFree (new_str);
+ }
+ if (!found_suffix) {
+ new_str = MakeStructuredCommentSuffixFromString (string);
+ AddItemStructuredCommentUserObject (uop, "StructuredCommentSuffix", string);
+ new_str = MemFree (new_str);
+ }
+}
+
+
/* This function reads in a tab-delimited table. The first line is a header.
* If not apply_to_all, the first column must contain sequence IDs. The remaining cells in the first
* line are the names of fields to create in structured comments.
@@ -20017,6 +19172,49 @@ NLM_EXTERN Boolean RemoveDuplicateNestedSetsForEntityID (Uint2 entityID)
}
+typedef struct keywordstruccomm {
+ CharPtr keyword;
+ CharPtr prefix;
+} KeywordStrucCommData, PNTR KeywordStrucCommPtr;
+
+static KeywordStrucCommData s_StructuredCommentKeywords[] = {
+ {"GSC:MIGS:2.1", "##MIGS-Data-START##"},
+ {"GSC:MIMS:2.1", "##MIMS-Data-START##"},
+ {"GSC:MIENS:2.1", "##MIENS-Data-START##"},
+ {"GSC:MIxS;MIGS:3.0", "##MIGS:3.0-Data-START##"},
+ {"GSC:MIxS;MIMS:3.0", "##MIMS:3.0-Data-START##"},
+ {"GSC:MIxS;MIMARKS:3.0", "##MIMARKS:3.0-Data-START##"},
+ {"GSC:MIxS:MIGS:4.0", "##MIGS:4.0-Data-START##" },
+ {"GSC:MIxS:MIMS:4.0", "##MIMS:4.0-Data-START##" },
+ {"GSC:MIxS:MIMARKS:4.0", "##MIMARKS:4.0-Data-START##" },
+ { NULL, NULL} };
+
+NLM_EXTERN CharPtr KeywordForStructuredCommentPrefix (CharPtr prefix)
+{
+ Int4 i;
+
+ for (i = 0; s_StructuredCommentKeywords[i].prefix != NULL; i++) {
+ if (StringICmp (prefix, s_StructuredCommentKeywords[i].prefix) == 0) {
+ return s_StructuredCommentKeywords[i].keyword;
+ }
+ }
+ return NULL;
+}
+
+
+NLM_EXTERN CharPtr StructuredCommentPrefixForKeyword (CharPtr keyword)
+{
+ Int4 i;
+
+ for (i = 0; s_StructuredCommentKeywords[i].keyword != NULL; i++) {
+ if (StringICmp (keyword, s_StructuredCommentKeywords[i].keyword) == 0) {
+ return s_StructuredCommentKeywords[i].prefix;
+ }
+ }
+ return NULL;
+}
+
+
NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop)
{
UserFieldPtr ufp;
@@ -20035,21 +19233,7 @@ NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop)
}
}
- if (prefix == NULL) {
- keyword = NULL;
- } else if (StringCmp (prefix, "##MIGS-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIGS:2.1");
- } else if (StringCmp (prefix, "##MIMS-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIMS:2.1");
- } else if (StringCmp (prefix, "##MIENS-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIENS:2.1");
- } else if (StringCmp (prefix, "##MIGS:3.0-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIxS;MIGS:3.0");
- } else if (StringCmp (prefix, "##MIMS:3.0-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIxS;MIMS:3.0");
- } else if (StringCmp (prefix, "##MIMARKS:3.0-Data-START##") == 0) {
- keyword = StringSave ("GSC:MIxS;MIMARKS:3.0");
- }
+ keyword = StringSave(KeywordForStructuredCommentPrefix(prefix));
return keyword;
}
@@ -20060,7 +19244,8 @@ static Boolean HasKeyword (BioseqPtr bsp, CharPtr keyword)
SeqDescPtr sdp;
SeqMgrDescContext context;
GBBlockPtr gb;
- Boolean has_keyword = FALSE;
+ Boolean has_keyword = FALSE;
+ CharPtr str;
ValNodePtr vnp;
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
@@ -20068,7 +19253,8 @@ static Boolean HasKeyword (BioseqPtr bsp, CharPtr keyword)
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) {
if ((gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) {
for (vnp = gb->keywords; vnp != NULL && !has_keyword; vnp = vnp->next) {
- if (StringCmp (vnp->data.ptrvalue, keyword) == 0) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringCmp (str, keyword) == 0) {
has_keyword = TRUE;
}
}
@@ -20151,6 +19337,127 @@ static void RemoveKeywordFromBioseq (BioseqPtr bsp, CharPtr keyword)
}
}
+NLM_EXTERN ValNodePtr SplitStringAtSemicolon (CharPtr keyword)
+
+{
+ ValNodePtr head = NULL, tail = NULL;
+ CharPtr lst, ptr, tmp;
+
+ if (StringHasNoText (keyword)) return NULL;
+
+ tmp = StringSave (keyword);
+ if (tmp == NULL) return NULL;
+
+ lst = tmp;
+ while (lst != NULL) {
+ ptr = StringChr (lst, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ ValNodeCopyStrEx (&head, &tail, 0, lst);
+ lst = ptr;
+ }
+
+ MemFree (tmp);
+
+ return head;
+}
+
+
+NLM_EXTERN ValNodePtr GetAllStructuredCommentKeywords (void)
+
+{
+ ValNodePtr head = NULL, tail = NULL, vnp;
+ Int2 i;
+ CharPtr kywd;
+
+ for (i = 0; s_StructuredCommentKeywords[i].prefix != NULL; i++) {
+ kywd = s_StructuredCommentKeywords[i].keyword;
+ ValNodeCopyStrEx (&head, &tail, 0, kywd);
+ if (StringChr (kywd, ';') == NULL) continue;
+ vnp = SplitStringAtSemicolon (kywd);
+ if (vnp != NULL && tail != NULL) {
+ tail->next = vnp;
+ while (vnp->next != NULL) {
+ vnp = vnp->next;
+ }
+ tail = vnp;
+ }
+ }
+
+ return head;
+}
+
+static void RemoveKeywordFromBioseqEx (BioseqPtr bsp, CharPtr keyword)
+
+{
+ ValNodePtr head = NULL, vnp;
+ CharPtr kywd;
+
+ RemoveKeywordFromBioseq (bsp, keyword);
+ if (StringChr (keyword, ';') == NULL) return;
+
+ head = SplitStringAtSemicolon (keyword);
+ if (head == NULL) return;
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ kywd = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (kywd)) continue;
+ RemoveKeywordFromBioseq (bsp, kywd);
+ }
+
+ ValNodeFreeData (head);
+}
+
+
+NLM_EXTERN Boolean HasAllKeywordsForStructuredComment (BioseqPtr bsp, CharPtr keyword)
+
+{
+ ValNodePtr key_head = NULL, vnp;
+ Boolean rsult = TRUE;
+ CharPtr str;
+
+ if (bsp == NULL || StringHasNoText (keyword)) return FALSE;
+
+ key_head = SplitStringAtSemicolon (keyword);
+ for (vnp = key_head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (! HasKeyword(bsp, str)) {
+ rsult = FALSE;
+ }
+ }
+
+ ValNodeFreeData (key_head);
+
+ return rsult;
+}
+
+
+NLM_EXTERN Boolean HasAnyKeywordForStructuredComment (BioseqPtr bsp, CharPtr keyword)
+
+{
+ ValNodePtr key_head = NULL, vnp;
+ Boolean rsult = FALSE;
+ CharPtr str;
+
+ if (bsp == NULL || StringHasNoText (keyword)) return FALSE;
+
+ key_head = SplitStringAtSemicolon (keyword);
+ for (vnp = key_head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (HasKeyword(bsp, str)) {
+ rsult = TRUE;
+ }
+ }
+
+ ValNodeFreeData (key_head);
+
+ return rsult;
+}
+
NLM_EXTERN Boolean HasKeywordForStructuredCommentName (BioseqPtr bsp, UserObjectPtr uop)
{
@@ -20210,12 +19517,36 @@ NLM_EXTERN void AddStructuredCommentKeywords (Uint2 entityID)
}
+static ValNodePtr ListKeywordsOnBioseq (BioseqPtr bsp)
+{
+ ValNodePtr list = NULL;
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ GBBlockPtr gb;
+ ValNodePtr vnp;
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) {
+ gb = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gb != NULL) {
+ for (vnp = gb->keywords; vnp; vnp = vnp->next) {
+ ValNodeAddPointer (&list, 0, StringSave (vnp->data.ptrvalue));
+ }
+ }
+ }
+ return list;
+}
+
+
static void RemoveStructuredCommentKeywordsCallback (BioseqPtr bsp, Pointer data)
{
SeqDescPtr sdp;
SeqMgrDescContext context;
UserObjectPtr uop;
- CharPtr keyword;
+ CharPtr keyword, prefix;
+ ValNodePtr keyword_list, prefix_list = NULL, vnp_k, vnp_p;
+ Boolean found;
if (bsp == NULL || ISA_aa (bsp->mol)) {
return;
@@ -20225,14 +19556,40 @@ static void RemoveStructuredCommentKeywordsCallback (BioseqPtr bsp, Pointer data
sdp != NULL;
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
+ continue;
+ }
keyword = KeywordForStructuredCommentName(uop);
if (keyword != NULL) {
if (IsStructuredCommentValid (uop, NULL, NULL) != eFieldValid_Valid) {
- RemoveKeywordFromBioseq(bsp, keyword);
+ RemoveKeywordFromBioseqEx (bsp, keyword);
+ } else {
+ ValNodeAddPointer (&prefix_list, 0, StringSave (GetStructuredCommentPrefix(uop)));
}
}
keyword = MemFree (keyword);
}
+
+ /* find keywords on the Bioseq */
+ keyword_list = ListKeywordsOnBioseq(bsp);
+ for (vnp_k = keyword_list; vnp_k != NULL; vnp_k = vnp_k->next) {
+ keyword = vnp_k->data.ptrvalue;
+ prefix = StructuredCommentPrefixForKeyword(keyword);
+ if (prefix != NULL) {
+ found = FALSE;
+ for (vnp_p = prefix_list; vnp_p != NULL && !found; vnp_p = vnp_p->next) {
+ if (StringICmp (prefix, vnp_p->data.ptrvalue) == 0) {
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ RemoveKeywordFromBioseqEx (bsp, keyword);
+ }
+ }
+ }
+
+ prefix_list = ValNodeFreeData (prefix_list);
+ keyword_list = ValNodeFreeData (keyword_list);
}
@@ -20253,6 +19610,45 @@ NLM_EXTERN void RemoveStructuredCommentKeywords (Uint2 entityID)
}
+static void RemoveAllStrucCommKeywordsCallback (BioseqPtr bsp, Pointer userdata)
+
+{
+ CharPtr kywd;
+ ValNodePtr vnp;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || userdata == NULL) return;
+
+ for (vnp = (ValNodePtr) userdata; vnp != NULL; vnp = vnp->next) {
+ kywd = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (kywd)) continue;
+ RemoveKeywordFromBioseq (bsp, kywd);
+ }
+}
+
+
+NLM_EXTERN void RemoveAllStructuredCommentKeywords (Uint2 entityID)
+
+{
+ SeqEntryPtr sep;
+ ValNodePtr vnp;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL) {
+ return;
+ }
+
+ vnp = GetAllStructuredCommentKeywords ();
+
+ VisitBioseqsInSep (sep, (Pointer) vnp, RemoveAllStrucCommKeywordsCallback);
+ DeleteMarkedObjects (entityID, 0, NULL);
+
+ ValNodeFreeData (vnp);
+
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+}
+
+
static Boolean StartsWith(CharPtr str, CharPtr start)
{
Int4 str_len, start_len;
@@ -20415,7 +19811,8 @@ static Boolean IsUSA (CharPtr country)
|| StringICmp (country, "United States of America") == 0
|| StringICmp (country, "United States") == 0
|| StringICmp (country, "U.S.A.") == 0
- || StringICmp (country, "U S A") == 0) {
+ || StringICmp (country, "U S A") == 0
+ || StringCmp (country, "US") == 0) {
return TRUE;
} else {
return FALSE;
@@ -20466,6 +19863,10 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp)
SeqEntryPtr sep;
LogInfoData lid;
SeqSubmitPtr ssp;
+ SubmitBlockPtr sbp;
+ ContactInfoPtr cip;
+ CitSubPtr csp;
+ AuthorPtr ap;
sep = GetTopSeqEntryForEntityID (entityID);
if (sep == NULL)
@@ -20476,9 +19877,23 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp)
VisitPubdescsInSep (sep, &lid, AbbreviateCitSubAffilStatesCallback);
ssp = FindSeqSubmitForSeqEntry (sep);
- if (ssp != NULL && ssp->sub != NULL && ssp->sub->cit != NULL) {
- FixStateAbbreviationsInCitSub (ssp->sub->cit, &lid);
+ if (ssp != NULL) {
+ sbp = ssp->sub;
+ if (sbp != NULL) {
+ csp = sbp->cit;
+ if (csp != NULL) {
+ FixStateAbbreviationsInCitSub (csp, &lid);
+ }
+ cip = sbp->contact;
+ if (cip != NULL) {
+ ap = cip->contact;
+ if (ap != NULL) {
+ FixStateAbbreviationsInAffil (ap->affil, NULL);
+ }
+ }
+ }
}
+
return lid.data_in_log;
}
@@ -20655,7 +20070,7 @@ static void ReportExonLocationChanges (ExonLocListPtr el, LogInfoPtr lip)
static Boolean AdjustedSpliceSitePairIsOk (CharPtr first, CharPtr last)
{
- if (first[0] == 'G' && (first[1] = 'T' || first[1] == 'C')
+ if (first[0] == 'G' && (first[1] == 'T' || first[1] == 'C')
&& last[0] == 'A' && last[1] == 'G')
{
return TRUE;
@@ -20738,13 +20153,13 @@ AdjustSeqLocPairBack
if (slp->choice == SEQLOC_PNT) {
spp = (SeqPntPtr) slp->data.ptrvalue;
sint = SeqIntNew ();
- sint->id = spp->id;
- spp->id = NULL;
+ sint->id = (SeqIdPtr)AsnIoMemCopy(spp->id, (AsnReadFunc) SeqIdAsnRead, (AsnWriteFunc) SeqIdAsnWrite);
sint->strand = spp->strand;
sint->to = spp->point;
sint->from = spp->point;
spp = SeqPntFree (spp);
slp->data.ptrvalue = sint;
+ slp->choice = SEQLOC_INT;
}
sint = (SeqIntPtr) slp->data.ptrvalue;
if (sint->strand == Seq_strand_minus) {
@@ -20773,9 +20188,117 @@ AdjustSeqLocPairBack
}
+static Boolean HasProteinChanged (SeqFeatPtr sfp, CharPtr orig_prot_str)
+{
+ ByteStorePtr bs;
+ CharPtr new_prot_str;
+ Boolean rval = FALSE;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
+ return FALSE;
+ }
+
+ bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
+ if (bs == NULL) {
+ rval = TRUE;
+ } else {
+ new_prot_str = BSMerge (bs, NULL);
+ bs = BSFree (bs);
+ if (StringCmp (orig_prot_str, new_prot_str) != 0) {
+ rval = TRUE;
+ }
+ new_prot_str = MemFree (new_prot_str);
+ }
+ return rval;
+}
+
+
+static void SwapSeqLocContents (SeqLocPtr a, SeqLocPtr b)
+{
+ ValNode swap;
+
+ swap.choice = a->choice;
+ swap.data.ptrvalue = a->data.ptrvalue;
+ a->choice = b->choice;
+ a->data.ptrvalue = b->data.ptrvalue;
+ b->choice = swap.choice;
+ b->data.ptrvalue = swap.data.ptrvalue;
+}
+
+
+static void AlsoAdjustmRNA (SeqLocPtr cds_loc, SeqLocPtr cds_loc_before, SeqFeatPtr mrna)
+{
+ SeqLocPtr slp_c, slp_cb, slp_m;
+ Int4 b_intron_left, b_intron_right, a_intron_left, a_intron_right;
+ Int4 diff;
+ Uint1 strand;
+
+ if (cds_loc == NULL || (cds_loc->choice != SEQLOC_MIX && cds_loc->choice != SEQLOC_PACKED_INT)
+ || cds_loc_before == NULL || (cds_loc_before->choice != SEQLOC_MIX && cds_loc_before->choice != SEQLOC_PACKED_INT)
+ || mrna == NULL || mrna->location == NULL || (mrna->location->choice != SEQLOC_MIX && mrna->location->choice != SEQLOC_PACKED_INT)) {
+ return;
+ }
+
+ strand = SeqLocStrand (cds_loc);
+
+ for (slp_c = cds_loc->data.ptrvalue, slp_cb = cds_loc_before->data.ptrvalue, slp_m = mrna->location->data.ptrvalue;
+ slp_c != NULL && slp_c->next != NULL && slp_cb != NULL && slp_cb->next != NULL && slp_m != NULL && slp_m->next != NULL;
+ slp_c = slp_c->next, slp_cb = slp_cb->next, slp_m = slp_m->next) {
+ if (strand == Seq_strand_minus) {
+ b_intron_left = SeqLocStop (slp_cb->next) + 1;
+ b_intron_right = SeqLocStart (slp_cb) - 1;
+ a_intron_left = SeqLocStop (slp_c->next) + 1;
+ a_intron_right = SeqLocStart (slp_c) - 1;
+ } else {
+ b_intron_left = SeqLocStop (slp_cb) + 1;
+ b_intron_right = SeqLocStart (slp_cb->next) - 1;
+ a_intron_left = SeqLocStop (slp_c) + 1;
+ a_intron_right = SeqLocStart (slp_c->next) - 1;
+ }
+ diff = a_intron_left - b_intron_left;
+ if (diff != 0 && diff == a_intron_right - b_intron_right) {
+ if (diff < 0) {
+ if (strand == Seq_strand_minus) {
+ AdjustSeqLocPairBack (slp_m, slp_m->next, NULL, NULL, diff);
+ } else {
+ AdjustLocPairForward (slp_m, slp_m->next, NULL, NULL, diff);
+ }
+ } else {
+ if (strand == Seq_strand_minus) {
+ AdjustLocPairForward (slp_m, slp_m->next, NULL, NULL, -diff);
+ } else {
+ AdjustSeqLocPairBack (slp_m, slp_m->next, NULL, NULL, -diff);
+ }
+ }
+ }
+ }
+}
+
+
+static Int4 IntronLength (SeqLocPtr slp_last, SeqLocPtr slp)
+{
+ Int4 begin, end;
+
+ if (slp_last == NULL || slp == NULL) {
+ return 0;
+ }
+
+ if (SeqLocStrand (slp_last) == Seq_strand_minus) {
+ begin = SeqLocStop (slp);
+ end = SeqLocStart (slp_last);
+ } else {
+ begin = SeqLocStop (slp_last);
+ end = SeqLocStart (slp);
+ }
+
+ return end - begin - 1;
+}
+
+
static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
{
SeqLocPtr slp, slp_last = NULL, slp_unchanged;
+ SeqLocPtr slp_before, slp_last_before;
SeqIdPtr sip;
BioseqPtr bsp;
Boolean partial5, partial3, partial5_last, partial3_last, first = TRUE;
@@ -20791,9 +20314,10 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
LogInfoPtr lip;
ByteStorePtr bs;
CharPtr orig_prot_str, new_prot_str;
+ SeqFeatPtr mrna;
if (sfp == NULL
- || (sfp->data.choice != SEQFEAT_CDREGION && sfp->idx.subtype != FEATDEF_mRNA)
+ || (sfp->data.choice != SEQFEAT_CDREGION)
|| sfp->location == NULL
|| (sfp->location->choice != SEQLOC_MIX && sfp->location->choice != SEQLOC_PACKED_INT)
|| (sip = SeqLocId (sfp->location)) == NULL
@@ -20824,11 +20348,18 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
- if (bs == NULL) return;
+ if (bs == NULL) {
+ BioseqUnlock (bsp);
+ return;
+ }
orig_prot_str = BSMerge (bs, NULL);
bs = BSFree (bs);
- if (orig_prot_str == NULL) return;
+ if (orig_prot_str == NULL) {
+ BioseqUnlock (bsp);
+ return;
+ }
slp_unchanged = SeqLocCopy (sfp->location);
+ mrna = GetmRNAforCDS (sfp);
if ((lip = (LogInfoPtr)data) != NULL && lip->fp != NULL) {
orig_loc = SeqLocPrintUseBestID (sfp->location);
@@ -20843,7 +20374,8 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
if (!first && !partial5 && !partial3_last
&& (slp_last->choice == SEQLOC_INT || slp_last->choice == SEQLOC_PNT)
- && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)) {
+ && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)
+ && IntronLength (slp_last, slp) > 9) {
/* check for donor and acceptor pair */
/* maximum search space is beginning of previous exon to end of current exon */
@@ -20857,11 +20389,13 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
stop = MAX (this_pos, prev_pos);
len = stop - start + 1;
buf = (CharPtr) MemNew (sizeof (Char) * (len + 1));
- SeqPortStreamInt (bsp, start, stop, strand, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
+ SeqPortStreamInt (bsp, start, stop, strand, EXPAND_GAPS_TO_DASHES | STREAM_CORRECT_INVAL, (Pointer) buf, NULL);
if (AdjustedSpliceSitePairIsOk(buf + exon_len_last, buf + len - exon_len - 2)) {
/* already have donor acceptor pair */
} else {
match = FALSE;
+ slp_before = SeqLocCopy (slp);
+ slp_last_before = SeqLocCopy (slp_last);
/* search forward */
if ((slp_last->choice == SEQLOC_INT || slp_last->choice == SEQLOC_PNT)
&& slp->choice == SEQLOC_INT) {
@@ -20875,7 +20409,6 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
if (match) {
AdjustLocPairForward (slp_last, slp, last_exon_list, this_exon_list, diff);
- changed = TRUE;
}
}
/* search backward */
@@ -20891,9 +20424,20 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
if (match) {
AdjustSeqLocPairBack (slp_last, slp, last_exon_list, this_exon_list, diff);
+ }
+ }
+
+ if (match) {
+ /* check to make sure protein hasn't changed. If it has, roll back the change, otherwise set changed to TRUE */
+ if (HasProteinChanged(sfp, orig_prot_str)) {
+ SwapSeqLocContents (slp_before, slp);
+ SwapSeqLocContents (slp_last_before, slp_last);
+ } else {
changed = TRUE;
}
}
+ slp_before = SeqLocFree (slp_before);
+ slp_last_before = SeqLocFree (slp_last_before);
}
buf = MemFree (buf);
@@ -20926,7 +20470,9 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
new_prot_str = MemFree (new_prot_str);
}
- if (!changed) {
+ if (changed) {
+ AlsoAdjustmRNA(sfp->location, slp_unchanged, mrna);
+ } else {
sfp->location = MemFree (sfp->location);
sfp->location = slp_unchanged;
slp_unchanged = NULL;
@@ -20952,6 +20498,11 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
+typedef struct consensusspliceadjustment {
+ LogInfoPtr lip;
+ Boolean strict;
+} ConsensusSpliceAdjustmentData, PNTR ConsensusSpliceAdjustmentPtr;
+
static void AdjustSeqEntryForConsensusSpliceBioseqCallback (BioseqPtr bsp, Pointer data)
{
SeqDescPtr sdp;
@@ -20959,31 +20510,39 @@ static void AdjustSeqEntryForConsensusSpliceBioseqCallback (BioseqPtr bsp, Point
BioSourcePtr biop;
SeqFeatPtr sfp;
SeqMgrFeatContext fcontext;
+ ConsensusSpliceAdjustmentPtr csap;
- if (bsp == NULL || ISA_aa (bsp->mol)) {
+ if (bsp == NULL || ISA_aa (bsp->mol) || (csap = (ConsensusSpliceAdjustmentPtr) data) == NULL) {
return;
}
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
if (sdp == NULL || (biop = (BioSourcePtr)sdp->data.ptrvalue) == NULL
- || (biop->genome != GENOME_genomic && biop->genome != GENOME_unknown)
- || (biop->org != NULL && biop->org->orgname != NULL && StringISearch (biop->org->orgname->lineage, "viruses") != NULL)
- || !HasTaxonomyID(biop))
+ || (biop->genome != GENOME_genomic && biop->genome != GENOME_unknown))
{
return;
}
+ if (csap->strict) {
+ if ((biop->org != NULL && biop->org->orgname != NULL && StringISearch (biop->org->orgname->lineage, "viruses") != NULL)
+ || !HasTaxonomyID(biop))
+ {
+ return;
+ }
+ }
+
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext))
{
- AdjustForConsensusSpliceCallback (sfp, data);
+ AdjustForConsensusSpliceCallback (sfp, csap->lip);
}
}
-NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp)
+NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp, Boolean strict)
{
+ ConsensusSpliceAdjustmentData csad;
LogInfoData lid;
if (sep == NULL) {
@@ -20991,14 +20550,16 @@ NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *lo
}
MemSet (&lid, 0, sizeof (LogInfoData));
lid.fp = log_fp;
+ csad.lip = &lid;
+ csad.strict = strict;
- VisitBioseqsInSep (sep, &lid, AdjustSeqEntryForConsensusSpliceBioseqCallback);
+ VisitBioseqsInSep (sep, &csad, AdjustSeqEntryForConsensusSpliceBioseqCallback);
return lid.data_in_log;
}
NLM_EXTERN void AdjustSeqEntryForConsensusSplice (SeqEntryPtr sep)
{
- AdjustSeqEntryForConsensusSpliceEx (sep, NULL);
+ AdjustSeqEntryForConsensusSpliceEx (sep, NULL, TRUE);
}
@@ -21495,3 +21056,6468 @@ NLM_EXTERN Int2 GetGenCodeForBsp (
}
+
+static void CorrectGenCodeIndexedCallback (SeqFeatPtr sfp, Pointer userdata)
+{
+ CdRegionPtr crp;
+ GeneticCodePtr gc;
+ Int2Ptr pGenCode;
+ ValNodePtr vnp;
+ Boolean need_replacement = FALSE;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION
+ || sfp->data.value.ptrvalue == NULL
+ || userdata == NULL) {
+ return;
+ }
+ if (sfp->excpt && StringISearch (sfp->except_text, kAllowManualGenCodeException) != NULL) {
+ /* do not correct if this exception present */
+ return;
+ }
+
+ pGenCode = (Int2Ptr) userdata;
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp->genetic_code != NULL
+ && crp->genetic_code->choice == 254) {
+ if (crp->genetic_code->data.ptrvalue == NULL) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = 2;
+ vnp->data.intvalue = (Int4) *pGenCode;
+ } else {
+ vnp = crp->genetic_code->data.ptrvalue;
+ if (vnp->next == NULL && vnp->choice == 2) {
+ vnp->data.intvalue = (Int4) *pGenCode;
+ } else {
+ need_replacement = TRUE;
+ }
+ }
+ } else {
+ need_replacement = TRUE;
+ }
+ if (need_replacement) {
+ gc = GeneticCodeNew ();
+ if (gc == NULL) return;
+ crp->genetic_code = GeneticCodeFree (crp->genetic_code);
+ vnp = ValNodeNew (NULL);
+ gc->data.ptrvalue = vnp;
+ if (vnp != NULL) {
+ vnp->choice = 2;
+ vnp->data.intvalue = (Int4) *pGenCode;
+ }
+ crp->genetic_code = gc;
+ }
+}
+
+static void CorrectGenCodesBioseqCallback (BioseqPtr bsp, Pointer userdata)
+{
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+
+ if (bsp == NULL || userdata == NULL) return;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, FEATDEF_CDS, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, FEATDEF_CDS, &fcontext)) {
+ CorrectGenCodeIndexedCallback (sfp, userdata);
+ }
+
+}
+
+
+typedef struct gencodescan {
+ Boolean mito;
+ Boolean plastid;
+ Boolean hydrogenosome;
+ Int2 nuclCode;
+ Int2 mitoCode;
+ Int2 pstdCode;
+ Boolean already_found;
+} GenCodeScanData, PNTR GenCodeScanPtr;
+
+static void JustGetGenCodeFromOrgRef (OrgRefPtr orp, GenCodeScanPtr gp)
+{
+ OrgNamePtr onp;
+
+ if (orp == NULL || orp->orgname == NULL || gp == NULL || gp->already_found) return;
+ onp = orp->orgname;
+
+ gp->nuclCode = onp->gcode;
+ gp->mitoCode = onp->mgcode;
+ gp->pstdCode = onp->pgcode;
+}
+
+static void JustGetGenCodeFromBiop (BioSourcePtr biop, GenCodeScanPtr gp)
+{
+ if (biop == NULL || gp == NULL) return;
+ if (gp->already_found && !biop->is_focus) return;
+
+ gp->mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
+ biop->genome == GENOME_mitochondrion ||
+ biop->genome == GENOME_hydrogenosome);
+
+ gp->plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
+ biop->genome == GENOME_chromoplast ||
+ biop->genome == GENOME_plastid ||
+ biop->genome == GENOME_cyanelle ||
+ biop->genome == GENOME_apicoplast ||
+ biop->genome == GENOME_leucoplast ||
+ biop->genome == GENOME_proplastid ||
+ biop->genome == GENOME_chromatophore);
+ gp->hydrogenosome = (Boolean) (biop->genome == GENOME_hydrogenosome);
+
+ JustGetGenCodeFromOrgRef (biop->org, gp);
+ gp->already_found = TRUE;
+}
+
+
+static void JustGetGenCodeFromFeat (SeqFeatPtr sfp, Pointer userdata)
+{
+ GenCodeScanPtr gp;
+
+ if (sfp == NULL || userdata == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
+
+ gp = (GenCodeScanPtr) userdata;
+
+ JustGetGenCodeFromBiop (sfp->data.value.ptrvalue, gp);
+}
+
+static void JustGetGenCodeFromDesc (SeqDescrPtr sdp, Pointer userdata)
+{
+ GenCodeScanPtr gp;
+
+ if (sdp == NULL || userdata == NULL || sdp->choice != Seq_descr_source) return;
+
+ gp = (GenCodeScanPtr) userdata;
+
+ JustGetGenCodeFromBiop (sdp->data.ptrvalue, gp);
+}
+
+static Int2 JustGetGenCodeForSeqEntry (SeqEntryPtr sep)
+{
+ GenCodeScanData gd;
+
+ gd.already_found = FALSE;
+ gd.mito = FALSE;
+ gd.mitoCode = 0;
+ gd.nuclCode = 0;
+ gd.pstdCode = 0;
+ gd.plastid = FALSE;
+
+ VisitDescriptorsInSep (sep, &gd, JustGetGenCodeFromDesc);
+ VisitFeaturesInSep (sep, &gd, JustGetGenCodeFromFeat);
+
+ if (gd.plastid) {
+ if (gd.pstdCode > 0) {
+ return gd.pstdCode;
+ } else {
+ return 11;
+ }
+ } else if (gd.mito) {
+ return gd.mitoCode;
+ } else if (gd.hydrogenosome) {
+ return gd.mitoCode;
+ } else {
+ return gd.nuclCode;
+ }
+}
+
+
+NLM_EXTERN void CorrectGenCodes (SeqEntryPtr sep, Uint2 entityID)
+
+{
+ BioseqSetPtr bssp;
+ Int2 genCode;
+
+ if (sep == NULL) return;
+ if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp != NULL && (bssp->_class == 7 ||
+ (IsPopPhyEtcSet (bssp->_class)))) {
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ CorrectGenCodes (sep, entityID);
+ }
+ return;
+ }
+ }
+
+ genCode = JustGetGenCodeForSeqEntry(sep);
+ VisitFeaturesInSep (sep, &genCode, CorrectGenCodeIndexedCallback);
+ VisitBioseqsInSep (sep, &genCode, CorrectGenCodesBioseqCallback);
+}
+
+
+typedef struct flankgenedata {
+ SeqFeatPtr firstgene;
+ SeqFeatPtr lastgene;
+} FlankingGeneData, PNTR FlankingGenePtr;
+
+static Boolean LIBCALLBACK FlankingGeneSMFEProc (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+)
+
+
+{
+ FlankingGenePtr fgp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE || context == NULL) return TRUE;
+ fgp = (FlankingGenePtr) context->userdata;
+ if (fgp == NULL) return TRUE;
+
+ if (fgp->firstgene == NULL) {
+ fgp->firstgene = sfp;
+ }
+
+ fgp->lastgene = sfp;
+
+ return TRUE;
+}
+
+NLM_EXTERN Boolean FindFlankingGenes (SeqLocPtr location, SeqFeatPtr PNTR firstP, SeqFeatPtr PNTR lastP)
+
+{
+ Int2 count;
+ FlankingGeneData fgd;
+
+ if (location == NULL || firstP == NULL || lastP == NULL) return FALSE;
+ *firstP = NULL;
+ *lastP = NULL;
+
+ MemSet ((Pointer) &fgd, 0, sizeof (FlankingGeneData));
+ count = SeqMgrGetAllOverlappingFeatures (location, FEATDEF_GENE, NULL, 0, LOCATION_SUBSET,
+ (Pointer) &fgd, FlankingGeneSMFEProc);
+ if (count == 0) return FALSE;
+ if (fgd.firstgene == NULL) return FALSE;
+
+ if (SeqLocStrand (location) == Seq_strand_minus) {
+ *firstP = fgd.lastgene;
+ *lastP = fgd.firstgene;
+ } else {
+ *firstP = fgd.firstgene;
+ *lastP = fgd.lastgene;
+ }
+
+ return TRUE;
+}
+
+NLM_EXTERN void AssignGeneXrefToFeat (SeqFeatPtr sfp, SeqFeatPtr gene)
+
+{
+ GeneRefPtr grp, gcopy;
+ SeqFeatXrefPtr xref, prevXref;
+
+ if (sfp == NULL || gene == NULL) return;
+
+ prevXref = NULL;
+ xref = sfp->xref;
+ while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
+ prevXref = xref;
+ xref = xref->next;
+ }
+ if (xref != NULL) {
+ if (prevXref != NULL) {
+ prevXref->next = xref->next;
+ } else {
+ sfp->xref = xref->next;
+ }
+ xref->next = NULL;
+ SeqFeatXrefFree (xref);
+ xref = NULL;
+ }
+
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (grp == NULL) return;
+ gcopy = AsnIoMemCopy (grp, (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
+ if (gcopy == NULL) return;
+
+ xref = SeqFeatXrefNew ();
+ if (xref == NULL) return;
+ xref->data.choice = SEQFEAT_GENE;
+ xref->data.value.ptrvalue = gcopy;
+ xref->next = sfp->xref;
+ sfp->xref = xref;
+}
+
+
+void PopulateGapLocQuals(GapLocPtr glp, SeqFeatPtr sfp, Int4 left, Int4 len)
+{
+ GBQualPtr gbq;
+
+ glp->start = left;
+ glp->length = len;
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringHasNoText (gbq->val)) continue;
+ if (StringsAreEquivalent (gbq->qual, "estimated_length")) {
+ glp->estimated_length = gbq->val;
+ if (StringsAreEquivalent(glp->estimated_length, "unknown")
+ || StringsAreEquivalent(glp->estimated_length, "unknown_length")) {
+ glp->unknown_length = TRUE;
+ }
+ } else if (StringsAreEquivalent (gbq->qual, "gap_type")) {
+ glp->gap_type = gbq->val;
+ } else if (StringsAreEquivalent (gbq->qual, "linkage_evidence")) {
+ glp->linkage_evidence = gbq->val;
+ }
+ }
+}
+
+
+GapLocPtr GapLocFromSeqFeat(SeqFeatPtr sfp, Int4 left)
+{
+ GapLocPtr glp = (GapLocPtr) MemNew (sizeof (GapLocData));
+ PopulateGapLocQuals(glp, sfp, left, SeqLocLen(sfp->location));
+ return glp;
+}
+
+
+static CharPtr gapTypeStrings [] = {
+ "unknown",
+ "within scaffold",
+ "within scaffold",
+ "between scaffolds",
+ "short_arm",
+ "heterochromatin",
+ "centromere",
+ "telomere",
+ "repeat within scaffold",
+ "repeat between scaffolds",
+ "between scaffold",
+ "between scaffolds",
+ "within scaffold",
+ "other",
+ NULL
+};
+
+static Int4 gapTypeValues [] = {
+ 0,
+ 1,
+ 2,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 7,
+ 8,
+ 8,
+ 9,
+ 255
+};
+
+static CharPtr linkEvStrings [] = {
+ "paired-ends",
+ "align genus",
+ "align xgenus",
+ "align trnscpt",
+ "within clone",
+ "clone contig",
+ "map",
+ "strobe",
+ "unspecified",
+ "pcr",
+ "other",
+ NULL
+};
+
+static Int4 linkEvValues [] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 255
+};
+
+Boolean IncompatibleGapFeatQuals (SeqFeatPtr sfp)
+
+{
+ GBQualPtr gbq;
+ GapLocData gld;
+ int i;
+ Int4 type = 0;
+
+ if (sfp == NULL) return FALSE;
+
+ MemSet ((Pointer) &gld, 0, sizeof (GapLocData));
+
+ sfp->qual = SortFeatureGBQuals (sfp->qual);
+ CleanupDuplicateGBQuals (&(sfp->qual));
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringHasNoText (gbq->val)) continue;
+ if (StringsAreEquivalent (gbq->qual, "estimated_length")) {
+ if (gld.estimated_length != NULL) return TRUE;
+ gld.estimated_length = gbq->val;
+ } else if (StringsAreEquivalent (gbq->qual, "gap_type")) {
+ if (gld.gap_type != NULL) return TRUE;
+ gld.gap_type = gbq->val;
+ } else if (StringsAreEquivalent (gbq->qual, "linkage_evidence")) {
+ gld.linkage_evidence = gbq->val;
+ }
+ }
+
+ if (StringDoesHaveText (gld.gap_type)) {
+ for (i = 0; gapTypeStrings [i] != NULL; i++) {
+ if (StringCmp (gld.gap_type, gapTypeStrings [i]) == 0) {
+ type = gapTypeValues [i];
+ }
+ }
+ }
+
+ if (gld.linkage_evidence != NULL) {
+ if (type == 3 || type == 4 || type == 5 || type == 6 || type == 8 || type == 255) return TRUE;
+ } else {
+ if (type == 9) return TRUE;
+ if (type == 7) return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+static ValNodePtr GetGapLocListFromBioseq (BioseqPtr bsp)
+{
+ ValNodePtr head = NULL, tail = NULL;
+ GapLocPtr glp;
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_assembly_gap, &context);
+ while (sfp != NULL) {
+ glp = GapLocFromSeqFeat(sfp, context.left);
+ if (glp != NULL) {
+ ValNodeAddPointerEx (&head, &tail, 0, (Pointer) glp);
+ }
+ sfp->idx.deleteme = TRUE;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_assembly_gap, &context);
+ }
+ return head;
+}
+
+
+static SeqLitPtr SeqGapFromGapLoc(GapLocPtr glp)
+{
+ SeqLitPtr slitp;
+ SeqGapPtr sgp;
+ Int4 i;
+ LinkageEvidencePtr lep;
+ IntFuzzPtr ifp;
+
+ /* add gap */
+ slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slitp != NULL) {
+ sgp = SeqGapNew ();
+ if (sgp != NULL) {
+ slitp->seq_data_type = Seq_code_gap;
+ slitp->seq_data = (SeqDataPtr) sgp;
+ slitp->length = glp->length;
+ if (glp->unknown_length) {
+ ifp = IntFuzzNew();
+ ifp->choice = 4;
+ slitp->fuzz = ifp;
+ }
+ if (StringDoesHaveText (glp->gap_type)) {
+ for (i = 0; gapTypeStrings [i] != NULL; i++) {
+ if (StringCmp (glp->gap_type, gapTypeStrings [i]) == 0) {
+ sgp->type = gapTypeValues [i];
+ }
+ }
+ }
+ if (StringDoesHaveText (glp->linkage_evidence)) {
+ sgp->linkage = 1;
+ for (i = 0; linkEvStrings [i] != NULL; i++) {
+ if (StringsAreEquivalent (glp->linkage_evidence, linkEvStrings [i])) {
+ lep = LinkageEvidenceNew ();
+ if (lep != NULL) {
+ lep->type = linkEvValues [i];
+ ValNodeAddPointer (&sgp->linkage_evidence, 0, (Pointer) lep);
+ }
+ }
+ }
+ }
+ }
+ }
+ return slitp;
+}
+
+
+void BioseqToDeltaByGapFeat (BioseqPtr bsp, Pointer userdata)
+
+{
+ CharPtr bases;
+ Int4 gap_start, len = 0, orig_seq_offset;
+ GapLocPtr glp;
+ ValNodePtr head = NULL, seq_ext = NULL, vnp;
+ SeqEntryPtr sep;
+ SeqLitPtr slitp;
+ Char tmp_ch;
+
+ if (bsp == NULL || (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_delta) || ISA_aa (bsp->mol)) return;
+
+ head = GetGapLocListFromBioseq(bsp);
+ bases = GetSequenceByBsp (bsp);
+ if (bases == NULL) return;
+
+ orig_seq_offset = 0;
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ glp = (GapLocPtr) vnp->data.ptrvalue;
+ if (glp == NULL) continue;
+
+ gap_start = glp->start;
+ if (gap_start < 1 || gap_start > bsp->length) continue;
+
+ /* add data since last gap */
+ if (gap_start - orig_seq_offset > 0) {
+ slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slitp != NULL) {
+ slitp->length = gap_start - orig_seq_offset;
+ ValNodeAddPointer (&(seq_ext), (Int2) 2, (Pointer) slitp);
+ slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
+ slitp->seq_data_type = Seq_code_iupacna;
+ tmp_ch = bases [gap_start];
+ bases [gap_start] = 0;
+ AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases + orig_seq_offset);
+ bases [gap_start] = tmp_ch;
+ len += slitp->length;
+ orig_seq_offset += slitp->length;
+ }
+ }
+
+ /* add gap */
+ slitp = SeqGapFromGapLoc(glp);
+ if (slitp != NULL) {
+ len += slitp->length;
+ ValNodeAddPointer ((ValNodePtr PNTR) &(seq_ext), (Int2) 2, (Pointer) slitp);
+ }
+ orig_seq_offset += glp->length;
+ }
+
+ /* add remaining data after last gap to end */
+ if (bsp->length - orig_seq_offset > 0) {
+ slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slitp != NULL) {
+ slitp->length = bsp->length - orig_seq_offset;
+ ValNodeAddPointer (&(seq_ext), (Int2) 2, (Pointer) slitp);
+ slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
+ slitp->seq_data_type = Seq_code_iupacna;
+ AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases + orig_seq_offset);
+ len += slitp->length;
+ }
+ }
+
+ MemFree (bases);
+
+ bsp->seq_data = SeqDataFree (bsp->seq_data, bsp->seq_data_type);
+ bsp->seq_data_type = 0;
+ bsp->repr = Seq_repr_delta;
+ bsp->seq_ext_type = 4;
+ bsp->seq_ext = seq_ext;
+ bsp->length = len;
+
+ BioseqPack (bsp);
+
+ /* now adjust features for insertion */
+ /*
+ orig_seq_offset = 0;
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ glp = (GapLocPtr) vnp->data.ptrvalue;
+ if (glp == NULL) continue;
+
+ gap_start = glp->start;
+ if (gap_start < 1 || gap_start > bsp->length) continue;
+
+ AdjustFeaturesForInsertion (bsp, bsp->id,
+ gap_start + orig_seq_offset,
+ glp->length, FALSE);
+ orig_seq_offset += glp->length;
+ }
+ */
+
+ sep = GetTopSeqEntryForEntityID (bsp->idx.entityID);
+ VisitFeaturesInSep (sep, userdata, AdjustCDSLocationsForUnknownGapsCallback);
+
+ ValNodeFreeData (head);
+}
+
+
+static Boolean ValidateAssemblyGapFeat (SeqFeatPtr sfp, BioseqPtr bsp)
+
+{
+ Char ch;
+ int i;
+ size_t len;
+ Boolean rsult = FALSE;
+ CharPtr seq;
+ SeqIntPtr sintp;
+ SeqLocPtr slp;
+
+ if (sfp == NULL || sfp->location == NULL || bsp == NULL) return FALSE;
+
+ slp = (SeqLocPtr) AsnIoMemCopy ((Pointer) sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
+ if (slp == NULL) return FALSE;
+
+ if (slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL && sintp->from > 0 && sintp->to < bsp->length - 1) {
+ (sintp->from)--;
+ (sintp->to)++;
+ seq = GetSequenceByLocation (slp);
+ if (seq != NULL) {
+ len = StringLen (seq);
+ if (len > 0 && len == SeqLocLen (slp)) {
+ ch = seq [0];
+ if (IS_ALPHA (ch) && ch != 'N') {
+ ch = seq [len - 1];
+ if (IS_ALPHA (ch) && ch != 'N') {
+ rsult = TRUE;
+ for (i = 1; i < len - 1; i++) {
+ ch = seq [i];
+ if (ch != 'N') {
+ rsult = FALSE;
+ }
+ }
+ }
+ }
+ }
+ }
+ MemFree (seq);
+ }
+ }
+
+ SeqLocFree (slp);
+
+ return rsult;
+}
+
+static CharPtr gapTypeVals [] = {
+ "unknown",
+ "within scaffold",
+ "between scaffolds",
+ "short_arm",
+ "heterochromatin",
+ "centromere",
+ "telomere",
+ "repeat within scaffold",
+ "between scaffolds",
+ "within scaffold",
+ "other",
+ NULL
+};
+
+static CharPtr linkEvVals [] = {
+ "paired-ends",
+ "align genus",
+ "align xgenus",
+ "align trnscpt",
+ "within clone",
+ "clone contig",
+ "map",
+ "strobe",
+ "unspecified",
+ "pcr",
+ "other",
+ NULL
+};
+
+static void InstantiateAssemblyGapFeats (BioseqPtr bsp)
+
+{
+ Char buf [128];
+ Int4 currpos = 0, lastpos, linktype, type;
+ ValNodePtr evidvnp, vnp;
+ Boolean gap_is_linked;
+ ImpFeatPtr ifp;
+ LinkageEvidencePtr lep;
+ SeqLitPtr litp;
+ SeqFeatPtr sfp;
+ SeqGapPtr sgp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
+
+ sip = SeqIdFindBest (bsp->id, 0);
+ if (sip == NULL) return;
+
+ for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 1) {
+ slp = (SeqLocPtr) vnp->data.ptrvalue;
+ if (slp == NULL) continue;
+ currpos += SeqLocLen (slp);
+ }
+ if (vnp->choice == 2) {
+ litp = (SeqLitPtr) vnp->data.ptrvalue;
+ if (litp == NULL) continue;
+ lastpos = currpos;
+ currpos += litp->length;
+
+ if (litp->length == 0 ) continue;
+
+ if (litp->seq_data == NULL) {
+ ifp = ImpFeatNew ();
+ if (ifp == NULL) continue;
+ ifp->key = StringSave ("assembly_gap");
+ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_IMP, NULL);
+ if (sfp == NULL) continue;
+ sfp->data.value.ptrvalue = (Pointer) ifp;
+ sfp->excpt = TRUE;
+ sfp->location = AddIntervalToLocation (NULL, sip, lastpos, currpos - 1, FALSE, FALSE);
+ sprintf (buf, "%ld", (long) litp->length);
+ AddQualifierToFeature (sfp, "estimated_length", buf);
+ AddQualifierToFeature (sfp, "gap_type", "unknown");
+ continue;
+ }
+
+ if (litp->seq_data_type != Seq_code_gap) continue;
+ sgp = (SeqGapPtr) litp->seq_data;
+ if (sgp == NULL) continue;
+
+ ifp = ImpFeatNew ();
+ if (ifp == NULL) continue;
+ ifp->key = StringSave ("assembly_gap");
+
+ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_IMP, NULL);
+ if (sfp == NULL) continue;
+ sfp->data.value.ptrvalue = (Pointer) ifp;
+
+ sfp->excpt = TRUE;
+
+ sfp->location = AddIntervalToLocation (NULL, sip, lastpos, currpos - 1, FALSE, FALSE);
+
+ gap_is_linked = FALSE;
+ if (sgp->linkage == 1 || sgp->linkage_evidence != NULL) {
+ gap_is_linked = TRUE;
+ }
+
+ sprintf (buf, "%ld", (long) litp->length);
+ AddQualifierToFeature (sfp, "estimated_length", buf);
+
+ type = sgp->type;
+ if (type == 2) {
+ AddQualifierToFeature (sfp, "gap_type", gap_is_linked ? "within scaffold" : "between scaffolds");
+ } else if (type == 7) {
+ AddQualifierToFeature (sfp, "gap_type", gap_is_linked ? "repeat within scaffold" : "repeat between scaffolds");
+ } else if (type >= 1 && type <= 9) {
+ AddQualifierToFeature (sfp, "gap_type", gapTypeVals [type]);
+ } else if (sgp->type == 255) {
+ AddQualifierToFeature (sfp, "gap_type", "other");
+ }
+
+ for (evidvnp = sgp->linkage_evidence; evidvnp; evidvnp = evidvnp->next) {
+ lep = (LinkageEvidencePtr) evidvnp->data.ptrvalue;
+ if (lep == NULL) continue;
+ linktype = lep->type;
+ if (linktype >= 0 && linktype <= 9) {
+ AddQualifierToFeature (sfp, "linkage_evidence", linkEvVals [linktype]);
+ } else if (linktype == 255) {
+ AddQualifierToFeature (sfp, "linkage_evidence", "other");
+ }
+ }
+ }
+ }
+}
+
+void BioseqToDeltaMergeGapFeat (BioseqPtr bsp, Pointer userdata)
+
+{
+ CharPtr bases;
+ SeqMgrFeatContext context;
+ Boolean failed = FALSE;
+ Int4 gap_start, len = 0, orig_seq_offset;
+ GapLocPtr glp;
+ ValNodePtr head = NULL, seq_ext = NULL, vnp;
+ SeqFeatPtr sfp;
+ SeqLitPtr slitp;
+ Char tmp_ch;
+
+ if (bsp == NULL || (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_delta) || ISA_aa (bsp->mol)) return;
+
+ if (bsp->repr == Seq_repr_delta) {
+ if (! DeltaLitOnly (bsp)) return;
+ }
+
+ /* Ensure that assembly_gap features are above Ns */
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_assembly_gap, &context);
+
+ /* skip if there are no assembly_gap features */
+
+ if (sfp == NULL) return;
+
+ while (sfp != NULL) {
+ if (! ValidateAssemblyGapFeat (sfp, bsp)) {
+ Message (MSG_POSTERR, "ValidateAssemblyGapFeat failed for %ld..%ld",
+ (long) (context.left + 1), (long) (context.right + 1));
+ failed = TRUE;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_assembly_gap, &context);
+ }
+
+ if (failed) return;
+
+ /* Now instantiate Seq-gaps into transient assembly_gap features */
+
+ InstantiateAssemblyGapFeats (bsp);
+
+ /* Reindex to pick up real and generated assembly_gap features */
+
+ SeqMgrIndexFeatures (bsp->idx.entityID, NULL);
+
+ /* Merge qualifiers in actual and generated assembly_gap features with same location, bail if incompatible */
+
+ if (! MergeAssemblyGapFeats (bsp)) {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_assembly_gap, &context);
+ while (sfp != NULL) {
+ if (sfp->excpt) {
+ sfp->idx.deleteme = TRUE;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_assembly_gap, &context);
+ }
+ DeleteMarkedObjects (bsp->idx.entityID, 0, NULL);
+ SeqMgrClearFeatureIndexes (bsp->idx.entityID, NULL);
+ SeqMgrIndexFeatures (bsp->idx.entityID, NULL);
+ return;
+ }
+
+ DeleteMarkedObjects (bsp->idx.entityID, 0, NULL);
+ SeqMgrClearFeatureIndexes (bsp->idx.entityID, NULL);
+ SeqMgrIndexFeatures (bsp->idx.entityID, NULL);
+
+ head = GetGapLocListFromBioseq(bsp);
+
+ /* Now reconstruct delta using old Seq-gap components and new assembly_gap features */
+
+ bases = GetSequenceByBsp (bsp);
+ if (bases == NULL) return;
+
+ orig_seq_offset = 0;
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ glp = (GapLocPtr) vnp->data.ptrvalue;
+ if (glp == NULL) continue;
+
+ gap_start = glp->start;
+ if (gap_start < 1 || gap_start > bsp->length) continue;
+
+ /* add data since last gap */
+ if (gap_start - orig_seq_offset > 0) {
+ slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slitp != NULL) {
+ slitp->length = gap_start - orig_seq_offset;
+ ValNodeAddPointer (&(seq_ext), (Int2) 2, (Pointer) slitp);
+ slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
+ slitp->seq_data_type = Seq_code_iupacna;
+ tmp_ch = bases [gap_start];
+ bases [gap_start] = 0;
+ AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases + orig_seq_offset);
+ bases [gap_start] = tmp_ch;
+ len += slitp->length;
+ orig_seq_offset += slitp->length;
+ }
+ }
+
+ /* add gap */
+ slitp = SeqGapFromGapLoc(glp);
+ if (slitp != NULL) {
+ len += slitp->length;
+ ValNodeAddPointer ((ValNodePtr PNTR) &(seq_ext), (Int2) 2, (Pointer) slitp);
+ }
+
+ orig_seq_offset += glp->length;
+ }
+
+ /* add remaining data after last gap to end */
+ if (bsp->length - orig_seq_offset > 0) {
+ slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
+ if (slitp != NULL) {
+ slitp->length = bsp->length - orig_seq_offset;
+ ValNodeAddPointer (&(seq_ext), (Int2) 2, (Pointer) slitp);
+ slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
+ slitp->seq_data_type = Seq_code_iupacna;
+ AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases + orig_seq_offset);
+ len += slitp->length;
+ }
+ }
+
+ MemFree (bases);
+
+ bsp->seq_data = SeqDataFree (bsp->seq_data, bsp->seq_data_type);
+ bsp->seq_data_type = 0;
+ bsp->repr = Seq_repr_delta;
+ bsp->seq_ext_type = 4;
+ bsp->seq_ext = seq_ext;
+ bsp->length = len;
+
+ BioseqPack (bsp);
+
+ /* now adjust features for insertion */
+ /*
+ orig_seq_offset = 0;
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ glp = (GapLocPtr) vnp->data.ptrvalue;
+ if (glp == NULL) continue;
+
+ gap_start = glp->start;
+ if (gap_start < 1 || gap_start > bsp->length) continue;
+
+ AdjustFeaturesForInsertion (bsp, bsp->id,
+ gap_start + orig_seq_offset,
+ glp->length, FALSE);
+ orig_seq_offset += glp->length;
+ }
+ */
+
+ /*
+ sep = GetTopSeqEntryForEntityID (bsp->idx.entityID);
+ VisitFeaturesInSep (sep, (Pointer) Cln_GlobalAlign2Seq, AdjustCDSLocationsForUnknownGapsCallback);
+ */
+
+ ValNodeFreeData (head);
+}
+
+
+static void MoveGBQualList (SeqFeatPtr dst, SeqFeatPtr src)
+
+{
+ GBQualPtr last = NULL;
+
+ if (dst == NULL || src == NULL) return;
+
+ if (dst->qual != NULL) {
+ last = dst->qual;
+ while (last->next != NULL) {
+ last = last->next;
+ }
+ last->next = src->qual;
+ src->qual = NULL;
+ } else {
+ dst->qual = src->qual;
+ src->qual = NULL;
+ }
+}
+
+
+Boolean MergeAssemblyGapFeats (BioseqPtr bsp)
+
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr last = NULL, sfp;
+ Int4 left = 0, right = 0;
+ Boolean rsult = TRUE;
+
+ if (bsp == NULL) return FALSE;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_assembly_gap, &context);
+ while (sfp != NULL) {
+ if (last != NULL && context.left == left && context.right == right) {
+ if (last->excpt) {
+ MoveGBQualList (sfp, last);
+ if (IncompatibleGapFeatQuals (sfp)) {
+ rsult = FALSE;
+ }
+ last->idx.deleteme = TRUE;
+ } else if (sfp->excpt) {
+ MoveGBQualList (last, sfp);
+ if (IncompatibleGapFeatQuals (last)) {
+ rsult = FALSE;
+ }
+ sfp->idx.deleteme = TRUE;
+ }
+ }
+ last = sfp;
+ left = context.left;
+ right = context.right;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_assembly_gap, &context);
+ }
+
+ return rsult;
+}
+
+
+Boolean DeltaLitOnly (
+ BioseqPtr bsp
+)
+
+{
+ ValNodePtr vnp;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
+ for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 1) return FALSE;
+ }
+ return TRUE;
+}
+
+
+/* begin code moved from sqnutil1.c which is not part of cleanup */
+NLM_EXTERN DatePtr DateAdvance (DatePtr dp, Uint1 monthsToAdd)
+
+{
+ if (dp == NULL) {
+ dp = DateCurr ();
+ }
+ if (dp != NULL && dp->data [0] == 1 && dp->data [1] > 0) {
+ while (monthsToAdd > 12) {
+ monthsToAdd--;
+ (dp->data [1])++;
+ }
+ if (dp->data [2] < 13 - monthsToAdd) {
+ (dp->data [2]) += monthsToAdd;
+ } else {
+ (dp->data [1])++;
+ (dp->data [2]) -= (12 - monthsToAdd);
+ }
+ if (dp->data [2] == 0) {
+ dp->data [2] = 1;
+ }
+ if (dp->data [3] == 0) {
+ switch (dp->data [2]) {
+ case 4 :
+ case 6 :
+ case 9 :
+ case 11 :
+ dp->data [3] = 30;
+ break;
+ case 2 :
+ dp->data [3] = 28;
+ break;
+ default :
+ dp->data [3] = 31;
+ break;
+ }
+ }
+ }
+ if (dp != NULL) {
+ switch (dp->data [2]) {
+ case 4 :
+ case 6 :
+ case 9 :
+ case 11 :
+ if (dp->data [3] > 30) {
+ dp->data [3] = 30;
+ }
+ break;
+ case 2 :
+ if (dp->data [3] > 28) {
+ dp->data [3] = 28;
+ }
+ break;
+ default :
+ if (dp->data [3] > 31) {
+ dp->data [3] = 31;
+ }
+ break;
+ }
+ }
+ return dp;
+}
+
+
+/* special cases for chloroplast genetic code until implemented in taxonomy database */
+
+typedef struct pgorg {
+ CharPtr organism;
+ Uint1 pgcode;
+} PgOrg;
+
+static PgOrg pgOrgList [] = {
+ { "Chromera velia", 4 } ,
+ { NULL, 0 }
+};
+
+typedef struct pglin {
+ CharPtr lineage;
+ Uint1 pgcode;
+} PgLin;
+
+static PgLin pgLinList [] = {
+ { "Eukaryota; Alveolata; Apicomplexa; Coccidia; ", 4 } ,
+ { NULL, 0 }
+};
+
+NLM_EXTERN Uint1 GetSpecialPlastidGenCode (
+ CharPtr taxname,
+ CharPtr lineage
+)
+
+{
+ Int2 i;
+ size_t max;
+ Uint1 pgcode = 0;
+
+ if (StringDoesHaveText (taxname)) {
+ for (i = 0; pgOrgList [i].organism != NULL; i++) {
+ if (StringICmp (taxname, pgOrgList [i].organism) != 0) continue;
+ pgcode = pgOrgList [i].pgcode;
+ }
+ }
+
+ if (StringDoesHaveText (lineage)) {
+ for (i = 0; pgLinList [i].lineage != NULL; i++) {
+ max = StringLen (pgLinList [i].lineage);
+ if (StringNICmp (lineage, pgLinList [i].lineage, max) != 0) continue;
+ pgcode = pgLinList [i].pgcode;
+ }
+ }
+
+ if (pgcode == 11) {
+ pgcode = 0;
+ }
+
+ return pgcode;
+}
+
+
+static void FixCountryCapitalization (CharPtr PNTR str)
+{
+ Int4 i;
+ CharPtr PNTR country_list;
+
+ if (str == NULL || StringHasNoText (*str)) {
+ return;
+ }
+
+ country_list = GetValidCountryList ();
+
+ for (i = 0; country_list[i] != NULL; i++)
+ {
+ FindReplaceString (str, country_list[i], country_list[i], FALSE, TRUE);
+ }
+}
+
+
+NLM_EXTERN void
+FixCapitalizationInTitle
+(CharPtr PNTR pTitle,
+ Boolean first_is_upper,
+ ValNodePtr org_names)
+{
+ if (pTitle == NULL) return;
+ ResetCapitalization (first_is_upper, *pTitle);
+ FixAbbreviationsInElement (pTitle);
+ FixOrgNamesInString (*pTitle, org_names);
+ FixCountryCapitalization (pTitle);
+}
+
+
+/* for converting "fake" structured comments to real structured comments */
+typedef struct structuredcommentconversion {
+ Int4 num_converted;
+ Int4 num_unable_to_convert;
+} StructuredCommentConversionData, PNTR StructuredCommentConversionPtr;
+
+static void CommentWithSpacesToStructuredCommentCallback (SeqDescPtr sdp, Pointer userdata)
+{
+ UserObjectPtr uop;
+ CharPtr str, start, stop;
+ Int4 len;
+ UserFieldPtr ufp = NULL, prev_ufp = NULL;
+ StructuredCommentConversionPtr sd;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_comment || StringHasNoText (sdp->data.ptrvalue)) {
+ return;
+ }
+
+ uop = UserObjectNew ();
+ uop->type = ObjectIdNew ();
+ uop->type->str = StringSave ("StructuredComment");
+
+ start = sdp->data.ptrvalue;
+ while (*start != 0) {
+ stop = start + StringCSpn (start, " ~");
+ while (*stop != 0 && *stop != '~' && !isspace (*(stop + 1)) && *(stop + 1) != 0) {
+ stop = stop + 1 + StringCSpn (stop + 1, " ~");
+ }
+ len = 1 + stop - start;
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ StringNCpy (str, start, len - 1);
+ str[len - 1] = 0;
+ if (ufp == NULL) {
+ /* add new field */
+ ufp = UserFieldNew ();
+ if (prev_ufp == NULL) {
+ uop->data = ufp;
+ } else {
+ prev_ufp->next = ufp;
+ }
+ ufp->label = ObjectIdNew ();
+ ufp->label->str = str;
+ } else {
+ /* add value to last field */
+ ufp->choice = 1;
+ ufp->data.ptrvalue = str;
+ prev_ufp = ufp;
+ ufp = NULL;
+ }
+ if (*stop == 0) {
+ start = stop;
+ } else {
+ start = stop + 1 + StringSpn (stop + 1, " ");
+ }
+ }
+
+ if (prev_ufp == NULL) {
+ uop = UserObjectFree (uop);
+ return;
+ }
+ sd = (StructuredCommentConversionPtr) userdata;
+ if (ufp == NULL) {
+ sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
+ sdp->data.ptrvalue = uop;
+ sdp->choice = Seq_descr_user;
+ if (sd != NULL) {
+ sd->num_converted++;
+ }
+ } else {
+ uop = UserObjectFree (uop);
+ if (sd != NULL) {
+ sd->num_unable_to_convert++;
+ }
+ }
+}
+
+
+NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep)
+{
+ StructuredCommentConversionData sd;
+
+ MemSet (&sd, 0, sizeof (StructuredCommentConversionData));
+ VisitDescriptorsInSep (sep, &sd, CommentWithSpacesToStructuredCommentCallback);
+
+ return sd.num_unable_to_convert;
+}
+
+
+/* for feature xrefs */
+static void MakeFeatureXrefsFromProteinIdQualsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ GBQualPtr gbq;
+ SeqIdPtr sip;
+ BioseqPtr pbsp;
+ SeqFeatPtr cds;
+ CharPtr product;
+ ProtRefPtr prp;
+ SeqEntryPtr sep;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
+ return;
+ }
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "protein_id") == 0 || StringICmp (gbq->qual, "orig_protein_id") == 0) {
+ sip = CreateSeqIdFromText (gbq->val, sep);
+ pbsp = BioseqFind (sip);
+ cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
+ if (cds != NULL) {
+ LinkTwoFeatures (cds, sfp);
+ LinkTwoFeatures (sfp, cds);
+ product = GetRNAProductString(sfp, NULL);
+ if (StringHasNoText (product)) {
+ prp = GetProtRefForFeature (cds);
+ if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
+ }
+ }
+ product = MemFree (product);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep)
+{
+ /* assign feature IDs, so that we can create xrefs that use them */
+ AssignFeatureIDs (sep);
+
+ VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromProteinIdQualsCallback);
+}
+
+
+static void MakeFeatureXrefsFromTranscriptIdQualsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ GBQualPtr gbq;
+ SeqIdPtr sip;
+ BioseqPtr pbsp;
+ SeqFeatPtr cds;
+ CharPtr product;
+ ProtRefPtr prp;
+ SeqEntryPtr sep;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
+ return;
+ }
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "transcript_id") == 0 || StringICmp (gbq->qual, "orig_transcript_id") == 0) {
+ sip = CreateSeqIdFromText (gbq->val, sep);
+ pbsp = BioseqFind (sip);
+ cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
+ if (cds != NULL) {
+ LinkTwoFeatures (cds, sfp);
+ LinkTwoFeatures (sfp, cds);
+ product = GetRNAProductString(sfp, NULL);
+ if (StringHasNoText (product)) {
+ prp = GetProtRefForFeature (cds);
+ if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
+ }
+ }
+ product = MemFree (product);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep)
+{
+ /* assign feature IDs, so that we can create xrefs that use them */
+ AssignFeatureIDs (sep);
+
+ VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromTranscriptIdQualsCallback);
+}
+
+
+static void FinishHalfXrefsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ SeqFeatPtr other;
+ SeqFeatXrefPtr xref, xref_other;
+ Boolean has_other_xref;
+
+ if (sfp == NULL) {
+ return;
+ }
+
+ xref = sfp->xref;
+ while (xref != NULL) {
+ if (xref->id.choice == 3) {
+ other = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
+ if (other != NULL) {
+ xref_other = other->xref;
+ has_other_xref = FALSE;
+ while (xref_other != NULL && !has_other_xref) {
+ if (xref_other->id.choice == 3) {
+ has_other_xref = TRUE;
+ }
+ xref_other = xref_other->next;
+ }
+ if (!has_other_xref) {
+ LinkTwoFeatures (sfp, other);
+ }
+ }
+ }
+ xref = xref->next;
+ }
+}
+
+
+NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep)
+{
+ VisitFeaturesInSep (sep, (Pointer) sep, FinishHalfXrefsCallback);
+}
+
+
+/* for fixing tRNA codons_recognized values */
+
+NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp)
+{
+ Uint1 aa;
+ Uint1 from;
+ SeqMapTablePtr smtp;
+
+ if (trp == NULL) {
+ return 0;
+ }
+
+ aa = 0;
+ if (trp->aatype == 2) {
+ aa = trp->aa;
+ } else {
+ from = 0;
+ switch (trp->aatype) {
+ case 0:
+ from = 0;
+ break;
+ case 1:
+ from = Seq_code_iupacaa;
+ break;
+ case 2:
+ from = Seq_code_ncbieaa;
+ break;
+ case 3:
+ from = Seq_code_ncbi8aa;
+ break;
+ case 4:
+ from = Seq_code_ncbistdaa;
+ break;
+ default:
+ break;
+ }
+ smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
+ if (smtp != NULL) {
+ aa = SeqMapTableConvert (smtp, trp->aa);
+ }
+ }
+ return aa;
+}
+
+
+NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode)
+{
+ BioseqPtr bsp;
+ Int2 code = 0;
+ GeneticCodePtr gncp;
+ ValNodePtr vnp;
+ CharPtr codes = NULL;
+
+ if (sfp == NULL) {
+ return NULL;
+ }
+
+ /* find genetic code table */
+
+ bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID);
+ BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL);
+
+ gncp = GeneticCodeFind (code, NULL);
+ if (gncp == NULL) {
+ gncp = GeneticCodeFind (1, NULL);
+ code = 1;
+ }
+ if (gncp != NULL) {
+ for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != 3) continue;
+ codes = (CharPtr) vnp->data.ptrvalue;
+ break;
+ }
+ }
+ if (pCode != NULL) {
+ *pCode = code;
+ }
+ return codes;
+}
+
+
+static Boolean DoesCodonMatchAminoAcid (Uint1 aa, Uint1 index, CharPtr codes)
+{
+ Uint1 taa;
+ Boolean rval = FALSE;
+
+ if (aa == 0 || aa == 255 || codes == NULL)
+ {
+ return TRUE;
+ }
+ taa = codes [index];
+
+ if (taa == aa)
+ {
+ rval = TRUE;
+ }
+ /* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */
+ else if (aa == 'U' && taa == '*' && index == 14)
+ {
+ rval = TRUE;
+ }
+ /* pyrrolysine normally uses TAG (11) in archaebacteria, ignore without requiring exception */
+ else if (aa == 'O' && taa == '*' && index == 11) {
+ rval = TRUE;
+ }
+ /* TAA (10) is not yet known to be used for an exceptional amino acid, but the night is young */
+
+ return rval;
+}
+
+
+static Boolean IsATGC (Char ch)
+{
+ if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C') {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Char s_comp (Char ch)
+{
+ if (ch == 'A') {
+ return 'T';
+ } else if (ch == 'G') {
+ return 'C';
+ } else if (ch == 'C') {
+ return 'G';
+ } else if (ch == 'T') {
+ return 'A';
+ } else {
+ return 'N';
+ }
+}
+
+
+static CharPtr GetFlipCodonLoggingInfo (SeqFeatPtr sfp)
+{
+ SeqFeatPtr gene = NULL;
+ GeneRefPtr grp = NULL;
+ ValNode vn;
+ CharPtr txt = NULL;
+
+ GetGeneInfoForFeature (sfp, &grp, &gene);
+ if (grp != NULL && !StringHasNoText (grp->locus_tag)) {
+ txt = StringSave (grp->locus_tag);
+ } else {
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = sfp;
+ txt = GetDiscrepancyItemText (&vn);
+ }
+ return txt;
+}
+
+
+static Int4 CountCodonsRecognized (tRNAPtr trp)
+{
+ Int4 num = 0, i;
+
+ if (trp == NULL) {
+ return 0;
+ }
+ for (i = 0; i < 6; i++) {
+ if (trp->codon [i] < 64) {
+ num++;
+ }
+ }
+ return num;
+}
+
+
+static Int4 CountMatchingCodons (tRNAPtr trp, Uint1 aa, CharPtr codes)
+{
+ Int4 num = 0, i;
+
+ if (trp == NULL) {
+ return 0;
+ }
+ for (i = 0; i < 6; i++) {
+ if (trp->codon [i] < 64) {
+ if (DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)) {
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Int4 CountFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
+{
+ Int4 num = 0, i;
+ Int2 index;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+
+ if (trp == NULL) {
+ return 0;
+ }
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+ for (i = 0; i < 6; i++)
+ {
+ if (trp->codon [i] < 64
+ && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
+ && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ rcodon[0] = s_comp(codon[2]);
+ rcodon[1] = s_comp(codon[1]);
+ rcodon[2] = s_comp(codon[0]);
+ index = IndexForCodon (rcodon, code);
+ if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
+ {
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Int4 FlipFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
+{
+ Int4 num = 0, i;
+ Int2 index;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+
+ if (trp == NULL) {
+ return 0;
+ }
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+ for (i = 0; i < 6; i++)
+ {
+ if (trp->codon [i] < 64
+ && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
+ && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ rcodon[0] = s_comp(codon[2]);
+ rcodon[1] = s_comp(codon[1]);
+ rcodon[2] = s_comp(codon[0]);
+ index = IndexForCodon (rcodon, code);
+ if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
+ {
+ trp->codon[i] = index;
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Boolean IgnoretRNACodonRecognized (SeqFeatPtr sfp)
+{
+ if (sfp == NULL
+ || StringISearch (sfp->except_text, "RNA editing") != NULL
+ || StringISearch (sfp->except_text, "modified codon recognition") != NULL)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+
+//LCOV_EXCL_START
+static void FlipCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
+{
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+ Uint1 aa;
+ CharPtr txt;
+ LogInfoPtr lip;
+ Int2 code = 0;
+ CharPtr codes = NULL;
+ Int4 num_codons, num_match, num_flippable;
+
+ if (IgnoretRNACodonRecognized(sfp)
+ || sfp->idx.subtype != FEATDEF_tRNA
+ || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
+ || rrp->ext.choice != 2
+ || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
+ {
+ return;
+ }
+
+ num_codons = CountCodonsRecognized (trp);
+ if (num_codons == 0) {
+ return;
+ }
+
+ lip = (LogInfoPtr) data;
+
+ aa = GetAaFromtRNA (trp);
+
+ /* find genetic code table */
+ codes = GetCodesFortRNA (sfp, &code);
+
+ if (codes == NULL) return;
+
+ num_match = CountMatchingCodons (trp, aa, codes);
+ if (num_codons == num_match) {
+ return;
+ } else if (num_codons > 1) {
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ } else {
+ num_flippable = CountFlippableCodons(trp, aa, codes, code);
+ if (num_flippable == num_codons) {
+ FlipFlippableCodons (trp, aa, codes, code);
+ } else {
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+{
+ VisitFeaturesInSep (sep, lip, FlipCodonRecognizedCallback);
+}
+
+
+static void RemoveBadCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
+{
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+ Int2 j, k;
+ Uint1 aa;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+ CharPtr txt;
+ LogInfoPtr lip;
+ Int2 code = 0;
+ CharPtr codes = NULL;
+ Int4 num_codons, num_match;
+
+ if (IgnoretRNACodonRecognized(sfp)
+ || sfp->idx.subtype != FEATDEF_tRNA
+ || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
+ || rrp->ext.choice != 2
+ || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
+ {
+ return;
+ }
+
+ num_codons = CountCodonsRecognized (trp);
+ if (num_codons == 0) {
+ return;
+ }
+
+ lip = (LogInfoPtr) data;
+
+ aa = GetAaFromtRNA (trp);
+
+ /* find genetic code table */
+ codes = GetCodesFortRNA (sfp, &code);
+
+ if (codes == NULL) return;
+
+ num_match = CountMatchingCodons (trp, aa, codes);
+ if (num_match == num_codons) {
+ return;
+ }
+
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+
+ for (j = 0; j < 6; j++)
+ {
+ if (trp->codon [j] < 64)
+ {
+ if (DoesCodonMatchAminoAcid (aa, trp->codon[j], codes))
+ {
+ /* already ok - skip it */
+ }
+ else if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ for (k = j + 1; k < 6; k++)
+ {
+ trp->codon[k - 1] = trp->codon[k];
+ }
+ trp->codon[5] = 255;
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Removed codon_recognized '%s' for %s\n", codon, txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ /* push index down, so we don't skip over a codon */
+ j--;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+{
+ VisitFeaturesInSep (sep, lip, RemoveBadCodonRecognizedCallback);
+}
+//LCOV_EXCL_STOP
+
+/* for finding sequences that are part of alignments */
+NLM_EXTERN void ReverseBioseqInAlignment (SeqAlignPtr salp, Pointer userdata)
+{
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ Boolean found = FALSE;
+ Int4 order;
+
+ if (salp == NULL || userdata == NULL) return;
+
+ bsp = (BioseqPtr) userdata;
+
+ for (sip = bsp->id; sip != NULL && ! found; sip = sip->next)
+ {
+ order = SeqIdOrderInBioseqIdList(sip, SeqIdPtrFromSeqAlign (salp));
+ if (order > 0) {
+ AlnMgr2IndexSeqAlignEx(salp, FALSE);
+ ReverseAlignmentStrand (salp, order);
+ SeqAlignIndexFree(salp->saip);
+ salp->saip = NULL;
+ found = TRUE;
+ }
+ }
+}
+
+
+/* need to reverse the order of the segments and flip the strands */
+NLM_EXTERN void FlipAlignment (SeqAlignPtr salp)
+{
+ DenseSegPtr dsp;
+ Int4 row, seg, swap_start, swap_len, opp_seg;
+ Score swap_score;
+ Uint1 swap_strand;
+
+ if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
+ {
+ return;
+ }
+
+ dsp = (DenseSegPtr) salp->segs;
+ if (dsp->strands == NULL) {
+ dsp->strands = (Uint1Ptr) MemNew (dsp->numseg * dsp->dim * sizeof (Uint1));
+ MemSet (dsp->strands, Seq_strand_plus, dsp->numseg * dsp->dim * sizeof (Uint1));
+ }
+
+ for (seg = 0; seg < dsp->numseg / 2; seg++) {
+ /* swap segments to reverse order */
+ opp_seg = dsp->numseg - 1 - seg;
+ /* swap lens */
+ swap_len = dsp->lens[seg];
+ dsp->lens[seg] = dsp->lens[opp_seg];
+ dsp->lens[opp_seg] = swap_len;
+ /* swap scores */
+ if (dsp->scores != NULL) {
+ swap_score = dsp->scores[seg];
+ dsp->scores[seg] = dsp->scores[opp_seg];
+ dsp->scores[opp_seg] = swap_score;
+ }
+ for (row = 0; row < dsp->dim; row++) {
+ /* swap strands */
+ swap_strand = dsp->strands[dsp->dim * seg + row];
+ dsp->strands[dsp->dim * seg + row] = dsp->strands[dsp->dim * opp_seg + row];
+ dsp->strands[dsp->dim * opp_seg + row] = swap_strand;
+
+ /* swap starts */
+ swap_start = dsp->starts[dsp->dim * seg + row];
+ dsp->starts[dsp->dim * seg + row] = dsp->starts[dsp->dim * opp_seg + row];
+ dsp->starts[dsp->dim * opp_seg + row] = swap_start;
+ }
+ }
+
+ /* reverse segments */
+ for (seg = 0; seg < dsp->numseg; seg++) {
+ for (row = 0; row < dsp->dim; row++) {
+ if (dsp->strands[dsp->dim * seg + row] == Seq_strand_minus) {
+ dsp->strands[dsp->dim * seg + row] = Seq_strand_plus;
+ } else {
+ dsp->strands[dsp->dim * seg + row] = Seq_strand_minus;
+ }
+ }
+ }
+ SAIndex2Free2(salp->saip);
+ salp->saip = NULL;
+}
+
+
+NLM_EXTERN void FlipEntireAlignmentIfAllSequencesFlipped (SeqAnnotPtr sap, Pointer userdata)
+{
+ SeqAlignPtr salp;
+ ValNodePtr vnp;
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ Boolean found;
+ Int4 row, num_rows;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp == NULL || salp->idx.deleteme) return;
+
+
+ AlnMgr2IndexSingleChildSeqAlign(salp);
+ num_rows = AlnMgr2GetNumRows(salp);
+ for (row = 1; row <= num_rows; row++) {
+ sip = AlnMgr2GetNthSeqIdPtr(salp, row);
+ found = FALSE;
+ vnp = (ValNodePtr)userdata;
+ while (vnp != NULL && !found) {
+ bsp = (BioseqPtr) vnp->data.ptrvalue;
+ if (SeqIdOrderInBioseqIdList (sip, bsp->id) > 0) {
+ found = TRUE;
+ }
+ vnp = vnp->next;
+ }
+ if (!found) return;
+ }
+
+ FlipAlignment(salp);
+}
+
+
+NLM_EXTERN ValNodePtr ListSequencesWithAlignments (ValNodePtr bsp_list)
+{
+ BioseqPtr bsp;
+ ValNodePtr vnp, aln_bsp = NULL;
+
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ bsp = (BioseqPtr) vnp->data.ptrvalue;
+ if (bsp != NULL && IsBioseqInAnyAlignment (bsp, bsp->idx.entityID)) {
+ ValNodeAddPointer (&aln_bsp, 0, bsp);
+ }
+ }
+ return aln_bsp;
+}
+
+
+NLM_EXTERN void RevCompBioseqList (ValNodePtr bsp_list,
+ Uint2 entityID,
+ BioseqFunc func,
+ Boolean revCompFeats,
+ Boolean check_for_aln)
+{
+ SeqEntryPtr sep;
+ BioseqPtr bsp;
+ ValNodePtr vnp;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ bsp = (BioseqPtr) vnp->data.ptrvalue;
+ if (func != NULL) {
+ func (bsp);
+ if (check_for_aln) {
+ VisitAlignmentsInSep (sep, (Pointer) bsp, ReverseBioseqInAlignment);
+ }
+ }
+ if (revCompFeats) {
+ if (bsp->repr == Seq_repr_raw || bsp->repr == Seq_repr_const) {
+
+ if (sep != NULL) {
+ SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
+ }
+ }
+ }
+ }
+}
+
+
+typedef struct bioseqinalignmentdata {
+ Boolean found;
+ BioseqPtr lookingfor;
+} BioseqInAlignmentData, PNTR BioseqInAlignmentPtr;
+
+static Boolean IsBioseqInThisAlignment (SeqAlignPtr salp, BioseqPtr bsp)
+{
+ SeqIdPtr sip;
+ Boolean found = FALSE;
+
+ for (sip = bsp->id; sip != NULL && ! found; sip = sip->next)
+ {
+ found = SeqAlignFindSeqId (salp, sip);
+ }
+ return found;
+}
+
+static void FindAlignmentCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ BioseqInAlignmentPtr biap;
+ SeqAlignPtr salp;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL)
+ {
+ return;
+ }
+ biap = (BioseqInAlignmentPtr) userdata;
+ if (biap->found) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp == NULL) return;
+ biap->found = IsBioseqInThisAlignment (salp, biap->lookingfor);
+
+}
+
+NLM_EXTERN Boolean IsBioseqInAnyAlignment (BioseqPtr bsp, Uint2 input_entityID)
+{
+ SeqEntryPtr topsep;
+ BioseqInAlignmentData biad;
+
+ topsep = GetTopSeqEntryForEntityID (input_entityID);
+ biad.found = FALSE;
+ biad.lookingfor = bsp;
+
+ VisitAnnotsInSep (topsep, &biad, FindAlignmentCallback);
+ return biad.found;
+}
+
+
+typedef struct bioseqlistinalignmentdata {
+ Boolean found;
+ ValNodePtr lookingfor;
+} BioseqListInAlignmentData, PNTR BioseqListInAlignmentPtr;
+
+static void ListBioseqsInSet (BioseqSetPtr bssp, ValNodePtr PNTR list)
+{
+ SeqEntryPtr sep;
+
+ if (bssp == NULL) {
+ return;
+ }
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ if (IS_Bioseq (sep)) {
+ ValNodeAddPointer (list, OBJ_BIOSEQ, sep->data.ptrvalue);
+ } else {
+ ListBioseqsInSet (sep->data.ptrvalue, list);
+ }
+ }
+}
+
+
+static void FindListInAlignmentCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ BioseqListInAlignmentPtr biap;
+ SeqAlignPtr salp;
+ ValNodePtr vnp;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL)
+ {
+ return;
+ }
+ biap = (BioseqListInAlignmentPtr) userdata;
+ if (biap->found) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp == NULL) return;
+ for (vnp = biap->lookingfor; vnp != NULL && !biap->found; vnp = vnp->next) {
+ biap->found = IsBioseqInThisAlignment (salp, vnp->data.ptrvalue);
+ }
+}
+
+
+NLM_EXTERN Boolean AreAnyElementsOfSetInAnyAlignment (BioseqSetPtr bssp, Uint2 input_entityID)
+{
+ SeqEntryPtr topsep;
+ BioseqListInAlignmentData biad;
+
+ topsep = GetTopSeqEntryForEntityID (input_entityID);
+ biad.found = FALSE;
+ biad.lookingfor = NULL;
+ ListBioseqsInSet (bssp, &(biad.lookingfor));
+
+ VisitAnnotsInSep (topsep, &biad, FindListInAlignmentCallback);
+ biad.lookingfor = ValNodeFree (biad.lookingfor);
+ return biad.found;
+}
+
+
+static void RemoveAlignmentsWithSequenceCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ SeqAlignPtr salp;
+ SeqIdPtr sip;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp == NULL || salp->idx.deleteme) return;
+ sip = (SeqIdPtr) userdata;
+ while (sip != NULL && !sap->idx.deleteme) {
+ if (FindSeqIdinSeqAlign (salp, sip)) {
+ sap->idx.deleteme = TRUE;
+ }
+ sip = sip->next;
+ }
+}
+
+NLM_EXTERN void RemoveAlignmentsWithSequence (BioseqPtr bsp, Uint2 input_entityID)
+{
+ SeqEntryPtr topsep;
+
+ if (bsp == NULL) return;
+ topsep = GetTopSeqEntryForEntityID (input_entityID);
+
+ VisitAnnotsInSep (topsep, bsp->id, RemoveAlignmentsWithSequenceCallback);
+}
+
+
+/* for segregating sets */
+static Boolean IsElementOfSetInAlignment (BioseqSetPtr bssp, SeqAlignPtr salp)
+{
+ Boolean rval = FALSE;
+ SeqEntryPtr sep;
+
+ if (bssp == NULL || salp == NULL) {
+ return FALSE;
+ }
+
+ for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) {
+ if (IS_Bioseq (sep)) {
+ rval = IsBioseqInThisAlignment(salp, sep->data.ptrvalue);
+ } else if (IS_Bioseq_set (sep)) {
+ rval = IsElementOfSetInAlignment (sep->data.ptrvalue, salp);
+ }
+ }
+ return rval;
+}
+
+
+static void RemoveAlignmentsWithElementsOfSetCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ SeqAlignPtr salp;
+ BioseqSetPtr bssp;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp == NULL || salp->idx.deleteme) return;
+ bssp = (BioseqSetPtr) userdata;
+ if (IsElementOfSetInAlignment (bssp, salp)) {
+ salp->idx.deleteme = TRUE;
+ }
+}
+
+
+NLM_EXTERN void RemoveAlignmentsWithElementsOfSet (BioseqSetPtr bssp, Uint2 input_entityID)
+{
+ SeqEntryPtr topsep;
+
+ if (bssp == NULL) return;
+ topsep = GetTopSeqEntryForEntityID (input_entityID);
+
+ VisitAnnotsInSep (topsep, bssp, RemoveAlignmentsWithElementsOfSetCallback);
+}
+
+
+/* code for creating a location for a gene based on location of feature */
+/* assumes locations on same Bioseq */
+static Boolean OutOfOrder (SeqLocPtr slp_prev, SeqLocPtr slp_next)
+{
+ Uint1 strand_p, strand_n;
+ Boolean rval = FALSE;
+ Int4 start_p, start_n, stop_p, stop_n;
+
+ if (slp_prev == NULL || slp_next == NULL)
+ {
+ return FALSE;
+ }
+
+ strand_p = SeqLocStrand (slp_prev);
+ strand_n = SeqLocStrand (slp_next);
+ if (strand_p == Seq_strand_minus)
+ {
+ if (strand_n != Seq_strand_minus)
+ {
+ /* mixed strand, not necessarily out of order */
+ rval = FALSE;
+ } else {
+ start_p = SeqLocStart (slp_prev);
+ stop_p = SeqLocStop (slp_prev);
+ start_n = SeqLocStart (slp_next);
+ stop_n = SeqLocStop (slp_next);
+ if (start_p < start_n || stop_p < stop_n)
+ {
+ rval = TRUE;
+ }
+ }
+ } else {
+ if (strand_n == Seq_strand_minus)
+ {
+ /* mixed strand, not necessarily out of order */
+ rval = FALSE;
+ } else {
+ start_p = SeqLocStart (slp_prev);
+ stop_p = SeqLocStop (slp_prev);
+ start_n = SeqLocStart (slp_next);
+ stop_n = SeqLocStop (slp_next);
+ if (start_p > start_n || stop_p > stop_n)
+ {
+ rval = TRUE;
+ }
+ }
+ }
+ return rval;
+}
+
+
+/* assumes locations on same Bioseq and in order on same strand*/
+static Boolean TooFarApartForTransSplicing (SeqLocPtr slp_prev, SeqLocPtr slp_next)
+{
+ Boolean rval = FALSE;
+ Int4 start_n, start_p, stop_n, stop_p;
+
+ if (slp_prev == NULL || slp_next == NULL)
+ {
+ return FALSE;
+ }
+
+ if (SeqLocStrand (slp_prev) == Seq_strand_minus)
+ {
+ start_p = SeqLocStart (slp_prev);
+ stop_n = SeqLocStop (slp_next);
+ if (start_p - stop_n > 10000)
+ {
+ rval = TRUE;
+ }
+ } else {
+ stop_p = SeqLocStop (slp_prev);
+ start_n = SeqLocStart (slp_next);
+ if (start_n - stop_p > 10000)
+ {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN SeqLocPtr MakeGeneLocForFeatureLoc (SeqLocPtr floc, Uint2 entityID, Boolean trans_spliced)
+{
+ /* in the age of small-set genomes, we're going to pretend that segmented sets do not exist.
+ * A gene location for a feature location that includes multiple bioseqs should include
+ * one interval per bioseq that covers all locations of the feature that occur on that bioseq.
+ */
+
+ SeqLocPtr slp_new = NULL, slp_tmp, slp_last = NULL, add_slp;
+ SeqLocPtr PNTR pAddSlp = NULL;
+ BioseqPtr bsp, last_bsp = NULL;
+ Boolean partial5 = FALSE, partial3 = FALSE;
+ Uint2 strand, last_strand = Seq_strand_plus;
+
+ pAddSlp = &slp_new;
+ for (slp_tmp = SeqLocFindNext (floc, NULL);
+ slp_tmp != NULL;
+ slp_tmp = SeqLocFindNext (floc, slp_tmp))
+ {
+ bsp = GetBioseqGivenSeqLoc (slp_tmp, entityID);
+ strand = SeqLocStrand (slp_tmp);
+ if (bsp != last_bsp || strand != last_strand
+ || (trans_spliced && OutOfOrder (slp_last, slp_tmp))
+ || (trans_spliced && TooFarApartForTransSplicing(slp_last, slp_tmp))) {
+ add_slp = SeqLocMerge (bsp, slp_tmp, NULL, TRUE, FALSE, FALSE);
+ if (slp_last == NULL) {
+ slp_new = add_slp;
+ } else {
+ slp_last->next = add_slp;
+ pAddSlp = &(slp_last->next);
+ }
+ slp_last = add_slp;
+ last_bsp = bsp;
+ last_strand = strand;
+ } else {
+ add_slp = SeqLocMerge (bsp, *pAddSlp, slp_tmp, TRUE, FALSE, FALSE);
+ *pAddSlp = SeqLocFree (*pAddSlp);
+ *pAddSlp = add_slp;
+ slp_last = add_slp;
+ }
+ }
+ if (slp_new != NULL && slp_new->next != NULL) {
+ slp_tmp = ValNodeNew (NULL);
+ slp_tmp->choice = SEQLOC_MIX;
+ slp_tmp->data.ptrvalue = slp_new;
+ slp_new = slp_tmp;
+ }
+ if (slp_new != NULL) {
+ CheckSeqLocForPartial (floc, &partial5, &partial3);
+ SetSeqLocPartial (slp_new, partial5, partial3);
+ }
+
+ return slp_new;
+}
+
+
+/* code for resolving conflicting IDs */
+typedef struct {
+ CharPtr oldStr;
+ SeqIdPtr newSip;
+} ReplaceIDStruct, PNTR ReplaceIDStructPtr;
+
+
+/********************************************************************
+*
+* SeqLocReplaceLocalID
+* replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
+* only if the Seq-Id is a local one.
+*
+**********************************************************************/
+
+static SeqLocPtr SeqLocReplaceLocalID (SeqLocPtr slp,
+ SeqIdPtr new_sip)
+{
+ SeqLocPtr curr;
+ PackSeqPntPtr pspp;
+ SeqIntPtr target_sit;
+ SeqPntPtr spp;
+ SeqIdPtr currId;
+
+ switch (slp->choice) {
+ case SEQLOC_PACKED_INT :
+ case SEQLOC_MIX :
+ case SEQLOC_EQUIV :
+ curr = NULL;
+ while ((curr = SeqLocFindNext (slp, curr)) != NULL) {
+ curr = SeqLocReplaceLocalID (curr, new_sip);
+ }
+ break;
+ case SEQLOC_PACKED_PNT :
+ pspp = (PackSeqPntPtr) slp->data.ptrvalue;
+ if ((pspp != NULL) && (pspp->id->choice == SEQID_LOCAL)) {
+ SeqIdFree (pspp->id);
+ pspp->id = SeqIdDup (new_sip);
+ }
+ break;
+ case SEQLOC_EMPTY :
+ case SEQLOC_WHOLE :
+ currId = (SeqIdPtr) slp->data.ptrvalue;
+ if (currId->choice == SEQID_LOCAL)
+ {
+ SeqIdFree (currId);
+ slp->data.ptrvalue = (Pointer) SeqIdDup (new_sip);
+ }
+ break;
+ case SEQLOC_INT :
+ target_sit = (SeqIntPtr) slp->data.ptrvalue;
+ if (target_sit->id->choice == SEQID_LOCAL)
+ {
+ SeqIdFree (target_sit->id);
+ target_sit->id = SeqIdDup (new_sip);
+ }
+ break;
+ case SEQLOC_PNT :
+ spp = (SeqPntPtr)slp->data.ptrvalue;
+ if (spp->id->choice == SEQID_LOCAL)
+ {
+ SeqIdFree(spp->id);
+ spp->id = SeqIdDup(new_sip);
+ }
+ break;
+ default :
+ break;
+ }
+ return slp;
+}
+
+static void ReplaceIdForFeature (SeqFeatPtr sfp, SeqIdPtr sip)
+{
+ CdRegionPtr crp;
+ CodeBreakPtr cbp;
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+
+ if (sfp == NULL || sip == NULL) {
+ return;
+ }
+ /* replace local ID in location */
+ if (sfp->location != NULL) {
+ SeqLocReplaceLocalID (sfp->location, sip);
+ }
+
+ /* also replace local ID in code breaks */
+ if (sfp->data.choice == SEQFEAT_CDREGION
+ && (crp = (CdRegionPtr)sfp->data.value.ptrvalue) != NULL
+ && crp->code_break != NULL) {
+ for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
+ SeqLocReplaceLocalID (cbp->loc, sip);
+ }
+ }
+
+ /* also replace local ID in anticodons */
+ if (sfp->data.choice == SEQFEAT_RNA
+ && (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) != NULL
+ && rrp->type == 3 && rrp->ext.choice == 2
+ && (trp = (tRNAPtr) rrp->ext.value.ptrvalue) != NULL
+ && trp->anticodon != NULL) {
+ SeqLocReplaceLocalID (trp->anticodon, sip);
+ }
+}
+
+
+static void ReplaceLocalIdOnLoc_callback (SeqFeatPtr sfp, Pointer userdata)
+{
+ SeqIdPtr sip;
+
+ if (sfp == NULL) {
+ return;
+ }
+
+ sip = (SeqIdPtr) userdata;
+ ReplaceIdForFeature (sfp, sip);
+}
+
+
+static void CheckFeatForNuclID_callback (SeqFeatPtr sfp, Pointer userdata)
+{
+ SeqIdPtr featSip = NULL;
+ ReplaceIDStructPtr idsPtr;
+ ObjectIdPtr oip;
+ Char tmpIdStr [128];
+
+ if (NULL == sfp)
+ return;
+
+ /* Get the old Seq Id and the new */
+ /* one that it was changed to. */
+
+ idsPtr = (ReplaceIDStructPtr) userdata;
+ if ((NULL == idsPtr) ||
+ (NULL == idsPtr->oldStr) ||
+ (NULL == idsPtr->newSip))
+ return;
+
+ /* Get the location Seq ID for this CDS feature */
+
+ featSip = SeqLocId (sfp->location);
+ if (featSip == NULL) return;
+ oip = (ObjectIdPtr) featSip->data.ptrvalue;
+
+ /* If the location Seq ID matches the old Seq Id */
+ /* then change the location to point to the new. */
+
+ if (NULL == oip->str) {
+ sprintf (tmpIdStr, "%d", oip->id);
+ if (StringCmp (tmpIdStr, idsPtr->oldStr) == 0) {
+ ReplaceIdForFeature (sfp, idsPtr->newSip);
+ }
+ } else if (StringCmp (oip->str, idsPtr->oldStr) == 0){
+ ReplaceIdForFeature (sfp, idsPtr->newSip);
+ }
+}
+
+
+static void CheckFeatForProductID_callback (SeqFeatPtr sfp, Pointer userdata)
+{
+ SeqIdPtr featSip = NULL;
+ ReplaceIDStructPtr idsPtr;
+ ObjectIdPtr oip;
+ Char tmpIdStr [128];
+
+ if (NULL == sfp)
+ return;
+
+ if ((sfp->data.choice == SEQFEAT_CDREGION) &&
+ (sfp->product != NULL)) {
+
+ /* Get the old Seq Id and the new */
+ /* one that it was changed to. */
+
+ idsPtr = (ReplaceIDStructPtr) userdata;
+ if ((NULL == idsPtr) ||
+ (NULL == idsPtr->oldStr) ||
+ (NULL == idsPtr->newSip))
+ return;
+
+ /* Get the product Seq ID for this CDS feature */
+
+ featSip = SeqLocId (sfp->product);
+ oip = (ObjectIdPtr) featSip->data.ptrvalue;
+
+ /* If the product Seq ID matches the old Seq Id */
+ /* then change the product to point to the new. */
+
+ if (NULL == oip->str) {
+ sprintf (tmpIdStr, "%d", oip->id);
+ if (StringCmp (tmpIdStr, idsPtr->oldStr) == 0)
+ SeqLocReplaceLocalID (sfp->product, idsPtr->newSip);
+ }
+ if (StringCmp (oip->str, idsPtr->oldStr) == 0)
+ SeqLocReplaceLocalID (sfp->product, idsPtr->newSip);
+
+ }
+}
+
+
+static void ReplaceLocalID (BioseqPtr bsp,
+ SeqIdPtr sip,
+ CharPtr key,
+ Int2 count)
+
+{
+ ObjectIdPtr oip;
+ Char str [64];
+ Char tmp [70];
+ BioseqSetPtr bssp = NULL;
+ ReplaceIDStruct ids;
+ BioseqPtr siblingBsp;
+ SeqEntryPtr sep;
+ Int2 parentType;
+
+ if (bsp == NULL || sip == NULL || StringHasNoText (key)) return;
+ oip = (ObjectIdPtr) sip->data.ptrvalue;
+ if (oip == NULL) return;
+
+ /* Create the new ID string */
+
+ StringNCpy_0 (str, key, sizeof (str));
+ sprintf (tmp, "%s__%d", str, (int) count);
+
+ /* Save the original SeqId for later passing */
+ /* to CheckSetForNuclID_callback () and */
+ /* CheckSetForProductId_callback (). */
+
+ if (NULL != oip->str)
+ ids.oldStr = StringSave (oip->str);
+ else {
+ ids.oldStr = (CharPtr) MemNew (32);
+ sprintf (ids.oldStr, "%d", oip->id);
+ }
+
+
+ /* Update the Seq ID with the new string */
+
+ oip->str = StringSave (tmp);
+ ids.newSip = sip;
+ SeqMgrReplaceInBioseqIndex (bsp);
+
+ /* Replace the local ID on all the features of the bioseq */
+
+ VisitFeaturesOnBsp (bsp, (Pointer) sip, ReplaceLocalIdOnLoc_callback);
+
+ /* Check the parent (and grandparent, etc.) BioseqSet */
+ /* for features that use the changed ID. */
+
+ parentType = bsp->idx.parenttype;
+ if (parentType == OBJ_BIOSEQSET)
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+
+ while (bssp != NULL && parentType == OBJ_BIOSEQSET) {
+
+ if (bssp->_class == 1) {
+
+ /* Check features that are attached to */
+ /* the parent set itself. */
+
+ if (ISA_na(bsp->mol))
+ VisitFeaturesOnSet (bssp, (Pointer) &ids,
+ CheckFeatForNuclID_callback);
+ else if (ISA_aa(bsp->mol))
+ VisitFeaturesOnSet (bssp, (Pointer) &ids,
+ CheckFeatForProductID_callback);
+
+ /* Check features that are attached to */
+ /* other Bioseqs in the set. */
+
+ sep = bssp->seqentry;
+ while (NULL != sep) {
+ if (sep->choice == 1) { /* bioseq */
+ siblingBsp = (BioseqPtr) sep->data.ptrvalue;
+ if (ISA_na(bsp->mol))
+ VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
+ CheckFeatForNuclID_callback);
+ else if (ISA_aa(bsp->mol))
+ VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
+ CheckFeatForProductID_callback);
+ }
+ sep = sep->next;
+ }
+
+ sep = bssp->seq_set;
+ while (NULL != sep) {
+ if (sep->choice == 1) { /* bioseq */
+ siblingBsp = (BioseqPtr) sep->data.ptrvalue;
+ if (ISA_na(bsp->mol))
+ VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
+ CheckFeatForNuclID_callback);
+ else if (ISA_aa(bsp->mol))
+ VisitFeaturesOnBsp (siblingBsp, (Pointer) sip,
+ CheckFeatForProductID_callback);
+ }
+ sep = sep->next;
+ }
+ }
+ parentType = bssp->idx.parenttype;
+ bssp = (BioseqSetPtr) bssp->idx.parentptr;
+ }
+
+ /* Clean up before exiting */
+
+ MemFree (ids.oldStr);
+
+}
+
+
+static void BuildLclTree (LclIdListPtr PNTR head, BioseqPtr bsp, CharPtr x, SeqIdPtr sip)
+
+{
+ Int2 comp;
+ LclIdListPtr idlist;
+
+ if (*head != NULL) {
+ idlist = *head;
+ comp = StringICmp (idlist->key, x);
+ if (comp < 0) {
+ BuildLclTree (&(idlist->right), bsp, x, sip);
+ } else if (comp > 0) {
+ BuildLclTree (&(idlist->left), bsp, x, sip);
+ } else {
+ if (idlist->firstbsp != NULL && idlist->firstsip != NULL) {
+ ReplaceLocalID (idlist->firstbsp, idlist->firstsip, x, 1);
+ idlist->count = 2;
+ idlist->firstbsp = NULL;
+ idlist->firstsip = NULL;
+ }
+ ReplaceLocalID (bsp, sip, x, idlist->count);
+ (idlist->count)++;
+ }
+ } else {
+ idlist = MemNew (sizeof (LclIdList));
+ if (idlist != NULL) {
+ *head = idlist;
+ idlist->firstbsp = bsp;
+ idlist->firstsip = sip;
+ idlist->count = 1;
+ idlist->key = StringSave (x);
+ idlist->left = NULL;
+ idlist->right = NULL;
+ }
+ }
+}
+
+NLM_EXTERN void FreeLclTree (LclIdListPtr PNTR head)
+
+{
+ LclIdListPtr idlist;
+
+ if (head != NULL && *head != NULL) {
+ idlist = *head;
+ FreeLclTree (&(idlist->left));
+ FreeLclTree (&(idlist->right));
+ MemFree (idlist->key);
+ MemFree (idlist);
+ }
+}
+
+
+NLM_EXTERN void ResolveExistingIDsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
+
+{
+ BioseqPtr bsp;
+ LclIdListPtr PNTR head;
+ SeqIdPtr sip;
+ Char str [64];
+
+ head = (LclIdListPtr PNTR) mydata;
+ if (sep == NULL || head == NULL) return;
+ if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_LOCAL) {
+ SeqIdWrite (sip, str, PRINTID_REPORT, sizeof (str));
+ BuildLclTree (head, bsp, str, sip);
+ }
+ }
+ }
+ }
+}
+
+
+static Boolean DoesIdListHaveLocal (SeqIdPtr sip)
+{
+ while (sip != NULL) {
+ if (sip->choice == SEQID_LOCAL) {
+ return TRUE;
+ }
+ sip = sip->next;
+ }
+ return FALSE;
+}
+
+
+static Boolean DoesSeqLocListHaveLocalId (SeqLocPtr slp)
+{
+ SeqLocPtr loc;
+ PackSeqPntPtr psp;
+ SeqBondPtr sbp;
+ SeqIntPtr sinp;
+ SeqIdPtr sip;
+ SeqPntPtr spp;
+ Boolean has_local = FALSE;
+
+ while (slp != NULL) {
+ switch (slp->choice) {
+ case SEQLOC_NULL :
+ break;
+ case SEQLOC_EMPTY :
+ case SEQLOC_WHOLE :
+ sip = (SeqIdPtr) slp->data.ptrvalue;
+ has_local = DoesIdListHaveLocal (sip);
+ break;
+ case SEQLOC_INT :
+ sinp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sinp != NULL) {
+ sip = sinp->id;
+ has_local = DoesIdListHaveLocal (sip);
+ }
+ break;
+ case SEQLOC_PNT :
+ spp = (SeqPntPtr) slp->data.ptrvalue;
+ if (spp != NULL) {
+ sip = spp->id;
+ has_local = DoesIdListHaveLocal (sip);
+ }
+ break;
+ case SEQLOC_PACKED_PNT :
+ psp = (PackSeqPntPtr) slp->data.ptrvalue;
+ if (psp != NULL) {
+ sip = psp->id;
+ has_local = DoesIdListHaveLocal (sip);
+ }
+ break;
+ case SEQLOC_PACKED_INT :
+ case SEQLOC_MIX :
+ case SEQLOC_EQUIV :
+ loc = (SeqLocPtr) slp->data.ptrvalue;
+ while (loc != NULL && !has_local) {
+ has_local = DoesSeqLocListHaveLocalId(loc);
+ loc = loc->next;
+ }
+ break;
+ case SEQLOC_BOND :
+ sbp = (SeqBondPtr) slp->data.ptrvalue;
+ if (sbp != NULL) {
+ spp = (SeqPntPtr) sbp->a;
+ if (spp != NULL) {
+ sip = spp->id;
+ has_local = DoesIdListHaveLocal (sip);
+ }
+ spp = (SeqPntPtr) sbp->b;
+ if (spp != NULL) {
+ sip = spp->id;
+ has_local = DoesIdListHaveLocal (sip);
+ }
+ }
+ break;
+ case SEQLOC_FEAT :
+ break;
+ default :
+ break;
+ }
+ slp = slp->next;
+ }
+ return FALSE;
+}
+
+
+static void SeqEntryHasAlignmentsWithLocalIDsCallback (SeqAnnotPtr sap, Pointer userdata)
+{
+ DenseDiagPtr ddp;
+ DenseSegPtr dsp;
+ PackSegPtr psp;
+ SeqAlignPtr salp;
+ StdSegPtr ssp;
+ Boolean has_local = FALSE;
+ BoolPtr bp;
+
+ if (sap == NULL || sap->type != 2 || userdata == NULL) return;
+ salp = (SeqAlignPtr) sap->data;
+ if (salp != NULL)
+ {
+ switch (salp->segtype) {
+ case SAS_DENDIAG :
+ for (ddp = salp->segs; ddp != NULL && !has_local; ddp = ddp->next) {
+ has_local = DoesIdListHaveLocal (ddp->id);
+ }
+ break;
+ case SAS_DENSEG :
+ dsp = salp->segs;
+ if (dsp != NULL) {
+ has_local = DoesIdListHaveLocal (dsp->ids);
+ }
+ break;
+ case SAS_STD :
+ for (ssp = salp->segs; ssp != NULL && !has_local; ssp = ssp->next) {
+ has_local = DoesIdListHaveLocal (ssp->ids);
+ if (!has_local) {
+ has_local = DoesSeqLocListHaveLocalId (ssp->loc);
+ }
+ }
+ break;
+ case SAS_PACKED :
+ psp = (PackSegPtr) salp->segs;
+ if (psp != NULL) {
+ has_local = DoesIdListHaveLocal (psp->ids);
+ }
+ break;
+ default :
+ break;
+ }
+ }
+
+ bp = (BoolPtr) userdata;
+ *bp |= has_local;
+}
+
+
+NLM_EXTERN Boolean HasAlignmentsWithLocalIDs (SeqEntryPtr sep)
+{
+ Boolean has_alignments = FALSE;
+
+ VisitAnnotsInSep (sep, (Pointer) &has_alignments, SeqEntryHasAlignmentsWithLocalIDsCallback);
+
+ return has_alignments;
+}
+
+NLM_EXTERN int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+
+ if (vnp1->choice > vnp2->choice) {
+ return 1;
+ } else if (vnp1->choice < vnp2->choice) {
+ return -1;
+ } else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) {
+ return 1;
+ } else if (vnp1->data.ptrvalue < vnp2->data.ptrvalue) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+
+/* for GenColl and replicon app */
+NLM_EXTERN CharPtr GetRepliconChromosomeName (BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL) {
+ return NULL;
+ } else if (biop->genome == GENOME_mitochondrion) {
+ return StringSave ("MT");
+ }
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_plasmid_name) {
+ return StringSave(ssp->name);
+ }
+ }
+
+ if (biop->genome == GENOME_chromosome) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_linkage_group) {
+ return StringSave(ssp->name);
+ }
+ }
+ }
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_chromosome) {
+ return StringSave(ssp->name);
+ }
+ }
+
+ /* no other name found */
+ switch (biop->genome) {
+ case GENOME_plasmid:
+ return StringSave("unnamed");
+ break;
+ case GENOME_chromosome:
+ return StringSave("ANONYMOUS");
+ break;
+ case GENOME_kinetoplast:
+ return StringSave("kinetoplast");
+ break;
+ case GENOME_plastid :
+ case GENOME_chloroplast:
+ case GENOME_chromoplast:
+ case GENOME_apicoplast :
+ case GENOME_leucoplast :
+ case GENOME_proplastid :
+ return StringSave("Pltd");
+ break;
+ }
+
+ return NULL;
+}
+
+
+NLM_EXTERN CharPtr GetRepliconType (BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+ CharPtr type_str = NULL;
+
+ if (biop == NULL) {
+ return type_str;
+ }
+
+ if (biop->genome == GENOME_plasmid) {
+ return StringSave("ePlasmid");
+ }
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_plasmid_name) {
+ type_str = StringSave ("ePlasmid");
+ return type_str;
+ }
+ }
+
+ if (biop->genome == GENOME_chromosome) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_linkage_group) {
+ type_str = StringSave("eLinkageGroup");
+ return type_str;
+ }
+ }
+ }
+ type_str = StringSave ("eChromosome");
+ return type_str;
+}
+
+
+NLM_EXTERN CharPtr GetRepliconLocation (BioSourcePtr biop)
+{
+ if (biop == NULL) {
+ return NULL;
+ }
+
+ if (biop->genome == GENOME_chromosome || StringCmp (GetRepliconType (biop), "ePlasmid") == 0) {
+ return StringSave("eNuclearProkaryote");
+ }
+
+ switch (biop->genome) {
+ case GENOME_unknown:
+ case GENOME_genomic:
+ return StringSave("eNuclearProkaryote");
+ break;
+ case GENOME_mitochondrion:
+ case GENOME_kinetoplast :
+ return StringSave("eMitochondrion");
+ break;
+ case GENOME_chromosome:
+ return StringSave("eChromosome");
+ break;
+ case GENOME_chloroplast:
+ return StringSave("eChloroplast");
+ break;
+ case GENOME_chromoplast:
+ return StringSave("eChromoplast");
+ break;
+ case GENOME_plastid :
+ return StringSave("ePlastid");
+ break;
+ case GENOME_macronuclear :
+ return StringSave("eMacronuclear");
+ break;
+ case GENOME_extrachrom :
+ return StringSave("eExtrachromosomal");
+ break;
+ case GENOME_cyanelle :
+ return StringSave("eCyanelle");
+ break;
+ case GENOME_proviral :
+ return StringSave("eProviral");
+ break;
+ case GENOME_virion :
+ return StringSave("eVirion");
+ break;
+ case GENOME_nucleomorph :
+ return StringSave("eNucleomorph");
+ break;
+ case GENOME_apicoplast :
+ return StringSave("eApicoplast");
+ break;
+ case GENOME_leucoplast :
+ return StringSave("eLeucoplast");
+ break;
+ case GENOME_proplastid :
+ return StringSave("eProplastid");
+ break;
+ case GENOME_endogenous_virus :
+ return StringSave("eEndogenous-virus");
+ break;
+ case GENOME_hydrogenosome :
+ return StringSave("eHydrogenosome");
+ break;
+ case GENOME_chromatophore :
+ return StringSave("eChromatophore");
+ break;
+ }
+
+ return NULL;
+}
+
+
+/* for finding qualifiers in definition lines (may be unused) */
+static ValNodePtr GetFeatureDeflineQuals (BioseqPtr bsp)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ GeneRefPtr grp;
+ Boolean geneFound = FALSE;
+ CharPtr str;
+ ValNodePtr vals = NULL;
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &fcontext)) {
+ if ((grp = (GeneRefPtr) sfp->data.value.ptrvalue) != NULL
+ && !StringHasNoText (grp->locus)) {
+ ValNodeAddPointer (&vals, 0, StringSave ("gene"));
+ ValNodeAddPointer (&vals, 0, StringSave (grp->locus));
+ geneFound = TRUE;
+ }
+ }
+ if (!geneFound)
+ {
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_RNA, 0, &fcontext)) {
+ str = GetRNAProductString (sfp, NULL);
+ if (str != NULL && !StringHasNoText (str)) {
+ ValNodeAddPointer (&vals, 0, StringSave ("product"));
+ ValNodeAddPointer (&vals, 0, str);
+ }
+ }
+ }
+ return vals;
+}
+
+
+static CharPtr GetDefLineFromQualList (ValNodePtr vals)
+{
+ Int4 len = 1;
+ ValNodePtr vnp;
+ CharPtr summ;
+
+ for (vnp = vals; vnp != NULL && vnp->next != NULL; vnp = vnp->next) {
+ len += StringLen (vnp->data.ptrvalue) + StringLen (vnp->next->data.ptrvalue) + 3;
+ }
+ summ = (CharPtr) MemNew (sizeof (Char) * (len));
+ vnp = vals;
+ while (vnp != NULL && vnp->next != NULL) {
+ StringCat (summ, "[");
+ StringCat (summ, (CharPtr) vnp->data.ptrvalue);
+ StringCat (summ, "=");
+ StringCat (summ, (CharPtr) vnp->next->data.ptrvalue);
+ StringCat (summ, "]");
+ vnp = vnp->next->next;
+ }
+ return summ;
+}
+
+
+static ValNodePtr GetAllDeflineSourceModifiers (BioseqPtr bsp, Boolean include_subsource)
+{
+ SeqMgrDescContext dcontext;
+ SeqDescPtr sdp;
+ OrgModPtr mod;
+ BioSourcePtr biop = NULL;
+ SubSourcePtr ssp;
+ CharPtr val;
+ ValNodePtr vals = NULL;
+
+ if (bsp == NULL) {
+ return NULL;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL) {
+ if (biop->org != NULL && !StringHasNoText (biop->org->taxname)) {
+ ValNodeAddPointer (&vals, 0, StringSave ("org"));
+ ValNodeAddPointer (&vals, 0, StringSave (biop->org->taxname));
+ }
+ if (include_subsource) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ val = GetSourceQualName (GetSrcQualFromSubSrcOrOrgMod (ssp->subtype, FALSE));
+ ValNodeAddPointer (&vals, 0, StringSave (val));
+ ValNodeAddPointer (&vals, 0, StringSave (ssp->name));
+ }
+ }
+ if (biop->org != NULL && biop->org->orgname != NULL) {
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
+ val = GetSourceQualName (GetSrcQualFromSubSrcOrOrgMod (mod->subtype, TRUE));
+ ValNodeAddPointer (&vals, 0, StringSave (val));
+ ValNodeAddPointer (&vals, 0, StringSave (mod->subname));
+ }
+ }
+ }
+ return vals;
+}
+
+
+static ValNodePtr GetDeflineSourceModifiersByList (BioseqPtr bsp, ValNodePtr list)
+{
+ SeqMgrDescContext dcontext;
+ SeqDescPtr sdp;
+ BioSourcePtr biop = NULL;
+ CharPtr val;
+ ValNodePtr vals = NULL, vnp;
+ SourceQualChoice sq;
+
+ if (bsp == NULL) {
+ return NULL;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL) {
+ if (biop->org != NULL && !StringHasNoText (biop->org->taxname)) {
+ ValNodeAddPointer (&vals, 0, StringSave("org"));
+ ValNodeAddPointer (&vals, 0, StringSave(biop->org->taxname));
+ }
+ MemSet (&sq, 0, sizeof (SourceQualChoice));
+ sq.choice = SourceQualChoice_textqual;
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ sq.data.intvalue = GetSourceQualTypeByName(vnp->data.ptrvalue);
+ val = GetSourceQualFromBioSource (biop, &sq, NULL);
+ if (!StringHasNoText (val)) {
+ ValNodeAddPointer (&vals, 0, StringSave (GetSourceQualName (sq.data.intvalue)));
+ ValNodeAddPointer (&vals, 0, val);
+ }
+ }
+ }
+ return vals;
+}
+
+
+NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiers (BioseqPtr bsp, Boolean include_subsource)
+{
+ CharPtr summ;
+ ValNodePtr vals;
+
+ if (bsp == NULL) {
+ return NULL;
+ }
+
+ vals = GetAllDeflineSourceModifiers (bsp, include_subsource);
+ ValNodeLink (&vals, GetFeatureDeflineQuals(bsp));
+ summ = GetDefLineFromQualList (vals);
+ vals = ValNodeFreeData (vals);
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiersByList (BioseqPtr bsp, ValNodePtr list)
+{
+ CharPtr summ;
+ ValNodePtr vals;
+
+ if (bsp == NULL) {
+ return NULL;
+ }
+
+ vals = GetDeflineSourceModifiersByList (bsp, list);
+ ValNodeLink (&vals, GetFeatureDeflineQuals(bsp));
+ summ = GetDefLineFromQualList (vals);
+ vals = ValNodeFreeData (vals);
+ return summ;
+}
+
+
+/* code for finding frameshifts in alignments */
+
+typedef struct exoninterval {
+ Int4 start;
+ Int4 stop;
+} ExonIntervalData, PNTR ExonIntervalPtr;
+
+
+static ExonIntervalPtr ExonIntervalNew (Int4 start, Int4 stop)
+{
+ ExonIntervalPtr p = (ExonIntervalPtr) MemNew (sizeof (ExonIntervalData));
+ if (start < stop) {
+ p->start = start;
+ p->stop = stop;
+ } else {
+ p->start = stop;
+ p->stop = start;
+ }
+ return p;
+}
+
+
+static int LIBCALLBACK SortExonIntervals (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1, vnp2;
+ ExonIntervalPtr p1, p2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ p1 = (ExonIntervalPtr) vnp1->data.ptrvalue;
+ p2 = (ExonIntervalPtr) vnp2->data.ptrvalue;
+ if (p1 != NULL && p2 != NULL) {
+ if (p1->start < p2->start)
+ {
+ return -1;
+ }
+ else if (p1->start > p2->start)
+ {
+ return 1;
+ }
+ else if (p1->stop < p2->stop)
+ {
+ return -1;
+ }
+ else if (p1->stop > p2->stop)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+
+typedef struct exonintervallist {
+ ExonIntervalPtr intervals;
+ Int4 num_intervals;
+} ExonIntervalListData, PNTR ExonIntervalListPtr;
+
+
+static ExonIntervalListPtr ExonIntervalListFree (ExonIntervalListPtr list)
+{
+ if (list != NULL) {
+ list->intervals = MemFree (list->intervals);
+ list = MemFree (list);
+ }
+ return list;
+}
+
+
+static ExonIntervalListPtr ExonIntervalListNew (ValNodePtr interval_list)
+{
+ ExonIntervalListPtr list = NULL;
+ ExonIntervalPtr exint;
+ ValNodePtr vnp;
+ Int4 i;
+
+ list = (ExonIntervalListPtr) MemNew (sizeof (ExonIntervalListData));
+ list->num_intervals = ValNodeLen (interval_list);
+ if (list->num_intervals == 0) {
+ list->intervals = NULL;
+ } else {
+ list->intervals = (ExonIntervalPtr) MemNew (sizeof (ExonIntervalData) * list->num_intervals);
+ for (vnp = interval_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ exint = (ExonIntervalPtr) vnp->data.ptrvalue;
+ list->intervals[i].start = exint->start;
+ list->intervals[i].stop = exint->stop;
+ }
+ }
+ return list;
+}
+
+
+static ExonIntervalListPtr GetExonIntervalsForBioseq (BioseqPtr bsp)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ ExonIntervalListPtr list = NULL;
+ ValNodePtr unsorted_list = NULL;
+ SeqLocPtr slp;
+ Int4 num_intervals = 0;
+
+ if (bsp == NULL || ISA_aa (bsp->mol)) {
+ return NULL;
+ }
+
+ for (sfp = SeqMgrGetNextFeature(bsp, NULL, 0, FEATDEF_CDS, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature(bsp, sfp, 0, FEATDEF_CDS, &fcontext)) {
+ for (slp = SeqLocFindNext (sfp->location, NULL);
+ slp != NULL;
+ slp = SeqLocFindNext (sfp->location, slp)) {
+ ValNodeAddPointer (&unsorted_list, 0, ExonIntervalNew (SeqLocStart (slp), SeqLocStop (slp)));
+ num_intervals++;
+ }
+ }
+
+ for (sfp = SeqMgrGetNextFeature(bsp, NULL, 0, FEATDEF_exon, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature(bsp, sfp, 0, FEATDEF_exon, &fcontext)) {
+ for (slp = SeqLocFindNext (sfp->location, NULL);
+ slp != NULL;
+ slp = SeqLocFindNext (sfp->location, slp)) {
+ ValNodeAddPointer (&unsorted_list, 0, ExonIntervalNew (SeqLocStart (slp), SeqLocStop (slp)));
+ num_intervals++;
+ }
+ }
+
+ if (num_intervals > 0) {
+ unsorted_list = ValNodeSort (unsorted_list, SortExonIntervals);
+ ValNodeUnique (&unsorted_list, SortExonIntervals, ValNodeFreeData);
+ list = ExonIntervalListNew(unsorted_list);
+ unsorted_list = ValNodeFreeData (unsorted_list);
+ }
+ return list;
+}
+
+
+static Boolean IsPointInExon (Int4 pos, ExonIntervalListPtr list)
+{
+ Int4 i = 0;
+ Boolean found = FALSE;
+
+ if (list == NULL) {
+ return FALSE;
+ }
+
+ /* looking for interval that contains pos */
+ while (i < list->num_intervals && !found && list->intervals[i].start <= pos) {
+ if (list->intervals[i].stop >= pos) {
+ found = TRUE;
+ }
+ i++;
+ }
+ return found;
+}
+
+
+static ExonIntervalListPtr PNTR GetExonIntervalLists (DenseSegPtr dsp, Int4 examine_dim)
+{
+ SeqIdPtr sip;
+ BioseqPtr bsp;
+ ExonIntervalListPtr PNTR exon_lists;
+ Int4 i;
+
+ if (dsp == NULL || examine_dim < 1) {
+ return NULL;
+ }
+ exon_lists = (ExonIntervalListPtr PNTR) MemNew (sizeof (ExonIntervalListPtr) * examine_dim);
+ for (sip = dsp->ids, i = 0; sip != NULL && i < examine_dim; sip = sip->next, i++) {
+ bsp = BioseqLockById (sip);
+ exon_lists[i] = GetExonIntervalsForBioseq(bsp);
+ BioseqUnlock (bsp);
+ }
+ return exon_lists;
+}
+
+
+static ExonIntervalListPtr PNTR FreeExonIntervalLists (ExonIntervalListPtr PNTR exon_lists, Int4 examine_dim)
+{
+ Int4 i;
+ for (i = 0; i < examine_dim; i++) {
+ exon_lists[i] = ExonIntervalListFree(exon_lists[i]);
+ }
+ exon_lists = MemFree (exon_lists);
+ return exon_lists;
+}
+
+
+/* note - we have already determined that this alignment position is in at least one
+ * exon. The question here is, if some sequences are in gaps at this point and others
+ * are not, do all of the sequences in one group have an exon at this position and all
+ * of the others do not?
+ */
+static Boolean IsShiftInExon (SeqAlignPtr salp, Int4 pos, Int4 examine_dim)
+{
+ Int4 i;
+ Int4 num_in_gap_with_exon = 0;
+ Int4 num_in_gap_no_exon = 0;
+ Int4 num_not_gap_with_exon = 0;
+ Int4 num_not_gap_no_exon = 0;
+ Int4 num_gap, num_no_gap;
+ Int4 seq_pos = 0, j, before_pos, after_pos, aln_len;
+ DenseSegPtr dsp;
+ ExonIntervalListPtr PNTR exon_lists;
+ Boolean in_exon;
+ Boolean rval = FALSE;
+
+ if (salp == NULL || pos < 0 || salp->segtype != SAS_DENSEG || (dsp = (DenseSegPtr) salp->segs) == NULL) {
+ return FALSE;
+ }
+
+ exon_lists = GetExonIntervalLists(dsp, examine_dim);
+
+ AlnMgr2IndexSeqAlign (salp);
+ aln_len = SeqAlignLength (salp);
+
+ for (i = 0;
+ i < examine_dim
+ && (num_in_gap_with_exon == 0
+ || num_in_gap_no_exon == 0
+ || num_not_gap_with_exon == 0
+ || num_not_gap_no_exon == 0);
+ i++) {
+ seq_pos = AlnMgr2MapSeqAlignToBioseq (salp, pos, i + 1);
+ if (seq_pos < 0) {
+ j = pos - 1;
+ before_pos = -1;
+ while (j > -1 && before_pos < 0) {
+ before_pos = AlnMgr2MapSeqAlignToBioseq (salp, j, i + 1);
+ j--;
+ }
+ j = pos + 1;
+ after_pos = -1;
+ while (j < aln_len && after_pos < 0) {
+ after_pos = AlnMgr2MapSeqAlignToBioseq (salp, j, i + 1);
+ j++;
+ }
+ in_exon = FALSE;
+ if (before_pos == after_pos - 1) {
+ if (IsPointInExon(before_pos, exon_lists[i]) && IsPointInExon(after_pos, exon_lists[i])) {
+ in_exon = TRUE;
+ }
+ }
+
+ if (in_exon) {
+ num_in_gap_with_exon++;
+ } else {
+ num_in_gap_no_exon++;
+ }
+ } else {
+ in_exon = IsPointInExon(seq_pos, exon_lists[i]);
+ if (in_exon) {
+ num_not_gap_with_exon++;
+ } else {
+ num_not_gap_no_exon++;
+ }
+ }
+ }
+ exon_lists = FreeExonIntervalLists(exon_lists, examine_dim);
+
+ /* are we looking at an insertion or a deletion? */
+ num_gap = num_in_gap_with_exon + num_in_gap_no_exon;
+ num_no_gap = num_not_gap_with_exon + num_not_gap_no_exon;
+ if (num_gap > num_no_gap) {
+ /* this is an insertion */
+ if (num_not_gap_with_exon > 0) {
+ rval = TRUE;
+ }
+ } else if (num_gap < num_no_gap) {
+ /* this is a deletion */
+ if (num_in_gap_with_exon > 0) {
+ rval = TRUE;
+ }
+ } else {
+ /* evenly divided - no way to tell */
+ if (num_in_gap_with_exon > 0 || num_not_gap_with_exon > 0) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static ExonIntervalListPtr GetAlignedExons
+(DenseSegPtr dsp,
+ Int4 examine_dim)
+{
+ Int4 seg, i, j;
+ Int4 aln_pos = 1, start = -1;
+ Boolean in_exon = FALSE;
+ ValNodePtr align_intervals = NULL;
+ ExonIntervalListPtr list = NULL;
+ ExonIntervalListPtr PNTR exon_lists;
+
+ /* create lists of exons for individual sequences */
+ exon_lists = GetExonIntervalLists(dsp, examine_dim);
+
+ for (seg = 0; seg < dsp->numseg; seg++) {
+ for (j = 0; j < dsp->lens[seg]; j++) {
+ in_exon = FALSE;
+ for (i = 0; i < examine_dim && !in_exon; i++) {
+ if (dsp->starts[seg * dsp->dim + i] != -1
+ && IsPointInExon(dsp->starts[seg * dsp->dim + i] + j, exon_lists[i])) {
+ in_exon = TRUE;
+ }
+ }
+ if (in_exon) {
+ if (start < 0) {
+ /* found the beginning of an interval */
+ start = aln_pos;
+ }
+ } else {
+ if (start > -1) {
+ /* found the end of an interval */
+ ValNodeAddPointer (&align_intervals, 0, ExonIntervalNew(start, aln_pos - 1));
+ start = -1;
+ }
+ }
+ aln_pos++;
+ }
+ }
+ if (start > -1) {
+ /* end of interval is same as end of alignment */
+ ValNodeAddPointer (&align_intervals, 0, ExonIntervalNew(start, aln_pos - 1));
+ start = -1;
+ }
+
+ /* free individual sequence exon lists */
+ exon_lists = FreeExonIntervalLists(exon_lists, examine_dim);
+
+ if (align_intervals != NULL) {
+ list = ExonIntervalListNew (align_intervals);
+ align_intervals = ValNodeFreeData (align_intervals);
+ }
+
+ return list;
+}
+
+
+static CharPtr FrameShiftReportString (EFrameShiftReport flag, Int4 aln_pos, Int4 gap, Int4 non_gap, Int4Ptr report, BoolPtr ignore, Int4 len, CharPtr fmt, CharPtr ids, Boolean possible_error)
+{
+ CharPtr msg = NULL;
+ Int4 num_items = 0, i, msg_len, num_flag = 0, num_normal = 0;
+ Boolean first = TRUE, show_flag;
+ CharPtr gap_fmt = "Gap: %d Non-gap: %d\n";
+ CharPtr possible_error_msg = "(Shift occurs at alignment position where exons exist on other sequences, but may not actually be in exon for this sequence)";
+
+ for (i = 0; i < len; i++) {
+ if (!ignore[i]) {
+ if (report[i] == flag) {
+ num_flag++;
+ } else {
+ num_normal++;
+ }
+ }
+ }
+
+ if (num_flag == 0 || num_normal == 0) {
+ return NULL;
+ }
+
+ if (num_flag <= num_normal) {
+ num_items = num_flag;
+ show_flag = TRUE;
+ } else {
+ num_items = num_normal;
+ show_flag = FALSE;
+ }
+
+ msg_len = StringLen (fmt) + StringLen (gap_fmt) + 30 + (num_items * 204);
+ if (possible_error) {
+ msg_len += StringLen (possible_error_msg) + 1;
+ }
+ msg = (CharPtr) MemNew (sizeof (CharPtr) * msg_len);
+ sprintf (msg, fmt, aln_pos);
+ sprintf (msg + StringLen (msg), gap_fmt, gap, non_gap);
+ num_items = 0;
+ for (i = 0; i < len; i++) {
+ if (!ignore[i] && ((show_flag && report[i] == flag) || (!show_flag && report[i] != flag))) {
+ if (!first) {
+ StringCat (msg, ", ");
+ if (num_items % 10 == 0) {
+ StringCat (msg, "\n");
+ }
+ }
+ StringCat (msg, ids + (200 * i));
+ first = FALSE;
+ num_items++;
+ }
+ }
+ if (possible_error) {
+ StringCat (msg, possible_error_msg);
+ }
+ return msg;
+}
+
+
+static CharPtr FrameShiftReportMult (Int4 aln_pos, Int4Ptr report, BoolPtr ignore, Int4 len, CharPtr fmt, CharPtr ids)
+{
+ CharPtr msg = NULL;
+ Int4 num_items = 0, i, msg_len, num_flag = 0, num_normal = 0;
+ Boolean first = TRUE, show_flag;
+
+ for (i = 0; i < len; i++) {
+ if (!ignore[i]) {
+ if (report[i] == eFrameShiftReport_ExonMult3) {
+ num_flag++;
+ } else {
+ num_normal++;
+ }
+ }
+ }
+
+ if (num_flag == 0 || num_normal == 0) {
+ return NULL;
+ }
+
+ if (num_flag <= num_normal) {
+ num_items = num_flag;
+ show_flag = TRUE;
+ } else {
+ num_items = num_normal;
+ show_flag = FALSE;
+ }
+
+ msg_len = StringLen (fmt) + (num_items * 204);
+ msg = (CharPtr) MemNew (sizeof (CharPtr) * msg_len);
+ sprintf (msg, fmt, aln_pos);
+ num_items = 0;
+ for (i = 0; i < len; i++) {
+ if (!ignore[i]
+ && ((show_flag && report[i] == eFrameShiftReport_ExonMult3)
+ || (!show_flag && report[i] != eFrameShiftReport_ExonMult3))) {
+ if (!first) {
+ StringCat (msg, ", ");
+ if (num_items % 10 == 0) {
+ StringCat (msg, "\n");
+ }
+ }
+ StringCat (msg, ids + (200 * i));
+ first = FALSE;
+ num_items++;
+ }
+ }
+ return msg;
+}
+
+
+static FrameShiftReportPtr FrameShiftReportNew (CharPtr msg, Int4 aln_pos, Int4 first_related_seq)
+{
+ FrameShiftReportPtr r = (FrameShiftReportPtr) MemNew (sizeof (FrameShiftReportData));
+ r->msg = msg;
+ r->aln_pos = aln_pos;
+ r->first_related_seq = first_related_seq;
+ return r;
+}
+
+
+static FrameShiftReportPtr FrameShiftReportFree (FrameShiftReportPtr r)
+{
+ if (r != NULL) {
+ r->msg = MemFree (r->msg);
+ r = MemFree (r);
+ }
+ return r;
+}
+
+
+NLM_EXTERN ValNodePtr FrameShiftReportListFree (ValNodePtr vnp)
+{
+ ValNodePtr tmp;
+
+ while (vnp != NULL) {
+ tmp = vnp->next;
+ vnp->next = NULL;
+ vnp->data.ptrvalue = FrameShiftReportFree (vnp->data.ptrvalue);
+ vnp = ValNodeFree (vnp);
+ vnp = tmp;
+ }
+ return vnp;
+}
+
+
+static int FrameShiftReportCompare (FrameShiftReportPtr r1, FrameShiftReportPtr r2)
+{
+ if (r1 == NULL && r2 == NULL) {
+ return 0;
+ } else if (r1 == NULL) {
+ return -1;
+ } else if (r2 == NULL) {
+ return 1;
+ } else if (r1->aln_pos < r2->aln_pos) {
+ return -1;
+ } else if (r1->aln_pos > r2->aln_pos) {
+ return 1;
+ } else {
+ return StringCmp (r1->msg, r2->msg);
+ }
+}
+
+
+static int LIBCALLBACK SortFrameShiftReports (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1, vnp2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ if (vnp1->choice == vnp2->choice) {
+ return FrameShiftReportCompare(vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ } else if (vnp1->choice == eFrameShiftReport_Exon) {
+ return -1;
+ } else if (vnp2->choice == eFrameShiftReport_Exon) {
+ return 1;
+ } else if (vnp1->choice == eFrameShiftReport_Intron) {
+ return -1;
+ } else if (vnp2->choice == eFrameShiftReport_Intron) {
+ return 1;
+ } else if (vnp1->choice == eFrameShiftReport_ExonMult3) {
+ return -1;
+ } else if (vnp2->choice == eFrameShiftReport_ExonMult3) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+NLM_EXTERN void
+PrintFrameShiftReportList
+(ValNodePtr list,
+ Boolean has_exons,
+ Boolean print_exons_only,
+ LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ FrameShiftReportPtr r;
+ EFrameShiftReport section = eFrameShiftReport_NoReport;
+ Boolean do_print = FALSE;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != section) {
+ if (vnp->choice == eFrameShiftReport_Exon) {
+ fprintf (lip->fp, "FRAMESHIFTS IN EXONS\n\n");
+ do_print = TRUE;
+ } else if (vnp->choice == eFrameShiftReport_Intron) {
+ if (print_exons_only) {
+ do_print = FALSE;
+ } else if (has_exons) {
+ fprintf (lip->fp, "FRAMESHIFTS IN INTRONS\n\n");
+ do_print = TRUE;
+ } else {
+ fprintf (lip->fp, "FRAMESHIFTS\n\n");
+ do_print = TRUE;
+ }
+ } else if (vnp->choice == eFrameShiftReport_ExonMult3) {
+ if (print_exons_only) {
+ do_print = FALSE;
+ } else {
+ fprintf (lip->fp, "MULTIPLES OF THREE ARE IGNORED\n\n");
+ do_print = TRUE;
+ }
+ }
+ section = vnp->choice;
+ }
+ if (do_print && (r = (FrameShiftReportPtr) vnp->data.ptrvalue) != NULL) {
+ fprintf (lip->fp, "%s\n\n", r->msg);
+ lip->data_in_log = TRUE;
+ }
+ }
+}
+
+
+static Int4 LenBeforeBoundary (Int4 i, Int4 seg, Int4 offset, Int4 aln_pos,
+ DenseSegPtr dsp, ExonIntervalListPtr exon_intervals)
+{
+ Int4 len = 1;
+ Boolean is_gap;
+ Boolean is_exon;
+ Boolean found_boundary = FALSE;
+
+ if (dsp == NULL) {
+ return 1;
+ }
+
+ if (dsp->starts[dsp->dim * seg + i] == -1) {
+ is_gap = TRUE;
+ } else {
+ is_gap = FALSE;
+ }
+
+ is_exon = IsPointInExon (aln_pos, exon_intervals);
+
+ offset++;
+ aln_pos++;
+ while (seg < dsp->numseg && !found_boundary) {
+ while (offset < dsp->lens[seg] && !found_boundary) {
+ if (IsPointInExon(aln_pos, exon_intervals) != is_exon) {
+ found_boundary = TRUE;
+ } else {
+ len++;
+ offset++;
+ aln_pos++;
+ }
+ }
+
+ if (!found_boundary) {
+ seg++;
+ offset = 0;
+ if (seg < dsp->numseg) {
+ if (dsp->starts[dsp->dim * seg + i] == -1 && !is_gap) {
+ found_boundary = TRUE;
+ } else if (dsp->starts[dsp->dim * seg + i] != -1 && is_gap) {
+ found_boundary = TRUE;
+ }
+ }
+ }
+ }
+
+ return len;
+}
+
+
+static Int4
+FindFirstSeqWithProblem
+(Int4Ptr report, BoolPtr current_gap_ignore, Int4 num,
+ EFrameShiftReport report_type, Int4 num_gap, Int4 num_non_gap)
+{
+ Int4 i;
+
+ if (num_non_gap >= num_gap) {
+ for (i = 0; i < num; i++) {
+ if (current_gap_ignore[i]) {
+ /* don't report this one */
+ } else if (report[i] == report_type) {
+ return i;
+ }
+ }
+ } else {
+ /* look for transition between problem/not-problem */
+ for (i = 0; i < num; i++) {
+ if (report[i] == report_type) {
+ if (i < num - 1 && report[i + 1] != report_type) {
+ return i;
+ } else if (i > 0 && report[i - 1] != report_type) {
+ return i;
+ }
+ }
+ }
+ }
+
+ return -1;
+}
+
+
+NLM_EXTERN ValNodePtr FindFrameShiftsInAlignment (SeqAlignPtr salp, BoolPtr has_exons)
+{
+ DenseSegPtr dsp;
+ Int4 seg, i, j, aln_pos = 1, len_gap, extend;
+ Int4 num_gap, num_non_gap;
+ BoolPtr current_gap_ignore = NULL;
+ Int4Ptr current_gap_examined = NULL, gap_mult3 = NULL;
+ Int4Ptr report = NULL;
+ Boolean any_report;
+ Int4 num_mult;
+ CharPtr ids = NULL;
+ ValNodePtr report_list = NULL;
+ SeqIdPtr sip;
+ Int4 examine_dim;
+ CharPtr msg;
+ CharPtr exon_insert_fmt = "Insertion in exon at alignment position %d:\n";
+ CharPtr exon_delete_fmt = "Deletion in exon at alignment position %d:\n";
+ CharPtr intron_insert_fmt = "Insertion at alignment position %d:\n";
+ CharPtr intron_delete_fmt = "Deletion at alignment position %d:\n";
+ CharPtr mult_fmt = "Ignored multiple of 3 at %d:\n";
+ ExonIntervalListPtr exon_intervals;
+ Int4 first_related_seq;
+ Boolean possible_error;
+
+ if (salp == NULL) {
+ return NULL;
+ }
+
+ if (salp->segtype != SAS_DENSEG || (dsp = (DenseSegPtr) salp->segs) == NULL) {
+ return NULL;
+ }
+ ids = (CharPtr) MemNew (sizeof (Char) * dsp->dim * 200);
+ for (sip = dsp->ids, i = 0; sip != NULL; sip = sip->next, i++) {
+ SeqIdWrite (sip, ids + (200 * i), PRINTID_REPORT, 199);
+ }
+ if (StringCmp (ids + (200 * (dsp->dim - 1)), "Consensus") == 0) {
+ examine_dim = dsp->dim - 1;
+ } else {
+ examine_dim = dsp->dim;
+ }
+
+ current_gap_examined = (Int4Ptr) MemNew (sizeof (Int4) * examine_dim);
+ gap_mult3 = (Int4Ptr) MemNew (sizeof (Int4Ptr) * examine_dim);
+ current_gap_ignore = (BoolPtr) MemNew (sizeof (Boolean) * examine_dim);
+ report = (Int4Ptr) MemNew (sizeof (Int4) * examine_dim);
+ for (i = 0; i < examine_dim; i++) {
+ current_gap_examined[i] = 0;
+ gap_mult3[i] = 0;
+ current_gap_ignore[i] = FALSE;
+ }
+
+ exon_intervals = GetAlignedExons (dsp, examine_dim);
+ if (has_exons != NULL) {
+ if (exon_intervals == NULL) {
+ *has_exons = FALSE;
+ } else {
+ *has_exons = TRUE;
+ }
+ }
+
+ for (seg = 0; seg < dsp->numseg; seg++) {
+ num_gap = 0;
+ num_non_gap = 0;
+ for (i = 0; i < examine_dim; i++) {
+ if (dsp->starts[seg * dsp->dim + i] == -1) {
+ if (!current_gap_ignore[i] && gap_mult3[i] == 0) {
+ if (seg == 0) {
+ /* ignore - beginning gap */
+ current_gap_ignore[i] = TRUE;
+ } else {
+ /* check to see if gap goes to end */
+ extend = seg + 1;
+ while (extend < dsp->numseg && dsp->starts[extend * dsp->dim + i] == -1) {
+ extend++;
+ }
+ if (extend == dsp->numseg) {
+ /* ignore - gap extends to end of alignment */
+ current_gap_ignore[i] = TRUE;
+ }
+ }
+ if (!current_gap_ignore[i]) {
+ num_gap ++;
+ }
+ }
+ } else {
+ current_gap_ignore[i] = FALSE;
+ num_non_gap++;
+ }
+ }
+
+ if (num_gap > 0) {
+ /* report for each position in seg */
+ for (j = 0; j < dsp->lens[seg]; j++) {
+ MemSet (report, eFrameShiftReport_NoReport, sizeof (Int4) * examine_dim);
+ num_mult = 0;
+ any_report = FALSE;
+ if (IsPointInExon (aln_pos + j, exon_intervals)) {
+ possible_error = FALSE;
+ for (i = 0; i < examine_dim; i++) {
+ if (gap_mult3[i] > 0) {
+ gap_mult3[i]--;
+ current_gap_examined[i] --;
+ } else if (!current_gap_ignore[i] && dsp->starts[dsp->dim * seg + i] == -1) {
+ len_gap = 1;
+ if (current_gap_examined[i] > 0) {
+ current_gap_examined[i] --;
+ } else {
+ /* check for multiple of 3 */
+ len_gap = LenBeforeBoundary (i, seg, j, aln_pos + j, dsp, exon_intervals);
+ current_gap_examined[i] = len_gap - 1;
+ }
+ if (len_gap % 3 == 0) {
+ report[i] = eFrameShiftReport_ExonMult3;
+ gap_mult3[i] = len_gap - 1;
+ num_mult++;
+ num_gap--;
+ } else {
+ report[i] = eFrameShiftReport_Exon;
+ possible_error = ! IsShiftInExon (salp, aln_pos + j, examine_dim);
+ any_report = TRUE;
+ }
+ }
+ }
+ if (any_report) {
+ msg = FrameShiftReportString(eFrameShiftReport_Exon, aln_pos + j, num_gap, num_non_gap, report, current_gap_ignore, examine_dim,
+ num_gap > num_non_gap ? exon_insert_fmt : exon_delete_fmt, ids, possible_error);
+ first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_Exon, num_gap, num_non_gap);
+ ValNodeAddPointer (&report_list, eFrameShiftReport_Exon, FrameShiftReportNew (msg, aln_pos + j, first_related_seq));
+ }
+ } else {
+ /* point is not in exon */
+ for (i = 0; i < examine_dim; i++) {
+ if (gap_mult3[i] > 0) {
+ gap_mult3[i]--;
+ current_gap_examined[i] --;
+ } else if (!current_gap_ignore[i] && dsp->starts[dsp->dim * seg + i] == -1) {
+ len_gap = 1;
+ if (current_gap_examined[i] > 0) {
+ current_gap_examined[i] --;
+ } else {
+ /* check for multiple of 3 */
+ len_gap = LenBeforeBoundary (i, seg, j, aln_pos + j, dsp, exon_intervals);
+ current_gap_examined[i] = len_gap - 1;
+ }
+ if (len_gap % 3 == 0) {
+ report[i] = eFrameShiftReport_ExonMult3;
+ gap_mult3[i] = len_gap - 1;
+ num_mult++;
+ num_gap--;
+ } else {
+ report[i] = eFrameShiftReport_Intron;
+ }
+ }
+ }
+ /* report introns later */
+ msg = FrameShiftReportString(eFrameShiftReport_Intron, aln_pos + j, num_gap, num_non_gap, report, current_gap_ignore, examine_dim,
+ num_gap > num_non_gap ? intron_insert_fmt : intron_delete_fmt, ids, FALSE);
+ if (msg != NULL) {
+ first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_Intron, num_gap, num_non_gap);
+ ValNodeAddPointer (&report_list, eFrameShiftReport_Intron, FrameShiftReportNew(msg, aln_pos + j, first_related_seq));
+ }
+ }
+ /* report multiples of 3 later */
+ if (num_mult > 0) {
+ msg = FrameShiftReportMult (aln_pos + j, report, current_gap_ignore, examine_dim, mult_fmt, ids);
+ first_related_seq = FindFirstSeqWithProblem(report, current_gap_ignore, examine_dim, eFrameShiftReport_ExonMult3, num_gap, num_non_gap);
+ ValNodeAddPointer (&report_list, eFrameShiftReport_ExonMult3, FrameShiftReportNew(msg, aln_pos + j, first_related_seq));
+ }
+ }
+ /* finished reporting for each position in seg */
+ }
+ aln_pos += dsp->lens[seg];
+ }
+
+ exon_intervals = ExonIntervalListFree (exon_intervals);
+
+ report_list = ValNodeSort (report_list, SortFrameShiftReports);
+
+ ids = MemFree (ids);
+ current_gap_examined = MemFree (current_gap_examined);
+ current_gap_ignore = MemFree (current_gap_ignore);
+ report = MemFree (report);
+ return report_list;
+}
+
+NLM_EXTERN int CompareUserFields (UserFieldPtr ufp1, UserFieldPtr ufp2)
+{
+ if (ufp1 == NULL && ufp2 == NULL) {
+ return 0;
+ } else if (ufp1 == NULL) {
+ return -1;
+ } else if (ufp2 == NULL) {
+ return 1;
+ } else if (ufp1->choice != 1 || ufp2->choice != 1) {
+ return 0;
+ } else {
+ return StringCmp (ufp1->data.ptrvalue, ufp2->data.ptrvalue);
+ }
+}
+
+/* for duplicate structured comments */
+static Boolean IsStructuredComment (SeqDescPtr sdp)
+{
+ UserObjectPtr uop;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user
+ || (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || uop->type == NULL
+ || StringCmp (uop->type->str, "StructuredComment") != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static int CompareStructuredComment (SeqDescPtr sdp1, SeqDescPtr sdp2)
+{
+ ObjValNodePtr ovp1, ovp2;
+ UserObjectPtr uop1, uop2;
+ UserFieldPtr ufp1, ufp2;
+ int rval = 0;
+
+ ovp1 = (ObjValNodePtr) sdp1;
+ ovp2 = (ObjValNodePtr) sdp2;
+ if (!IsStructuredComment(sdp1)) {
+ if (!IsStructuredComment (sdp2)) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if (!IsStructuredComment(sdp2)) {
+ return 1;
+ /*
+ } else if (ovp1->idx.parentptr < ovp2->idx.parentptr) {
+ return -1;
+ } else if (ovp1->idx.parentptr > ovp2->idx.parentptr) {
+ return 1;
+ */
+ } else {
+ uop1 = sdp1->data.ptrvalue;
+ uop2 = sdp2->data.ptrvalue;
+ for (ufp1 = uop1->data, ufp2 = uop2->data;
+ ufp1 != NULL && ufp2 != NULL && rval == 0;
+ ufp1 = ufp1->next, ufp2 = ufp2->next) {
+ rval = CompareUserFields(ufp1, ufp2);
+ }
+ if (!rval) {
+ if (ufp1 == NULL && ufp2 != NULL) {
+ rval = -1;
+ } else if (ufp1 != NULL && ufp2 == NULL) {
+ rval = 1;
+ }
+ }
+ }
+ return rval;
+}
+
+
+static int LIBCALLBACK SortStructuredCommentDescriptor (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ SeqDescPtr sdp1, sdp2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ sdp1 = (SeqDescPtr) vnp1->data.ptrvalue;
+ sdp2 = (SeqDescPtr) vnp2->data.ptrvalue;
+ if (sdp1 != NULL && sdp2 != NULL
+ && IsStructuredComment(sdp1) && IsStructuredComment(sdp2)) {
+ rval = CompareStructuredComment(sdp1, sdp2);
+ }
+ }
+ }
+ return rval;
+}
+
+
+static void RemoveDuplicateStructuredCommentsCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp, sdp_cmp;
+ SeqMgrDescContext context;
+ ValNodePtr comment_list = NULL, vnp;
+ ObjValNodePtr ovp;
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if (sdp->extended && IsStructuredComment(sdp)) {
+ ValNodeAddPointer (&comment_list, OBJ_SEQDESC, sdp);
+ }
+ }
+ if (comment_list == NULL || comment_list->next == NULL) {
+ comment_list = ValNodeFree (comment_list);
+ return;
+ }
+
+ comment_list = ValNodeSort (comment_list, SortStructuredCommentDescriptor);
+ sdp = comment_list->data.ptrvalue;
+ for (vnp = comment_list->next; vnp != NULL; vnp = vnp->next) {
+ sdp_cmp = vnp->data.ptrvalue;
+ if (CompareStructuredComment(sdp, sdp_cmp) == 0) {
+ ovp = (ObjValNodePtr)sdp_cmp;
+ ovp->idx.deleteme = TRUE;
+ *((BoolPtr)data) = TRUE;
+ } else {
+ sdp = sdp_cmp;
+ }
+ }
+}
+
+
+NLM_EXTERN Boolean RemoveDuplicateStructuredCommentsInSeqEntry (SeqEntryPtr sep)
+{
+ Boolean any = FALSE;
+
+ VisitBioseqsInSep (sep, &any, RemoveDuplicateStructuredCommentsCallback);
+ if (any) {
+ DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
+ }
+ return any;
+}
+
+
+/* SUC code */
+static CharPtr StartsWithQualOrFeat (CharPtr str)
+{
+ Int4 space_before_qual, qual_len, space_after_feat;
+ CharPtr qual_name = NULL;
+
+ if (StringHasNoText (str)) return NULL;
+
+ space_before_qual = StringSpn (str, " \t");
+ /* qual is name between slash and equals sign */
+ if (str[space_before_qual] == '/') {
+ qual_len = StringCSpn (str + space_before_qual, "=");
+ if (qual_len != 0 && qual_len != StringLen (str + space_before_qual)) {
+ qual_name = (CharPtr) MemNew ((qual_len + 1) * sizeof(Char));
+ StringNCpy (qual_name, str + space_before_qual, qual_len);
+ qual_name[qual_len] = 0;
+ }
+ } else {
+ qual_len = StringCSpn (str + space_before_qual, " \t");
+ space_after_feat = StringSpn (str + space_before_qual + qual_len, " \t");
+ /* look for location after feature name */
+ if (space_before_qual == 5
+ && qual_len + space_after_feat == 16
+ && (isdigit(str[space_before_qual + qual_len + space_after_feat])
+ || str[space_before_qual + qual_len + space_after_feat] == '<'
+ || StringNCmp (str + space_before_qual + qual_len + space_after_feat, "complement", 10) == 0)) {
+ qual_name = (CharPtr) MemNew ((qual_len + 1) * sizeof(Char));
+ StringNCpy (qual_name, str + space_before_qual, qual_len);
+ qual_name[qual_len] = 0;
+ }
+ }
+
+ return qual_name;
+}
+
+
+static void CaptureFFLineEx (
+ CharPtr str,
+ Pointer userdata,
+ BlockType blocktype,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ Boolean include_sequence,
+ Boolean byqual
+)
+
+{
+ Char ch;
+ CharPtr copy;
+ ValNodePtr PNTR head;
+ CharPtr ptr;
+ CharPtr tmp;
+ ValNodePtr vnp;
+ ClickableItemPtr cip, subcip;
+ ValNodePtr item_list = NULL;
+ BioseqPtr bsp;
+ SeqFeatPtr sfp;
+ SeqDescrPtr sdp;
+ SeqMgrFeatContext fcontext;
+ SeqMgrDescContext dcontext;
+ CharPtr qual_name;
+
+ if (!include_sequence && blocktype == SEQUENCE_BLOCK) return;
+
+ head = (ValNodePtr PNTR) userdata;
+ copy = StringSaveNoNull (str);
+ if (copy == NULL) return;
+
+ ptr = copy;
+ tmp = StringChr (ptr, '\n');
+ while (tmp != NULL) {
+ ch = *tmp;
+ *tmp = '\0';
+ if (!StringHasNoText (ptr)) {
+ item_list = NULL;
+ if (itemtype == OBJ_BIOSEQ) {
+ bsp = GetBioseqGivenIDs (entityID, itemID, itemtype);
+ if (bsp != NULL) {
+ item_list = ValNodeNew (NULL);
+ item_list->choice = OBJ_BIOSEQ;
+ item_list->data.ptrvalue = bsp;
+ }
+ } else if (itemtype == OBJ_SEQFEAT) {
+ sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext);
+ if (sfp != NULL) {
+ if (sfp->idx.subtype == FEATDEF_gap) {
+ /* can't add gap features, they are temporary */
+ } else {
+ item_list = ValNodeNew (NULL);
+ item_list->choice = OBJ_SEQFEAT;
+ item_list->data.ptrvalue = sfp;
+ }
+ }
+ } else if (itemtype == OBJ_SEQDESC) {
+ sdp = SeqMgrGetDesiredDescriptor (entityID, NULL, itemID, 0, NULL, &dcontext);
+ item_list = ValNodeNew (NULL);
+ item_list->choice = OBJ_SEQDESC;
+ item_list->data.ptrvalue = sdp;
+ }
+
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->clickable_item_type = blocktype;
+
+ if (byqual && (qual_name = StartsWithQualOrFeat (ptr)) != NULL) {
+ cip->description = qual_name;
+ subcip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (subcip, 0, sizeof (ClickableItemData));
+ subcip->clickable_item_type = blocktype;
+ subcip->description = StringSave (ptr);
+ if (item_list != NULL) {
+ ValNodeAddPointer (&(subcip->item_list), item_list->choice, item_list->data.ptrvalue);
+ }
+ ValNodeAddPointer (&(cip->subcategories), 0, subcip);
+ /* iterate to add the rest of the lines of the qual */
+ while (ch != 0 && tmp != NULL && (qual_name = StartsWithQualOrFeat (tmp + 1)) == NULL) {
+ *tmp = ch;
+ tmp++;
+ ptr = tmp;
+ tmp = StringChr (ptr, '\n');
+ if (tmp != NULL) {
+ ch = *tmp;
+ *tmp = '\0';
+ subcip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (subcip, 0, sizeof (ClickableItemData));
+ subcip->clickable_item_type = blocktype;
+ subcip->description = StringSave (ptr);
+ if (item_list != NULL) {
+ ValNodeAddPointer (&(subcip->item_list), item_list->choice, item_list->data.ptrvalue);
+ }
+ ValNodeAddPointer (&(cip->subcategories), 0, subcip);
+ }
+ }
+ qual_name = MemFree (qual_name);
+ } else {
+ cip->description = StringSave (ptr);
+ }
+ cip->item_list = item_list;
+ vnp = ValNodeNew(NULL);
+ vnp->choice = blocktype;
+ vnp->data.ptrvalue = cip;
+ if (*head == NULL) {
+ *head = vnp;
+ } else {
+ vnp->next = *head;
+ *head = vnp;
+ }
+ }
+ /* tmp may have become NULL while processing quals */
+ if (tmp != NULL) {
+ *tmp = ch;
+ tmp++;
+ ptr = tmp;
+ tmp = StringChr (ptr, '\n');
+ }
+ }
+
+ MemFree (copy);
+}
+
+static void CaptureFFLine (
+ CharPtr str,
+ Pointer userdata,
+ BlockType blocktype,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ Int4 left,
+ Int4 right
+)
+
+{
+ CaptureFFLineEx (str, userdata, blocktype, entityID, itemtype, itemID, TRUE, FALSE);
+}
+
+static void CaptureFFLineNoSequence (
+ CharPtr str,
+ Pointer userdata,
+ BlockType blocktype,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ Int4 left,
+ Int4 right
+)
+
+{
+ CaptureFFLineEx (str, userdata, blocktype, entityID, itemtype, itemID, FALSE, FALSE);
+}
+
+static void CaptureFFLineNoSequenceByQual (
+ CharPtr str,
+ Pointer userdata,
+ BlockType blocktype,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ Int4 left,
+ Int4 right
+)
+
+{
+ CaptureFFLineEx (str, userdata, blocktype, entityID, itemtype, itemID, FALSE, TRUE);
+}
+
+static void CaptureFFLineByQual (
+ CharPtr str,
+ Pointer userdata,
+ BlockType blocktype,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ Int4 left,
+ Int4 right
+)
+
+{
+ CaptureFFLineEx (str, userdata, blocktype, entityID, itemtype, itemID, TRUE, TRUE);
+}
+
+static int LIBCALLBACK SortVnpByChoiceAndClickableItemDesc (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ CharPtr str1;
+ CharPtr str2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ ClickableItemPtr cip1, cip2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ cip1 = (ClickableItemPtr) vnp1->data.ptrvalue;
+ cip2 = (ClickableItemPtr) vnp2->data.ptrvalue;
+ if (cip1 != NULL && cip2 != NULL) {
+ str1 = cip1->description;
+ str2 = cip2->description;
+ if (str1 != NULL && str2 != NULL) {
+ if (vnp1->choice > vnp2->choice) {
+ return 1;
+ } else if (vnp1->choice < vnp2->choice) {
+ return -1;
+ }
+ return StringCmp (str1, str2);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static int LIBCALLBACK SortVnpByClickableItemDesc (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ CharPtr str1;
+ CharPtr str2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ ClickableItemPtr cip1, cip2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ cip1 = (ClickableItemPtr) vnp1->data.ptrvalue;
+ cip2 = (ClickableItemPtr) vnp2->data.ptrvalue;
+ if (cip1 != NULL && cip2 != NULL) {
+ str1 = cip1->description;
+ str2 = cip2->description;
+ if (str1 != NULL && str2 != NULL) {
+ return StringCmp (str1, str2);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static ValNodePtr UniqueAndCountValNodeCS (ValNodePtr list)
+
+{
+ Int4 count;
+ ValNodePtr curr;
+ size_t len;
+ ValNodePtr next;
+ Pointer PNTR prev;
+ CharPtr tmp;
+ ValNodePtr vnp;
+ ClickableItemPtr cip, cip_last = NULL;
+
+ if (list == NULL) return NULL;
+ cip_last = (ClickableItemPtr) list->data.ptrvalue;
+ vnp = list->next;
+ if (vnp == NULL) return list;
+ prev = (Pointer PNTR) &(list->next);
+ count = 1;
+ curr = list;
+ while (vnp != NULL) {
+ next = vnp->next;
+ cip = (ClickableItemPtr) vnp->data.ptrvalue;
+ if (StringCmp (cip_last->description, cip->description) == 0) {
+ vnp->next = NULL;
+ *prev = next;
+ ValNodeLink (&(cip_last->item_list), cip->item_list);
+ cip->item_list = NULL;
+ ValNodeLink (&(cip_last->subcategories), cip->subcategories);
+ cip->subcategories = NULL;
+ FreeClickableList (vnp);
+ count++;
+ } else {
+ len = StringLen (cip_last->description) + 20;
+ tmp = (CharPtr) MemNew (len);
+ if (tmp != NULL) {
+ sprintf (tmp, "%6ld %s", (long) count, cip_last->description);
+ cip_last->description = MemFree (cip_last->description);
+ cip_last->description = tmp;
+ cip_last->subcategories = ValNodeSort (cip_last->subcategories, SortVnpByClickableItemDesc);
+ cip_last->subcategories = UniqueAndCountValNodeCS (cip_last->subcategories);
+ }
+ cip_last = cip;
+ prev = (Pointer PNTR) &(vnp->next);
+ count = 1;
+ curr = vnp;
+ }
+ vnp = next;
+ }
+ len = StringLen (cip_last->description) + 20;
+ tmp = (CharPtr) MemNew (len);
+ if (tmp != NULL) {
+ sprintf (tmp, "%6ld %s", (long) count, cip_last->description);
+ cip_last->description = MemFree (cip_last->description);
+ cip_last->description = tmp;
+ cip_last->subcategories = ValNodeSort (cip_last->subcategories, SortVnpByClickableItemDesc);
+ cip_last->subcategories = UniqueAndCountValNodeCS (cip_last->subcategories);
+ }
+
+ return list;
+}
+
+static ValNodePtr SortFlatFile (ValNodePtr head, Boolean reverse, Boolean byblock)
+
+{
+ ValNodePtr next;
+ ValNodePtr tail = NULL;
+ ValNodePtr vnp;
+
+ if (head == NULL) return NULL;
+
+ if (byblock) {
+ head = ValNodeSort (head, SortVnpByChoiceAndClickableItemDesc);
+ } else {
+ head = ValNodeSort (head, SortVnpByClickableItemDesc);
+ }
+ if (reverse) {
+ for (vnp = head; vnp != NULL; vnp = next) {
+ next = vnp->next;
+ vnp->next = tail;
+ tail = vnp;
+ }
+ head = tail;
+ } else {
+ head = UniqueAndCountValNodeCS (head);
+ }
+
+ return head;
+}
+
+NLM_EXTERN ValNodePtr GetSUCCommonList (SeqEntryPtr sep, Boolean reverse, Boolean byblock, Boolean showsequence, Boolean byqual)
+{
+ XtraBlock xtra;
+ ValNodePtr head = NULL;
+ ErrSev level;
+ Boolean okay;
+ SeqEntryPtr oldscope;
+ Uint2 entityID;
+
+ if (sep == NULL) return NULL;
+
+ MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
+ if (showsequence)
+ {
+ if (byqual)
+ {
+ xtra.ffwrite = CaptureFFLineByQual;
+ }
+ else
+ {
+ xtra.ffwrite = CaptureFFLine;
+ }
+ }
+ else
+ {
+ if (byqual)
+ {
+ xtra.ffwrite = CaptureFFLineNoSequenceByQual;
+ }
+ else
+ {
+ xtra.ffwrite = CaptureFFLineNoSequence;
+ }
+ }
+ xtra.userdata = (Pointer) &head;
+ level = ErrSetMessageLevel (SEV_MAX);
+ oldscope = SeqEntrySetScope (sep);
+ okay = SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE,
+ SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL);
+ entityID = SeqMgrGetEntityIDForSeqEntry (sep);
+ SeqMgrIndexFeatures (entityID, NULL);
+ SeqEntrySetScope (oldscope);
+ ErrSetMessageLevel (level);
+ if (okay) {
+ head = SortFlatFile (head, FALSE, byblock);
+ if (reverse) {
+ head = SortFlatFile (head, TRUE, FALSE);
+ }
+ }
+ return head;
+}
+
+
+/* Pub Lookup */
+static void AddAuthorProc (NameStdPtr nsp, Pointer userdata)
+
+{
+ ValNodeBlockPtr vnbp;
+
+ if (nsp == NULL || userdata == NULL) return;
+ vnbp = (ValNodeBlockPtr) userdata;
+
+ if (StringHasNoText (nsp->names[0])) return;
+
+ ValNodeCopyStrEx (&(vnbp->head), &(vnbp->tail), 0, nsp->names[0]);
+}
+
+
+static CharPtr ConstructArticleQuery (ValNodePtr oldpep, Boolean useAuthors, Boolean useTitle,
+ Boolean useJournal, Boolean useImprint)
+
+{
+ ValNodeBlock blk;
+ CitArtPtr cap = NULL;
+ CitJourPtr cjp = NULL;
+ DatePtr dp;
+ ImprintPtr imp = NULL;
+ Pubdesc pd;
+ CharPtr query;
+ CharPtr str;
+ ValNodePtr vnp;
+ Char year [8];
+
+ if (oldpep == NULL) return NULL;
+
+ for (vnp = oldpep; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != PUB_Article) continue;
+ cap = (CitArtPtr) vnp->data.ptrvalue;
+ }
+ if (cap == NULL) return NULL;
+
+ if (cap->from == 1) {
+ cjp = (CitJourPtr) cap->fromptr;
+ if (cjp != NULL) {
+ imp = cjp->imp;
+ }
+ }
+
+ blk.head = NULL;
+ blk.tail = NULL;
+
+ if (useAuthors) {
+ MemSet ((Pointer) &pd, 0, sizeof (Pubdesc));
+ pd.pub = oldpep;
+ VisitAuthorsInPub (&pd, (Pointer) &blk, AddAuthorProc);
+ }
+
+ if (useTitle) {
+ for (vnp = cap->title; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != Cit_title_name) continue;
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, str);
+ break;
+ }
+ }
+
+ if (useJournal) {
+ if (cjp != NULL) {
+ for (vnp = cjp->title; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != Cit_title_jta && vnp->choice != Cit_title_iso_jta) continue;
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, str);
+ break;
+ }
+ }
+ }
+
+ if (useImprint) {
+ if (imp != NULL) {
+ dp = imp->date;
+ if (dp != NULL) {
+ if (dp->data [0] == 1) {
+ if (dp->data [1] != 0) {
+ sprintf (year, "%ld", (long) (1900 + dp->data [1]));
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, year);
+ }
+ }
+ }
+ if (StringDoesHaveText (imp->volume)) {
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, imp->volume);
+ }
+ if (StringDoesHaveText (imp->issue)) {
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, imp->issue);
+ }
+ if (StringDoesHaveText (imp->pages)) {
+ ValNodeCopyStrEx (&blk.head, &blk.tail, 0, imp->pages);
+ }
+ }
+ }
+
+ if (blk.head == NULL) return NULL;
+
+ query = ValNodeMergeStrsEx (blk.head, "+");
+ ValNodeFreeData (blk.head);
+
+ return query;
+}
+
+static Int4 PerformArticleQuery (CharPtr query, CharPtr journalcheck, Int4Ptr numhits)
+
+{
+ XmlObjPtr attr, tmp, xop;
+ CitArtPtr cap;
+ CitJourPtr cjp;
+ ValNodePtr head;
+ CharPtr idstr;
+ CharPtr jour;
+ MedlineEntryPtr mep;
+ PubmedEntryPtr pmep;
+ Int4 pmid = 0;
+ Int4 pmval;
+ CharPtr score;
+ CharPtr str;
+ ValNodePtr tail;
+ long int val;
+ ValNodePtr vnp;
+ ValNodePtr vnt;
+ Boolean debug_mode = FALSE;
+
+ if (getenv ("DEBUG_LOOKUP_JOURNAL_EUTILS") != NULL) {
+ debug_mode = TRUE;
+ }
+
+ if (numhits != NULL) {
+ *numhits = 0;
+ }
+ if (StringHasNoText (query)) return 0;
+
+ /*
+ curl -s "http://intranet.ncbi.nlm.nih.gov/projects/hydra/hydra_search.cgi?search=pubmed_search_citation_top_20.1&query=..." | xlint
+ */
+
+ str = QUERY_UrlSynchronousQuery ("www.ncbi.nlm.nih.gov", 0,
+ "/projects/hydra/hydra_search.cgi",
+ "search=pubmed_search_citation_top_20.1&query=",
+ /* "search=pmc_citation.1&query=", */
+ query, NULL, NULL);
+ if (str == NULL) return 0;
+
+ xop = ParseXmlString (str);
+ if (xop != NULL) {
+
+ head = NULL;
+ tail = NULL;
+
+ for (tmp = xop; tmp != NULL; tmp = tmp->successor) {
+ if (XmlPathSuffixIs (tmp, "/IdList/Id")) {
+ if (StringHasNoText (tmp->contents)) continue;
+ for (attr = tmp->attributes; attr != NULL; attr = attr->next) {
+ if (StringICmp (attr->name, "score") != 0) continue;
+ score = attr->contents;
+ if (StringHasNoText (score)) continue;
+ if (StringChr (score, '-') != NULL) continue;
+ if (StringNCmp (score, "1", 1) == 0 || StringNCmp (score, "0.9", 3) == 0 || StringNCmp (score, "0.8", 3) == 0) {
+ ValNodeCopyStrEx (&head, &tail, 0, tmp->contents);
+ }
+ }
+ }
+ }
+
+ if (head != NULL) {
+ if (numhits != NULL) {
+ *numhits = ValNodeLen (head);
+ }
+ for (vnp = head; vnp != NULL && pmid == 0; vnp = vnp->next) {
+ idstr = (CharPtr) vnp->data.ptrvalue;
+ if (StringDoesHaveText (idstr)) {
+ if (sscanf (idstr, "%ld", &val) == 1) {
+ pmval = (Int4) val;
+ if (pmval == 0) continue;
+ pmep = PubMedSynchronousQuery (pmval);
+ if (pmep != NULL) {
+ mep = (MedlineEntryPtr) pmep->medent;
+ if (mep != NULL) {
+ cap = mep->cit;
+ if (cap != NULL && cap->from == 1) {
+ cjp = (CitJourPtr) cap->fromptr;
+ if (cjp != NULL) {
+ for (vnt = cjp->title; vnt != NULL; vnt = vnt->next) {
+ if (vnt->choice != Cit_title_jta && vnt->choice != Cit_title_iso_jta) continue;
+ jour = (CharPtr) vnt->data.ptrvalue;
+ if (StringHasNoText (jour)) continue;
+ if (journalcheck == NULL || StringICmp (jour, journalcheck) == 0) {
+ pmid = pmval;
+ }
+ }
+ }
+ }
+ }
+ pmep = PubmedEntryFree (pmep);
+ }
+ }
+ }
+ }
+ ValNodeFreeData (head);
+ }
+ FreeXmlObject (xop);
+ }
+
+ MemFree (str);
+
+ return pmid;
+}
+
+static Int4 ConstructAndPerformQuery (ValNodePtr oldpep, Boolean useAuthors, Boolean useTitle,
+ Boolean useJournal, Boolean useImprint, Int4Ptr numhits)
+
+{
+ Char ch;
+ CharPtr journalcheck;
+ Int4 pmid;
+ CharPtr ptr;
+ CharPtr query;
+
+ if (oldpep == NULL) return 0;
+
+ query = ConstructArticleQuery (oldpep, useAuthors, useTitle, useJournal, useImprint);
+ if (query == NULL) return 0;
+
+ /* remove ampersands in query string */
+ ptr = query;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == '&') {
+ *ptr = ' ';
+ }
+ ptr++;
+ ch = *ptr;
+ }
+
+ journalcheck = ConstructArticleQuery (oldpep, FALSE, FALSE, TRUE, FALSE);
+
+ pmid = PerformArticleQuery (query, journalcheck, numhits);
+
+ MemFree (query);
+ MemFree (journalcheck);
+
+ return pmid;
+}
+
+
+NLM_EXTERN ValNodePtr LookupArticlesWithEutils (ValNodePtr orig_pub, LogInfoPtr lip)
+{
+ CitArtPtr cap = NULL;
+ ArticleIdPtr ids;
+ MlaBackPtr mbp;
+ MlaRequestPtr mrp;
+ Int4 numhits;
+ Int4 pmid = 0;
+ ValNodePtr new_pub = NULL;
+ ValNodePtr vnp;
+
+ if (orig_pub == NULL) return NULL;
+
+ for (vnp = orig_pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Article) {
+ cap = (CitArtPtr) vnp->data.ptrvalue;
+ } else if (vnp->choice == PUB_PMid) {
+ pmid = (Int4) vnp->data.intvalue;
+ }
+ }
+
+ if (pmid == 0) {
+ pmid = ConstructAndPerformQuery (orig_pub, TRUE, TRUE, TRUE, TRUE, &numhits);
+ }
+
+ if (pmid > 0) {
+ mrp = Mla2CreatePubFetchRequest (pmid);
+ if (mrp != NULL) {
+ mbp = Mla2SynchronousQuery (mrp);
+ mrp = Mla2RequestFree (mrp);
+ if (mbp != NULL) {
+ cap = Mla2ExtractPubFetchReply (mbp);
+ if (cap != NULL) {
+ ChangeCitArtMLAuthorsToSTD (cap);
+ for (ids = cap->ids; ids != NULL; ids = ids->next) {
+ if (ids->choice != ARTICLEID_PUBMED) continue;
+ if (ids->data.intvalue != pmid) {
+ if (lip != NULL) {
+ fprintf (lip->fp, "PubLookup error: CitArt ID %ld does not match PMID %ld\n",
+ (long) ids->data.intvalue, (long) pmid);
+ lip->data_in_log = TRUE;
+ }
+ }
+ }
+ ValNodeAddPointer (&new_pub, PUB_Article, (Pointer) cap);
+ ValNodeAddInt (&new_pub, PUB_PMid, pmid);
+ }
+ mbp = MlaBackFree (mbp);
+ }
+ }
+ }
+
+ return new_pub;
+}
+
+typedef struct pubreplace {
+ LogInfoPtr lip;
+ Int4 num_replaced;
+} PubReplaceData, PNTR PubReplacePtr;
+
+
+static Boolean DoPubListsMatch (ValNodePtr old_pub, ValNodePtr new_pub)
+{
+ Boolean match = TRUE;
+ while (old_pub != NULL && new_pub != NULL && match) {
+ match = AsnIoMemComp (old_pub, new_pub, (AsnWriteFunc) PubAsnWrite);
+ old_pub = old_pub->next;
+ new_pub = new_pub->next;
+ }
+ if (old_pub != NULL || new_pub != NULL) {
+ match = FALSE;
+ }
+ return match;
+}
+
+
+static void LookupPubsCallback (PubdescPtr pdp, Pointer userdata)
+{
+ PubReplacePtr prp;
+ ValNodePtr new_pub;
+
+ if (pdp == NULL || pdp->pub == NULL) {
+ return;
+ }
+ prp = (PubReplacePtr) userdata;
+ new_pub = LookupArticlesWithEutils (pdp->pub, prp == NULL ? NULL : prp->lip);
+ if (new_pub != NULL) {
+ if (DoPubListsMatch (pdp->pub, new_pub)) {
+ AsnGenericChoiceSeqOfFree(new_pub, (AsnOptFreeFunc) PubFree);
+ } else {
+ AsnGenericChoiceSeqOfFree(pdp->pub, (AsnOptFreeFunc) PubFree);
+ pdp->pub = new_pub;
+ if (prp != NULL) {
+ prp->num_replaced ++;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN Int4 LookupPubsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+{
+ PubReplaceData prd;
+
+ prd.lip = lip;
+ prd.num_replaced = 0;
+
+ VisitPubdescsInSep (sep, &prd, LookupPubsCallback);
+ return prd.num_replaced;
+}
+
+typedef struct trimnandlog
+{
+ SeqEntryPtr top_sep;
+ LogInfoPtr lip;
+ Int4 num_bioseqs_trimmed;
+} TrimNAndLogData, PNTR TrimNAndLogPtr;
+
+NLM_EXTERN void LogTrimmedLocation (LogInfoPtr lip, SeqLocPtr slp)
+{
+ CharPtr loc_str;
+
+ if (lip != NULL && lip->fp != NULL && slp != NULL)
+ {
+ loc_str = SeqLocPrintUseBestID(slp);
+ fprintf (lip->fp, "%s\n", loc_str);
+ MemFree(loc_str);
+ lip->data_in_log = TRUE;
+ }
+
+}
+
+
+static void BioseqTrimNAndLog (BioseqPtr bsp, Pointer userdata)
+{
+ TrimNAndLogPtr tnalp;
+ SeqIdPtr sip;
+ SeqLocPtr slp1 = NULL,
+ slp2 = NULL;
+ CharPtr str;
+ Int4 j, lens;
+ Boolean any = FALSE;
+
+ tnalp = (TrimNAndLogPtr) userdata;
+ if (bsp == NULL || ! ISA_na (bsp->mol) || tnalp == NULL)
+ {
+ return;
+ }
+
+ str = GetSequenceByBsp (bsp);
+ lens = StringLen(str);
+ sip = SeqIdFindBest (bsp->id, 0);
+ if (str != NULL)
+ {
+ j = lens-1;
+ while (j>0) {
+ if (str[j] != 'n' && str[j] != 'N')
+ break;
+ j--;
+ }
+ if (j<lens-1)
+ {
+ slp1 = SeqLocIntNew (j+1, lens-1, Seq_strand_plus, sip);
+ SeqDeleteByLoc (slp1, TRUE, FALSE);
+ TrimQualityScores (bsp, lens - 1 - j, FALSE);
+ }
+ j=0;
+ while (j<lens) {
+ if (str[j] != 'n' && str[j] != 'N')
+ break;
+ j++;
+ }
+ if (j>0) {
+ slp2 = SeqLocIntNew (0, j-1, Seq_strand_plus, sip);
+ SeqDeleteByLoc (slp2, TRUE, FALSE);
+ TrimQualityScores (bsp, j, TRUE);
+ any = TRUE;
+ }
+ if (slp1!=NULL) {
+ LogTrimmedLocation (tnalp->lip, slp1);
+ if (tnalp->top_sep!=NULL)
+ SeqEntryExplore (tnalp->top_sep, (Pointer)slp1, SeqAlignDeleteByLocCallback);
+ ValNodeFree (slp1);
+ any = TRUE;
+ }
+ if (slp2!=NULL) {
+ LogTrimmedLocation (tnalp->lip, slp2);
+ if (tnalp->top_sep!=NULL)
+ SeqEntryExplore (tnalp->top_sep, (Pointer)slp2, SeqAlignDeleteByLocCallback);
+ ValNodeFree (slp2);
+ any = TRUE;
+ }
+ }
+ if (any) {
+ tnalp->num_bioseqs_trimmed++;
+ }
+}
+
+
+NLM_EXTERN Int4 TrimNsFromNucsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+
+{
+ TrimNAndLogData tnald;
+
+ MemSet (&tnald, 0, sizeof (TrimNAndLogData));
+ tnald.top_sep = sep;
+ if (tnald.top_sep == NULL) return 0;
+
+ tnald.lip = lip;
+ VisitBioseqsInSep (tnald.top_sep, &tnald, BioseqTrimNAndLog);
+ return tnald.num_bioseqs_trimmed;
+}
+
+static Boolean FindBspItem (GatherContextPtr gcp)
+
+{
+ BioseqPtr PNTR bspp;
+
+ bspp = (BioseqPtr PNTR) gcp->userdata;
+ if (bspp != NULL && gcp->thistype == OBJ_BIOSEQ) {
+ *bspp = (BioseqPtr) gcp->thisitem;
+ }
+ return TRUE;
+}
+
+NLM_EXTERN BioseqPtr GetBioseqGivenIDs (Uint2 entityID, Uint4 itemID, Uint2 itemtype)
+
+{
+ BioseqPtr bsp;
+
+ bsp = NULL;
+ if (entityID > 0 && itemID > 0 && itemtype == OBJ_BIOSEQ) {
+ GatherItem (entityID, itemID, itemtype, (Pointer) (&bsp), FindBspItem);
+ }
+ return bsp;
+}
+
+NLM_EXTERN BioseqPtr GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID)
+
+{
+ BioseqPtr bsp;
+ SeqEntryPtr sep;
+ SeqIdPtr sip;
+
+ if (slp == NULL) return NULL;
+ bsp = NULL;
+ sip = SeqLocId (slp);
+ if (sip != NULL) {
+ bsp = BioseqFind (sip);
+ } else if (entityID > 0) {
+ slp = SeqLocFindNext (slp, NULL);
+ if (slp != NULL) {
+ sip = SeqLocId (slp);
+ if (sip != NULL) {
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ sep = GetBestTopParentForData (entityID, bsp);
+ if (sep != NULL) {
+ sep = FindNucSeqEntry (sep);
+ if (sep != NULL && sep->choice == 1) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ }
+ }
+ }
+ }
+ }
+ }
+ return bsp;
+}
+
+typedef struct tripletdata {
+ Uint2 entityID;
+ Uint4 itemID;
+ Uint2 itemtype;
+ Pointer lookfor;
+} TripletData, PNTR TripletDataPtr;
+
+static Boolean FindIDsFromPointer (GatherContextPtr gcp)
+
+{
+ TripletDataPtr tdp;
+
+ tdp = (TripletDataPtr) gcp->userdata;
+ if (tdp != NULL && gcp->thisitem == tdp->lookfor) {
+ tdp->entityID = gcp->entityID;
+ tdp->itemID = gcp->itemID;
+ tdp->itemtype = gcp->thistype;
+ }
+ return TRUE;
+}
+
+NLM_EXTERN Uint4 GetItemIDGivenPointer (Uint2 entityID, Uint2 itemtype, Pointer lookfor)
+
+{
+ GatherScope gs;
+ TripletData td;
+
+ if (entityID > 0 && itemtype > 0 && itemtype < OBJ_MAX && lookfor != NULL) {
+ td.entityID = 0;
+ td.itemID = 0;
+ td.itemtype = 0;
+ td.lookfor = lookfor;
+ MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
+ gs.seglevels = 1;
+ gs.get_feats_location = FALSE;
+ MemSet ((Pointer)(gs.ignore), (int)(FALSE), (size_t)(OBJ_MAX * sizeof(Boolean)));
+ /* gs.ignore[itemtype] = FALSE; */
+ GatherEntity (entityID, (Pointer) (&td), FindIDsFromPointer, &gs);
+ if (td.entityID == entityID && td.itemID > 0 && td.itemtype == itemtype) {
+ return td.itemID;
+ }
+ }
+ return 0;
+}
+
+static void AddNucPart (BioseqPtr segseq, BioseqSetPtr parts, SeqEntryPtr addme)
+
+{
+ BioseqPtr bsp;
+ SeqLocPtr slp;
+ SeqEntryPtr tmp;
+
+ if (segseq == NULL || addme == NULL) return;
+ if (addme->choice != 1 || addme->data.ptrvalue == NULL) return;
+ bsp = (BioseqPtr) addme->data.ptrvalue;
+
+ slp = ValNodeNew ((ValNodePtr) segseq->seq_ext);
+ if (slp == NULL) return;
+ if (segseq->seq_ext == NULL) {
+ segseq->seq_ext = (Pointer) slp;
+ }
+ if (bsp->length >= 0) {
+ segseq->length += bsp->length;
+ slp->choice = SEQLOC_WHOLE;
+ slp->data.ptrvalue = (Pointer) SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
+ } else {
+ slp->choice = SEQLOC_NULL;
+ addme = SeqEntryFree (addme);
+ return;
+ }
+
+ if (parts == NULL) {
+ addme = SeqEntryFree (addme);
+ return;
+ }
+ if (parts->seq_set != NULL) {
+ tmp = parts->seq_set;
+ while (tmp->next != NULL) {
+ tmp = tmp->next;
+ }
+ tmp->next = addme;
+ } else {
+ parts->seq_set = addme;
+ }
+}
+
+NLM_EXTERN void GetSeqEntryParent (SeqEntryPtr target, Pointer PNTR parentptr, Uint2Ptr parenttype)
+
+{
+ ObjMgrPtr omp;
+ ObjMgrDataPtr omdp;
+
+ if (parentptr == NULL || parenttype == NULL) return;
+ *parenttype = 0;
+ *parentptr = NULL;
+ if (target == NULL || target->data.ptrvalue == NULL) return;
+ omp = ObjMgrGet ();
+ if (omp == NULL) return;
+ omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
+ if (omdp == NULL) return;
+ *parenttype = omdp->parenttype;
+ *parentptr = omdp->parentptr;
+}
+
+NLM_EXTERN void SaveSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr PNTR omdptopptr, ObjMgrData PNTR omdataptr)
+
+{
+ ObjMgrPtr omp;
+ ObjMgrDataPtr omdp, omdptop = NULL;
+
+ if (target == NULL || omdptopptr == NULL || omdataptr == NULL) return;
+ *omdptopptr = NULL;
+ MemSet ((Pointer) omdataptr, 0, sizeof (ObjMgrData));
+ omp = ObjMgrGet ();
+ if (omp == NULL) return;
+ omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
+ if (omdp == NULL) return;
+ omdptop = ObjMgrFindTop (omp, omdp);
+ if (omdptop == NULL) return;
+ if (omdptop->EntityID == 0) return;
+ *omdptopptr = omdptop;
+ MemCopy ((Pointer) omdataptr, omdptop, sizeof (ObjMgrData));
+ omdptop->userdata = NULL;
+}
+
+extern void ObjMgrRemoveEntityIDFromRecycle (Uint2 entityID, ObjMgrPtr omp);
+extern void ObjMgrRecordOmdpByEntityID (Uint2 entityID, ObjMgrDataPtr omdp);
+NLM_EXTERN void RestoreSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr omdptop, ObjMgrData PNTR omdataptr)
+
+{
+ ObjMgrPtr omp;
+ ObjMgrDataPtr omdp, omdpnew = NULL;
+
+ if (target == NULL || omdptop == NULL || omdataptr == NULL) return;
+ if (omdataptr->EntityID == 0) return;
+ omp = ObjMgrGet ();
+ if (omp == NULL) return;
+ omdp = ObjMgrFindByData (omp, target->data.ptrvalue);
+ if (omdp == NULL) return;
+ omdpnew = ObjMgrFindTop (omp, omdp);
+ if (omdpnew == NULL) return;
+ if (omdpnew != omdptop) {
+ omdpnew->EntityID = omdataptr->EntityID;
+ omdptop->EntityID = 0;
+ omdpnew->lockcnt = omdataptr->lockcnt;
+ omdpnew->tempload = omdataptr->tempload;
+ omdpnew->clipboard = omdataptr->clipboard;
+ omdpnew->dirty = omdataptr->dirty;
+ omdpnew->being_freed = omdataptr->being_freed;
+ omdpnew->free = omdataptr->free;
+ omdpnew->options = omdataptr->options;
+ ObjMgrRemoveEntityIDFromRecycle (omdpnew->EntityID, omp);
+ ObjMgrRecordOmdpByEntityID (omdpnew->EntityID, omdpnew);
+ }
+ omdpnew->userdata = omdataptr->userdata;
+}
+
+NLM_EXTERN void AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, Boolean relink)
+
+{
+ SeqEntryPtr first;
+ BioseqPtr insertbsp;
+ BioseqSetPtr nuc_prot;
+ Uint2 parenttype;
+ Pointer parentptr;
+ BioseqSetPtr parts;
+ BioseqPtr seg;
+ BioseqSetPtr segs;
+ BioseqPtr targetbsp;
+ BioseqSetPtr targetbssp;
+ SeqEntryPtr the_nuc;
+ SeqEntryPtr the_prt;
+ SeqEntryPtr tmp;
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+
+ if (target == NULL || insert == NULL) return;
+ if (target->data.ptrvalue == NULL || insert->data.ptrvalue == NULL) return;
+
+ if (relink) {
+ SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
+ GetSeqEntryParent (target, &parentptr, &parenttype);
+ }
+
+ if (IS_Bioseq (target) && IS_Bioseq (insert)) {
+ targetbsp = (BioseqPtr) target->data.ptrvalue;
+ insertbsp = (BioseqPtr) insert->data.ptrvalue;
+ if (ISA_na (targetbsp->mol)) {
+ if (ISA_na (insertbsp->mol)) {
+
+ seg = BioseqNew ();
+ if (seg == NULL) return;
+ seg->mol = targetbsp->mol;
+ seg->repr = Seq_repr_seg;
+ seg->seq_ext_type = 1;
+ seg->length = 0;
+ /* seg->id = MakeSeqID ("SEG_dna"); */
+ /* seg->id = MakeNewProteinSeqId (NULL, NULL); */
+ seg->id = MakeUniqueSeqID ("segseq_");
+ SeqMgrAddToBioseqIndex (seg);
+
+ the_nuc = SeqEntryNew ();
+ if (the_nuc == NULL) return;
+ the_nuc->choice = 1;
+ the_nuc->data.ptrvalue = (Pointer) seg;
+
+ segs = BioseqSetNew ();
+ if (segs == NULL) return;
+ segs->_class = 2;
+ segs->seq_set = the_nuc;
+
+ parts = BioseqSetNew ();
+ if (parts == NULL) return;
+ parts->_class = 4;
+
+ tmp = SeqEntryNew ();
+ if (tmp == NULL) return;
+ tmp->choice = 2;
+ tmp->data.ptrvalue = (Pointer) parts;
+ the_nuc->next = tmp;
+
+ first = SeqEntryNew ();
+ if (first == NULL) return;
+ first->choice = 1;
+ first->data.ptrvalue = (Pointer) targetbsp;
+ target->choice = 2;
+ target->data.ptrvalue = (Pointer) segs;
+
+ AddNucPart (seg, parts, first);
+ AddNucPart (seg, parts, insert);
+
+ } else if (ISA_aa (insertbsp->mol)) {
+
+ nuc_prot = BioseqSetNew ();
+ if (nuc_prot == NULL) return;
+ nuc_prot->_class = 1;
+
+ the_nuc = SeqEntryNew ();
+ if (the_nuc == NULL) return;
+ the_nuc->choice = 1;
+ the_nuc->data.ptrvalue = (Pointer) targetbsp;
+ target->choice = 2;
+ target->data.ptrvalue = (Pointer) nuc_prot;
+ nuc_prot->seq_set = the_nuc;
+
+ the_nuc->next = insert;
+
+ }
+ } else if (ISA_aa (targetbsp->mol)) {
+ if (ISA_na (insertbsp->mol)) {
+
+ nuc_prot = BioseqSetNew ();
+ if (nuc_prot == NULL) return;
+ nuc_prot->_class = 1;
+
+ the_prt = SeqEntryNew ();
+ if (the_prt == NULL) return;
+ the_prt->choice = 1;
+ the_prt->data.ptrvalue = (Pointer) targetbsp;
+ target->choice = 2;
+ target->data.ptrvalue = (Pointer) nuc_prot;
+ nuc_prot->seq_set = insert;
+
+ the_prt->next = insert->next;
+ insert->next = the_prt;
+
+ }
+ }
+ } else if (IS_Bioseq_set (target)) {
+ targetbssp = (BioseqSetPtr) target->data.ptrvalue;
+ if (targetbssp->_class == 1 && IS_Bioseq (insert)) {
+ insertbsp = (BioseqPtr) insert->data.ptrvalue;
+ if (ISA_aa (insertbsp->mol)) {
+
+ nuc_prot = targetbssp;
+ if (nuc_prot->seq_set != NULL) {
+ tmp = nuc_prot->seq_set;
+ while (tmp->next != NULL) {
+ tmp = tmp->next;
+ }
+ tmp->next = insert;
+ } else {
+ nuc_prot->seq_set = insert;
+ }
+
+ }
+ } else if (targetbssp->_class == 2 && IS_Bioseq (insert)) {
+ insertbsp = (BioseqPtr) insert->data.ptrvalue;
+ if (ISA_na (insertbsp->mol)) {
+
+ the_nuc = FindNucSeqEntry (target);
+ if (the_nuc != NULL && the_nuc->next != NULL) {
+ tmp = the_nuc->next;
+ if (tmp->choice == 2 && tmp->data.ptrvalue != NULL) {
+ parts = (BioseqSetPtr) tmp->data.ptrvalue;
+ if (parts->_class == 4 && the_nuc->choice == 1) {
+ seg = (BioseqPtr) the_nuc->data.ptrvalue;
+ AddNucPart (seg, parts, insert);
+ }
+ }
+ }
+
+ } else if (ISA_aa (insertbsp->mol)) {
+
+ nuc_prot = BioseqSetNew ();
+ if (nuc_prot == NULL) return;
+ nuc_prot->_class = 1;
+
+ first = SeqEntryNew ();
+ if (first == NULL) return;
+ first->choice = 2;
+ first->data.ptrvalue = (Pointer) targetbssp;
+ target->choice = 2;
+ target->data.ptrvalue = (Pointer) nuc_prot;
+ nuc_prot->seq_set = first;
+
+ first->next = insert;
+
+ }
+ } else if (targetbssp->_class == 7) {
+
+ if (targetbssp->seq_set != NULL) {
+ tmp = targetbssp->seq_set;
+ while (tmp->next != NULL) {
+ tmp = tmp->next;
+ }
+ tmp->next = insert;
+ } else {
+ targetbssp->seq_set = insert;
+ }
+ } else if ((targetbssp->_class >= BioseqseqSet_class_mut_set &&
+ targetbssp->_class <= BioseqseqSet_class_eco_set) ||
+ targetbssp->_class == BioseqseqSet_class_wgs_set ||
+ targetbssp->_class == BioseqseqSet_class_small_genome_set) {
+
+ if (targetbssp->seq_set != NULL) {
+ tmp = targetbssp->seq_set;
+ while (tmp->next != NULL) {
+ tmp = tmp->next;
+ }
+ tmp->next = insert;
+ } else {
+ targetbssp->seq_set = insert;
+ }
+
+ } else if (targetbssp->_class == BioseqseqSet_class_gen_prod_set) {
+
+ if (targetbssp->seq_set != NULL) {
+ tmp = targetbssp->seq_set;
+ while (tmp->next != NULL) {
+ tmp = tmp->next;
+ }
+ tmp->next = insert;
+ } else {
+ targetbssp->seq_set = insert;
+ }
+
+ }
+ }
+
+ if (relink) {
+ SeqMgrLinkSeqEntry (target, parenttype, parentptr);
+ RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
+ }
+}
+
+NLM_EXTERN void ReplaceSeqEntryWithSeqEntry (SeqEntryPtr target, SeqEntryPtr replaceWith, Boolean relink)
+
+{
+ Uint2 parenttype;
+ Pointer parentptr;
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+
+ if (target == NULL || replaceWith == NULL) return;
+
+ if (relink) {
+ SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
+ GetSeqEntryParent (target, &parentptr, &parenttype);
+ }
+
+ if (target->choice == 1) {
+ BioseqFree ((BioseqPtr) target->data.ptrvalue);
+ } else if (target->choice == 2) {
+ BioseqSetFree ((BioseqSetPtr) target->data.ptrvalue);
+ }
+ target->choice = replaceWith->choice;
+ target->data.ptrvalue = replaceWith->data.ptrvalue;
+ MemFree (replaceWith);
+
+ if (relink) {
+ SeqMgrLinkSeqEntry (target, parenttype, parentptr);
+ RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
+ }
+}
+
+static void SeqEntryRemoveLoop (SeqEntryPtr sep, SeqEntryPtr del, SeqEntryPtr PNTR prev)
+
+{
+ BioseqSetPtr bssp;
+ SeqEntryPtr next;
+
+ while (sep != NULL) {
+ next = sep->next;
+ if (sep == del) {
+ *prev = sep->next;
+ sep->next = NULL;
+ SeqEntryFree (sep);
+ } else {
+ prev = (SeqEntryPtr PNTR) &(sep->next);
+ if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp != NULL) {
+ SeqEntryRemoveLoop (bssp->seq_set, del, &(bssp->seq_set));
+ }
+ }
+ }
+ sep = next;
+ }
+}
+
+NLM_EXTERN void RemoveSeqEntryFromSeqEntry (SeqEntryPtr top, SeqEntryPtr del, Boolean relink)
+
+{
+ SeqEntryPtr dummy;
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+ Uint2 parenttype;
+ Pointer parentptr;
+
+ if (top == NULL || del == NULL) return;
+ if (top->data.ptrvalue == NULL || del->data.ptrvalue == NULL) return;
+
+ if (relink) {
+ SaveSeqEntryObjMgrData (top, &omdptop, &omdata);
+ GetSeqEntryParent (top, &parentptr, &parenttype);
+ }
+
+ dummy = NULL;
+ SeqEntryRemoveLoop (top, del, &dummy);
+
+ if (relink) {
+ SeqMgrLinkSeqEntry (top, parenttype, parentptr);
+ RestoreSeqEntryObjMgrData (top, omdptop, &omdata);
+ }
+}
+
+/* for discouraged and unused modifiers */
+
+/* if string starts with given prefix, return pointer to remaining text */
+
+NLM_EXTERN CharPtr StringHasPrefix (CharPtr str, CharPtr pref, Boolean novalneeded, Boolean skippref)
+
+{
+ Char ch;
+ size_t len;
+ Char tmp [64];
+ CharPtr val;
+
+ if (StringHasNoText (str) || StringHasNoText (pref)) return NULL;
+ len = StringLen (pref);
+ StringNCpy_0 (tmp, pref, sizeof (tmp));
+ if (StringNICmp (str, tmp, len) != 0) {
+ /* try after replacing dash with underscore */
+ val = tmp;
+ ch = *val;
+ while (ch != '\0') {
+ if (ch == '-') {
+ *val = '_';
+ }
+ val++;
+ ch = *val;
+ }
+ if (StringNICmp (str, tmp, len) != 0) return NULL;
+ }
+ if (skippref) {
+ val = str + len;
+ } else {
+ val = str;
+ }
+ if (StringHasNoText (val)) {
+ if (novalneeded) return " ";
+ return NULL;
+ }
+ ch = *(str + len);
+ if (ch != '=' && ch != ' ' && ch != ':' && ch != '\0') return NULL;
+ ch = *val;
+ while (ch == '=' || ch == ' ' || ch == ':') {
+ val++;
+ ch = *val;
+ }
+ if (StringHasNoText (val)) return NULL;
+ return val;
+}
+
+
+Nlm_QualNameAssoc current_orgmod_subtype_alist[] = {
+ {" ", 0},
+ {"Acronym", ORGMOD_acronym},
+ {"Anamorph", ORGMOD_anamorph},
+ {"Authority", ORGMOD_authority},
+ {"Bio-material", ORGMOD_bio_material},
+ {"Biotype", ORGMOD_biotype},
+ {"Biovar", ORGMOD_biovar},
+ {"Breed", ORGMOD_breed},
+ {"Chemovar", ORGMOD_chemovar},
+ {"Common", ORGMOD_common},
+ {"Cultivar", ORGMOD_cultivar},
+ {"Culture-collection", ORGMOD_culture_collection},
+ {"Ecotype", ORGMOD_ecotype},
+ {"Forma", ORGMOD_forma},
+ {"Forma-specialis", ORGMOD_forma_specialis},
+ {"Group", ORGMOD_group},
+ {"Host", ORGMOD_nat_host},
+ {"Isolate", ORGMOD_isolate},
+ {"Metagenome-source", ORGMOD_metagenome_source},
+ {"Pathovar", ORGMOD_pathovar},
+ {"Serogroup", ORGMOD_serogroup},
+ {"Serotype", ORGMOD_serotype},
+ {"Serovar", ORGMOD_serovar},
+ {"Specimen-voucher", ORGMOD_specimen_voucher},
+ {"Strain", ORGMOD_strain},
+ {"Subgroup", ORGMOD_subgroup},
+ {"Sub-species", ORGMOD_sub_species},
+ {"Substrain", ORGMOD_substrain},
+ {"Subtype", ORGMOD_subtype},
+ {"Synonym", ORGMOD_synonym},
+ {"Teleomorph", ORGMOD_teleomorph},
+ {"Type", ORGMOD_type},
+ {"Variety", ORGMOD_variety},
+ { NULL, 0 } };
+
+Nlm_QualNameAssoc discouraged_orgmod_subtype_alist[] = {
+ {"Old Lineage", ORGMOD_old_lineage},
+ {"Old Name", ORGMOD_old_name},
+ { NULL, 0 } };
+
+Nlm_QualNameAssoc discontinued_orgmod_subtype_alist[] = {
+ {"Dosage", ORGMOD_dosage},
+ { NULL, 0 } };
+
+
+Nlm_NameNameAssoc orgmod_aliases[] = {
+ {"Sub-species", "subspecies", ORGMOD_sub_species},
+ {"Host", "nat-host", ORGMOD_nat_host},
+ {"Host", "specific-host", ORGMOD_nat_host},
+ {"Substrain", "Sub_strain", ORGMOD_substrain},
+ { NULL, NULL, 0 } };
+
+extern CharPtr GetOrgModQualName (Uint1 subtype)
+{
+ Int4 i;
+
+ if (subtype == ORGMOD_other) {
+ return "Note";
+ }
+ for (i = 0; current_orgmod_subtype_alist[i].name != NULL; i++) {
+ if (current_orgmod_subtype_alist[i].value == subtype) {
+ return current_orgmod_subtype_alist[i].name;
+ }
+ }
+ for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL; i++) {
+ if (discouraged_orgmod_subtype_alist[i].value == subtype) {
+ return discouraged_orgmod_subtype_alist[i].name;
+ }
+ }
+
+ for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL; i++) {
+ if (discontinued_orgmod_subtype_alist[i].value == subtype) {
+ return discontinued_orgmod_subtype_alist[i].name;
+ }
+ }
+
+ return NULL;
+}
+
+
+extern void BioSourceHasOldOrgModQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
+{
+ OrgModPtr mod;
+ Boolean discouraged = FALSE, discontinued = FALSE;
+ Int4 i;
+
+ if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL) {
+ mod = biop->org->orgname->mod;
+ while (mod != NULL && (!discouraged || !discontinued)) {
+ for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL && !discouraged; i++) {
+ if (mod->subtype == discouraged_orgmod_subtype_alist[i].value) {
+ discouraged = TRUE;
+ }
+ }
+ for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL && !discontinued; i++) {
+ if (mod->subtype == discontinued_orgmod_subtype_alist[i].value) {
+ discontinued = TRUE;
+ }
+ }
+ mod = mod->next;
+ }
+ }
+
+ if (has_discouraged != NULL) {
+ *has_discouraged = discouraged;
+ }
+ if (has_discontinued != NULL) {
+ *has_discontinued = discontinued;
+ }
+}
+
+
+NLM_EXTERN void StringHasOrgModPrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
+{
+ Int2 i;
+ CharPtr val = NULL;
+ Uint1 subtype_val = 0;
+
+ for (i = 0; current_orgmod_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
+ if (current_orgmod_subtype_alist[i].value == ORGMOD_nat_host) continue;
+ val = StringHasPrefix (str, current_orgmod_subtype_alist [i].name, FALSE, skippref);
+ if (val != NULL) {
+ subtype_val = current_orgmod_subtype_alist[i].value;
+ }
+ }
+ if (subtype_val == 0) {
+ for (i = 0; orgmod_aliases[i].name != NULL && subtype_val == 0; i++) {
+ if (orgmod_aliases[i].value == ORGMOD_nat_host) continue;
+ val = StringHasPrefix (str, orgmod_aliases [i].alias, FALSE, skippref);
+ if (val != NULL) {
+ subtype_val = orgmod_aliases[i].value;
+ }
+ }
+ }
+ if (pval != NULL) {
+ *pval = val;
+ }
+ if (p_subtypeval != NULL) {
+ *p_subtypeval = subtype_val;
+ }
+}
+
+
+Nlm_QualNameAssoc current_subsource_subtype_alist[] = {
+ {" ", 0},
+ {"Altitude", SUBSRC_altitude},
+ {"Cell-line", SUBSRC_cell_line},
+ {"Cell-type", SUBSRC_cell_type},
+ {"Chromosome", SUBSRC_chromosome},
+ {"Clone", SUBSRC_clone},
+ {"Clone-lib", SUBSRC_clone_lib},
+ {"Collected-by", SUBSRC_collected_by},
+ {"Collection-date", SUBSRC_collection_date},
+ {"Country", SUBSRC_country},
+ {"Dev-stage", SUBSRC_dev_stage},
+ {"Endogenous-virus-name", SUBSRC_endogenous_virus_name},
+ {"Environmental-sample", SUBSRC_environmental_sample},
+ {"Genotype", SUBSRC_genotype},
+ {"Germline", SUBSRC_germline},
+ {"Haplogroup", SUBSRC_haplogroup},
+ {"Haplotype", SUBSRC_haplotype},
+ {"Identified-by", SUBSRC_identified_by},
+ {"Isolation-source", SUBSRC_isolation_source},
+ {"Lab-host", SUBSRC_lab_host},
+ {"Lat-Lon", SUBSRC_lat_lon},
+ {"Linkage-group", SUBSRC_linkage_group},
+ {"Map", SUBSRC_map},
+ {"Mating-type", SUBSRC_mating_type},
+ {"Metagenomic", SUBSRC_metagenomic},
+ {"Plasmid-name", SUBSRC_plasmid_name},
+ {"Pop-variant", SUBSRC_pop_variant},
+ {"Rearranged", SUBSRC_rearranged},
+ {"Segment", SUBSRC_segment},
+ {"Sex", SUBSRC_sex},
+ {"Subclone", SUBSRC_subclone},
+ {"Tissue-lib", SUBSRC_tissue_lib},
+ {"Tissue-type", SUBSRC_tissue_type},
+ {"Transgenic", SUBSRC_transgenic},
+ { NULL, 0 } };
+
+Nlm_QualNameAssoc discouraged_subsource_subtype_alist[] = {
+ {"Plastid-name", SUBSRC_plastid_name},
+ { NULL, 0 } };
+
+Nlm_QualNameAssoc discontinued_subsource_subtype_alist[] = {
+ {"Frequency", SUBSRC_frequency},
+ {"Ins-seq-name", SUBSRC_insertion_seq_name},
+ {"Transposon-name", SUBSRC_transposon_name},
+ {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name},
+ {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq},
+ {"Rev-PCR-primer-name", SUBSRC_rev_primer_name},
+ {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq},
+ { NULL, 0 } };
+
+Nlm_NameNameAssoc subsource_aliases[] = {
+ {"Fwd-PCR-primer-name", "fwd-primer-name", SUBSRC_fwd_primer_name},
+ {"Fwd-PCR-primer-seq", "fwd-primer-seq", SUBSRC_fwd_primer_seq},
+ {"Rev-PCR-primer-name", "rev-primer-name", SUBSRC_rev_primer_name},
+ {"Rev-PCR-primer-seq", "rev-primer-seq", SUBSRC_rev_primer_seq},
+ {"Subclone", "sub-clone", SUBSRC_subclone},
+ {"Lat-Lon", "Lat-long", SUBSRC_lat_lon},
+ {"Lat-Lon", "Latitude-Longitude", SUBSRC_lat_lon },
+ { NULL, NULL, 0 } };
+
+extern CharPtr GetSubsourceQualName (Uint1 subtype)
+{
+ Int4 i;
+
+ if (subtype == SUBSRC_other) {
+ return "Note";
+ }
+ for (i = 0; current_subsource_subtype_alist[i].name != NULL; i++) {
+ if (current_subsource_subtype_alist[i].value == subtype) {
+ return current_subsource_subtype_alist[i].name;
+ }
+ }
+
+ for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL; i++) {
+ if (discouraged_subsource_subtype_alist[i].value == subtype) {
+ return discouraged_subsource_subtype_alist[i].name;
+ }
+ }
+
+ for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL; i++) {
+ if (discontinued_subsource_subtype_alist[i].value == subtype) {
+ return discontinued_subsource_subtype_alist[i].name;
+ }
+ }
+
+ return NULL;
+}
+
+
+extern void BioSourceHasOldSubSourceQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
+{
+ SubSourcePtr ssp;
+ Boolean discouraged = FALSE, discontinued = FALSE;
+ Int4 i;
+
+ if (biop != NULL) {
+ ssp = biop->subtype;
+ while (ssp != NULL && (!discouraged || !discontinued)) {
+ for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL && !discouraged; i++) {
+ if (ssp->subtype == discouraged_subsource_subtype_alist[i].value) {
+ discouraged = TRUE;
+ }
+ }
+ for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL && !discontinued; i++) {
+ if (ssp->subtype == discontinued_subsource_subtype_alist[i].value) {
+ discontinued = TRUE;
+ }
+ }
+ ssp = ssp->next;
+ }
+ }
+
+ if (has_discouraged != NULL) {
+ *has_discouraged = discouraged;
+ }
+ if (has_discontinued != NULL) {
+ *has_discontinued = discontinued;
+ }
+}
+
+
+static Boolean CheckForAlignments (GatherContextPtr gcp)
+
+{
+ BoolPtr boolptr;
+
+ if (gcp == NULL) return TRUE;
+
+ boolptr = (BoolPtr) gcp->userdata;
+ if (boolptr == NULL ) return TRUE;
+
+ switch (gcp->thistype) {
+ case OBJ_SEQALIGN :
+ case OBJ_SEQHIST_ALIGN :
+ *boolptr = TRUE;
+ return TRUE;
+ default :
+ break;
+ }
+ return TRUE;
+}
+
+
+NLM_EXTERN Boolean LIBCALL SeqEntryHasAligns (Uint2 entityID, SeqEntryPtr sep)
+
+{
+ GatherScope gs;
+ Boolean rsult;
+
+ rsult = FALSE;
+ if (entityID == 0 || sep == NULL) return FALSE;
+ MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
+ gs.seglevels = 1;
+ MemSet((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean)));
+ gs.ignore[OBJ_BIOSEQ] = FALSE;
+ gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
+ gs.ignore[OBJ_SEQALIGN] = FALSE;
+ gs.ignore[OBJ_SEQANNOT] = FALSE;
+ gs.ignore[OBJ_SEQHIST] = FALSE;
+ gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
+ gs.scope = sep;
+ GatherEntity (entityID, (Pointer) (&rsult), CheckForAlignments, &gs);
+ return rsult;
+}
+
+static Int4 ScanBioseqSetReleaseInt (
+ CharPtr inputFile,
+ Boolean binary,
+ Boolean compressed,
+ Pointer userdata,
+ ScanBioseqSetFunc callback,
+ Boolean freesep,
+ TNlmMutexPtr mutex
+)
+
+{
+ AsnIoPtr aip;
+ AsnModulePtr amp;
+ AsnTypePtr atp, atp_bss, atp_se;
+ FILE *fp;
+ Int4 index = 0;
+ SeqEntryPtr sep;
+#ifdef OS_UNIX
+ Char cmmd [256];
+ CharPtr gzcatprog;
+ int ret;
+ Boolean usedPopen = FALSE;
+#endif
+ if (StringHasNoText (inputFile) || callback == NULL) return index;
+
+#ifndef OS_UNIX
+ if (compressed) {
+ Message (MSG_ERROR, "Can only decompress on-the-fly on UNIX machines");
+ return index;
+ }
+#endif
+
+ amp = AsnAllModPtr ();
+ if (amp == NULL) {
+ Message (MSG_ERROR, "Unable to load AsnAllModPtr");
+ return index;
+ }
+
+ atp_bss = AsnFind ("Bioseq-set");
+ if (atp_bss == NULL) {
+ Message (MSG_ERROR, "Unable to find ASN.1 type Bioseq-set");
+ return index;
+ }
+
+ atp_se = AsnFind ("Bioseq-set.seq-set.E");
+ if (atp_se == NULL) {
+ Message (MSG_ERROR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
+ return index;
+ }
+
+#ifdef OS_UNIX
+ if (compressed) {
+ gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
+ if (gzcatprog != NULL) {
+ sprintf (cmmd, "%s %s", gzcatprog, inputFile);
+ } else {
+ ret = system ("gzcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "gzcat %s", inputFile);
+ } else if (ret == -1) {
+ Message (MSG_FATAL, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
+ return index;
+ } else {
+ ret = system ("zcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "zcat %s", inputFile);
+ } else if (ret == -1) {
+ Message (MSG_FATAL, "Unable to fork or exec zcat in ScanBioseqSetRelease");
+ return index;
+ } else {
+ Message (MSG_FATAL, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
+ return index;
+ }
+ }
+ }
+ fp = popen (cmmd, /* binary? "rb" : */ "r");
+ usedPopen = TRUE;
+ } else {
+ fp = FileOpen (inputFile, binary? "rb" : "r");
+ }
+#else
+ fp = FileOpen (inputFile, binary? "rb" : "r");
+#endif
+ if (fp == NULL) {
+ Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
+ return index;
+ }
+
+ aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
+ if (aip == NULL) {
+ Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", inputFile);
+ return index;
+ }
+
+ atp = atp_bss;
+
+ while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
+ if (atp == atp_se) {
+ if (mutex != NULL) {
+ NlmMutexLockEx (mutex);
+ }
+ SeqMgrHoldIndexing (TRUE);
+ sep = SeqEntryAsnRead (aip, atp);
+ SeqMgrHoldIndexing (FALSE);
+ if (mutex != NULL) {
+ NlmMutexUnlock (*mutex);
+ }
+ callback (sep, userdata);
+ if (freesep) {
+ SeqEntryFree (sep);
+ }
+ index++;
+ } else {
+ AsnReadVal (aip, atp, NULL);
+ }
+ }
+
+ AsnIoFree (aip, FALSE);
+
+#ifdef OS_UNIX
+ if (usedPopen) {
+ pclose (fp);
+ } else {
+ FileClose (fp);
+ }
+#else
+ FileClose (fp);
+#endif
+ return index;
+}
+
+NLM_EXTERN Int4 ScanBioseqSetRelease (
+ CharPtr inputFile,
+ Boolean binary,
+ Boolean compressed,
+ Pointer userdata,
+ ScanBioseqSetFunc callback
+)
+
+{
+ return ScanBioseqSetReleaseInt (inputFile, binary, compressed, userdata, callback, TRUE, NULL);
+}
+
+static TNlmMutex scan_bioseq_set_release_mutex = NULL;
+
+NLM_EXTERN Int4 ScanBioseqSetReleaseMT (
+ CharPtr inputFile,
+ Boolean binary,
+ Boolean compressed,
+ Pointer userdata,
+ ScanBioseqSetFunc callback
+)
+
+{
+ return ScanBioseqSetReleaseInt (inputFile, binary, compressed, userdata, callback, FALSE, &scan_bioseq_set_release_mutex);
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FreeScanSeqEntryMT (
+ SeqEntryPtr sep
+)
+
+{
+ if (sep == NULL) return NULL;
+
+ NlmMutexLockEx (&scan_bioseq_set_release_mutex);
+
+ SeqMgrHoldIndexing (TRUE);
+ SeqEntryFree (sep);
+ SeqMgrHoldIndexing (FALSE);
+
+ NlmMutexUnlock (scan_bioseq_set_release_mutex);
+
+ return NULL;
+}
+
+NLM_EXTERN Int4 ScanEntrezgeneSetRelease (
+ CharPtr inputFile,
+ Boolean binary,
+ Boolean compressed,
+ Pointer userdata,
+ ScanEntrezgeneSetFunc callback
+)
+
+{
+ AsnIoPtr aip;
+ AsnModulePtr amp;
+ AsnTypePtr atp, atp_egs, atp_egse;
+ EntrezgenePtr egp;
+ FILE *fp;
+ Int4 index = 0;
+#ifdef OS_UNIX
+ Char cmmd [256];
+ CharPtr gzcatprog;
+ int ret;
+ Boolean usedPopen = FALSE;
+#endif
+ if (StringHasNoText (inputFile) || callback == NULL) return index;
+
+#ifndef OS_UNIX
+ if (compressed) {
+ Message (MSG_ERROR, "Can only decompress on-the-fly on UNIX machines");
+ return index;
+ }
+#endif
+
+ amp = AsnAllModPtr ();
+ if (amp == NULL) {
+ Message (MSG_ERROR, "Unable to load AsnAllModPtr");
+ return index;
+ }
+
+ atp_egs = AsnFind ("Entrezgene-Set");
+ if (atp_egs == NULL) {
+ Message (MSG_ERROR, "Unable to find ASN.1 type Entrezgene-Set");
+ return index;
+ }
+
+ atp_egse = AsnFind ("Entrezgene-Set.E");
+ if (atp_egse == NULL) {
+ Message (MSG_ERROR, "Unable to find ASN.1 type Entrezgene-Set.E");
+ return index;
+ }
+
+#ifdef OS_UNIX
+ if (compressed) {
+ gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
+ if (gzcatprog != NULL) {
+ sprintf (cmmd, "%s %s", gzcatprog, inputFile);
+ } else {
+ ret = system ("gzcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "gzcat %s", inputFile);
+ } else if (ret == -1) {
+ Message (MSG_FATAL, "Unable to fork or exec gzcat in ScanEntrezgeneSetRelease");
+ return index;
+ } else {
+ ret = system ("zcat -h >/dev/null 2>&1");
+ if (ret == 0) {
+ sprintf (cmmd, "zcat %s", inputFile);
+ } else if (ret == -1) {
+ Message (MSG_FATAL, "Unable to fork or exec zcat in ScanEntrezgeneSetRelease");
+ return index;
+ } else {
+ Message (MSG_FATAL, "Unable to find zcat or gzcat in ScanEntrezgeneSetRelease - please edit your PATH environment variable");
+ return index;
+ }
+ }
+ }
+ fp = popen (cmmd, /* binary? "rb" : */ "r");
+ usedPopen = TRUE;
+ } else {
+ fp = FileOpen (inputFile, binary? "rb" : "r");
+ }
+#else
+ fp = FileOpen (inputFile, binary? "rb" : "r");
+#endif
+ if (fp == NULL) {
+ Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
+ return index;
+ }
+
+ aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
+ if (aip == NULL) {
+ Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", inputFile);
+ return index;
+ }
+
+ atp = atp_egs;
+
+ while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
+ if (atp == atp_egse) {
+ egp = EntrezgeneAsnRead (aip, atp);
+ callback (egp, userdata);
+ EntrezgeneFree (egp);
+ index++;
+ } else {
+ AsnReadVal (aip, atp, NULL);
+ }
+ }
+
+ AsnIoFree (aip, FALSE);
+
+#ifdef OS_UNIX
+ if (usedPopen) {
+ pclose (fp);
+ } else {
+ FileClose (fp);
+ }
+#else
+ FileClose (fp);
+#endif
+ return index;
+}
+
+
+typedef struct miscdata {
+ SeqEntryPtr sep;
+ Int2 count;
+ Int2 desired;
+ Uint1 _class;
+} MiscData, PNTR MiscDataPtr;
+
+static void FindNthSeqEntryCallback (SeqEntryPtr sep, Pointer mydata,
+ Int4 index, Int2 indent)
+
+{
+ MiscDataPtr mdp;
+
+ if (sep != NULL && mydata != NULL) {
+ mdp = (MiscDataPtr) mydata;
+ (mdp->count)++;
+ if (mdp->count == mdp->desired) {
+ mdp->sep = sep;
+ }
+ }
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FindNthSeqEntry (SeqEntryPtr sep, Int2 seq)
+
+{
+ MiscData md;
+
+ md.sep = NULL;
+ md.count = 0;
+ md.desired = seq;
+ if (sep != NULL) {
+ SeqEntryExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
+ }
+ return md.sep;
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FindNthBioseq (SeqEntryPtr sep, Int2 seq)
+
+{
+ MiscData md;
+
+ md.sep = NULL;
+ md.count = 0;
+ md.desired = seq;
+ if (sep != NULL) {
+ BioseqExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
+ }
+ return md.sep;
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FindNthSequinEntry (SeqEntryPtr sep, Int2 seq)
+
+{
+ MiscData md;
+
+ md.sep = NULL;
+ md.count = 0;
+ md.desired = seq;
+ if (sep != NULL) {
+ SequinEntryExplore (sep, (Pointer) (&md), FindNthSeqEntryCallback);
+ }
+ return md.sep;
+}
+
+static void FindNucSeqEntryCallback (SeqEntryPtr sep, Pointer mydata,
+ Int4 index, Int2 indent)
+
+{
+ BioseqPtr bsp;
+ MiscDataPtr mdp;
+
+ if (sep != NULL && sep->choice == 1 && mydata != NULL) {
+ mdp = (MiscDataPtr) mydata;
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp != NULL && ISA_na (bsp->mol)) {
+ if (mdp->sep == NULL) {
+ mdp->sep = sep;
+ }
+ }
+ }
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FindNucSeqEntry (SeqEntryPtr sep)
+
+{
+ MiscData md;
+
+ md.sep = NULL;
+ md.count = 0;
+ md.desired = 0;
+ if (sep != NULL) {
+ BioseqExplore (sep, (Pointer) (&md), FindNucSeqEntryCallback);
+ }
+ return md.sep;
+}
+
+NLM_EXTERN BioseqPtr LIBCALL FindNucBioseq (SeqEntryPtr sep)
+
+{
+ BioseqPtr nbsp;
+ SeqEntryPtr nsep;
+
+ nsep = FindNucSeqEntry (sep);
+ if (nsep == NULL) return NULL;
+ if (! IS_Bioseq (nsep)) return NULL;
+ nbsp = (BioseqPtr) nsep->data.ptrvalue;
+ return nbsp;
+}
+
+static void FindBioseqSetByClassCallback (SeqEntryPtr sep, Pointer mydata,
+ Int4 index, Int2 indent)
+
+{
+ BioseqSetPtr bssp;
+ MiscDataPtr mdp;
+
+ if (sep != NULL && sep->choice == 2 && mydata != NULL) {
+ mdp = (MiscDataPtr) mydata;
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp != NULL && bssp->_class == mdp->_class) {
+ if (mdp->sep == NULL) {
+ mdp->sep = sep;
+ }
+ }
+ }
+}
+
+NLM_EXTERN SeqEntryPtr LIBCALL FindBioseqSetByClass (SeqEntryPtr sep, Uint1 _class)
+
+{
+ MiscData md;
+
+ md.sep = NULL;
+ md.count = 0;
+ md.desired = 0;
+ md._class = _class;
+ if (sep != NULL) {
+ SeqEntryExplore (sep, (Pointer) (&md), FindBioseqSetByClassCallback);
+ }
+ return md.sep;
+}
+
+
+typedef struct kinddata {
+ Boolean hasNuc;
+ Boolean hasProt;
+} KindData, PNTR KindPtr;
+
+static void HasNucOrProtCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
+
+{
+ BioseqPtr bsp;
+ KindPtr kptr;
+
+ if (sep != NULL && sep->choice == 1 && sep->data.ptrvalue != NULL && mydata != NULL) {
+ kptr = (KindPtr) mydata;
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (ISA_na (bsp->mol)) {
+ kptr->hasNuc = TRUE;
+ } else if (ISA_aa (bsp->mol)) {
+ kptr->hasProt = TRUE;
+ }
+ }
+}
+
+NLM_EXTERN Boolean LIBCALL SeqEntryHasNucs (SeqEntryPtr sep)
+
+{
+ KindData kd;
+
+ kd.hasNuc = FALSE;
+ kd.hasProt = FALSE;
+ if (sep != NULL) {
+ BioseqExplore (sep, (Pointer) (&kd), HasNucOrProtCallback);
+ }
+ return kd.hasNuc;
+}
+
+NLM_EXTERN Boolean LIBCALL SeqEntryHasProts (SeqEntryPtr sep)
+
+{
+ KindData kd;
+
+ kd.hasNuc = FALSE;
+ kd.hasProt = FALSE;
+ if (sep != NULL) {
+ BioseqExplore (sep, (Pointer) (&kd), HasNucOrProtCallback);
+ }
+ return kd.hasProt;
+}
+
+
+static void FindPowerBLASTAsnCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
+
+{
+ AnnotDescrPtr desc;
+ ObjectIdPtr oip;
+ SeqAnnotPtr sap;
+ BoolPtr rsult;
+
+ if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
+ rsult = (BoolPtr) mydata;
+ sap = (IS_Bioseq (sep)) ?
+ ((BioseqPtr) sep->data.ptrvalue)->annot :
+ ((BioseqSetPtr) sep->data.ptrvalue)->annot;
+ while (sap != NULL) {
+ if (sap->type == 2) {
+ desc = NULL;
+ while ((desc = ValNodeFindNext (sap->desc, desc, Annot_descr_user)) != NULL) {
+ if (desc->data.ptrvalue != NULL) {
+ oip = ((UserObjectPtr) desc->data.ptrvalue)->type;
+ if (oip != NULL && StringCmp (oip->str, "Hist Seqalign") == 0) {
+ *rsult = TRUE;
+ }
+ }
+ }
+ }
+ sap = sap->next;
+ }
+}
+
+NLM_EXTERN Boolean LIBCALL PowerBLASTASN1Detected (SeqEntryPtr sep)
+
+{
+ Boolean rsult;
+
+ rsult = FALSE;
+ SeqEntryExplore (sep, (Pointer) &rsult, FindPowerBLASTAsnCallback);
+ return rsult;
+}
+
+
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index e2ff9d96..8a859381 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.930 $
+* $Revision: 6.1153 $
*
* File Description:
*
@@ -73,6 +73,8 @@
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
#include <macroapi.h>
+#include <objvalid.h>
+#include <valapi.h>
/* functions for associating CDS and parent mRNA using featureIDs */
@@ -1779,6 +1781,10 @@ static void ConvertImpFeatToProt (SeqFeatPtr feat, Pointer userdata)
processed = 4;
choice = SEQFEAT_PROT;
subtype = FEATDEF_transit_peptide_aa;
+ } else if (StringCmp (ifp->key, "propeptide") == 0 || StringCmp (ifp->key, "pro_peptide") == 0) {
+ processed = 5;
+ choice = SEQFEAT_PROT;
+ subtype = FEATDEF_propeptide;
} else if (StringCmp (ifp->key, "misc_feature") == 0 && feat->comment != NULL) {
site = FindStr (feat_site, num_site, feat->comment);
if (site != -1) {
@@ -1921,21 +1927,6 @@ static void MergeAdjacentAnnotsCallback (SeqEntryPtr sep, Pointer mydata, Int4 i
MergeAdjacentAnnotsInList (sap);
}
-NLM_EXTERN Boolean PubIsEffectivelyEmpty (PubdescPtr pdp)
-
-{
- ValNodePtr vnp;
-
- if (pdp == NULL) return FALSE;
- vnp = pdp->pub;
- if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
- if (empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
static void MarkEmptyDescsForCleanup (SeqDescrPtr sdp, Pointer userdata)
{
@@ -2084,84 +2075,6 @@ static void ConvertPubFeatDescProc (SeqFeatPtr sfp, Pointer userdata)
}
}
-extern void ConvertSourceFeatDescProc (SeqFeatPtr sfp, Pointer userdata)
-
-{
- BioSourcePtr biop;
- BioseqPtr bsp;
- SubSourcePtr lastssp;
- ObjValNodePtr ovp;
- SeqDescPtr sdp;
- SeqEntryPtr sep;
- SeqIdPtr sip;
- SubSourcePtr ssp;
- ValNode vn;
- ValNodePtr last_dbxref;
-
- /* look for biosource features */
- if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
- /* get bioseq by feature location */
- sip = SeqLocId (sfp->location);
- bsp = BioseqFind (sip);
- if (bsp == NULL) return;
- sip = SeqIdFindBest(bsp->id, 0);
- if (sip == NULL) return;
- vn.choice = SEQLOC_WHOLE;
- vn.extended = 0;
- vn.data.ptrvalue = (Pointer) sip;
- vn.next = NULL;
- /* is feature full length? */
- if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return;
- sep = SeqMgrGetSeqEntryForData (bsp);
- if (sep == NULL) return;
- sdp = CreateNewDescriptor (sep, Seq_descr_source);
- if (sdp == NULL) return;
- /* move biosource from feature to descriptor */
- sdp->data.ptrvalue = sfp->data.value.ptrvalue;
- if (sdp->extended != 0) {
- ovp = (ObjValNodePtr) sdp;
- ovp->idx.subtype = Seq_descr_source;
- }
- sfp->data.value.ptrvalue = NULL;
- /* flag old feature for removal */
- sfp->idx.deleteme = TRUE;
- /* move comment to subsource note */
- if (sfp->comment == NULL) return;
- biop = (BioSourcePtr) sdp->data.ptrvalue;
- if (biop == NULL) return;
- ssp = SubSourceNew ();
- if (ssp == NULL) return;
- ssp->subtype = SUBSRC_other;
- ssp->name = sfp->comment;
- sfp->comment = NULL;
- /* link in at end, since BasicSeqEntry will have sorted this list */
- if (biop->subtype == NULL) {
- biop->subtype = ssp;
- } else {
- lastssp = biop->subtype;
- while (lastssp->next != NULL) {
- lastssp = lastssp->next;
- }
- lastssp->next = ssp;
- }
-
- /* move dbxrefs on feature to source */
- if (sfp->dbxref != NULL) {
- if (biop->org == NULL) {
- biop->org = OrgRefNew();
- }
- last_dbxref = biop->org->db;
- while (last_dbxref != NULL && last_dbxref->next != NULL) {
- last_dbxref = last_dbxref->next;
- }
- if (last_dbxref == NULL) {
- biop->org->db = sfp->dbxref;
- } else {
- last_dbxref->next = sfp->dbxref;
- }
- sfp->dbxref = NULL;
- }
-}
static void PromoteOrgRefDescToBioSource (SeqDescrPtr sdp, Pointer userdata)
@@ -2419,10 +2332,12 @@ static FeatdefNameData featdefWithName [] = {
{ FEATDEF_primer_bind , "primer_bind" },
{ FEATDEF_prim_transcript , "prim_transcript" },
{ FEATDEF_promoter , "promoter" },
+ { FEATDEF_propeptide , "propeptide" },
{ FEATDEF_PROT , "Protein" },
{ FEATDEF_protein_bind , "protein_bind" },
{ FEATDEF_RBS , "RBS" },
{ FEATDEF_REGION , "Region" },
+ { FEATDEF_regulatory , "regulatory" },
{ FEATDEF_repeat_region , "repeat_region" },
{ FEATDEF_repeat_unit , "repeat_unit" },
{ FEATDEF_rep_origin , "rep_origin" },
@@ -2588,7 +2503,9 @@ static CharPtr featurekeys [] = {
"mobile_element",
"centromere",
"telomere",
- "assembly_gap"
+ "assembly_gap",
+ "regulatory",
+ "propeptide"
};
NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF)
@@ -3226,9 +3143,11 @@ static Int4 AddGapSeqLit (ValNodePtr PNTR seq_ext)
ifp->choice = 4;
slip->fuzz = ifp;
+ /*
slip->seq_data = (SeqDataPtr) BSNew (slip->length);
slip->seq_data_type = Seq_code_iupacna;
AddBasesToByteStore ((ByteStorePtr) slip->seq_data, gap_chars);
+ */
return 100;
}
return 0;
@@ -3364,25 +3283,141 @@ static BioseqPtr GetDeltaSeqFromMasterSeg (BioseqPtr bsp)
return new_bsp;
}
-NLM_EXTERN void ConvertSegSetsToDeltaSequences (SeqEntryPtr sep)
+static void CopyFirstGBBlock(
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ GBBlockPtr gbp;
+ GBBlockPtr PNTR gbpp;
+
+
+ if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
+ gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gbp == NULL) return;
+
+ gbpp = (GBBlockPtr PNTR) userdata;
+ if (gbpp == NULL) return;
+
+ if (*gbpp != NULL) return;
+ *gbpp = (GBBlockPtr) AsnIoMemCopy (gbp, (AsnReadFunc) GBBlockAsnRead, (AsnWriteFunc) GBBlockAsnWrite);
+}
+
+static void AddPartAccns (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ Char buf [64];
+ GBBlockPtr gbp;
+ SeqIdPtr sip;
+
+ if (bsp == NULL) return;
+ gbp = (GBBlockPtr) userdata;
+ if (gbp == NULL) return;
+
+ if (bsp->repr == Seq_repr_virtual) return;
+
+ sip = SeqIdFindBestAccession (bsp->id);
+ if (sip == NULL) return;
+
+ SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
+ if (StringHasNoText (buf)) return;
+
+ ValNodeCopyStr (&(gbp->extra_accessions), 0, buf);
+}
+
+static void AddPartHist (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ Char buf [64];
+ BioseqPtr deltabsp;
+ SeqHistPtr shp;
+ SeqIdPtr sip;
+
+ if (bsp == NULL) return;
+ deltabsp = (BioseqPtr) userdata;
+ if (deltabsp == NULL) return;
+
+ if (bsp->repr == Seq_repr_virtual) return;
+
+ sip = SeqIdFindBestAccession (bsp->id);
+ if (sip == NULL) return;
+
+ SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
+ if (StringHasNoText (buf)) return;
+
+ shp = ParseStringIntoSeqHist (deltabsp->hist, buf);
+ if (deltabsp->hist == NULL) {
+ deltabsp->hist = shp;
+ }
+}
+
+static void MarkGBBlock(
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ ObjValNodePtr ovp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
+
+ if (sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+}
+
+static void ConvertSegSetsToDeltaSequencesInt (SeqEntryPtr sep)
{
BioseqSetPtr bssp;
SeqEntryPtr sub_sep, prev_sep, next_sep;
+ GBBlockPtr gbp = NULL;
ObjMgrDataPtr omdptop;
ObjMgrData omdata;
Uint2 parenttype;
Pointer parentptr;
+ SeqEntryPtr partssep = NULL;
+ BioseqPtr segbsp;
+ SeqEntryPtr segseq = NULL;
+ SeqEntryPtr segsep = NULL;
+ BioseqSetPtr segset = NULL;
SeqEntryPtr new_sep;
BioseqPtr bsp, new_bsp = NULL;
BioseqSetPtr parent_set;
if (sep == NULL || !IS_Bioseq_set (sep)) return;
bssp = (BioseqSetPtr) sep->data.ptrvalue;
- if (bssp->_class == 2)
+ if (bssp->_class == BioseqseqSet_class_segset)
{
SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
GetSeqEntryParent (sep, &parentptr, &parenttype);
-
+
+ segsep = sep;
+ segset = (BioseqSetPtr) segsep->data.ptrvalue;
+ if (segset == NULL) return;
+ segseq = segset->seq_set;
+ if (segseq == NULL) return;
+ if (! IS_Bioseq (segseq)) return;
+ segbsp = (BioseqPtr) segseq->data.ptrvalue;
+ if (segbsp == NULL) return;
+ if (segbsp->repr != Seq_repr_seg) return;
+ partssep = segseq->next;
+ if (partssep == NULL) return;
+
+ VisitDescriptorsInSep (segsep, NULL, MarkGBBlock);
+
+ VisitDescriptorsInSep (segsep, (Pointer) &gbp, CopyFirstGBBlock);
+ if (gbp != NULL) {
+ VisitBioseqsInSep (partssep, (Pointer) gbp, AddPartAccns);
+ }
+
parent_set = (BioseqSetPtr)(bssp->idx.parentptr);
prev_sep = NULL;
for (sub_sep = bssp->seq_set; sub_sep != NULL && !IS_Bioseq (sub_sep); sub_sep = sub_sep->next)
@@ -3396,13 +3431,30 @@ NLM_EXTERN void ConvertSegSetsToDeltaSequences (SeqEntryPtr sep)
new_sep = SeqEntryNew();
new_sep->choice = 1;
new_sep->data.ptrvalue = new_bsp;
-
+
+ /* swap Bioseqs */
+ sub_sep->data.ptrvalue = new_bsp;
+ new_sep->data.ptrvalue = bsp;
+
+ /* populate Seq-hist.replaces */
+
+ VisitBioseqsInSep (partssep, (Pointer) new_bsp, AddPartHist);
+
+ if (gbp != NULL) {
+ SeqDescrAddPointer (&(new_bsp->descr), Seq_descr_genbank, (Pointer) gbp);
+ }
+
/* add new seq entry to parent set */
+ /*
AddSeqEntryToSeqEntry (parent_set->seqentry, new_sep, TRUE);
+ */
- /* remove segset */
+ /* remove segset */
+ /*
bssp->idx.deleteme = TRUE;
+ */
}
+
SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
DeleteMarkedObjects (0, OBJ_BIOSEQSET, parent_set);
@@ -3418,6 +3470,28 @@ NLM_EXTERN void ConvertSegSetsToDeltaSequences (SeqEntryPtr sep)
}
}
+NLM_EXTERN void ConvertSegSetsToDeltaSequences (SeqEntryPtr sep)
+{
+ BioseqSetPtr bssp;
+ SeqEntryPtr tmp;
+
+ if (sep == NULL) return;
+ if (! IS_Bioseq_set (sep)) return;
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp == NULL) return;
+
+ if ((bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
+ bssp->_class == BioseqseqSet_class_wgs_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
+ for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
+ ConvertSegSetsToDeltaSequencesInt (tmp);
+ }
+ } else {
+
+ ConvertSegSetsToDeltaSequencesInt (sep);
+ }
+}
+
static PubMedFetchFunc pmf_pubfetch = NULL;
NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func)
@@ -3654,162 +3728,6 @@ extern CharPtr MyFGetLine (FILE *fp, ValNodePtr PNTR current_data)
return GetLineFromBuffer (current_data, data_len);
}
-/* PCR_primer manipulation functions */
-
-static ValNodePtr ParsePCRComponent (
- CharPtr strs
-)
-
-{
- ValNodePtr head = NULL;
- size_t len;
- CharPtr ptr, str, tmp;
-
- if (StringHasNoText (strs)) return NULL;
-
- tmp = StringSave (strs);
- if (tmp == NULL) return NULL;
-
- str = tmp;
- len = StringLen (str);
- if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
- str [len - 1] = '\0';
- str++;
- }
-
- while (StringDoesHaveText (str)) {
- ptr = StringChr (str, ',');
- if (ptr != NULL) {
- *ptr = '\0';
- ptr++;
- }
-
- TrimSpacesAroundString (str);
- ValNodeCopyStr (&head, 0, str);
-
- str = ptr;
- }
-
- MemFree (tmp);
- return head;
-}
-
-NLM_EXTERN ValNodePtr ParsePCRStrings (
- CharPtr fwd_primer_seq,
- CharPtr rev_primer_seq,
- CharPtr fwd_primer_name,
- CharPtr rev_primer_name
-)
-
-{
- ValNodePtr curr_fwd_name;
- ValNodePtr curr_fwd_seq;
- ValNodePtr curr_rev_name;
- ValNodePtr curr_rev_seq;
- CharPtr fwd_name;
- CharPtr fwd_seq;
- CharPtr rev_name;
- CharPtr rev_seq;
- ValNodePtr fwd_name_list = NULL;
- ValNodePtr fwd_seq_list = NULL;
- ValNodePtr rev_name_list = NULL;
- ValNodePtr rev_seq_list = NULL;
- ValNodePtr head = NULL;
- Boolean okay;
- Int2 orig_order = 0;
- PcrSetPtr psp;
-
- fwd_seq_list = ParsePCRComponent (fwd_primer_seq);
- rev_seq_list = ParsePCRComponent (rev_primer_seq);
- fwd_name_list = ParsePCRComponent (fwd_primer_name);
- rev_name_list = ParsePCRComponent (rev_primer_name);
-
- curr_fwd_seq = fwd_seq_list;
- curr_rev_seq = rev_seq_list;
- curr_fwd_name = fwd_name_list;
- curr_rev_name = rev_name_list;
-
- while (curr_fwd_seq != NULL || curr_rev_seq != NULL || curr_fwd_name != NULL || curr_rev_name != NULL) {
- fwd_seq = NULL;
- rev_seq = NULL;
- fwd_name = NULL;
- rev_name = NULL;
- okay = FALSE;
-
- if (curr_fwd_seq != NULL) {
- fwd_seq = (CharPtr) curr_fwd_seq->data.ptrvalue;
- curr_fwd_seq = curr_fwd_seq->next;
- okay = TRUE;
- }
-
- if (curr_rev_seq != NULL) {
- rev_seq = (CharPtr) curr_rev_seq->data.ptrvalue;
- curr_rev_seq = curr_rev_seq->next;
- okay = TRUE;
- }
-
- if (curr_fwd_name != NULL) {
- fwd_name = (CharPtr) curr_fwd_name->data.ptrvalue;
- curr_fwd_name = curr_fwd_name->next;
- okay = TRUE;
- }
-
- if (curr_rev_name != NULL) {
- rev_name = (CharPtr) curr_rev_name->data.ptrvalue;
- curr_rev_name = curr_rev_name->next;
- okay = TRUE;
- }
-
- if (okay) {
- psp = (PcrSetPtr) MemNew (sizeof (PcrSet));
- if (psp != NULL) {
- psp->fwd_seq = StringSaveNoNull (fwd_seq);
- psp->rev_seq = StringSaveNoNull (rev_seq);
- psp->fwd_name = StringSaveNoNull (fwd_name);
- psp->rev_name = StringSaveNoNull (rev_name);
- orig_order++;
- psp->orig_order = orig_order;
- ValNodeAddPointer (&head, 0, (Pointer) psp);
- }
- }
- }
-
- ValNodeFreeData (fwd_seq_list);
- ValNodeFreeData (rev_seq_list);
- ValNodeFreeData (fwd_name_list);
- ValNodeFreeData (rev_name_list);
-
- return head;
-}
-
-NLM_EXTERN ValNodePtr ParsePCRSet (
- BioSourcePtr biop
-)
-
-{
- CharPtr fwd_primer_seq = NULL;
- CharPtr rev_primer_seq = NULL;
- CharPtr fwd_primer_name = NULL;
- CharPtr rev_primer_name = NULL;
- SubSourcePtr ssp;
-
- if (biop == NULL) return NULL;
-
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_fwd_primer_seq) {
- fwd_primer_seq = ssp->name;
- } else if (ssp->subtype == SUBSRC_rev_primer_seq) {
- rev_primer_seq = ssp->name;
- } else if (ssp->subtype == SUBSRC_fwd_primer_name) {
- fwd_primer_name = ssp->name;
- } else if (ssp->subtype == SUBSRC_rev_primer_name) {
- rev_primer_name = ssp->name;
- }
- }
-
- return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
-}
-
NLM_EXTERN int LIBCALLBACK SortVnpByPCRSetSeq (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -4064,216 +3982,6 @@ NLM_EXTERN ValNodePtr FreePCRSet (
return ValNodeFreeData (pset);
}
-static ValNodePtr ParsePCRColonString (
- CharPtr strs
-)
-
-{
- ValNodePtr head = NULL;
- size_t len;
- CharPtr ptr, str, tmp;
-
- if (StringHasNoText (strs)) return NULL;
-
- tmp = StringSave (strs);
- str = tmp;
- len = StringLen (str);
- if (len > 1 && StringChr (str, ':') != NULL) {
- while (StringDoesHaveText (str)) {
- ptr = StringChr (str, ':');
- if (ptr != NULL) {
- *ptr = '\0';
- ptr++;
- }
- TrimSpacesAroundString (str);
- ValNodeCopyStr (&head, 0, str);
- str = ptr;
- }
- } else {
- ValNodeCopyStr (&head, 0, str);
- }
-
- MemFree (tmp);
- return head;
-}
-
-static CharPtr FusePrimerNames (
- CharPtr first,
- CharPtr second
-)
-
-{
- size_t len;
- CharPtr str;
-
- if (first == NULL) return second;
- if (second == NULL) return first;
-
- len = StringLen (first) + StringLen (second) + 5;
- str = MemNew (len);
- if (str == NULL) return NULL;
-
- StringCpy (str, first);
- StringCat (str, ":");
- StringCat (str, second);
-
- return str;
-}
-
-static PCRPrimerPtr ModernizePCRPrimerHalf (
- CharPtr seq,
- CharPtr name
-)
-
-{
- CharPtr curr_name = NULL, curr_seq = NULL, fused_name;
- PCRPrimerPtr curr_primer = NULL, last_primer = NULL, primer_set = NULL;
- ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
-
- seq_list = ParsePCRColonString (seq);
- name_list = ParsePCRColonString (name);
-
- seq_vnp = seq_list;
- name_vnp = name_list;
-
- while (seq_vnp != NULL /* || name_vnp != NULL */) {
- if (seq_vnp != NULL) {
- curr_seq = (CharPtr) seq_vnp->data.ptrvalue;
- seq_vnp = seq_vnp->next;
- }
- if (name_vnp != NULL) {
- curr_name = (CharPtr) name_vnp->data.ptrvalue;
- name_vnp = name_vnp->next;
- } else {
- curr_name = NULL;
- }
-
- curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
- if (curr_primer != NULL) {
- curr_primer->seq = StringSaveNoNull (curr_seq);
- curr_primer->name = StringSaveNoNull (curr_name);
-
- if (primer_set == NULL) {
- primer_set = curr_primer;
- }
- if (last_primer != NULL) {
- last_primer->next = curr_primer;
- }
- last_primer = curr_primer;
- }
- }
-
- while (name_vnp != NULL && last_primer != NULL) {
- curr_name = (CharPtr) name_vnp->data.ptrvalue;
- fused_name = FusePrimerNames (last_primer->name, curr_name);
- MemFree (last_primer->name);
- last_primer->name = StringSaveNoNull (fused_name);
- name_vnp = name_vnp->next;
- }
-
- while (name_vnp != NULL && last_primer == NULL) {
- curr_name = (CharPtr) name_vnp->data.ptrvalue;
- curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
- if (curr_primer != NULL) {
- curr_primer->name = StringSaveNoNull (curr_name);
-
- if (primer_set == NULL) {
- primer_set = curr_primer;
- }
- if (last_primer != NULL) {
- last_primer->next = curr_primer;
- }
- last_primer = curr_primer;
- }
- name_vnp = name_vnp->next;
- }
-
- ValNodeFreeData (seq_list);
- ValNodeFreeData (name_list);
-
- return primer_set;
-}
-
-NLM_EXTERN void ModernizePCRPrimers (
- BioSourcePtr biop
-)
-
-{
- PCRReactionSetPtr curr_reaction, last_reaction = NULL, reaction_set = NULL;
- PCRPrimerPtr forward, reverse;
- PcrSetPtr psp;
- ValNodePtr pset, vnp;
- SubSourcePtr nextssp;
- SubSourcePtr PNTR prevssp;
- SubSourcePtr ssp;
- Boolean unlink;
-
- if (biop == NULL) return;
- /* if (biop->pcr_primers != NULL) return; */
-
- pset = ParsePCRSet (biop);
- if (pset == NULL) return;
-
- for (vnp = pset; vnp != NULL; vnp = vnp->next) {
- psp = (PcrSetPtr) vnp->data.ptrvalue;
- if (psp == NULL) continue;
-
- forward = ModernizePCRPrimerHalf (psp->fwd_seq, psp->fwd_name);
- reverse = ModernizePCRPrimerHalf (psp->rev_seq, psp->rev_name);
-
- if (forward != NULL || reverse != NULL) {
-
- curr_reaction = (PCRReactionSetPtr) MemNew (sizeof (PCRReactionSet));
- if (curr_reaction != NULL) {
- curr_reaction->forward = forward;
- curr_reaction->reverse = reverse;
-
- if (reaction_set == NULL) {
- reaction_set = curr_reaction;
- }
- if (last_reaction != NULL) {
- last_reaction->next = curr_reaction;
- }
- last_reaction = curr_reaction;
- }
- }
- }
-
- FreePCRSet (pset);
-
- if (reaction_set != NULL) {
- if (last_reaction != NULL) {
- /* merge with existing structured pcr_primers */
- last_reaction->next = biop->pcr_primers;
- }
- biop->pcr_primers = reaction_set;
-
- ssp = biop->subtype;
- prevssp = (SubSourcePtr PNTR) &(biop->subtype);
- while (ssp != NULL) {
- nextssp = ssp->next;
- unlink= FALSE;
-
- if (ssp->subtype == SUBSRC_fwd_primer_seq ||
- ssp->subtype == SUBSRC_rev_primer_seq ||
- ssp->subtype == SUBSRC_fwd_primer_name ||
- ssp->subtype == SUBSRC_rev_primer_name) {
- unlink = TRUE;
- }
-
- if (unlink) {
- *prevssp = ssp->next;
- ssp->next = NULL;
- SubSourceFree (ssp);
- } else {
- prevssp = (SubSourcePtr PNTR) &(ssp->next);
- }
- ssp = nextssp;
- }
- }
-}
-
-
NLM_EXTERN void ModernizeRNAFields (
SeqFeatPtr sfp
)
@@ -4351,172 +4059,6 @@ NLM_EXTERN void ModernizeRNAFields (
}
}
-static DbtagPtr DbtagParse (
- CharPtr str
-)
-
-{
- Boolean all_digits = TRUE;
- Char ch;
- DbtagPtr dbt;
- long num;
- Int2 num_digits = 0;
- ObjectIdPtr oip;
- CharPtr ptr;
- CharPtr tmp;
-
- if (StringHasNoText (str)) return NULL;
- ptr = StringChr (str, ':');
- if (ptr == NULL) return NULL;
-
- dbt = DbtagNew ();
- oip = ObjectIdNew ();
- if (dbt == NULL || oip == NULL) return NULL;
-
- if (ptr != NULL) {
- *ptr = '\0';
- ptr++;
- }
-
- dbt->db = StringSave (str);
- dbt->tag = oip;
-
- tmp = ptr;
- ch = *tmp;
- while (ch != '\0') {
- if (IS_DIGIT (ch)) {
- num_digits++;
- } else {
- all_digits = FALSE;
- }
- tmp++;
- ch = *tmp;
- }
-
- if (all_digits) {
- if (num_digits < 10 || (num_digits == 10 && StringCmp (ptr, "2147483647") <= 0)) {
- sscanf (ptr, "%ld", &num);
- oip->id = (Int4) num;
- return dbt;
- }
- }
-
- oip->str = StringSave (ptr);
-
- return dbt;
-}
-
-static void GetNomenclatureUOP (
- UserObjectPtr uop,
- Pointer userdata
-)
-
-{
- ObjectIdPtr oip;
- UserObjectPtr PNTR uopp;
-
- if (uop == NULL || userdata == NULL) return;
- oip = uop->type;
- if (oip == NULL) return;
- if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
- uopp = (UserObjectPtr PNTR) userdata;
- *uopp = uop;
-}
-
-NLM_EXTERN void ModernizeGeneFields (
- SeqFeatPtr sfp
-)
-
-{
- GeneNomenclaturePtr gnp;
- GeneRefPtr grp;
- ObjectIdPtr oip;
- CharPtr str;
- CharPtr symbol = NULL, name = NULL, source = NULL;
- Uint2 status = 0;
- UserFieldPtr ufp;
- UserObjectPtr uop = NULL;
- UserObjectPtr curr, next;
- UserObjectPtr PNTR prev;
-
- if (sfp == NULL) return;
- if (sfp->data.choice != SEQFEAT_GENE) return;
-
- grp = (GeneRefPtr) sfp->data.value.ptrvalue;
- if (grp == NULL) return;
-
- if (grp->formal_name != NULL) return;
-
- if (sfp->ext == NULL) return;
- VisitUserObjectsInUop (sfp->ext, (Pointer) &uop, GetNomenclatureUOP);
- if (uop == NULL) return;
-
- for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
- oip = ufp->label;
- if (oip == NULL || oip->str == NULL) continue;
- if (StringICmp (oip->str, "Symbol") == 0) {
- if (ufp->choice == 1) {
- str = (CharPtr) ufp->data.ptrvalue;
- if (str != NULL) {
- symbol = str;
- }
- }
- } else if (StringICmp (oip->str, "Name") == 0) {
- if (ufp->choice == 1) {
- str = (CharPtr) ufp->data.ptrvalue;
- if (str != NULL) {
- name = str;
- }
- }
- } else if (StringICmp (oip->str, "DataSource") == 0) {
- if (ufp->choice == 1) {
- str = (CharPtr) ufp->data.ptrvalue;
- if (str != NULL) {
- source = str;
- }
- }
- } else if (StringICmp (oip->str, "Status") == 0) {
- if (ufp->choice == 1) {
- str = (CharPtr) ufp->data.ptrvalue;
- if (str != NULL) {
- if (StringICmp (str, "Official") == 0) {
- status = 1;
- } else if (StringICmp (str, "Interim") == 0) {
- status = 2;
- }
- }
- }
- }
- }
- if (symbol == NULL && name == NULL && source == NULL && status == 0) return;
-
- gnp = GeneNomenclatureNew ();
- if (gnp == NULL) return;
-
- gnp->status = status;
- gnp->symbol = StringSaveNoNull (symbol);
- gnp->name = StringSaveNoNull (name);
- gnp->source = DbtagParse (source);
-
- grp->formal_name = gnp;
-
- prev = (UserObjectPtr PNTR) &(sfp->ext);
- curr = sfp->ext;
- while (curr != NULL) {
- next = curr->next;
- if (uop == curr) {
- *(prev) = curr->next;
- curr->next = NULL;
- UserObjectFree (curr);
- } else {
- prev = (UserObjectPtr PNTR) &(curr->next);
- }
- curr = next;
- }
-}
-
-
-
static void AddDefLinesToAlignmentSequences
(TAlignmentFilePtr afp,
SeqEntryPtr sep_head)
@@ -4593,7 +4135,7 @@ static void AddDefLinesToAlignmentSequences
{ /* otherwise one defline per sequence */
curr_seg = index;
}
- if (curr_seg < afp->num_deflines)
+ if (curr_seg < afp->num_deflines && afp->deflines != NULL)
{
new_title_len += StringLen (afp->deflines [curr_seg]) + 1;
}
@@ -4629,7 +4171,7 @@ static void AddDefLinesToAlignmentSequences
{
curr_seg = index;
}
- if (curr_seg < afp->num_deflines)
+ if (curr_seg < afp->num_deflines && afp->deflines != NULL)
{
StringCat (new_title, afp->deflines [curr_seg]);
}
@@ -5110,6 +4652,135 @@ static void ReplacePipesWithUnderscores (CharPtr seqid_str)
}
}
+
+static CharPtr s_IDStringFromGeneral (DbtagPtr dbtag)
+{
+ CharPtr id_str = NULL;
+ Int4 len;
+ CharPtr format = "gnl|%s|%s";
+ Char num_buf[20];
+
+ if (dbtag == NULL || StringHasNoText (dbtag->db) || dbtag->tag == NULL) {
+ return NULL;
+ }
+
+ if (dbtag->tag->id > 0) {
+ sprintf (num_buf, "%d", dbtag->tag->id);
+ len = StringLen (format) + StringLen (dbtag->db) + StringLen (num_buf);
+ id_str = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (id_str, format, dbtag->db, num_buf);
+ } else {
+ len = StringLen (format) + StringLen (dbtag->db) + StringLen (dbtag->tag->str);
+ id_str = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (id_str, format, dbtag->db, dbtag->tag->str == NULL ? "" : dbtag->tag->str);
+ }
+ return id_str;
+}
+
+
+static DbtagPtr FindBankitDbtag (BioseqPtr bsp)
+{
+ DbtagPtr dbtag;
+ SeqIdPtr sip;
+
+ if (bsp == NULL) {
+ return NULL;
+ }
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL
+ && StringICmp (dbtag->db, "BankIt") == 0) {
+ return dbtag;
+ }
+ }
+ return NULL;
+}
+
+
+NLM_EXTERN BioseqPtr BioseqFromAlignmentID (CharPtr PNTR p_id_str)
+{
+ SeqIdPtr sip;
+ BioseqPtr bsp = NULL;
+ CharPtr id_str;
+ CharPtr tmp_id_str;
+ DbtagPtr dbtag;
+ CharPtr slash;
+
+ if (p_id_str == NULL || StringHasNoText (*p_id_str) || StringNCmp (*p_id_str, "acc", 3) == 0) {
+ return NULL;
+ }
+ id_str = *p_id_str;
+
+ sip = MakeSeqID (id_str);
+ if (sip != NULL) {
+ sip->next = SeqIdFree (sip->next);
+ bsp = BioseqFind (sip);
+ }
+
+ if (bsp == NULL && StringChr (id_str, '|') == NULL)
+ {
+ sip = SeqIdFree (sip);
+ tmp_id_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (id_str) + 4));
+ sprintf (tmp_id_str, "gb|%s", id_str);
+ sip = MakeSeqID (tmp_id_str);
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ *p_id_str = MemFree (*p_id_str);
+ *p_id_str = tmp_id_str;
+ } else {
+ MemFree (tmp_id_str);
+ }
+ }
+ if (bsp == NULL) {
+ if (StringNICmp (id_str, "BankIt", 6) == 0) {
+ sip = SeqIdFree (sip);
+ sip = ValNodeNew (NULL);
+ sip->choice = SEQID_GENERAL;
+ dbtag = DbtagNew ();
+ dbtag->db = StringSave ("BankIt");
+ dbtag->tag = ObjectIdNew ();
+ dbtag->tag->str = StringSave (id_str + 6);
+ sip->data.ptrvalue = dbtag;
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ *p_id_str = MemFree (*p_id_str);
+ *p_id_str = s_IDStringFromGeneral (dbtag);
+ } else if ((slash = StringRChr (id_str, '/')) != NULL) {
+ sip = SeqIdFree (sip);
+ sip = MakeSeqID (slash + 1);
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ dbtag = FindBankitDbtag (bsp);
+ if (dbtag == NULL) {
+ tmp_id_str = StringSave (slash + 1);
+ *p_id_str = MemFree (*p_id_str);
+ *p_id_str = tmp_id_str;
+ } else {
+ *p_id_str = MemFree (*p_id_str);
+ *p_id_str = s_IDStringFromGeneral (dbtag);
+ }
+ }
+ }
+ } else if ((slash = StringRChr (id_str, '/')) != NULL) {
+ sip = SeqIdFree (sip);
+ sip = ValNodeNew (NULL);
+ sip->choice = SEQID_GENERAL;
+ dbtag = DbtagNew ();
+ dbtag->db = StringSave ("NCBIFILE");
+ dbtag->tag = ObjectIdNew ();
+ dbtag->tag->str = StringSave (id_str);
+ sip->data.ptrvalue = dbtag;
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ *p_id_str = MemFree (*p_id_str);
+ *p_id_str = s_IDStringFromGeneral (dbtag);
+ }
+ }
+ }
+ sip = SeqIdFree (sip);
+ return bsp;
+}
+
+
extern SeqEntryPtr MakeSequinDataFromAlignmentEx (TAlignmentFilePtr afp, Uint1 moltype, Boolean check_ids)
{
SeqIdPtr PNTR sip_list;
@@ -5123,7 +4794,6 @@ extern SeqEntryPtr MakeSequinDataFromAlignmentEx (TAlignmentFilePtr afp, Uint1 m
ValNodePtr vnp;
Int4 index, curr_seg, num_sets;
BioseqPtr bsp;
- CharPtr tmp_id_str;
MsgAnswer ans;
Int4Ptr segs_per_set = NULL;
Int4Ptr segs_per_aln = NULL;
@@ -5183,16 +4853,7 @@ extern SeqEntryPtr MakeSequinDataFromAlignmentEx (TAlignmentFilePtr afp, Uint1 m
}
if (check_ids && StringNCmp (afp->ids[index], "acc", 3) != 0)
{
- bsp = BioseqFind (sip);
- if (bsp == NULL)
- {
- sip = SeqIdFree (sip);
- tmp_id_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (afp->ids [index]) + 4));
- sprintf (tmp_id_str, "gb|%s", afp->ids [index]);
- sip = MakeSeqID (tmp_id_str);
- MemFree (tmp_id_str);
- bsp = BioseqFind (sip);
- }
+ bsp = BioseqFromAlignmentID (&(afp->ids[index]));
if (bsp == NULL)
{
ans = Message (MSG_YN, "Can't find sequence %s in set - is this a far pointer?", afp->ids[index]);
@@ -5363,45 +5024,6 @@ extern SeqEntryPtr make_seqentry_for_seqentry (SeqEntryPtr sep)
return sep1;
}
-/* These two functions are used for removing mRNAs that overlap pseudo misc_feats
- * and marking genes that overlap pseudo misc_feats as pseudo.
- */
-static void PseudoMiscFeatProcessingCallback (SeqFeatPtr sfp, Pointer userdata)
-{
- SeqFeatPtr gene, mRNA;
- SeqMgrFeatContext gcontext, mcontext;
-
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) return;
- /* we only want to process misc_feats if the pseudo flag is set or the
- * comment contains the word "pseudogene".
- */
-#if 0
- if (!sfp->pseudo && StringISearch (sfp->comment, "pseudogene") == NULL) return;
-#endif
-
- gene = SeqMgrGetOverlappingGene (sfp->location, &gcontext);
- mRNA = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_mRNA, NULL, 0, NULL,
- RANGE_MATCH, &mcontext);
- if (gene != NULL)
- {
- gene->pseudo = TRUE;
- }
- if (mRNA != NULL && mRNA->product == NULL) /* only delete mRNAs without products */
- {
- mRNA->idx.deleteme = TRUE;
- }
-}
-
-extern void ProcessPseudoMiscFeatsForEntityID (Uint2 entityID)
-{
- SeqEntryPtr sep;
-
- sep = GetTopSeqEntryForEntityID (entityID);
- if (sep == NULL) return;
-
- VisitFeaturesInSep (sep, (Pointer) NULL, PseudoMiscFeatProcessingCallback);
- DeleteMarkedObjects (entityID, 0, NULL);
-}
/* These three functions are used for converting pseudo CDSs to misc_features. */
NLM_EXTERN Boolean ConvertOnePseudoCDSToMiscFeatEx (SeqFeatPtr sfp, Boolean remove_product)
@@ -5903,6 +5525,53 @@ static CharPtr inferencePrefix [] = {
NULL
};
+static Boolean IsSraPrefix (CharPtr str)
+
+{
+ Char ch;
+
+ if (StringLen (str) < 3) return FALSE;
+
+ ch = str [0];
+ /*
+ if (ch != 'S' && ch != 'E' && ch != 'D') return FALSE;
+ */
+ if (StringChr ("SED", ch) == NULL) return FALSE;
+
+ ch = str [1];
+ if (ch != 'R') return FALSE;
+
+ ch = str [2];
+ /*
+ if (ch != 'A' && ch != 'P' && ch != 'X' && ch != 'R' && ch != 'S' && ch != 'Z') return FALSE;
+ */
+ if (StringChr ("APXRSZ", ch) == NULL) return FALSE;
+
+ return TRUE;
+}
+
+static Boolean IsAllDigitsOrPeriods (CharPtr str)
+
+{
+ Char ch, lastch = '\0';
+
+ if (StringHasNoText (str)) return FALSE;
+
+ ch = *str;
+ if (ch == '.') return FALSE;
+ while (ch != '\0') {
+ if (IS_DIGIT (ch) || ch == '.') {
+ } else {
+ return FALSE;
+ }
+ lastch = ch;
+ str++;
+ ch = *str;
+ }
+ if (lastch == '.') return FALSE;
+ return TRUE;
+}
+
static Int2 ValidateInferenceAccession (
CharPtr str,
Char chr,
@@ -5952,23 +5621,39 @@ static Int2 ValidateInferenceAccession (
}
}
}
- accnv = ValidateAccnDotVer (tmp);
- if (accnv == -5 || accnv == -6) {
- rsult = BAD_INFERENCE_ACC_VERSION;
- } else if (accnv != 0) {
- rsult = BAD_INFERENCE_ACCESSION;
- } else if (fetchAccn) {
- sip = SeqIdFromAccessionDotVersion (tmp);
- sev = ErrGetMessageLevel ();
- ErrSetMessageLevel (SEV_ERROR);
- if (has_fetch_function && GetGIForSeqId (sip) == 0) {
- rsult = ACC_VERSION_NOT_PUBLIC;
+ if (IsSraPrefix (tmp) && IsAllDigitsOrPeriods (tmp + 3)) {
+ } else if (StringNCmp (tmp, "MAP_", 4) == 0 && StringIsAllDigits (tmp + 4)) {
+ } else {
+ accnv = ValidateAccnDotVer (tmp);
+ if (accnv == -5 || accnv == -6) {
+ rsult = BAD_INFERENCE_ACC_VERSION;
+ } else if (accnv != 0) {
+ rsult = BAD_INFERENCE_ACCESSION;
+ } else if (fetchAccn) {
+ sip = SeqIdFromAccessionDotVersion (tmp);
+ sev = ErrGetMessageLevel ();
+ ErrSetMessageLevel (SEV_ERROR);
+ if (has_fetch_function && GetGIForSeqId (sip) == 0) {
+ rsult = ACC_VERSION_NOT_PUBLIC;
+ }
+ ErrSetMessageLevel (sev);
+ SeqIdFree (sip);
}
- ErrSetMessageLevel (sev);
- SeqIdFree (sip);
}
} else if (is_similar_to && is_blast) {
rsult = BAD_ACCESSION_TYPE;
+ } else if (is_similar_to) {
+ if (StringICmp (str, "GenBank") != 0 &&
+ StringICmp (str, "EMBL") != 0 &&
+ StringICmp (str, "DDBJ") != 0 &&
+ StringICmp (str, "INSD") != 0 &&
+ StringICmp (str, "RefSeq") != 0 &&
+ StringICmp (str, "UniProt") != 0 &&
+ StringICmp (str, "UniProtKB") != 0 &&
+ StringICmp (str, "SwissProt") != 0 &&
+ StringICmp (str, "KEGG") != 0) {
+ rsult = UNRECOGNIZED_DATABASE;
+ }
}
}
if (StringChr (tmp, ' ') != NULL) rsult = SPACES_IN_INFERENCE;
@@ -5999,11 +5684,12 @@ static Char NextColonOrVerticalBar (CharPtr ptr)
NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
{
- Int2 best, j, rsult, tmprsult;
+ Int2 best = -1, j, rsult, tmprsult;
Char ch;
Boolean has_fetch_function, same_species;
size_t len;
- CharPtr nxt, ptr, rest, skip, str;
+ Int4 num_spaces = 0;
+ CharPtr nxt, ptr, rest = NULL, skip, str;
ObjMgrProcPtr ompp = NULL;
if (StringHasNoText (val)) return EMPTY_INFERENCE_STRING;
@@ -6114,6 +5800,23 @@ NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
}
}
+ if (rsult == VALID_INFERENCE) {
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == ' ') {
+ num_spaces++;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ if (num_spaces > 3) {
+ rsult = INFERENCE_HAS_COMMENT;
+ } else if (num_spaces > 0) {
+ rsult = SPACES_IN_INFERENCE;
+ }
+ }
+
MemFree (str);
return rsult;
@@ -6178,107 +5881,6 @@ extern void MergeFeatureIntervalsToParts (SeqFeatPtr sfp, Boolean ordered)
}
}
-extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
-
-{
- MolInfoPtr mip;
- SeqDescrPtr sdp;
- Boolean is_mrna = FALSE, is_master_seq = FALSE, has_nulls = FALSE;
- SeqFeatPtr gene = NULL;
- SeqFeatPtr sfp;
- SeqMgrFeatContext context;
- Int4 num_cds = 0;
- Int4 num_mrna = 0;
- SeqIdPtr sip;
- SeqLocPtr slp;
- Boolean partial5, partial3;
- BioSourcePtr biop;
- OrgRefPtr orp;
- BioseqSetPtr bssp;
-
- if (bsp == NULL || bsp->length == 0
- || !ISA_na (bsp->mol)) {
- return;
- }
-
- sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
- if (sdp != NULL) {
- mip = (MolInfoPtr) sdp->data.ptrvalue;
- if (mip != NULL && mip->biomol == MOLECULE_TYPE_MRNA) {
- is_mrna = TRUE;
- }
- }
- if (!is_mrna) {
- return;
- }
-
- sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
- if (sdp != NULL) {
- biop = (BioSourcePtr) sdp->data.ptrvalue;
- if (biop != NULL) {
- if (biop->origin == ORG_ARTIFICIAL) {
- orp = biop->org;
- if (orp != NULL) {
- if (StringICmp (orp->taxname, "synthetic construct") == 0) return;
- }
- }
- }
- }
-
- if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
- bssp = (BioseqSetPtr) bsp->idx.parentptr;
- if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
- is_master_seq = TRUE;
- }
- }
-
- for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
- sfp != NULL;
- sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
- if (sfp->data.choice == SEQFEAT_GENE) {
- /* skip this sequence if it has more than one gene */
- if (gene == NULL) {
- gene = sfp;
- } else {
- return;
- }
- } else if (sfp->data.choice == SEQFEAT_CDREGION) {
- num_cds++;
- /* skip this sequence if it has more than one coding region */
- if (num_cds > 1 && !is_master_seq) {
- return;
- }
- } else if (sfp->idx.subtype == FEATDEF_mRNA) {
- num_mrna++;
- /* skip this sequence if it has more than one mRNA */
- if (num_mrna > 1) return;
- }
- }
-
- if (gene != NULL && gene->location != NULL) {
- slp = gene->location;
- if (slp->choice != SEQLOC_INT) {
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- /* skip this sequence if it is multi-interval and EMBL or DDBJ */
- if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return;
- }
- }
- }
-
- if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) {
- CheckSeqLocForPartial (gene->location, &partial5, &partial3);
- has_nulls = LocationHasNullsBetween (gene->location);
- /* gene should cover entire length of sequence */
- slp = SeqLocIntNew (0, bsp->length - 1, SeqLocStrand (gene->location), SeqIdFindBest (bsp->id, 0));
- SetSeqLocPartial (slp, partial5, partial3);
- gene->location = SeqLocFree (gene->location);
- gene->location = slp;
- if (is_master_seq) {
- MergeFeatureIntervalsToParts (gene, has_nulls);
- }
- }
-}
-
/* Functions for the Discrepancy Report */
@@ -6368,13 +5970,30 @@ NewClickableItem
ValNodePtr item_list)
{
ClickableItemPtr dip;
+ CharPtr item_cnt;
+ CharPtr tmp;
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (dip != NULL)
{
dip->clickable_item_type = clickable_item_type;
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (description_fmt) + 15));
- sprintf (dip->description, description_fmt, ValNodeLen (item_list));
+
+ item_cnt = StringStr(description_fmt, "%d");
+ if (item_cnt != NULL && item_cnt != description_fmt) {
+ StringNCpy(dip->description, description_fmt,
+ StringLen(description_fmt) - StringLen(item_cnt));
+ tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (description_fmt)));
+ sprintf (tmp, "%d", ValNodeLen (item_list));
+ SetStringValue (&(dip->description), tmp, ExistingTextOption_append_none);
+ SetStringValue (&(dip->description), item_cnt + 2, ExistingTextOption_append_none);
+ tmp = MemFree(tmp);
+ }
+ else if (item_cnt == NULL) sprintf (dip->description, "%s", description_fmt);
+ else {
+ sprintf (dip->description, "%d", ValNodeLen (item_list));
+ SetStringValue (&(dip->description), description_fmt+2, ExistingTextOption_append_none);
+ }
dip->callback_func = NULL;
dip->datafree_func = NULL;
dip->callback_data = NULL;
@@ -6642,10 +6261,12 @@ extern int LIBCALLBACK SortVnpByClickableItemDescription (VoidPtr ptr1, VoidPtr
/* utility functions for the discrepancy report tests */
static void ValNodeLinkCopy (ValNodePtr PNTR list1, ValNodePtr list2)
{
+ ValNodePtr newnode;
if (list1 == NULL) return;
while (list2 != NULL)
{
- ValNodeAddPointer (list1, list2->choice, list2->data.ptrvalue);
+ newnode = ValNodeAddPointer (list1, list2->choice, list2->data.ptrvalue);
+ newnode->fatal = list2->fatal;
list2 = list2->next;
}
}
@@ -6828,6 +6449,27 @@ NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyString (VoidPtr ptr1, VoidP
return 0;
}
+NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyStringCaseSensitive (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ GlobalDiscrepancyPtr g1, g2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ g1 = (GlobalDiscrepancyPtr) vnp1->data.ptrvalue;
+ g2 = (GlobalDiscrepancyPtr) vnp2->data.ptrvalue;
+ if (g1 != NULL && g2 != NULL && g1->str != NULL && g2->str != NULL) {
+ return StringCmp (g1->str, g2->str);
+ }
+ }
+ }
+ return 0;
+}
+
static Int4 CountDupGlobalDiscrepancy (ValNodePtr vnp)
{
@@ -7242,6 +6884,9 @@ static void PercentNDiscrepanciesForSeqEntry (ValNodePtr PNTR discrepancy_list,
static void FindAdjacentPseudoGenes (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
/* J. Chen */
+static void ProductsWithNoProductString(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+static void FindSeqIdHavingPhrases(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+static void FindUnculturedNotes(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
static void ShowTranslExcept(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
static void ShowCDsHavingGene(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
static void TestDeflineExistence(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
@@ -7270,9 +6915,134 @@ static void CheckCountryColons(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_
static void FixCountryColons(ValNodePtr item_list, Pointer data, LogInfoPtr lip);
static void FindBioProjectIdSequences(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
static void StrainTaxnameConflict(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+static void FindLongBioseqsWithoutAnnotation(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+static void FindMoreNamesInCollectedBy(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+static void FindEndColon(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
/* J. Chen */
+typedef Boolean (*CollectBioSourceTest) PROTO ((BioSourcePtr));
+static ValNodePtr CollectBioSources (ValNodePtr sep_list, CollectBioSourceTest test_func, Boolean want_pass);
+
+
+static void RemoveEndColon (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp, entityIDList = NULL;
+ BioSourcePtr biop;
+ SeqDescrPtr sdp;
+ SeqFeatPtr sfp;
+ SubSourcePtr ssp;
+ CharPtr idx, tmp;
+ Boolean fixed;
+ ObjValNodePtr ovp;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQDESC) {
+ sdp = (SeqDescrPtr)vnp->data.ptrvalue;
+ ovp = (ObjValNodePtr) sdp;
+ ValNodeAddInt (&entityIDList, 0, ovp->idx.entityID);
+ biop = sdp->data.ptrvalue;
+ }
+ else if (vnp->choice == OBJ_SEQFEAT) {
+ sfp = (SeqFeatPtr)vnp->data.ptrvalue;
+ ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
+ biop = sfp->data.value.ptrvalue;
+ } else continue;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_country) {
+ fixed = FALSE;
+ tmp = ssp->name;
+ while (StringLen(tmp) && !fixed) {
+ idx = StringChr(tmp, ':');
+ if (idx != NULL && (idx - tmp +1) == StringLen(tmp)) {
+ idx[0] = '\0';
+ fixed = TRUE;
+ }
+ else tmp = idx + 1;
+ }
+ }
+ }
+ }
+ for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
+ ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0);
+ }
+ ValNodeFree (entityIDList);
+};
+
+static Boolean CountryEndWithColon(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+ CharPtr idx, tmp;
+ if (biop == NULL || biop->subtype == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_country) { // country
+ tmp = ssp->name;
+ while (StringLen(tmp)) {
+ idx = StringChr(tmp, ':');
+ if (idx != NULL) {
+ if ( (idx - tmp + 1) == StringLen(tmp) ) return TRUE;
+ else tmp = idx + 1;
+ }
+ else return FALSE;
+ }
+ }
+ }
+
+ return FALSE;
+};
+
+static void FindEndColon(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, src_list = NULL, item_list = NULL;
+ src_list = CollectBioSources(sep_list, CountryEndWithColon, TRUE);
+
+ for (vnp = src_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQDESC)
+ ValNodeAddPointer (&item_list, OBJ_SEQDESC, vnp->data.ptrvalue);
+ else ValNodeAddPointer (&item_list, OBJ_SEQFEAT, vnp->data.ptrvalue);
+ }
+
+ if (item_list != NULL)
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem(END_COLON_IN_COUNTRY,
+ "%d country sources end with a colon.", item_list));
+};
+
+
+
+static void FindLongBioseqsWithoutAnnotationCallback (BioseqPtr bsp, Pointer userdata)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+
+ if (bsp == NULL || !ISA_na(bsp->mol) || bsp->length < 5000 || userdata == NULL) return;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ if (sfp == NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+
+void FindLongBioseqsWithoutAnnotation(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr item_list = NULL, vnp;
+ SeqEntryPtr sep;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ sep = vnp->data.ptrvalue;
+ VisitBioseqsInSep (sep, &item_list, FindLongBioseqsWithoutAnnotationCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_LONG_NO_ANNOTATION, "%d bioseqs are longer than 5000nt and have no features", item_list));
+ }
+};
+
+
+
static Boolean StrainConflictsTaxname(OrgRefPtr org)
{
@@ -7432,8 +7202,452 @@ void FindBioProjectIdSequences(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_
-typedef Boolean (*CollectBioSourceTest) PROTO ((BioSourcePtr));
-static ValNodePtr CollectBioSources (ValNodePtr sep_list, CollectBioSourceTest test_func, Boolean want_pass);
+static Boolean CollectedSuspOrgName(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL || biop->subtype == NULL || biop->org->taxname == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_collected_by) { // collected-by
+ if (!StringCmp(biop->org->taxname, "Homo sapiens")) return TRUE;
+ else return FALSE;
+ }
+ }
+ return FALSE;
+};
+
+
+static void FindSuspOrgNameInCollected(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr src_list = NULL;
+
+ src_list = CollectBioSources (sep_list, CollectedSuspOrgName, TRUE);
+
+ if (src_list) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (ONCALLER_SUSPECTED_ORG_COLLECTED,
+ "%d biosources have collected-by and suspect organism", src_list));
+ }
+};
+
+
+static Boolean IdentifiedSuspOrgName(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL || biop->subtype == NULL || biop->org->taxname == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if (StringCmp(biop->org->taxname, "Homo sapiens") == 0) return TRUE;
+ if (StringSearch(biop->org->taxname, "uncultured") != NULL) return TRUE;
+ }
+ }
+ return FALSE;
+};
+
+
+
+static void FindSuspOrgNameInIdentified(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr src_list = NULL;
+
+ src_list = CollectBioSources (sep_list, IdentifiedSuspOrgName, TRUE);
+
+ if (src_list) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (ONCALLER_SUSPECTED_ORG_IDENTIFIED,
+ "%d biosources have identified-by and suspect organism", src_list));
+ }
+};
+
+
+static Boolean HasMoreNames(SubSourcePtr ssp)
+{
+ CharPtr name, cp1, cp2, cp;
+ Uint2 cnt = 0; // punctuation
+ Boolean need_skip;
+
+ name = ssp->name;
+ while (name) {
+ cp1 = StringChr(name, ',');
+ cp2 = StringChr(name, ';');
+ if (!cp1 && !cp2) break;
+ else {
+ if (cp1 && cp2) {
+ if (cp1 < cp2) cp = cp1;
+ else cp = cp2;
+ }
+ else if (cp1) cp = cp1;
+ else cp = cp2;
+ if (++cnt > 2) return TRUE;
+
+ // adjust string
+ if (*(cp+1) == '\0') name = NULL;
+ else {
+ name = cp+1;
+ while (isspace(*name)) name ++;
+ do {
+ need_skip = FALSE;
+ if (isspace(*name)) { name++; need_skip = TRUE; }
+ else if (*name == ',') {
+ name ++; need_skip = TRUE;
+ }
+ else if (*name == ';') {
+ name ++; need_skip = TRUE;
+ }
+ else if (!StringNCmp(name, "and", 3)) {
+ name += 3; need_skip = TRUE;
+ }
+
+ } while (need_skip && *name != '\0') ;
+ if (*name == '\0') name = NULL;
+ }
+ }
+ }
+ if (name != NULL && *name != '\0') cnt++;
+ if (cnt > 2) return TRUE;
+ else return FALSE;
+};
+
+
+
+CharPtr spec_words[] = {"institute", "institution", "University", "College"};
+Uint4 spec_wd_cnt = sizeof(spec_words)/sizeof(CharPtr);
+Boolean IdentifiedByHasSpecWords(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+ Uint4 i;
+
+ if (biop == NULL || biop->subtype == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ for (i=0; i< spec_wd_cnt; i++)
+ if (StringISearch(ssp->name, spec_words[i]) ) return TRUE;
+ }
+ }
+ return FALSE;
+};
+
+
+
+static Boolean IdentifiedByHasMoreNames (BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL || biop->subtype == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if (HasMoreNames(ssp)) return TRUE;
+ else return FALSE;
+ }
+ }
+ return FALSE;
+};
+
+
+static void CollectIdentifiedByDesc (SeqDescrPtr sdp, Pointer data)
+{
+ SubSourcePtr ssp;
+ BioSourcePtr biop;
+
+ if (sdp != NULL && sdp->choice == Seq_descr_source) {
+ biop = (BioSourcePtr)sdp->data.ptrvalue;
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ ValNodeAddPointer(data, OBJ_SEQDESC, sdp);
+ }
+ }
+ }
+};
+
+
+static void CollectIdentifiedByFeat(SeqFeatPtr sfp, Pointer data)
+{
+ BioSourcePtr biop;
+ SubSourcePtr ssp;
+
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) {
+ biop = (BioSourcePtr)sfp->data.value.ptrvalue;
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ ValNodeAddPointer(data, OBJ_SEQFEAT, sfp);
+ }
+ }
+ }
+};
+
+
+
+static SubmitBlockPtr FindSubmitBlockForSeqEntry (SeqEntryPtr sep);
+
+static void CheckForSubmitText(SeqEntryPtr sep, CharPtr inst, CharPtr dept, Pointer userdata)
+{
+ SubSourcePtr ssp;
+ ValNodePtr src_list = NULL, feat_list = NULL, vnp;
+ SeqDescrPtr sdp;
+ BioSourcePtr biosrc;
+ SeqFeatPtr sfp;
+
+ VisitDescriptorsInSep (sep, &src_list, CollectIdentifiedByDesc);
+ for (vnp = src_list; vnp != NULL; vnp = vnp->next) {
+ sdp = (SeqDescrPtr)vnp->data.ptrvalue;
+ biosrc = (BioSourcePtr)sdp->data.ptrvalue;
+ for (ssp = biosrc->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if ((inst && !StringCmp(ssp->name, inst)) || (dept && !StringCmp(ssp->name, dept)))
+ ValNodeAddPointer(userdata, OBJ_SEQDESC, sdp);
+ }
+ }
+ }
+
+ VisitFeaturesInSep (sep, &feat_list, CollectIdentifiedByFeat);
+ for (vnp = feat_list; vnp != NULL; vnp = vnp->next) {
+ sfp = (SeqFeatPtr)vnp->data.ptrvalue;
+ biosrc = (BioSourcePtr)sfp->data.value.ptrvalue;
+ for (ssp = biosrc->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if ((inst && !StringCmp(ssp->name, inst)) || (dept && !StringCmp(ssp->name, dept)))
+ ValNodeAddPointer(userdata, OBJ_SEQFEAT, sfp);
+ }
+ }
+ }
+};
+
+
+
+static void CollectPubsForUSAStateFeatCallback (SeqFeatPtr sfp, Pointer data);
+static void CollectPubsForUSAStateDescCallback (SeqDescrPtr sdp, Pointer data);
+
+static void FindSubmitTextInBioseq(BioseqPtr bsp, Pointer userdata)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ ValNodePtr vnp, pub_list = NULL;
+ ValNode field_inst, field_dept;
+ CharPtr inst, dept;
+ BioSourcePtr biosrc;
+ SubSourcePtr ssp;
+
+ if (bsp == NULL || userdata == NULL) return;
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext) )
+ CollectPubsForUSAStateDescCallback(sdp, &pub_list);
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext))
+ CollectPubsForUSAStateFeatCallback(sfp, &pub_list);
+
+ field_inst.choice = FieldType_pub;
+ field_inst.data.intvalue = Publication_field_affiliation;
+ field_inst.next = NULL;
+
+ field_dept.choice = FieldType_pub;
+ field_dept.data.intvalue = Publication_field_affil_div;
+ field_dept.next = NULL;
+
+ for (vnp = pub_list; vnp != NULL; vnp = vnp->next) {
+ inst = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, &field_inst, NULL);
+ dept = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, &field_dept, NULL);
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext) ) {
+
+ biosrc = (BioSourcePtr)sdp->data.ptrvalue;
+ for (ssp = biosrc->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if ((inst && !StringCmp(ssp->name, inst)) || (dept && !StringCmp(ssp->name, dept)))
+ ValNodeAddPointer(userdata, OBJ_SEQDESC, sdp);
+ }
+ }
+ }
+
+ if (inst) inst = MemFree(inst);
+ if (dept) dept = MemFree(dept);
+ }
+}
+
+
+static void FindSubmitTextInBioseqSet(BioseqSetPtr bssp, Pointer userdata);
+
+static void FindSubmitTextFromPub(SeqEntryPtr sep, Pointer userdata)
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+
+ if (sep == NULL) {
+ return;
+ }
+ if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ FindSubmitTextInBioseq(bsp, userdata);
+ }
+ else if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ FindSubmitTextInBioseqSet(bssp, userdata);
+ }
+};
+
+
+
+static void FindSubmitTextInBioseqSet(BioseqSetPtr bssp, Pointer userdata)
+{
+ SeqEntryPtr tmp;
+
+ if (bssp == NULL) return;
+
+ for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
+ FindSubmitTextFromPub (tmp, userdata);
+ }
+
+};
+
+
+
+static void FindSubmitTextFromSubmitBlock(SeqEntryPtr sep, Pointer userdata)
+{
+ SubmitBlockPtr sbp;
+ CharPtr inst = NULL, dept = NULL;
+
+ sbp = FindSubmitBlockForSeqEntry(sep);
+ if (sbp) {
+ if (sbp->cit->authors && sbp->cit->authors->affil && sbp->cit->authors->affil->affil)
+ inst = StringSave (sbp->cit->authors->affil->affil);
+ if (sbp->cit->authors && sbp->cit->authors->affil && sbp->cit->authors->affil->div)
+ dept = StringSave(sbp->cit->authors->affil->div);
+
+ CheckForSubmitText(sep, inst, dept, userdata);
+ if (inst) inst = MemFree(inst);
+ if (dept) dept = MemFree(dept);
+ }
+
+};
+
+
+
+static void MarkAndRemoveIdentifiedItems(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ BioSourcePtr biosrc;
+ SeqDescrPtr sdp;
+ SubSourcePtr ssp, pre_ssp;
+ CharPtr feat_txt;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ sdp = (SeqDescrPtr)vnp->data.ptrvalue;
+ biosrc = (BioSourcePtr) sdp->data.ptrvalue;
+ pre_ssp = NULL;
+
+ for (ssp = biosrc->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_identified_by) { // identified-by
+ if (pre_ssp == NULL) biosrc->subtype = ssp->next;
+ else pre_ssp->next = ssp->next;
+ ssp->next = NULL;
+ if (lip != NULL && lip->fp != NULL) {
+ feat_txt = GetDiscrepancyItemText (vnp);
+ fprintf (lip->fp, "Removed identified-by from %s", feat_txt);
+ feat_txt = MemFree (feat_txt);
+ lip->data_in_log = TRUE;
+ }
+ break;
+ }
+ else pre_ssp = ssp;
+ }
+ }
+};
+
+
+static void FindMoreNamesInIdentifiedBy(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, src_list = NULL, spec_wd_list = NULL;
+
+ src_list = CollectBioSources (sep_list, IdentifiedByHasMoreNames, TRUE);
+ spec_wd_list = CollectBioSources(sep_list, IdentifiedByHasSpecWords, TRUE);
+ ValNodeLink (&src_list, spec_wd_list);
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ FindSubmitTextFromSubmitBlock(vnp->data.ptrvalue, &src_list);
+ FindSubmitTextFromPub(vnp->data.ptrvalue, &src_list);
+ }
+
+ if (src_list) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY,
+ "%d biosources have 3 or more names or suspect text in identified-by",
+ src_list));
+ }
+}
+
+
+static void MarkAndRemoveCollectedItems(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ BioSourcePtr biosrc;
+ SeqDescrPtr sdp;
+ SubSourcePtr ssp, pre_ssp;
+ CharPtr feat_txt;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ sdp = (SeqDescrPtr)vnp->data.ptrvalue;
+ biosrc = (BioSourcePtr) sdp->data.ptrvalue;
+ pre_ssp = NULL;
+ for (ssp = biosrc->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_collected_by) { // collected-by
+ if (pre_ssp == NULL) biosrc->subtype = ssp->next;
+ else pre_ssp->next = ssp->next;
+ ssp->next = NULL;
+ if (lip != NULL && lip->fp != NULL) {
+ feat_txt = GetDiscrepancyItemText (vnp);
+ fprintf (lip->fp, "Removed collected-by from %s", feat_txt);
+ feat_txt = MemFree (feat_txt);
+ lip->data_in_log = TRUE;
+ }
+ break;
+ }
+ else pre_ssp = ssp;
+ }
+ }
+};
+
+
+
+static Boolean CollectedByHasMoreNames(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL || biop->subtype == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_collected_by) { // collected-by
+ if (HasMoreNames(ssp)) return TRUE;
+ else return FALSE;
+ }
+ }
+ return FALSE;
+};
+
+
+
+static void FindMoreNamesInCollectedBy(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr src_list = NULL;
+
+ src_list = CollectBioSources (sep_list, CollectedByHasMoreNames, TRUE);
+
+ if (src_list) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (ONCALLER_MORE_NAMES_COLLECTED_BY,
+ "%d biosources have 3 or more names in collected-by",
+ src_list));
+ }
+};
static Boolean HasMoreColons(SubSourcePtr ssp)
@@ -7456,7 +7670,7 @@ static Boolean CountryHasColons(BioSourcePtr biop)
if (biop == NULL || biop->subtype == NULL) return FALSE;
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == 23) { // country
+ if (ssp->subtype == SUBSRC_country) { // country
if (HasMoreColons(ssp)) return TRUE;
else return FALSE;
}
@@ -7487,7 +7701,7 @@ static void CheckCountryColons(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_
}; // CheckCountryColons
-void ChangeColons(SubSourcePtr ssp, int replc)
+static void ChangeColons(SubSourcePtr ssp, int replc)
{
CharPtr colon_idx;
colon_idx = StringChr(ssp->name, ':');
@@ -7504,7 +7718,7 @@ static void FixCountryColons(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
SeqFeatPtr sfp;
SeqDescPtr sdp;
SubSourcePtr ssp;
- BioSourcePtr biop;
+ BioSourcePtr biop = NULL;
ObjValNodePtr ovp;
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
@@ -7520,8 +7734,10 @@ static void FixCountryColons(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
biop = (BioSourcePtr)(sdp->data.ptrvalue);
}
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (biop != NULL) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
if (HasMoreColons(ssp)) ChangeColons(ssp, ',');
+ }
}
}
@@ -7686,9 +7902,9 @@ void CheckForEukaryoteWithoutmRNA(ValNodePtr PNTR discrepancy_list, ValNodePtr s
ValNodePtr vnp;
ValNodePtr item_list = NULL;
- has_Eukaryote = TRUE;
- has_CD = FALSE;
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ has_Eukaryote = TRUE;
+ has_CD = FALSE;
if (item_list == NULL)
VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, ReportEukaryoticCDSHasmRNA);
}
@@ -7701,17 +7917,19 @@ void CheckForEukaryoteWithoutmRNA(ValNodePtr PNTR discrepancy_list, ValNodePtr s
static Boolean NameNotStandard(CharPtr nm)
{
- CharPtr stand_nm[] = {"5S ribosomal RNA",
- "5.8S ribosomal RNA",
- "12S ribosomal RNA",
- "16S ribosomal RNA",
- "18S ribosomal RNA",
- "23S ribosomal RNA",
- "26S ribosomal RNA",
- "28S ribosomal RNA",
- "large subunit ribosomal RNA",
- "small subunit ribosomal RNA"
- };
+ CharPtr stand_nm[] = {"4.5S ribosomal RNA",
+ "5S ribosomal RNA",
+ "5.8S ribosomal RNA",
+ "12S ribosomal RNA",
+ "16S ribosomal RNA",
+ "18S ribosomal RNA",
+ "21S ribosomal RNA",
+ "23S ribosomal RNA",
+ "26S ribosomal RNA",
+ "28S ribosomal RNA",
+ "large subunit ribosomal RNA",
+ "small subunit ribosomal RNA"
+ };
CharPtr cp, cp_next;
Uint4 cnt = sizeof(stand_nm)/sizeof(CharPtr);
@@ -7752,13 +7970,15 @@ static Boolean NameNotStandard(CharPtr nm)
static void CheckRRnaName(SeqFeatPtr sfp, Pointer userdata)
{
RnaRefPtr rna_p;
+ ValNodePtr newnode;
if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA) {
rna_p = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rna_p != NULL && rna_p->type == 4) {
if (rna_p->ext.choice== 1
&& NameNotStandard(rna_p->ext.value.ptrvalue)){
- ValNodeAddPointer(userdata, OBJ_SEQFEAT, sfp);
+ newnode = ValNodeAddPointer(userdata, OBJ_SEQFEAT, sfp);
+ newnode->fatal = 1;
}
}
}
@@ -7767,13 +7987,15 @@ static void CheckRRnaName(SeqFeatPtr sfp, Pointer userdata)
void CheckforRRnaNameConflicts(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
- ValNodePtr vnp, rrna_ls = NULL;
+ ValNodePtr vnp, newnode, rrna_ls = NULL;
+
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
VisitFeaturesInSep(vnp->data.ptrvalue, &rrna_ls, CheckRRnaName);
}
if (rrna_ls != NULL) {
- ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (RRNA_NAME_CONFLICTS, "%d rRNA product names are not standard", rrna_ls));
+ newnode = ValNodeAddPointer(discrepancy_list, 0, NewClickableItem(RRNA_NAME_CONFLICTS, "%d rRNA product names are not standard. Correct the names to the standard format, eg \"16S ribosomal RNA\"", rrna_ls));
+ newnode->fatal = 1;
}
};
@@ -7842,9 +8064,13 @@ static Boolean HasDivCode(BioSourcePtr biop)
{
CharPtr divcode;
- if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) return FALSE;
+ if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) {
+ return FALSE;
+ }
divcode = biop->org->orgname->div;
- if (divcode == NULL || divcode[0] == '\0') return FALSE;
+ if (divcode == NULL || divcode[0] == '\0') {
+ return FALSE;
+ }
return TRUE;
@@ -7855,7 +8081,7 @@ static Boolean HasDivCode(BioSourcePtr biop)
static void CheckForDivConflicts(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
ValNodePtr src_list = NULL, vnp, subcat=NULL;
- ValNodePtr PNTR grp_sdp;
+ ValNodePtr PNTR grp_sdp_sfp;
SeqDescrPtr sdp;
SeqFeatPtr sfp;
CharPtr div;
@@ -7863,16 +8089,17 @@ static void CheckForDivConflicts(ValNodePtr PNTR discrepancy_list, ValNodePtr se
BioSourcePtr biosrcp;
OrgRefPtr orp;
OrgNamePtr onp;
- Uint4 i, num_grp = 0;
+ Uint4 i, num_grp = 0, sz;
ClickableItemPtr cip;
CharPtr fmt = "%d bioseqs have divsion code", tmp;
- grp_divcode = (CharPtr PNTR) MemNew (10* sizeof(CharPtr));
- for (i=0; i< 10; i++) grp_divcode[i] = (CharPtr) MemNew (4 * sizeof(Char));
- grp_sdp = (ValNodePtr PNTR) MemNew (10 * sizeof(ValNodePtr));
- for (i=0; i< 10; i++) grp_sdp[i] = NULL;
-
src_list = CollectBioSources (sep_list, HasDivCode, TRUE);
+ sz = ValNodeLen(src_list);
+
+ grp_divcode = (CharPtr PNTR) MemNew (sz * sizeof(CharPtr));
+ grp_sdp_sfp = (ValNodePtr PNTR) MemNew (sz * sizeof(ValNodePtr));
+ for (i=0; i< sz; i++) grp_sdp_sfp[i] = NULL;
+
for (vnp = src_list; vnp != NULL; vnp = vnp->next) {
biosrcp = NULL;
@@ -7891,32 +8118,47 @@ static void CheckForDivConflicts(ValNodePtr PNTR discrepancy_list, ValNodePtr se
onp = orp->orgname;
if (onp == NULL) continue;
div = onp->div;
- for (i=0; i< 10; i++)
+ for (i=0; i< num_grp; i++) {
if (!StringCmp(grp_divcode[i], div)) break;
- if (i==10) {
+ }
+ if (i == num_grp) {
+ grp_divcode[num_grp]
+ = (CharPtr) MemNew ((StringLen(div) + 1) * sizeof(Char));
sprintf(grp_divcode[num_grp], "%s", div);
- ValNodeAddPointer(&(grp_sdp[num_grp]), OBJ_SEQDESC, sdp);
+ if (vnp->choice == OBJ_SEQDESC)
+ ValNodeAddPointer(&(grp_sdp_sfp[num_grp]), OBJ_SEQDESC, sdp);
+ else ValNodeAddPointer(&(grp_sdp_sfp[num_grp]), OBJ_SEQFEAT, sfp);
num_grp ++;
}
- else ValNodeAddPointer(&(grp_sdp[i]), OBJ_SEQDESC, sdp);
+ else {
+ if (vnp->choice == OBJ_SEQDESC)
+ ValNodeAddPointer(&(grp_sdp_sfp[i]), OBJ_SEQDESC, sdp);
+ else ValNodeAddPointer(&(grp_sdp_sfp[i]), OBJ_SEQFEAT, sfp);
+ }
}
if (num_grp > 1) {
- for (i=0; i< num_grp; i++) {
- tmp = (CharPtr) MemNew (sizeof(Char) * (StringLen(fmt) + 5));
- sprintf(tmp, "%s %s", fmt, grp_divcode[i]);
- ValNodeAddPointer(&subcat, 0,
- NewClickableItem (DIVISION_CODE_CONFLICTS, tmp, grp_sdp[i]));
- }
- cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
- cip->subcategories = subcat;
- cip->clickable_item_type = DIVISION_CODE_CONFLICTS;
- cip->description = (CharPtr) MemNew (100 *sizeof(Char));
- sprintf(cip->description, "Division code conflicts found");
+ for (i=0; i< num_grp; i++) {
+ tmp = (CharPtr) MemNew (
+ sizeof(Char) * (StringLen(fmt) + StringLen(grp_divcode[i]) + 10));
+ sprintf(tmp, "%s %s", fmt, grp_divcode[i]);
+ ValNodeAddPointer(&subcat, 0,
+ NewClickableItem (DIVISION_CODE_CONFLICTS, tmp, grp_sdp_sfp[i]));
+ tmp = MemFree(tmp);
+ }
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->subcategories = subcat;
+ cip->clickable_item_type = DIVISION_CODE_CONFLICTS;
+ cip->description = (CharPtr) MemNew (100 *sizeof(Char));
+ sprintf(cip->description, "Division code conflicts found");
- ValNodeAddPointer(discrepancy_list, 0, cip);
+ ValNodeAddPointer(discrepancy_list, 0, cip);
}
+ for (i=0; i< sz; i++) grp_divcode[i] = MemFree(grp_divcode[i]);
+ grp_divcode = MemFree(grp_divcode);
+ grp_sdp_sfp = MemFree(grp_sdp_sfp);
+
} // CheckForDivConflicts
@@ -7926,7 +8168,7 @@ static void AddCBSStrainToCultureColl (ValNodePtr item_list, Pointer data, LogIn
AECRParseActionPtr parse;
SourceQualPairPtr pair;
ValNodePtr field_from, field_to, vnp;
- CharPtr str1, str2, cp, new_str;
+ CharPtr str1, str2, cp;
parse = AECRParseActionNew ();
@@ -7957,16 +8199,13 @@ static void AddCBSStrainToCultureColl (ValNodePtr item_list, Pointer data, LogIn
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL);
- str2 = GetTextPortionFromString (str1, parse->portion);
- if (str2 != NULL) {
- cp = StringChr (str2, ';');
+ str2 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL);
+ if (str1 == NULL) {
+ cp = StringChr (str2, ':');
if (cp != NULL) {
- *cp = 0;
+ *cp = ' ';
}
- new_str = (CharPtr) MemNew (sizeof (Char) * (4 + StringLen (str2) + 1));
- sprintf (new_str, "CBS:%s", str2);
- SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, new_str, parse->existing_text);
- new_str = MemFree (new_str);
+ SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL, str2, parse->existing_text);
}
str1 = MemFree (str1);
str2 = MemFree (str2);
@@ -8087,6 +8326,9 @@ static void FindMrnaHavingMultiCDS(BioseqPtr bsp, Pointer userdata)
Uint4 num = 0;
Boolean isMRNA = FALSE;
CharPtr supp_cmt = "coding region disrupted by sequencing gap";
+ Boolean all_pseudo = TRUE;
+ Boolean all_comment = TRUE;
+
if (bsp == NULL || userdata == NULL) return;
@@ -8105,11 +8347,18 @@ static void FindMrnaHavingMultiCDS(BioseqPtr bsp, Pointer userdata)
for (cds = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext);
cds != NULL;
cds = SeqMgrGetNextFeature (bsp, cds, 0, FEATDEF_CDS, &fcontext)) {
- if (!IsPseudo (cds) && !StringStr(cds->comment, supp_cmt)) num++;
+ num++;
+ if (!IsPseudo(cds)) {
+ all_pseudo = FALSE;
+ }
+ if (StringStr(cds->comment, supp_cmt) == NULL) {
+ all_comment = FALSE;
+ }
}
- if (num > 1) ValNodeAddPointer(userdata, OBJ_BIOSEQ, bsp);
-
+ if (num > 1 && !all_pseudo && !all_comment) {
+ ValNodeAddPointer(userdata, OBJ_BIOSEQ, bsp);
+ }
} // FindMrnaHavingMultiCDS
@@ -8586,15 +8835,15 @@ static void CheckAuthMissingAuthCallback (NameStdPtr nsp, Pointer userdata)
if ((pIsBad = (BoolPtr)userdata) == NULL || *pIsBad) return;
- if (nsp == NULL) *pIsBad = TRUE;
-
- if ( nsp->names[0] == NULL || !strlen(nsp->names[0])) {
+ if (nsp == NULL) {
+ *pIsBad = TRUE;
+ } else if ( nsp->names[0] == NULL || !StringLen(nsp->names[0])) {
/* last name missing */
*pIsBad = TRUE;
- } else if( nsp->names[1] == NULL || !strlen(nsp->names[1])) {
+ } else if( nsp->names[1] == NULL || !StringLen(nsp->names[1])) {
/* first name missing */
*pIsBad = TRUE;
- } else if(nsp->names[4] == NULL || !strlen (nsp->names[4])) {
+ } else if(nsp->names[4] == NULL || !StringLen (nsp->names[4])) {
/* initials missing */
*pIsBad = TRUE;
}
@@ -8609,6 +8858,9 @@ static Boolean AreAuthMissingInPubdesc (PubdescPtr pubdesc)
if (pubdesc == NULL || (auth_ls = GetAuthListForPub(pubdesc->pub)) == NULL )
return FALSE;
+ if (*auth_ls == NULL) {
+ return TRUE;
+ }
for (this_pub = pubdesc->pub; this_pub != NULL; this_pub = this_pub->next) {
if (this_pub->choice == PUB_PMid) {return FALSE;}
@@ -8769,82 +9021,63 @@ static void CollectBiomaterialTaxnameDiscrepancies(ValNodePtr PNTR discrepancy_l
} // CollectBiomaterialTaxnameDiscrepancies()
-
-
-
-typedef struct sfp_cds {
- Boolean beg_cds;
- ValNodePtr item_list;
-} SfpCDs, PNTR SfpCDsPtr;
-
-
-typedef struct loc {
- SeqLocPtr seqloc;
- SeqFeatPtr gene;
-} GeneLoc, PNTR GeneLocPtr;
-
-
-static void GetOverlappedGenes(SeqFeatPtr sfp, Pointer userdata)
+static void GetOverlappedGenes (BioseqPtr bsp, Pointer userdata)
{
- SeqFeatPtr gene, tmp;
- GeneLocPtr gene_loc = NULL;
- int i=0, ii, jj, diff, cnt=0;
-
- if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || userdata == NULL
- || ((SfpCDsPtr)userdata)->beg_cds == FALSE) return;
+ SeqFeatPtr sfp, sfp_compare;
+ SeqMgrFeatContext context;
+ ValNodePtr gene_list = NULL, vnp, vnp_next;
+ ValNodePtr non_overlapped;
- gene = GetGeneForFeature (sfp);
- if (gene == NULL) { /* no gene means no gene name */
+ if (bsp == NULL || userdata == NULL)
+ {
return;
}
- ((SfpCDsPtr)userdata)->beg_cds = FALSE;
- for (tmp = gene; tmp != NULL; tmp = tmp->next)
- if (tmp->data.choice == SEQFEAT_GENE) cnt ++;
- gene_loc = (GeneLocPtr) MemNew (sizeof (GeneLoc) * cnt);
-
- for (tmp = gene; tmp != NULL; tmp = tmp->next) {
- if (tmp->data.choice != SEQFEAT_GENE) continue;
- gene_loc[i].gene = tmp;
- gene_loc[i].seqloc = tmp->location;
- i++;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, FEATDEF_GENE, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, FEATDEF_GENE, &context))
+ {
+ ValNodeAddPointer (&gene_list, 0, sfp);
}
- for (ii=0; ii < i-1; ii++) {
- for (jj=ii+1; jj< i; jj++) {
- diff = SeqLocAinB(gene_loc[ii].seqloc, gene_loc[jj].seqloc);
- if (diff >0)
- ValNodeAddPointer((&((SfpCDsPtr)userdata)->item_list), OBJ_SEQFEAT, gene_loc[ii].gene);
- diff = SeqLocAinB(gene_loc[jj].seqloc, gene_loc[ii].seqloc);
- if (diff > 0)
- ValNodeAddPointer((&((SfpCDsPtr)userdata)->item_list), OBJ_SEQFEAT, gene_loc[jj].gene);
+ for (vnp = gene_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next)
+ {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next)
+ {
+ sfp_compare = (SeqFeatPtr) vnp_next->data.ptrvalue;
+ if (SeqLocStrand (sfp->location) != SeqLocStrand (sfp_compare->location))
+ {
+ continue;
+ }
+
+ if ( SeqLocAinB(sfp->location, sfp_compare->location) > 0) vnp->choice = OBJ_SEQFEAT;
+ else if (SeqLocAinB(sfp_compare->location, sfp->location) > 0) vnp_next->choice = OBJ_SEQFEAT;
}
}
-} /* GetOverlappedGenes */
+ non_overlapped = ValNodeExtractList (&gene_list, 0);
+ non_overlapped = ValNodeFree (non_overlapped);
+ ValNodeLink ((ValNodePtr PNTR)userdata, gene_list);
+}; // GetOverlappedGenes
static void FindOverlappedGenes(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
- SfpCDsPtr genes=NULL;
- ValNodePtr vnp;
+ ValNodePtr vnp, genes = NULL;
- genes = (SfpCDsPtr) MemNew (sizeof(SfpCDs));
- genes->item_list = NULL;
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
- genes->beg_cds = TRUE;
- VisitFeaturesInSep (vnp->data.ptrvalue, genes, GetOverlappedGenes);
+ VisitBioseqsInSep (vnp->data.ptrvalue, &genes, GetOverlappedGenes);
}
- if (genes->item_list != NULL) {
- ValNodeAddPointer(discrepancy_list, 0,
- NewClickableItem (FIND_OVERLAPPED_GENES,
- "%d genes completely overlapped by other genes", genes->item_list));
+ if (genes!= NULL) {
+ ValNodeAddPointer(discrepancy_list, 0,
+ NewClickableItem (FIND_OVERLAPPED_GENES,
+ "%d genes completely overlapped by other genes", genes));
}
-} /* FindOverlappedFGenes */
-
+}
@@ -8979,7 +9212,7 @@ static void TestDeflineExistence(ValNodePtr PNTR discrepancy_list, ValNodePtr se
FindOneDefline(sep, &item_list);
if (item_list != NULL) {
ValNodeAddPointer (discrepancy_list, 0,
- NewClickableItem (TEST_DEFLINE_PRESENT, "Bioseq has definition line", item_list));
+ NewClickableItem (TEST_DEFLINE_PRESENT, "%d Bioseqs have definition line", item_list));
break;
}
}
@@ -9044,6 +9277,37 @@ static void ShowCDsHavingGene(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_l
} /* ShowCDsHavingGene() */
+/* autofix function for SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME test */
+static void RemoveGeneNamesFromHypotheticalCodingRegions(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp, entityIDList = NULL;
+ SeqFeatPtr sfp, gene;
+ GeneRefPtr grp;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp->data.choice == SEQFEAT_CDREGION
+ && (gene = GetGeneForFeature (sfp)) != NULL
+ && (grp = (GeneRefPtr) gene->data.value.ptrvalue) != NULL
+ && !StringHasNoText (grp->locus)) {
+ SetStringValue (&(gene->comment), grp->locus, ExistingTextOption_append_semi);
+ grp->locus = MemFree (grp->locus);
+ ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
+ }
+ }
+ }
+
+ for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
+ ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0);
+ }
+
+ entityIDList = ValNodeFree (entityIDList);
+
+}
+
+
/* Find code breaks in a coding region: J. Chen */
static Boolean CodingRegionHasCodeBreak(SeqFeatPtr sfp)
{
@@ -9103,26 +9367,34 @@ static Boolean GeneRefMatchForSuperfluousCheck (GeneRefPtr grp1, GeneRefPtr grp2
{
return FALSE;
}
- else if (StringCmp (grp1->locus, grp2->locus) != 0
- || StringCmp (grp1->locus_tag, grp2->locus_tag) != 0
- || (grp1->pseudo && !grp2->pseudo)
- || (!grp1->pseudo && grp2->pseudo))
+ if ((grp1->pseudo && !grp2->pseudo)|| (!grp1->pseudo && grp2->pseudo)) {
+ return FALSE;
+ }
+ else if (!StringHasNoText(grp1->locus)
+ && !StringHasNoText(grp2->locus)
+ && StringCmp (grp1->locus, grp2->locus) != 0)
+ {
+ return FALSE;
+ }
+ else if (!StringHasNoText(grp1->locus_tag)
+ && !StringHasNoText(grp2->locus_tag)
+ && StringCmp (grp1->locus_tag, grp2->locus_tag) != 0)
{
return FALSE;
}
- else if (StringHasNoText (grp1->allele)
+ else if (!StringHasNoText (grp1->allele)
&& !StringHasNoText (grp2->allele)
&& StringCmp (grp1->allele, grp2->allele) != 0)
{
return FALSE;
}
- else if (StringHasNoText (grp1->desc)
+ else if (!StringHasNoText (grp1->desc)
&& !StringHasNoText (grp2->desc)
&& StringCmp (grp1->desc, grp2->desc) != 0)
{
return FALSE;
}
- else if (StringHasNoText (grp1->maploc)
+ else if (!StringHasNoText (grp1->maploc)
&& !StringHasNoText (grp2->maploc)
&& StringCmp (grp1->maploc, grp2->maploc) != 0)
{
@@ -9312,6 +9584,22 @@ static void FindMissingGenes (BioseqPtr bsp, Pointer userdata)
}
+static Boolean HasPseudogeneQualifier (SeqFeatPtr sfp)
+{
+ GBQualPtr qual;
+
+ if (sfp == NULL) {
+ return FALSE;
+ }
+ for (qual = sfp->qual; qual != NULL; qual = qual->next) {
+ if (StringICmp (qual->qual, "pseudogene")) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
static void
GetPseudoAndNonPseudoGeneList
(ValNodePtr super_list,
@@ -9337,7 +9625,7 @@ GetPseudoAndNonPseudoGeneList
if (gene != NULL && gene->data.choice == SEQFEAT_GENE)
{
grp = (GeneRefPtr) gene->data.value.ptrvalue;
- if (gene->pseudo || (grp != NULL && grp->pseudo))
+ if (gene->pseudo || (grp != NULL && grp->pseudo) || HasPseudogeneQualifier(gene))
{
ValNodeAddPointer (pseudo_list, OBJ_SEQFEAT, gene);
}
@@ -9387,6 +9675,28 @@ GetFrameshiftAndNonFrameshiftGeneList
}
+static void RemoveGenesWithNoteOrDescription(ValNodePtr PNTR list)
+{
+ ValNodePtr vnp, remove;
+ SeqFeatPtr sfp;
+ GeneRefPtr grp;
+
+ for (vnp = *list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr)vnp->data.ptrvalue) != NULL) {
+ if (!StringHasNoText (sfp->comment)) {
+ vnp->choice = 0;
+ } else if (sfp->data.choice == SEQFEAT_GENE
+ && (grp = (GeneRefPtr) sfp->data.value.ptrvalue) != NULL
+ && !StringHasNoText (grp->desc)) {
+ vnp->choice = 0;
+ }
+ }
+ }
+ remove = ValNodeExtractList (list, 0);
+ ValNodeFree(remove);
+}
+
+
extern void AddMissingAndSuperfluousGeneDiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
ClickableItemPtr dip, pseudo_dip, non_pseudo_dip;
@@ -9398,6 +9708,8 @@ extern void AddMissingAndSuperfluousGeneDiscrepancies (ValNodePtr PNTR discrepan
MissSuperGenesData msgd;
ValNodePtr non_pseudo_list = NULL, pseudo_list = NULL, vnp;
ValNodePtr non_frameshift_list = NULL, frameshift_list = NULL;
+ SeqEntryPtr orig_scope;
+ ValNodePtr subcat = NULL, item_list;
if (discrepancy_list == NULL)
{
@@ -9407,9 +9719,12 @@ extern void AddMissingAndSuperfluousGeneDiscrepancies (ValNodePtr PNTR discrepan
msgd.missing_list = NULL;
msgd.super_list = NULL;
+ orig_scope = SeqEntrySetScope (NULL);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ SeqEntrySetScope (vnp->data.ptrvalue);
VisitBioseqsInSep (vnp->data.ptrvalue, &msgd, FindMissingGenes);
}
+ SeqEntrySetScope (orig_scope);
if (msgd.missing_list != NULL)
{
@@ -9423,31 +9738,34 @@ extern void AddMissingAndSuperfluousGeneDiscrepancies (ValNodePtr PNTR discrepan
if (msgd.super_list != NULL)
{
GetPseudoAndNonPseudoGeneList (msgd.super_list, &pseudo_list, &non_pseudo_list);
+ RemoveGenesWithNoteOrDescription(&non_pseudo_list);
GetFrameshiftAndNonFrameshiftGeneList (non_pseudo_list, &frameshift_list, &non_frameshift_list);
non_pseudo_list = ValNodeFree (non_pseudo_list);
- dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, extra_genes_fmt, msgd.super_list);
- if (dip != NULL)
+ msgd.super_list = ValNodeFree (msgd.super_list);
+
+ if (frameshift_list != NULL)
+ {
+ non_pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, non_pseudo_frameshift_extra_genes_fmt, frameshift_list);
+ non_pseudo_dip->level = 1;
+ ValNodeAddPointer (&subcat, 0, non_pseudo_dip);
+ }
+ if (non_frameshift_list != NULL)
+ {
+ non_pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, non_pseudo_non_frameshift_extra_genes_fmt, non_frameshift_list);
+ non_pseudo_dip->level = 1;
+ ValNodeAddPointer (&subcat, 0, non_pseudo_dip);
+ }
+ if (pseudo_list != NULL)
{
+ pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, pseudo_extra_genes_fmt, pseudo_list);
+ pseudo_dip->level = 1;
+ ValNodeAddPointer (&subcat, 0, pseudo_dip);
+ }
+ if (subcat != NULL) {
+ item_list = ItemListFromSubcategories (subcat);
+ dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, extra_genes_fmt, item_list);
+ dip->subcategories = subcat;
ValNodeAddPointer (discrepancy_list, 0, dip);
-
- if (frameshift_list != NULL)
- {
- non_pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, non_pseudo_frameshift_extra_genes_fmt, frameshift_list);
- non_pseudo_dip->level = 1;
- ValNodeAddPointer (&(dip->subcategories), 0, non_pseudo_dip);
- }
- if (non_frameshift_list != NULL)
- {
- non_pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, non_pseudo_non_frameshift_extra_genes_fmt, non_frameshift_list);
- non_pseudo_dip->level = 1;
- ValNodeAddPointer (&(dip->subcategories), 0, non_pseudo_dip);
- }
- if (pseudo_list != NULL)
- {
- pseudo_dip = NewClickableItem (DISC_SUPERFLUOUS_GENE, pseudo_extra_genes_fmt, pseudo_list);
- pseudo_dip->level = 1;
- ValNodeAddPointer (&(dip->subcategories), 0, pseudo_dip);
- }
}
}
}
@@ -9611,70 +9929,11 @@ static void OnCallerMissingAndSuperfluousGenes (ValNodePtr PNTR discrepancy_list
/* test for missing or inconsistent protein IDs */
-typedef struct prefixcheck
-{
- CharPtr prefix;
- ValNodePtr feature_list;
-} PrefixCheckData, PNTR PrefixCheckPtr;
-
-
-static ValNodePtr FreePrefixCheckList (ValNodePtr prefix_list)
-{
- PrefixCheckPtr pcp;
-
- if (prefix_list == NULL)
- {
- return NULL;
- }
-
- prefix_list->next = FreePrefixCheckList (prefix_list->next);
-
- pcp = (PrefixCheckPtr) prefix_list->data.ptrvalue;
- if (pcp != NULL)
- {
- pcp->prefix = MemFree (pcp->prefix);
- pcp->feature_list = ValNodeFree (pcp->feature_list);
- pcp = MemFree (pcp);
- }
- prefix_list = ValNodeFree (prefix_list);
- return NULL;
-}
-
-
-static ClickableItemPtr InconsistentPrefix (PrefixCheckPtr pcp, CharPtr bad_fmt, DiscrepancyType disc_type)
-{
- ClickableItemPtr dip = NULL;
-
- if (pcp == NULL || StringHasNoText (pcp->prefix) || pcp->feature_list == NULL)
- {
- return NULL;
- }
- dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
- if (dip != NULL)
- {
- dip->clickable_item_type = disc_type;
- dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + StringLen (pcp->prefix)+ 15));
- sprintf (dip->description, bad_fmt, ValNodeLen (pcp->feature_list), pcp->prefix);
- dip->callback_func = NULL;
- dip->datafree_func = NULL;
- dip->callback_data = NULL;
- dip->item_list = pcp->feature_list;
- pcp->feature_list = NULL;
- }
- return dip;
-}
-
CharPtr discReportInconsistentLocusTagPrefixFmt = "%d features have locus tag prefix %s.";
CharPtr discReportInconsistentProteinIDPrefixFmt = "%d sequences have protein ID prefix %s.";
CharPtr discReportBadProteinIdFmt = "%d proteins have invalid IDs.";
-static ClickableItemPtr InconsistentLocusTagPrefix (PrefixCheckPtr pcp)
-{
- return InconsistentPrefix (pcp, discReportInconsistentLocusTagPrefixFmt, DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX);
-}
-
-
extern void FindProteinIDCallback (BioseqPtr bsp, Pointer userdata)
{
ProtIdListsPtr pip;
@@ -9752,12 +10011,8 @@ typedef struct locustagcheck
Boolean exclude_dirsub;
} LocusTagCheckData, PNTR LocusTagCheckPtr;
-static void GeneLocusTagDiscrepancyCallback (ValNodePtr item_list, Pointer userdata)
-{
- Message (MSG_OK, "I could launch the editor for the individual gene...");
-}
-
static Boolean IsBacterialBioSource (BioSourcePtr biop);
+static Boolean IsArchaealBioSource (BioSourcePtr biop);
/* Not WGS, genome, or RefSeq */
static Boolean IsLocationDirSub (SeqLocPtr slp)
@@ -9786,7 +10041,7 @@ static Boolean IsLocationDirSub (SeqLocPtr slp)
rval = TRUE;
} else {
rval = TRUE;
- for (sip = bsp->id; sip != NULL && !rval; sip = sip->next) {
+ for (sip = bsp->id; sip != NULL && rval; sip = sip->next) {
if (sip->choice == SEQID_OTHER) {
rval = FALSE;
}
@@ -10070,7 +10325,7 @@ extern void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTagsEx (ValNodePtr PNT
}
if (ltcd.locus_tags_list != NULL) {
- ltcd.locus_tags_list = ValNodeSort (ltcd.locus_tags_list, SortVnpByGlobalDiscrepancyString);
+ ltcd.locus_tags_list = ValNodeSort (ltcd.locus_tags_list, SortVnpByGlobalDiscrepancyStringCaseSensitive);
ltcd.missing_list = ValNodeSort (ltcd.missing_list, SortVnpByGlobalDiscrepancyString);
if (ltcd.missing_list != NULL) {
@@ -10249,28 +10504,6 @@ static CharPtr FindVnpStringMatches (ValNodePtr list1, ValNodePtr list2, Boolean
}
-static void ExtractVnpByStringSearch (ValNodePtr PNTR list, CharPtr search)
-{
- ValNodePtr vnp, prev = NULL, vnp_next;
- if (list == NULL) return;
-
- for (vnp = *list; vnp != NULL; vnp = vnp_next) {
- vnp_next = vnp->next;
- if (StringSearch (vnp->data.ptrvalue, search) != NULL) {
- if (prev == NULL) {
- *list = vnp_next;
- } else {
- prev->next = vnp_next;
- }
- vnp->next = NULL;
- vnp = ValNodeFreeData (vnp);
- } else {
- prev = vnp;
- }
- }
-}
-
-
static CharPtr GetGeneStringMatch (CharPtr str1, CharPtr str2)
{
ValNodePtr list1, list2;
@@ -10396,11 +10629,14 @@ static void FindAdjacentPseudoGenes (ValNodePtr PNTR discrepancy_list, ValNodePt
}
if (pair_list != NULL) {
subcategories = SubcategoriesForIdenticalClickableItemDescriptions (pair_list);
-
- item_list = ItemListFromSubcategories (subcategories);
- cip = DiscrepancyForPairs (DISC_ADJACENT_PSEUDOGENE, "%d pseudogenes match an adjacent pseudogene's text", item_list);
- cip->subcategories = subcategories;
- ValNodeAddPointer (discrepancy_list, 0, cip);
+ if (subcategories == NULL) {
+ ValNodeLink(discrepancy_list, pair_list);
+ } else {
+ item_list = ItemListFromSubcategories (subcategories);
+ cip = DiscrepancyForPairs (DISC_ADJACENT_PSEUDOGENE, "%d pseudogenes match an adjacent pseudogene's text", item_list);
+ cip->subcategories = subcategories;
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
}
}
@@ -10556,10 +10792,12 @@ static Boolean PosIsAt3End (Int4 pos, SeqLocPtr slp)
return TRUE;
}
}
+/* unnecessary: J. Chen
bsp = BioseqFind(SeqLocId (slp));
if (pos == bsp->length -1) {
return TRUE;
}
+*/
}
return FALSE;
}
@@ -10640,19 +10878,48 @@ static void FindShortIntronsCallback (SeqFeatPtr sfp, Pointer data)
}
}
+static Boolean IsMitochondrionBioseq(BioseqPtr bsp)
+{
+ SeqMgrDescContext context;
+ SeqDescrPtr sdp;
+ BioSourcePtr biop;
+
+ if (bsp == NULL) {
+ return FALSE;
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL
+ || biop->genome != GENOME_mitochondrion) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+};
+
+static void FindShortIntronsOnBsp(BioseqPtr bsp, Pointer item_list)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+
+ if ( bsp == NULL || !ISA_na(bsp->mol)) return;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) {
+ FindShortIntronsCallback(sfp, item_list);
+ }
+};
-extern void FindShortIntrons (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+extern void FindShortIntronsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean check_organelles)
{
ValNodePtr item_list = NULL, vnp, with_exception = NULL;
SeqFeatPtr sfp;
- SeqEntryPtr sep;
Boolean any_no_exception = FALSE;
ClickableItemPtr cip;
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
- sep = vnp->data.ptrvalue;
- VisitFeaturesInSep (sep, &item_list, FindShortIntronsCallback);
+ VisitBioseqsInSep(vnp->data.ptrvalue, &item_list, FindShortIntronsOnBsp);
}
+
if (item_list != NULL) {
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
sfp = vnp->data.ptrvalue;
@@ -10675,24 +10942,144 @@ extern void FindShortIntrons (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_l
}
+extern void FindShortIntrons (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ FindShortIntronsEx(discrepancy_list, sep_list, FALSE);
+}
+
+
+static const CharPtr kPutativeFrameShift = "putative frameshift";
+
NLM_EXTERN void AddExceptionsToShortIntrons (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
{
- ValNodePtr vnp;
- SeqFeatPtr sfp;
- CharPtr txt;
+ BioseqPtr bsp, pbsp;
+ SeqFeatPtr gene, sfp;
+ size_t len;
+ SeqLocPtr slp;
+ CharPtr str, txt;
+ ValNodePtr entityIDList = NULL, vnp;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext context;
+ BioSourcePtr biop = NULL;
+ Boolean is_bac_src;
+ ValNodeBlock to_convert;
+
+ InitValNodeBlock(&to_convert, NULL);
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
- if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL
- && StringStr (sfp->except_text, "low-quality sequence region") == NULL) {
- SetStringValue (&(sfp->except_text), "low-quality sequence region", ExistingTextOption_append_semi);
- sfp->excpt = TRUE;
- if (lip != NULL && lip->fp != NULL) {
- txt = GetDiscrepancyItemText (vnp);
- fprintf (lip->fp, "Added low-quality sequence region exception to %s\n", txt);
- txt = MemFree (txt);
+ if (vnp->choice != OBJ_SEQFEAT) continue;
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp == NULL) continue;
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) continue;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp != NULL) biop = sdp->data.ptrvalue;
+ if (biop != NULL && IsBacterialBioSource(biop)) {
+ is_bac_src = TRUE;
+ }
+ else is_bac_src = FALSE;
+ if (biop != NULL && biop->genome == GENOME_mitochondrion) {
+ // no change
+ }
+ else if ( biop != NULL && (is_bac_src || IsArchaealBioSource(biop)) ) {
+ if (sfp->idx.subtype == FEATDEF_CDS) {
+ if (is_bac_src) {
+ ValNodeAddPointerToEnd (&to_convert, OBJ_SEQFEAT, sfp);
+ }
+
+ gene = GetGeneForFeature (sfp);
+ if (gene != NULL) {
+ gene->pseudo = TRUE;
+ if (StringDoesHaveText (sfp->comment)) {
+ if (StringDoesHaveText (gene->comment)) {
+ len = StringLen (sfp->comment) + StringLen (gene->comment) + 10;
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ if (str != NULL) {
+ StringCpy (str, sfp->comment);
+ StringCat (str, "; ");
+ StringCat (str, gene->comment);
+ gene->comment = MemFree (gene->comment);
+ gene->comment = str;
+ }
+ } else {
+ gene->comment = sfp->comment;
+ sfp->comment = NULL;
+ if (is_bac_src) {
+ sfp->comment = StringSave("contains short intron that may represent a frameshift");
+ }
+ }
+ }
+ if (StringSearch (gene->comment, kPutativeFrameShift) == NULL) {
+ if (StringDoesHaveText (gene->comment)) {
+ len = StringLen (kPutativeFrameShift) + StringLen (gene->comment) + 10;
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ if (str != NULL) {
+ StringCpy (str, kPutativeFrameShift);
+ StringCat (str, "; ");
+ StringCat (str, gene->comment);
+ gene->comment = MemFree (gene->comment);
+ gene->comment = str;
+ }
+ } else {
+ gene->comment = sfp->comment;
+ sfp->comment = NULL;
+ if (is_bac_src) {
+ sfp->comment = StringSave("contains short intron that may represent a frameshift");
+ }
+ }
+ }
+ slp = SeqLocMerge (bsp, gene->location, NULL, TRUE, FALSE, FALSE);
+ if (slp != NULL) {
+ gene->location = SeqLocFree (gene->location);
+ gene->location = slp;
+ }
+ pbsp = BioseqFindFromSeqLoc (sfp->product);
+ if (pbsp != NULL) {
+ pbsp->idx.deleteme = TRUE;
+ }
+ if (!is_bac_src) {
+ sfp->idx.deleteme = TRUE;
+ ValNodeAddInt (&entityIDList, 0, bsp->idx.entityID);
+ }
+ }
+ }
+ }
+ else if (StringStr (sfp->except_text, "low-quality sequence region") == NULL) {
+ SetStringValue (&(sfp->except_text), "low-quality sequence region", ExistingTextOption_append_semi);
+ sfp->excpt = TRUE;
+ if (lip != NULL && lip->fp != NULL) {
+ txt = GetDiscrepancyItemText (vnp);
+ fprintf (lip->fp, "Added low-quality sequence region exception to %s\n", txt);
+ txt = MemFree (txt);
+ }
+ }
+ }
+
+ entityIDList = ValNodeSort (entityIDList, SortByIntvalue);
+ ValNodeUnique (&entityIDList, SortByIntvalue, ValNodeFree);
+
+ if (to_convert.head != NULL) {
+ to_convert.head = ValNodeSort(to_convert.head, SortVnpByChoiceAndPtrvalue);
+ ValNodeUnique (&(to_convert.head), SortVnpByChoiceAndPtrvalue, ValNodeFree);
+
+ ConvertListToMiscFeat (to_convert.head, FALSE, lip);
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Converted %d contained coding regions to misc_features\n", ValNodeLen (to_convert.head));
}
+ lip->data_in_log = TRUE;
}
+
+ to_convert.head = ValNodeFree (to_convert.head);
+ }
+
+ for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
+ DeleteMarkedObjects (vnp->data.intvalue, 0, NULL);
+ ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0);
}
+ ValNodeFree (entityIDList);
}
@@ -10887,6 +11274,46 @@ static ClickableItemPtr MissingGeneXrefDiscrepancy (Uint1 feature_type, SeqFeatP
return cip;
}
+Boolean Does5primerAbutGap(SeqFeatPtr sfp)
+{
+ Int4 start, dsp_start;
+ BioseqPtr bsp;
+ DeltaSeqPtr dsp;
+
+ if (sfp == NULL) return FALSE;
+ start = SeqLocStart(sfp->location);
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return FALSE;
+
+ dsp = GetDeltaSeqForPosition(start-1, bsp, &dsp_start);
+ if (dsp == NULL) return FALSE;
+ else if (IsDeltaSeqGap(dsp) && (dsp_start + GetDeltaSeqLen(dsp) == start) ) {
+ return TRUE;
+ }
+ else return FALSE;
+};
+
+
+Boolean Does3primerAbutGap(SeqFeatPtr sfp)
+{
+ Int4 stop, dsp_start;
+ BioseqPtr bsp;
+ DeltaSeqPtr dsp;
+
+ stop = SeqLocStop(sfp->location);
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return FALSE;
+
+ dsp = GetDeltaSeqForPosition(stop + 1, bsp, &dsp_start);
+ if (dsp == NULL) return FALSE;
+ else if (IsDeltaSeqGap(dsp) && (dsp_start == stop + 1 )) {
+ return TRUE;
+ }
+ else return FALSE;
+};
+
static void
CheckFeatureTypeForLocationDiscrepancies
@@ -10899,7 +11326,8 @@ CheckFeatureTypeForLocationDiscrepancies
GeneRefPtr grp;
SeqFeatPtr sfp, gene_sfp;
Boolean found_match;
-
+ Boolean tmp_part, partial5, partial3;
+
if (bsp == NULL || ISA_aa (bsp->mol) || discrepancy_list == NULL || IsmRNASequenceInGenProdSet(bsp))
{
return;
@@ -10915,7 +11343,23 @@ CheckFeatureTypeForLocationDiscrepancies
gene_sfp = SeqMgrGetOverlappingGene (sfp->location, &gene_context);
if (gene_sfp != NULL && !IsGeneLocationOk (sfp, &context, gene_sfp, &gene_context, bsp) && sfp->idx.subtype != exclude_featdef)
{
- ValNodeAddPointer (discrepancy_list, 0, GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ if (sfp->data.choice != SEQFEAT_CDREGION || !sfp->partial) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ }
+ else {
+ CheckSeqLocForPartial(sfp->location, &partial5, &partial3);
+ if (SeqLocStrand (sfp->location) == Seq_strand_minus) {
+ tmp_part = partial5;
+ partial5 = partial3;
+ partial3 = tmp_part;
+ }
+ if ( (!partial5 || !Does5primerAbutGap(sfp))
+ && (!partial3 || !Does3primerAbutGap(sfp)) ) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ }
+ }
}
}
else if (!SeqMgrGeneIsSuppressed (grp))
@@ -10933,7 +11377,23 @@ CheckFeatureTypeForLocationDiscrepancies
}
else if (sfp->idx.subtype != exclude_featdef)
{
- ValNodeAddPointer (discrepancy_list, 0, GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ if (sfp->data.choice != SEQFEAT_CDREGION || !sfp->partial) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ }
+ else {
+ CheckSeqLocForPartial(sfp->location, &partial5, &partial3);
+ if (SeqLocStrand (sfp->location) == Seq_strand_minus) {
+ tmp_part = partial5;
+ partial5 = partial3;
+ partial3 = tmp_part;
+ }
+ if ( (!partial5 || !Does5primerAbutGap(sfp))
+ && (!partial3 || !Does3primerAbutGap(sfp)) ) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ GeneLocationDiscrepancy(feature_type, gene_sfp, sfp));
+ }
+ }
}
}
}
@@ -11016,6 +11476,10 @@ static Boolean IsBacterialBioSource (BioSourcePtr biop)
return HasLineage(biop, "Bacteria");
}
+static Boolean IsArchaealBioSource (BioSourcePtr biop)
+{
+ return HasLineage(biop, "Archaea");
+};
static Boolean IsEukaryotic (BioseqPtr bsp)
{
@@ -11101,7 +11565,7 @@ typedef struct cdsgeneproduct
{
ValNodePtr cds_list;
CharPtr gene_locus;
- CharPtr product_name;
+ ValNodePtr product_names;
} CDSGeneProductData, PNTR CDSGeneProductPtr;
@@ -11118,6 +11582,7 @@ static ValNodePtr CDSGeneProductListFree (ValNodePtr cds_list)
cgpp = (CDSGeneProductPtr) cds_list->data.ptrvalue;
if (cgpp != NULL) {
cgpp->cds_list = ValNodeFree (cgpp->cds_list);
+ cgpp->product_names = ValNodeFree (cgpp->product_names);
}
ValNodeFreeData (cds_list);
return NULL;
@@ -11179,7 +11644,7 @@ static void FindCDSGeneProductConflictsCallback (SeqFeatPtr sfp, Pointer userdat
{
ValNodeAddPointer (&(cgpp->cds_list), OBJ_SEQFEAT, sfp);
cgpp->gene_locus = gene_label;
- cgpp->product_name = StringSave (context.label);
+ ValNodeAddPointer( &(cgpp->product_names), 0, context.label);
ValNodeAddPointer (cds_list, 0, cgpp);
}
} else {
@@ -11188,12 +11653,11 @@ static void FindCDSGeneProductConflictsCallback (SeqFeatPtr sfp, Pointer userdat
{
cgpp_compare = (CDSGeneProductPtr) vnp->data.ptrvalue;
if (cgpp_compare != NULL
- && StringCmp (cgpp_compare->gene_locus, gene_label) == 0
- && StringCmp (cgpp_compare->product_name, context.label) != 0)
- {
- found_match = TRUE;
- vnp->choice = 1;
- ValNodeAddPointer (&(cgpp_compare->cds_list), OBJ_SEQFEAT, sfp);
+ && StringCmp (cgpp_compare->gene_locus, gene_label) == 0) {
+ found_match = TRUE;
+ vnp->choice = 1;
+ ValNodeAddPointer (&(cgpp_compare->cds_list), OBJ_SEQFEAT, sfp);
+ ValNodeAddPointer (&(cgpp_compare->product_names), 0, context.label);
}
vnp = vnp->next;
}
@@ -11203,7 +11667,7 @@ static void FindCDSGeneProductConflictsCallback (SeqFeatPtr sfp, Pointer userdat
{
ValNodeAddPointer (&(cgpp->cds_list), OBJ_SEQFEAT, sfp);
cgpp->gene_locus = gene_label;
- cgpp->product_name = StringSave (context.label);
+ ValNodeAddPointer (&(cgpp->product_names), 0, context.label);
ValNodeAddPointer (cds_list, 0, cgpp);
}
}
@@ -11218,6 +11682,10 @@ extern void FindCDSGeneProductConflicts (ValNodePtr PNTR discrepancy_list, ValNo
CharPtr bad_cat_fmt = "%d coding regions have the same gene name(%s) as another coding region but a different product.";
ClickableItemPtr dip;
ValNodePtr item_list = NULL, cds_vnp;
+ CharPtr prod_name;
+ ValNodePtr prod_next;
+ Boolean prod_diff;
+ ValNodePtr sub = NULL;
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
VisitGenProdSetFeatures (vnp->data.ptrvalue, &cds_list, FindCDSGeneProductConflictsCallback);
@@ -11230,14 +11698,33 @@ extern void FindCDSGeneProductConflicts (ValNodePtr PNTR discrepancy_list, ValNo
/* for each item, replace structure used for search with just the feature */
for (vnp = cds_list; vnp != NULL; vnp = vnp->next)
{
+ prod_diff = FALSE;
cgpp = (CDSGeneProductPtr) vnp->data.ptrvalue;
if (cgpp != NULL)
{
+ prod_name = StringSave(cgpp->product_names->data.ptrvalue);
+ prod_next = cgpp->product_names->next;
+ while (prod_next != NULL) {
+ if (StringCmp(prod_name, prod_next->data.ptrvalue)) {
+ prod_diff = TRUE;
+ break;
+ }
+ prod_next = prod_next->next;
+ }
+ if (!prod_diff) {
+ cgpp->cds_list = ValNodeFree (cgpp->cds_list);
+ cgpp->product_names = ValNodeFree (cgpp->product_names);
+ vnp->choice = 0;
+ vnp->data.ptrvalue = NULL;
+ cgpp = MemFree (cgpp);
+ continue;
+ }
+
for (cds_vnp = cgpp->cds_list; cds_vnp != NULL; cds_vnp = cds_vnp->next) {
ValNodeAddPointer (&item_list, OBJ_SEQFEAT, cds_vnp->data.ptrvalue);
}
-
- cgpp->product_name = MemFree (cgpp->product_name);
+
+ cgpp->product_names = ValNodeFree (cgpp->product_names);
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (dip != NULL)
@@ -11248,8 +11735,7 @@ extern void FindCDSGeneProductConflicts (ValNodePtr PNTR discrepancy_list, ValNo
dip->item_list = cgpp->cds_list;
cgpp->cds_list = NULL;
- vnp->choice = 0;
- vnp->data.ptrvalue = dip;
+ ValNodeAddPointer(&sub, 0, dip);
} else {
cgpp->cds_list = ValNodeFree (cgpp->cds_list);
vnp->choice = 0;
@@ -11260,7 +11746,7 @@ extern void FindCDSGeneProductConflicts (ValNodePtr PNTR discrepancy_list, ValNo
}
}
- if (cds_list != NULL)
+ if (sub != NULL)
{
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (dip != NULL)
@@ -11272,7 +11758,7 @@ extern void FindCDSGeneProductConflicts (ValNodePtr PNTR discrepancy_list, ValNo
dip->datafree_func = NULL;
dip->callback_data = NULL;
dip->item_list = item_list;
- dip->subcategories = cds_list;
+ dip->subcategories = sub;
ValNodeAddPointer (discrepancy_list, 0, dip);
}
}
@@ -11363,7 +11849,7 @@ static void FindECNumberNotes (SeqFeatPtr sfp, Pointer userdata)
ProtRefPtr prp;
ValNodePtr vnp;
- if (sfp == NULL || userdata == NULL || StringHasNoText (sfp->comment))
+ if (sfp == NULL || userdata == NULL)
{
return;
}
@@ -12184,9 +12670,33 @@ NLM_EXTERN void MarkOverlappingCDSs (ValNodePtr item_list, Pointer data, LogInfo
}
+
+static Boolean IgnoreContainedCDS (SeqFeatPtr sfp)
+{
+ ProtRefPtr prp;
+
+ if (sfp == NULL) {
+ return TRUE;
+ }
+ if (StringICmp (sfp->comment, "alternative") == 0) {
+ return TRUE;
+ }
+ prp = GetProtRefForFeature (sfp);
+ if (prp != NULL && prp->name != NULL) {
+ if (StringISearch (prp->name->data.ptrvalue, "mobilization") != NULL) {
+ return TRUE;
+ } else if (StringCmp (prp->name->data.ptrvalue, "dnaK") == 0 || StringCmp (prp->name->data.ptrvalue, "mob") == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
typedef struct twolists {
ValNodePtr first_list;
ValNodePtr second_list;
+ ValNodePtr third_list;
} TwoListsData, PNTR TwoListsPtr;
static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
@@ -12197,10 +12707,12 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
ValNodePtr cds_list = NULL;
ValNodePtr contained_list_this_strand = NULL, contained_list_other_strand = NULL, vnp, vnp_next, last;
ValNodePtr last_this_strand = NULL, last_other_strand = NULL;
+ ValNodePtr contained_list_with_note = NULL, last_note = NULL;
Int2 loc_compare;
Uint1 strand, strand_compare;
+ CharPtr note = "completely contained in another CDS";
- if (bsp == NULL || userdata == NULL)
+ if (bsp == NULL || userdata == NULL || IsEukaryotic(bsp))
{
return;
}
@@ -12211,10 +12723,13 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, FEATDEF_CDS, &context))
{
- ValNodeAddPointer (&last, OBJ_SEQFEAT, sfp);
- if (cds_list == NULL)
+ if (!IgnoreContainedCDS(sfp))
{
- cds_list = last;
+ ValNodeAddPointer (&last, OBJ_SEQFEAT, sfp);
+ if (cds_list == NULL)
+ {
+ cds_list = last;
+ }
}
}
@@ -12230,8 +12745,27 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
loc_compare = SeqLocCompare (sfp->location, sfp_compare->location);
if (loc_compare == SLC_A_IN_B || loc_compare == SLC_B_IN_A || loc_compare == SLC_A_EQ_B)
{
+ if (StringICmp(sfp->comment, note) == 0) {
+ if (!AlreadyInList (contained_list_with_note, sfp)) {
+ ValNodeAddPointer (&last_note, OBJ_SEQFEAT, sfp);
+ if (contained_list_with_note == NULL)
+ {
+ contained_list_with_note = last_note;
+ }
+ }
+ }
+ if (StringICmp(sfp_compare->comment, note) == 0) {
+ if (!AlreadyInList (contained_list_with_note, sfp_compare)) {
+ ValNodeAddPointer (&last_note, OBJ_SEQFEAT, sfp_compare);
+ if (contained_list_with_note == NULL)
+ {
+ contained_list_with_note = last_note;
+ }
+ }
+ }
if (StrandOk (strand, strand_compare)) {
- if (!AlreadyInList (contained_list_this_strand, sfp))
+ if (StringICmp(sfp->comment, note) != 0
+ && !AlreadyInList (contained_list_this_strand, sfp))
{
ValNodeAddPointer (&last_this_strand, OBJ_SEQFEAT, sfp);
if (contained_list_this_strand == NULL)
@@ -12239,7 +12773,9 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
contained_list_this_strand = last_this_strand;
}
}
- if (!AlreadyInList (contained_list_this_strand, sfp_compare))
+
+ if (StringICmp(sfp_compare->comment, note) != 0
+ && !AlreadyInList (contained_list_this_strand, sfp_compare))
{
ValNodeAddPointer (&last_this_strand, OBJ_SEQFEAT, sfp_compare);
if (contained_list_this_strand == NULL)
@@ -12247,8 +12783,9 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
contained_list_this_strand = last_this_strand;
}
}
- } else {
- if (!AlreadyInList (contained_list_other_strand, sfp))
+ } else { // other_strand
+ if (StringICmp(sfp->comment, note) != 0
+ && !AlreadyInList (contained_list_other_strand, sfp))
{
ValNodeAddPointer (&last_other_strand, OBJ_SEQFEAT, sfp);
if (contained_list_other_strand == NULL)
@@ -12256,7 +12793,9 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
contained_list_other_strand = last_other_strand;
}
}
- if (!AlreadyInList (contained_list_other_strand, sfp_compare))
+
+ if (StringICmp(sfp_compare->comment, note) != 0
+ && !AlreadyInList (contained_list_other_strand, sfp_compare))
{
ValNodeAddPointer (&last_other_strand, OBJ_SEQFEAT, sfp_compare);
if (contained_list_other_strand == NULL)
@@ -12272,6 +12811,48 @@ static void FindContainedCDSs (BioseqPtr bsp, Pointer userdata)
ValNodeLink (&(two_lists->first_list), contained_list_this_strand);
ValNodeLink (&(two_lists->second_list), contained_list_other_strand);
+ ValNodeLink (&(two_lists->third_list), contained_list_with_note);
+}
+
+
+static void ConvertContainedCDSToMiscFeat (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp, vnp_next;
+ SeqFeatPtr sfp, sfp_compare;
+ Uint1 strand;
+ Int2 loc_compare;
+ ValNodeBlock to_convert;
+
+ InitValNodeBlock (&to_convert, NULL);
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ sfp = vnp->data.ptrvalue;
+ strand = SeqLocStrand (sfp->location);
+ for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next)
+ {
+ sfp_compare = vnp_next->data.ptrvalue;
+ loc_compare = SeqLocCompare (sfp->location, sfp_compare->location);
+ if (loc_compare == SLC_A_IN_B) {
+ ValNodeAddPointerToEnd (&to_convert, OBJ_SEQFEAT, sfp);
+ } else if (loc_compare == SLC_B_IN_A) {
+ ValNodeAddPointerToEnd (&to_convert, OBJ_SEQFEAT, sfp_compare);
+ }
+ }
+ }
+ if (to_convert.head != NULL) {
+ to_convert.head = ValNodeSort(to_convert.head, SortVnpByChoiceAndPtrvalue);
+ ValNodeUnique (&(to_convert.head), SortVnpByChoiceAndPtrvalue, ValNodeFree);
+
+ ConvertListToMiscFeat (to_convert.head, TRUE, lip);
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Converted %d contained coding regions to misc_features\n", ValNodeLen (to_convert.head));
+ }
+ lip->data_in_log = TRUE;
+ }
+
+ to_convert.head = ValNodeFree (to_convert.head);
+ }
}
@@ -12281,8 +12862,10 @@ extern void AddContainedCodingRegionDiscrepancies (ValNodePtr PNTR discrepancy_l
CharPtr bad_fmt = "%d coding regions are completely contained in another coding region.";
CharPtr same_strand_fmt = "%d coding regions are completely contained in another coding region on the same strand.";
CharPtr other_strand_fmt = "%d coding regions are completely contained in another coding region, but on the opposite strand.";
+ CharPtr note_fmt = "%d coding regions are completely contained in another coding region but have note.";
TwoListsData two_lists;
ValNodePtr vnp, subcategories = NULL, item_list;
+ Int4 list_cnt;
if (discrepancy_list == NULL)
@@ -12296,9 +12879,26 @@ extern void AddContainedCodingRegionDiscrepancies (ValNodePtr PNTR discrepancy_l
VisitBioseqsInSep (vnp->data.ptrvalue, &two_lists, FindContainedCDSs);
}
- if (two_lists.first_list != NULL && two_lists.second_list != NULL) {
- ValNodeAddPointer (&subcategories, 0, NewClickableItem (DISC_CONTAINED_CDS, same_strand_fmt, two_lists.first_list));
- ValNodeAddPointer (&subcategories, 0, NewClickableItem (DISC_CONTAINED_CDS, other_strand_fmt, two_lists.second_list));
+
+ list_cnt = 0;
+ if (two_lists.first_list != NULL) list_cnt ++;
+ if (two_lists.second_list != NULL) list_cnt ++;
+ if (two_lists.third_list != NULL) list_cnt ++;
+ if (list_cnt > 1) {
+ if (two_lists.first_list != NULL) {
+ ValNodeAddPointer (&subcategories, 0,
+ NewClickableItem (DISC_CONTAINED_CDS, same_strand_fmt,
+ two_lists.first_list));
+ }
+ if (two_lists.second_list != NULL) {
+ ValNodeAddPointer (&subcategories, 0,
+ NewClickableItem (DISC_CONTAINED_CDS, other_strand_fmt,
+ two_lists.second_list));
+ }
+ if (two_lists.third_list != NULL) {
+ ValNodeAddPointer (&subcategories, 0,
+ NewClickableItem (DISC_CONTAINED_CDS, note_fmt, two_lists.third_list));
+ }
item_list = ItemListFromSubcategories (subcategories);
dip = NewClickableItem (DISC_CONTAINED_CDS, bad_fmt, item_list);
dip->subcategories = subcategories;
@@ -12308,12 +12908,17 @@ extern void AddContainedCodingRegionDiscrepancies (ValNodePtr PNTR discrepancy_l
} else if (two_lists.second_list != NULL) {
ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_CONTAINED_CDS, other_strand_fmt, two_lists.second_list));
}
+ else if (two_lists.third_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (DISC_CONTAINED_CDS, note_fmt, two_lists.third_list));
+ }
}
typedef struct cdsrnaoverlap {
ValNodePtr cds_in_rna;
ValNodePtr rna_in_cds;
+ ValNodePtr trna_in_cds;
ValNodePtr exact_match;
ValNodePtr overlap_same_strand;
ValNodePtr overlap_opp_strand;
@@ -12329,16 +12934,21 @@ static void FindCDSRNAOverlaps (BioseqPtr bsp, Pointer data)
SeqMgrFeatContext fcontext;
Int2 cmp;
Uint1 strand1, strand2;
+ Boolean ignore_trna = FALSE;
if (bsp == NULL || data == NULL) return;
+ ignore_trna = IsEukaryotic(bsp);
+
p = (CDSRNAOverlapPtr) data;
for (rna = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_RNA, 0, &fcontext);
rna != NULL;
rna = SeqMgrGetNextFeature (bsp, rna, SEQFEAT_RNA, 0, &fcontext))
{
- if (rna->idx.subtype == FEATDEF_mRNA) continue;
+ if (rna->idx.subtype == FEATDEF_mRNA || rna->idx.subtype == FEATDEF_ncRNA
+ || (rna->idx.subtype == FEATDEF_tRNA && ignore_trna)) continue;
+ if (IsShortrRNA(rna)) continue;
ValNodeAddPointer (&rna_list, OBJ_SEQFEAT, rna);
}
@@ -12368,8 +12978,13 @@ static void FindCDSRNAOverlaps (BioseqPtr bsp, Pointer data)
}
else if (cmp == SLC_B_IN_A)
{
- ValNodeAddPointer (&(p->rna_in_cds), OBJ_SEQFEAT, sfp);
- ValNodeAddPointer (&(p->rna_in_cds), OBJ_SEQFEAT, rna);
+ if (rna->idx.subtype == FEATDEF_tRNA) {
+ ValNodeAddPointer (&(p->trna_in_cds), OBJ_SEQFEAT, sfp);
+ ValNodeAddPointer (&(p->trna_in_cds), OBJ_SEQFEAT, rna);
+ } else {
+ ValNodeAddPointer (&(p->rna_in_cds), OBJ_SEQFEAT, sfp);
+ ValNodeAddPointer (&(p->rna_in_cds), OBJ_SEQFEAT, rna);
+ }
ValNodeAddPointer (&(p->all), OBJ_SEQFEAT, sfp);
ValNodeAddPointer (&(p->all), OBJ_SEQFEAT, rna);
}
@@ -12455,6 +13070,13 @@ extern void AddRNACDSOverlapDiscrepancies (ValNodePtr PNTR discrepancy_list, Val
"%d coding regions completely contain RNAs",
d.rna_in_cds));
}
+ if (d.trna_in_cds != NULL)
+ {
+ ValNodeAddPointer (&(dip->subcategories), 0,
+ DiscrepancyForPairs (DISC_RNA_CDS_OVERLAP,
+ "%d coding regions completely contain tRNAs",
+ d.trna_in_cds));
+ }
if (d.overlap != NULL)
{
overlap_dip = DiscrepancyForPairs (DISC_RNA_CDS_OVERLAP,
@@ -12612,7 +13234,7 @@ extern void FindShortSequences (ValNodePtr PNTR discrepancy_list, ValNodePtr sep
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (dip != NULL)
{
- dip->clickable_item_type = DISC_SHORT_CONTIG;
+ dip->clickable_item_type = DISC_SHORT_SEQUENCE;
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
sprintf (dip->description, bad_fmt, ValNodeLen (bioseq_list));
dip->callback_func = NULL;
@@ -12647,7 +13269,7 @@ static void FindShortProtSequencesCallback (BioseqPtr bsp, Pointer userdata)
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
if (sdp != NULL && (mip = (MolInfoPtr) sdp->data.ptrvalue) != NULL
- && mip->completeness != 1) {
+ && mip->completeness != 1 && mip->completeness != 0) {
return;
}
@@ -13395,13 +14017,6 @@ static Boolean MayContainPlural (CharPtr pattern, CharPtr search)
}
-
-static Boolean ContainsBracketsOrParentheses (CharPtr pattern, CharPtr search)
-{
- return ContainsNorMoreSetsOfBracketsOrParentheses (search, 1);
-}
-
-
static Boolean ContainsTwoSetsOfBracketsOrParentheses (CharPtr pattern, CharPtr search)
{
return ContainsNorMoreSetsOfBracketsOrParentheses (search, 2);
@@ -14476,7 +15091,7 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
SeqFeatPtr cds;
BioSourcePtr biop = NULL;
- if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT || sfp->data.value.ptrvalue == NULL
|| userdata == NULL)
{
return;
@@ -14532,6 +15147,7 @@ static ClickableItemPtr SuspectPhraseEx (Uint4 clickable_item_type, CharPtr phra
{
return NULL;
}
+
if (quote_phrase) {
bad_fmt = bad_fmt_quote;
} else {
@@ -14635,8 +15251,9 @@ static void FindSuspectProductNamesWithRulesCallback (SeqFeatPtr sfp, Pointer us
ProtRefPtr prp;
BioseqPtr bsp;
SeqFeatPtr cds;
-
- if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL
+ ValNodePtr newnode;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT || sfp->data.value.ptrvalue == NULL
|| (srlist = (SuspectRuleFeatsPtr)userdata) == NULL)
{
return;
@@ -14663,7 +15280,8 @@ static void FindSuspectProductNamesWithRulesCallback (SeqFeatPtr sfp, Pointer us
{
if (DoesStringMatchSuspectRule (prp->name->data.ptrvalue, sfp, rule))
{
- ValNodeAddPointer (&(srlist->feature_list[k]), OBJ_SEQFEAT, sfp);
+ newnode = ValNodeAddPointer (&(srlist->feature_list[k]), OBJ_SEQFEAT, sfp);
+ if (newnode != NULL) newnode->fatal = rule->fatal;
}
}
}
@@ -14706,6 +15324,7 @@ FindSuspectProductNamesWithRules
ClickableItemPtr dip, tdip = NULL;
ValNodePtr subcategories = NULL;
Int4 num_cat = Fix_type_gene + 1;
+ SeqEntryPtr orig_sep;
if (discrepancy_list == NULL) return;
@@ -14731,10 +15350,13 @@ FindSuspectProductNamesWithRules
name_cat[k] = NULL;
}
+ orig_sep = SeqEntrySetScope (NULL);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
{
+ SeqEntrySetScope (vnp->data.ptrvalue);
VisitGenProdSetFeatures (vnp->data.ptrvalue, &srdata, FindSuspectProductNamesWithRulesCallback);
}
+ SeqEntrySetScope (orig_sep);
for (k = 0, rule = srdata.rule_list; k < srdata.num_rules && rule != NULL; k++, rule = rule->next)
{
@@ -14744,13 +15366,13 @@ FindSuspectProductNamesWithRules
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (rule->rule_type == Fix_type_typo) {
dip->clickable_item_type = DISC_PRODUCT_NAME_TYPO;
- } else if (rule->rule_type == Fix_type_quickfix) {
+ } else if (rule->rule_type == Fix_type_quickfix || rule->replace != NULL){
dip->clickable_item_type = DISC_PRODUCT_NAME_QUICKFIX;
} else {
dip->clickable_item_type = DISC_SUSPECT_PRODUCT_NAME;
}
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (summ) + 15));
- sprintf (dip->description, fmt, ValNodeLen (srdata.feature_list[k]), summ);
+ sprintf(dip->description, fmt, ValNodeLen (srdata.feature_list[k]), summ);
summ = MemFree (summ);
dip->callback_func = NULL;
dip->datafree_func = NULL;
@@ -14764,6 +15386,7 @@ FindSuspectProductNamesWithRules
ValNodeLinkCopy (&master_list, srdata.feature_list[k]);
}
}
+
if (master_list != NULL)
{
for (k = 0; k < num_cat; k++) {
@@ -15248,8 +15871,9 @@ static void FindUnknownProteinsWithECNumbersCallback (SeqFeatPtr sfp, Pointer us
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
if (prp->name == NULL || prp->ec == NULL) return;
- if (StringISearch (prp->name->data.ptrvalue, "hypothetical protein") != NULL
- || StringISearch (prp->name->data.ptrvalue, "unknown protein") != NULL)
+ //if (StringISearch (prp->name->data.ptrvalue, "hypothetical protein") != NULL
+ // || StringISearch (prp->name->data.ptrvalue, "unknown protein") != NULL)
+ if (!StrICmp(prp->name->data.ptrvalue, "hypothetical protein") || !StrICmp(prp->name->data.ptrvalue, "unknown protein"))
{
feature_list = (ValNodePtr PNTR) userdata;
ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
@@ -15257,6 +15881,47 @@ static void FindUnknownProteinsWithECNumbersCallback (SeqFeatPtr sfp, Pointer us
}
+static void MoveEcNumberToNote(ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ SeqFeatPtr sfp, cds;
+ BioseqPtr pbsp;
+ ProtRefPtr prp;
+ Int4 count = 0;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL) {
+ cds = NULL;
+ prp = NULL;
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ cds = sfp;
+ prp = GetProtRefForFeature(cds);
+ } else if (sfp->data.choice == SEQFEAT_PROT) {
+ prp = sfp->data.value.ptrvalue;
+ pbsp = BioseqFindFromSeqLoc (sfp->location);
+ cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
+ }
+ if (cds != NULL && prp != NULL && prp->ec != NULL) {
+ /*
+ Dont copy EC to the comment, just delete it! JIRA: SQD-3470
+ for (vnp_ec = prp->ec; vnp_ec != NULL; vnp_ec = vnp_ec->next) {
+ SetStringValue (&(cds->comment), vnp_ec->data.ptrvalue, ExistingTextOption_append_semi);
+ }
+ */
+ prp->ec = ValNodeFreeData (prp->ec);
+ count++;
+ }
+ }
+ }
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Removed EC numbers from %d coding regions with hypothetical proteins\n", count);
+ }
+ lip->data_in_log = TRUE;
+ }
+}
+
+
extern void FindUnknownProteinsWithECNumbers (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
ValNodePtr feature_list = NULL, vnp;
@@ -15811,6 +16476,8 @@ static void AddRNANumList (ValNodePtr PNTR discrepancy_list, ValNodePtr list_sta
if (cp != NULL) {
cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->clickable_item_type =
+ ((ClickableItemPtr)list_start->data.ptrvalue)->clickable_item_type;
cip->subcategories = list_start;
copy_len = cp - desc_str;
cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (desc_fmt) + 15 + copy_len));
@@ -15896,19 +16563,14 @@ static void CountShorttRNA (SeqFeatPtr sfp, Pointer data)
static void CountLongtRNA (SeqFeatPtr sfp, Pointer data)
{
- /* variables for commented out section
SeqMgrFeatContext fcontext;
CharPtr label;
- */
Int4 len;
if (sfp == NULL || sfp->idx.subtype != FEATDEF_tRNA || data == NULL) return;
len = SeqLocLen (sfp->location);
if (len <= 90) return;
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
-
-/* remove exceptions:
if (len <= 100) {
if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &fcontext) == sfp) {
label = fcontext.label;
@@ -15917,21 +16579,7 @@ static void CountLongtRNA (SeqFeatPtr sfp, Pointer data)
if (StringCmp (label, "Sec") == 0) return;
}
}
-
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
-*/
-
- /*
- if ((len = SeqLocLen (sfp->location)) > 90) {
- if (len > 100
- || (sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &fcontext)) == NULL
- || (StringCmp (fcontext.label, "Ser") != 0
- && StringCmp (fcontext.label, "Leu") != 0
- && StringCmp (fcontext.label, "Sec") != 0) ) {
- if (sfp) ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
- }
- }
- */
+ ValNodeAddPointer((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
}
@@ -15965,6 +16613,8 @@ static void FindRNAsWithoutProductsCallback (SeqFeatPtr sfp, Pointer data)
ValNode field;
FeatureFieldPtr ff;
CharPtr str;
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) {
return;
@@ -15978,8 +16628,19 @@ static void FindRNAsWithoutProductsCallback (SeqFeatPtr sfp, Pointer data)
} else if (sfp->idx.subtype == FEATDEF_tmRNA) {
/* don't require products for tmRNA */
return;
+ } else if (sfp->idx.subtype == FEATDEF_ncRNA) {
+ /* if ncRNA has a class other than "other", don't need a product */
+ if ((rrp = (RnaRefPtr)(sfp->data.value.ptrvalue)) != NULL &&
+ rrp->ext.choice == 3 &&
+ (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL &&
+ !StringHasNoText(rgp->_class) &&
+ StringICmp(rgp->_class, "other") != 0) {
+ return;
+ }
}
+
+
ff = FeatureFieldNew ();
ff->type = Macro_feature_type_any;
ValNodeAddInt (&ff->field, FeatQualChoice_legal_qual, Feat_qual_legal_product);
@@ -15998,9 +16659,9 @@ static void FindRNAsWithoutProductsCallback (SeqFeatPtr sfp, Pointer data)
static ClickableItemPtr PseudoAndNonPseudoClickableItem (Uint4 clickable_item_type, CharPtr format, ValNodePtr item_list)
{
- ValNodePtr pseudo_list = NULL, non_pseudo_list = NULL, vnp;
- CharPtr pseudo_fmt = " and are pseudo", non_pseudo_fmt = " and are not pseudo";
- ClickableItemPtr cip, pseudo_cip = NULL, non_pseudo_cip = NULL;
+ ValNodePtr non_pseudo_list = NULL, vnp;
+ CharPtr non_pseudo_fmt = " and are not pseudo";
+ ClickableItemPtr non_pseudo_cip = NULL;
if (item_list == NULL) {
return NULL;
@@ -16008,22 +16669,11 @@ static ClickableItemPtr PseudoAndNonPseudoClickableItem (Uint4 clickable_item_ty
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == OBJ_SEQFEAT) {
- if (IsPseudo(vnp->data.ptrvalue)) {
- ValNodeAddPointer (&pseudo_list, OBJ_SEQFEAT, vnp->data.ptrvalue);
- } else {
+ if (!IsPseudo(vnp->data.ptrvalue)) {
ValNodeAddPointer (&non_pseudo_list, OBJ_SEQFEAT, vnp->data.ptrvalue);
}
}
}
- if (pseudo_list != NULL) {
- pseudo_cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
- MemSet (pseudo_cip, 0, sizeof (ClickableItemData));
- pseudo_cip->clickable_item_type = clickable_item_type;
- pseudo_cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (format) + StringLen (pseudo_fmt) + 15));
- sprintf (pseudo_cip->description, format, ValNodeLen (pseudo_list));
- StringCat (pseudo_cip->description, pseudo_fmt);
- pseudo_cip->item_list = pseudo_list;
- }
if (non_pseudo_list != NULL) {
non_pseudo_cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
@@ -16035,16 +16685,7 @@ static ClickableItemPtr PseudoAndNonPseudoClickableItem (Uint4 clickable_item_ty
non_pseudo_cip->item_list = non_pseudo_list;
}
- if (pseudo_cip == NULL) {
- cip = non_pseudo_cip;
- } else if (non_pseudo_cip == NULL) {
- cip = pseudo_cip;
- } else {
- cip = NewClickableItem (clickable_item_type, format, item_list);
- ValNodeAddPointer (&(cip->subcategories), 0, non_pseudo_cip);
- ValNodeAddPointer (&(cip->subcategories), 0, pseudo_cip);
- }
- return cip;
+ return non_pseudo_cip;
}
@@ -16553,7 +17194,7 @@ static void PercentNDiscrepanciesForSeqEntry (ValNodePtr PNTR discrepancy_list,
{
SeqEntryPtr sep;
ValNodePtr vnp, list = NULL;
- CharPtr top_fmt = "%d sequences have > 5%% Ns";
+ CharPtr top_fmt = "%d sequences have > 5% Ns";
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
sep = (SeqEntryPtr) vnp->data.ptrvalue;
@@ -17031,16 +17672,26 @@ typedef struct duplicatequal {
ValNodePtr qual;
} DuplicateQualData, PNTR DuplicateQualPtr;
+
static DuplicateQualPtr DuplicateQualNew (Uint1 choice, Pointer data, ValNodePtr qual)
{
DuplicateQualPtr dq;
SourceQualChoicePtr s;
+ ValNodePtr list;
dq = (DuplicateQualPtr) MemNew (sizeof (DuplicateQualData));
dq->choice = choice;
dq->data = data;
dq->qual = AsnIoMemCopy (qual, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite);
- dq->val = GetFieldValueForObject (choice, data, dq->qual, NULL);
+ if (dq->qual->choice == FieldType_dblink) {
+ /* compare as sorted list of semicolon-delimited strings */
+ list = GetMultipleFieldValuesForObject (dq->choice, dq->data, dq->qual, NULL, NULL);
+ list = ValNodeSort (list, SortVnpByString);
+ dq->val = ValNodeMergeStrsEx (list, "; ");
+ list = ValNodeFreeData (list);
+ } else {
+ dq->val = GetFieldValueForObject (choice, data, dq->qual, NULL);
+ }
if (StringHasNoText (dq->val) && dq->qual != NULL && dq->qual->choice == FieldType_source_qual
&& (s = (SourceQualChoicePtr) dq->qual->data.ptrvalue) != NULL
&& s->choice == SourceQualChoice_location) {
@@ -17311,15 +17962,47 @@ static ValNodePtr SourceQualListForOnCallerTest (SeqEntryPtr sep, ValNodePtr obj
}
+#define SAME_MULTI 1
+#define SOME_DUP_MULTI 2
+#define ALL_DIF_MULTI 3
+
+static int GetMultiType(ValNodePtr qual_list)
+{
+ ValNodePtr vnp, vnp2;
+ Boolean all_same = TRUE, some_dup = FALSE;
+
+ qual_list = ValNodeSort(qual_list, SortVnpByString);
+ vnp = qual_list;
+ while (vnp != NULL && vnp->next != NULL) {
+ for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) {
+ if ( StrCmp(vnp->data.ptrvalue, vnp2->data.ptrvalue) ) {
+ all_same = FALSE;
+ break;
+ }
+ else some_dup = TRUE;
+ }
+ vnp = vnp2;
+ }
+
+ if (all_same == TRUE) return SAME_MULTI;
+ else if (some_dup == TRUE) return SOME_DUP_MULTI;
+ else return ALL_DIF_MULTI;
+};
+
+
+
static ClickableItemPtr FindMultipleSourceQuals (ValNodePtr qual, ValNodePtr item_list)
{
ClickableItemPtr cip = NULL;
- ValNodePtr vnp;
- StringConstraintPtr scp;
- CharPtr str1, str2, qualname, fmt;
+ ValNodePtr vnp, qual_list = NULL;
+ CharPtr qualname, fmt;
CharPtr has_multi_fmt = "%%d sources have multiple %s qualifiers";
ValNodePtr has_multi = NULL;
+ ValNodePtr has_same_multi = NULL, has_some_dup_multi = NULL;
+ ValNodePtr has_all_dif_multi = NULL;
ValNodePtr src_choice;
+ int multi_type, multi_type_cnt=0;
+ ValNodePtr subcat = NULL;
if (qual == NULL || item_list == NULL) {
return NULL;
@@ -17330,31 +18013,63 @@ static ClickableItemPtr FindMultipleSourceQuals (ValNodePtr qual, ValNodePtr ite
return NULL;
}
- scp = StringConstraintNew ();
- scp->not_present = TRUE;
- scp->match_location = String_location_equals;
-
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
- str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, qual, NULL);
- if (str1 != NULL) {
- scp->match_text = str1;
- str2 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, qual, scp);
- if (str2 != NULL) {
- ValNodeAddPointer (&has_multi, vnp->choice, vnp->data.ptrvalue);
- str2 = MemFree (str2);
- }
- str1 = MemFree (str1);
- }
+ qual_list = GetMultipleSourceQualsFromBioSource (
+ GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue),
+ (SourceQualChoicePtr) qual->data.ptrvalue, NULL);
+ if (ValNodeLen(qual_list) > 1) {
+ multi_type = GetMultiType(qual_list);
+ switch (multi_type) {
+ case SAME_MULTI:
+ ValNodeAddPointer (&has_same_multi, vnp->choice, vnp->data.ptrvalue);
+ ValNodeAddPointer (&has_multi, vnp->choice, vnp->data.ptrvalue);
+ break;
+ case SOME_DUP_MULTI:
+ ValNodeAddPointer (&has_some_dup_multi, vnp->choice, vnp->data.ptrvalue);
+ ValNodeAddPointer (&has_multi, vnp->choice, vnp->data.ptrvalue);
+ break;
+ case ALL_DIF_MULTI:
+ ValNodeAddPointer (&has_all_dif_multi, vnp->choice, vnp->data.ptrvalue);
+ ValNodeAddPointer (&has_multi, vnp->choice, vnp->data.ptrvalue);
+ break;
+ }
+ //ValNodeAddPointer (&has_multi, vnp->choice, vnp->data.ptrvalue);
+ }
+ qual_list = ValNodeFree(qual_list);
}
- if (has_multi != NULL) {
- qualname = SummarizeFieldType (qual);
- fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (has_multi_fmt) + StringLen (qualname)));
+ qualname = SummarizeFieldType (qual);
+ fmt= (CharPtr) MemNew (sizeof (Char) * (StringLen (has_multi_fmt) + StringLen (qualname)));
+ if (has_same_multi) {
sprintf (fmt, has_multi_fmt, qualname);
- cip = NewClickableItem (DISC_DUP_SRC_QUAL, fmt, has_multi);
- fmt = MemFree (fmt);
- qualname = MemFree (qualname);
+ SetStringValue (&fmt, ", same value", ExistingTextOption_append_none);
+ cip = NewClickableItem (DISC_DUP_SRC_QUAL, fmt, has_same_multi);
+ ValNodeAddPointer(&subcat, 0, cip);
+ multi_type_cnt ++;
}
+ if (has_some_dup_multi) {
+ sprintf (fmt, has_multi_fmt, qualname);
+ SetStringValue (&fmt, ", some duplicates", ExistingTextOption_append_none);
+ cip = NewClickableItem (DISC_DUP_SRC_QUAL, fmt, has_some_dup_multi);
+ ValNodeAddPointer(&subcat, 0, cip);
+ multi_type_cnt ++;
+ }
+ if (has_all_dif_multi) {
+ sprintf (fmt, has_multi_fmt, qualname);
+ cip = NewClickableItem (DISC_DUP_SRC_QUAL, fmt, has_all_dif_multi);
+ ValNodeAddPointer(&subcat, 0, cip);
+ multi_type_cnt ++;
+ }
+
+ if (multi_type_cnt > 1) {
+ sprintf (fmt, has_multi_fmt, qualname);
+ cip = NewClickableItem (DISC_DUP_SRC_QUAL, fmt, has_multi);
+ cip->subcategories = subcat;
+ }
+
+ qualname = MemFree (qualname);
+ fmt = MemFree (fmt);
+
return cip;
}
@@ -17484,7 +18199,7 @@ static void FindRepeatedFieldValues (ValNodePtr PNTR discrepancy_list, ValNodePt
if (val_dup_list != NULL) {
item_list = ItemListFromSubcategories (val_dup_list);
RemoveDuplicateItems (&item_list);
- cip = NewClickableItem (item_type, "%d sources have two qualifiers with the same value", item_list);
+ cip = NewClickableItem (item_type, "%d sources have two or more qualifiers with the same value", item_list);
cip->subcategories = val_dup_list;
ValNodeAddPointer (discrepancy_list, 0, cip);
}
@@ -17502,7 +18217,6 @@ static void AddDiscrepanciesForSourceQualComboList (ValNodePtr PNTR discrepancy_
CharPtr dup_fmt = "%%d sources have '%s' for %s";
CharPtr fmt, qual_name;
Char tmp[30];
- ErrSev msev, lsev;
if (combo_list == NULL || *combo_list == NULL || src_list == NULL) {
return;
@@ -17586,14 +18300,10 @@ static void AddDiscrepanciesForSourceQualComboList (ValNodePtr PNTR discrepancy_
cip->clickable_item_type = item_type;
cip->description = StringSave ("Source Qualifier Report");
- msev = ErrSetMessageLevel (SEV_MAX);
- lsev = ErrSetLogLevel (SEV_MAX);
if (GetAppParam ("SEQUINCUSTOM", "ONCALLERTOOL", "EXPAND_SRCQUAL_REPORT", NULL, tmp, sizeof (tmp) - 1)
&& StringICmp (tmp, "TRUE") == 0) {
- cip->expanded = TRUE; /* initially source qualifier report should be open */
+ cip->expanded = TRUE; /** initially source qualifier report should be open **/
}
- ErrSetMessageLevel (msev);
- ErrSetLogLevel (lsev);
ValNodeAddPointer (discrepancy_list, 0, cip);
}
@@ -17602,7 +18312,7 @@ static void AddDiscrepanciesForSourceQualComboList (ValNodePtr PNTR discrepancy_
static void CheckBioSourceQualsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean combine_seqentry_reports, Uint4 item_type)
{
- ValNodePtr src_list = NULL, qual_list = NULL;
+ ValNodePtr src_list = NULL, qual_list = NULL, feat_list;
ValNodePtr vnp, vnp_q, vnp_s;
DuplicateQualPtr dq1;
SeqEntryPtr sep;
@@ -17613,6 +18323,10 @@ static void CheckBioSourceQualsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
ValNodeLink (&src_list, GetObjectListForFieldType (FieldType_source_qual, vnp->data.ptrvalue));
}
+ /* remove source features from list */
+ feat_list = ValNodeExtractList (&src_list, OBJ_SEQFEAT);
+ feat_list = ValNodeFree (feat_list);
+
qual_list = GetSourceQualSampleFieldListForSeqEntryList (sep_list);
AdjustSourceQualSampleFieldListForOnCallerTest (&qual_list, src_list);
@@ -18321,6 +19035,18 @@ static CitSubPtr CitSubFromPubdesc (PubdescPtr pdp)
}
+static CitSubPtr CitSubFromObject (ValNodePtr vnp)
+{
+ if (vnp == NULL) {
+ return NULL;
+ } else if (vnp->choice == OBJ_SEQSUB_CIT) {
+ return vnp->data.ptrvalue;
+ } else {
+ return CitSubFromPubdesc (PubdescFromItem(vnp));
+ }
+}
+
+
static AffilPtr AffilFromCitSub (CitSubPtr csp)
{
AffilPtr affil = NULL;
@@ -18343,8 +19069,8 @@ static int ComparePubAffilForItem (ValNodePtr vnp1, ValNodePtr vnp2)
} else if (vnp2 == NULL) {
rval = 1;
} else {
- afp1 = AffilFromCitSub (CitSubFromPubdesc (PubdescFromItem (vnp1)));
- afp2 = AffilFromCitSub (CitSubFromPubdesc (PubdescFromItem (vnp2)));
+ afp1 = AffilFromCitSub (CitSubFromObject(vnp1));
+ afp2 = AffilFromCitSub (CitSubFromObject(vnp2));
str1 = GetFlatFileAffilString (afp1);
str2 = GetFlatFileAffilString (afp2);
rval = StringCmp (str1, str2);
@@ -18373,33 +19099,175 @@ static int LIBCALLBACK SortVnpByPubAffil (VoidPtr ptr1, VoidPtr ptr2)
static void CollectCitSubPubsFeatCallback (SeqFeatPtr sfp, Pointer data)
{
- ValNode vn;
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB && CitSubFromPubdesc (sfp->data.value.ptrvalue) != NULL && data != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+}
- if (data != NULL) {
- MemSet (&vn, 0, sizeof (ValNode));
- vn.choice = OBJ_SEQFEAT;
- vn.data.ptrvalue = sfp;
- vn.next = NULL;
- if (CitSubFromPubdesc (PubdescFromItem(&vn)) != NULL) {
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+
+static void CollectCitSubPubsDescCallback (SeqDescrPtr sdp, Pointer data)
+{
+ if (sdp != NULL && sdp->choice == Seq_descr_pub && CitSubFromPubdesc (sdp->data.ptrvalue) != NULL && data != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+typedef struct affilconflict {
+ ValNodePtr obj;
+ CharPtr qual_val;
+} AffilConflictData, PNTR AffilConflictPtr;
+
+
+static AffilConflictPtr AffilConflictNew (ValNodePtr obj, CharPtr qual_val)
+{
+ AffilConflictPtr a;
+
+ a = (AffilConflictPtr) MemNew (sizeof (AffilConflictData));
+ a->obj = obj;
+ a->qual_val = qual_val;
+ return a;
+}
+
+
+static int LIBCALLBACK SortVnpByAffilConflictValue (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+ AffilConflictPtr a1, a2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ a1 = vnp1->data.ptrvalue;
+ a2 = vnp2->data.ptrvalue;
+ if (a1 != NULL && a2 != NULL) {
+ rval = StringCmp (a1->qual_val, a2->qual_val);
}
}
+
+ return rval;
}
-static void CollectCitSubPubsDescCallback (SeqDescrPtr sdp, Pointer data)
+static ClickableItemPtr ReportAffilConflictField (CharPtr qual_name, ValNodePtr PNTR list)
{
- ValNode vn;
+ ValNodePtr vnp, item_list = NULL, subcat = NULL;
+ CharPtr this_val = NULL;
+ AffilConflictPtr a;
+ ClickableItemPtr cip;
+ CharPtr fmt;
+ CharPtr fmt_fmt = "%%d affiliations have %s value '%s'";
+ CharPtr top_fmt = "Affiliations have different values for %s";
- if (data != NULL) {
- MemSet (&vn, 0, sizeof (ValNode));
- vn.choice = OBJ_SEQDESC;
- vn.data.ptrvalue = sdp;
- vn.next = NULL;
- if (CitSubFromPubdesc (PubdescFromItem(&vn)) != NULL) {
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ if (qual_name == NULL || list == NULL || *list == NULL || (*list)->next == NULL) {
+ return NULL;
+ }
+ *list = ValNodeSort (*list, SortVnpByAffilConflictValue);
+ a = (AffilConflictPtr) (*list)->data.ptrvalue;
+ this_val = a->qual_val;
+ ValNodeAddPointer (&item_list, a->obj->choice, a->obj->data.ptrvalue);
+ for (vnp = (*list)->next; vnp != NULL; vnp = vnp->next) {
+ a = (AffilConflictPtr) vnp->data.ptrvalue;
+ if (StringCmp (a->qual_val, this_val) == 0) {
+ ValNodeAddPointer (&item_list, a->obj->choice, a->obj->data.ptrvalue);
+ } else {
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt_fmt) + StringLen (qual_name) + StringLen (this_val)));
+ sprintf (fmt, fmt_fmt, qual_name, this_val == NULL ? "" : this_val);
+ cip = NewClickableItem (DISC_CITSUBAFFIL_CONFLICT, fmt, item_list);
+ ValNodeAddPointer (&subcat, 0, cip);
+ fmt = MemFree (fmt);
+ item_list = NULL;
+ this_val = a->qual_val;
+ ValNodeAddPointer (&item_list, a->obj->choice, a->obj->data.ptrvalue);
}
}
+ /* if we haven't created any subcategories yet, then there were no conflicts */
+ if (subcat == NULL) {
+ item_list = ValNodeFree (item_list);
+ return NULL;
+ } else {
+ /* add in last subcategory */
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt_fmt) + StringLen (qual_name) + StringLen (this_val)));
+ sprintf (fmt, fmt_fmt, qual_name, this_val == NULL ? "" : this_val);
+ cip = NewClickableItem (DISC_CITSUBAFFIL_CONFLICT, fmt, item_list);
+ ValNodeAddPointer (&subcat, 0, cip);
+ fmt = MemFree (fmt);
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (top_fmt) + StringLen (qual_name)));
+ sprintf (cip->description, top_fmt, qual_name);
+ cip->item_list = ItemListFromSubcategories (subcat);
+ cip->subcategories = subcat;
+ cip->clickable_item_type = DISC_CITSUBAFFIL_CONFLICT;
+ return cip;
+ }
+}
+
+
+static ValNodePtr AddOneCitSubConflictCategory (ClickableItemPtr top, CharPtr qual_name, ValNodePtr list)
+{
+ ClickableItemPtr cip_sub;
+
+ cip_sub = ReportAffilConflictField (qual_name, &list);
+ if (cip_sub != NULL) {
+ ValNodeAddPointer (&(top->subcategories), 0, cip_sub);
+ }
+ list = ValNodeFreeData (list);
+ return list;
+}
+
+
+static void AddCitSubConflictSubcategories (ClickableItemPtr cip)
+{
+ ValNodePtr subcat, vnp;
+ ClickableItemPtr cip_sub;
+ CitSubPtr citsub;
+ AffilPtr affil;
+ ValNodePtr inst = NULL, div = NULL, city = NULL, sub = NULL,
+ country = NULL, street = NULL, postal_code = NULL,
+ email = NULL, fax = NULL, phone = NULL;
+
+ if (cip == NULL) {
+ return;
+ }
+
+ /* build up list of conflicting objects */
+ for (subcat = cip->subcategories; subcat != NULL; subcat = subcat->next) {
+ cip_sub = (ClickableItemPtr) subcat->data.ptrvalue;
+ /* only add for items with affiliation */
+ if (StringSearch (cip_sub->description, "Cit-subs have no affiliation") == NULL) {
+ for (vnp = cip_sub->item_list; vnp != NULL; vnp = vnp->next) {
+ /* add values */
+ if ((citsub = CitSubFromObject(vnp)) != NULL && (affil = AffilFromCitSub(citsub)) != NULL) {
+ ValNodeAddPointer (&inst, 0, AffilConflictNew(vnp, affil->affil));
+ ValNodeAddPointer (&div, 0, AffilConflictNew(vnp, affil->div));
+ ValNodeAddPointer (&city, 0, AffilConflictNew(vnp, affil->city));
+ ValNodeAddPointer (&sub, 0, AffilConflictNew(vnp, affil->sub));
+ ValNodeAddPointer (&country, 0, AffilConflictNew(vnp, affil->country));
+ ValNodeAddPointer (&street, 0, AffilConflictNew(vnp, affil->street));
+ ValNodeAddPointer (&postal_code, 0, AffilConflictNew(vnp, affil->postal_code));
+ ValNodeAddPointer (&email, 0, AffilConflictNew(vnp, affil->email));
+ ValNodeAddPointer (&fax, 0, AffilConflictNew(vnp, affil->fax));
+ ValNodeAddPointer (&phone, 0, AffilConflictNew(vnp, affil->phone));
+ }
+ }
+ }
+ }
+
+ inst = AddOneCitSubConflictCategory (cip, "institution", inst);
+ div = AddOneCitSubConflictCategory (cip, "department", div);
+ city = AddOneCitSubConflictCategory (cip, "city", city);
+ sub = AddOneCitSubConflictCategory (cip, "state/province", sub);
+ country = AddOneCitSubConflictCategory (cip, "country", country);
+ street = AddOneCitSubConflictCategory (cip, "street", street);
+ postal_code = AddOneCitSubConflictCategory (cip, "postal code", postal_code);
+ email = AddOneCitSubConflictCategory (cip, "email", email);
+ fax = AddOneCitSubConflictCategory (cip, "fax", fax);
+ phone = AddOneCitSubConflictCategory (cip, "phone", phone);
+
}
@@ -18408,18 +19276,27 @@ static void FindMismatchedCitSubAffiliations (ValNodePtr PNTR discrepancy_list,
ValNodePtr vnp, cit_sub_list = NULL, repeated = NULL, subcat = NULL;
CharPtr summ1 = NULL, summ2, fmt, affil_fmt = "%%d CitSubs have affiliation %s";
ClickableItemPtr cip;
+ Boolean has_seq_submit = TRUE;
+ SeqEntryPtr sep;
+ SubmitBlockPtr sbp;
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ sep = (SeqEntryPtr) vnp->data.ptrvalue;
+ sbp = FindSubmitBlockForSeqEntry (sep);
+ if (sbp != NULL) {
+ has_seq_submit = FALSE;
+ ValNodeAddPointer(&cit_sub_list, OBJ_SEQSUB_CIT, sbp->cit);
+ }
VisitDescriptorsInSep (vnp->data.ptrvalue, &cit_sub_list, CollectCitSubPubsDescCallback);
VisitFeaturesInSep (vnp->data.ptrvalue, &cit_sub_list, CollectCitSubPubsFeatCallback);
}
cit_sub_list = ValNodeSort (cit_sub_list, SortVnpByPubAffil);
if (cit_sub_list != NULL && cit_sub_list->next != NULL) {
- summ1 = GetFlatFileAffilString (AffilFromCitSub (CitSubFromPubdesc (PubdescFromItem (cit_sub_list))));
+ summ1 = GetFlatFileAffilString (AffilFromCitSub (CitSubFromObject(cit_sub_list)));
ValNodeAddPointer (&repeated, cit_sub_list->choice, cit_sub_list->data.ptrvalue);
for (vnp = cit_sub_list->next; vnp != NULL; vnp = vnp->next) {
- summ2 = GetFlatFileAffilString (AffilFromCitSub (CitSubFromPubdesc (PubdescFromItem (vnp))));
+ summ2 = GetFlatFileAffilString (AffilFromCitSub (CitSubFromObject(vnp)));
if (StringCmp (summ1, summ2) != 0) {
repeated = ValNodeSort (repeated, SortVnpByDiscrepancyItemText);
if (StringHasNoText (summ1)) {
@@ -18470,6 +19347,7 @@ static void FindMismatchedCitSubAffiliations (ValNodePtr PNTR discrepancy_list,
cip->clickable_item_type = DISC_CITSUBAFFIL_CONFLICT;
cip->description = StringSave ("Citsub affiliation conflicts found");
cip->subcategories = subcat;
+ AddCitSubConflictSubcategories (cip);
ValNodeAddPointer (discrepancy_list, 0, cip);
}
summ1 = MemFree (summ1);
@@ -18477,6 +19355,224 @@ static void FindMismatchedCitSubAffiliations (ValNodePtr PNTR discrepancy_list,
}
+static Boolean IsAffilDivider (Char ch)
+{
+ if (isspace (ch) || ispunct (ch)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static const CharPtr kUniversityOf = "University of";
+
+static Boolean AffilStreetEndsWith (CharPtr str, CharPtr end)
+{
+ Int4 len, end_len, u_len;
+ Boolean rval = FALSE;
+
+ if ((len = StringLen (str)) == 0
+ || (end_len = StringLen (end)) == 0
+ || end_len > len) {
+ rval = FALSE;
+ } else if (StringICmp (str + len - end_len, end) == 0
+ && (len == end_len || IsAffilDivider (*(str + len - end_len - 1)))) {
+ u_len = StringLen (kUniversityOf);
+ if (len >= end_len + u_len && StringNICmp (str + len - end_len - u_len - 1, kUniversityOf, u_len) == 0) {
+ rval = FALSE;
+ } else {
+ rval = TRUE;
+ }
+ } else {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static Boolean AffilStreetContainsDuplicateText (AffilPtr affil)
+{
+ if (affil == NULL || StringHasNoText (affil->street)) {
+ return FALSE;
+ }
+
+ if (AffilStreetEndsWith(affil->street, affil->country)) {
+ return TRUE;
+ } else if (AffilStreetEndsWith (affil->street, affil->postal_code)) {
+ return TRUE;
+ } else if (AffilStreetEndsWith (affil->street, affil->sub)) {
+ return TRUE;
+ } else if (AffilStreetEndsWith (affil->street, affil->city)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static const CharPtr kPRChina = "P.R. China";
+static const CharPtr kChina = "China";
+
+static Boolean RemoveAffilEndString (CharPtr str, CharPtr end)
+{
+ Int4 len, end_len, u_len;
+ Boolean rval = FALSE;
+ CharPtr cp;
+
+ if ((len = StringLen (str)) == 0
+ || (end_len = StringLen (end)) == 0
+ || end_len > len) {
+ return rval;
+ }
+ if (StringICmp (str + len - end_len, end) == 0
+ && (len == end_len || IsAffilDivider (*(str + len - end_len - 1)))) {
+ u_len = StringLen (kUniversityOf);
+ if (len >= end_len + u_len && StringNICmp (str + len - end_len - u_len - 1, kUniversityOf, u_len) == 0) {
+ /* don't truncate */
+ } else {
+ if (StringICmp (end, kChina) == 0) {
+ u_len = StringLen (kPRChina);
+ if (len >= u_len && StringICmp (str + len - u_len, kPRChina) == 0) {
+ end_len = u_len;
+ }
+ }
+
+ *(str + len - end_len) = 0;
+ cp = str + (len - end_len - 1);
+ while (cp > str && (isspace (*cp) || *cp == ',')) {
+ *cp = 0;
+ cp--;
+ }
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Boolean RemoveAffilStreetDuplicateText (AffilPtr affil)
+{
+ Boolean any = TRUE, rval = FALSE;
+
+ if (affil == NULL || StringHasNoText (affil->street)) {
+ return rval;
+ }
+
+ while (any) {
+ any = RemoveAffilEndString (affil->street, affil->country);
+ any |= RemoveAffilEndString (affil->street, affil->postal_code);
+ any |= RemoveAffilEndString (affil->street, affil->sub);
+ any |= RemoveAffilEndString (affil->street, affil->city);
+ if (any) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static void ReportCitSubAffilDuplicateTextDescCallback (SeqDescPtr sdp, Pointer data)
+{
+ CitSubPtr sub;
+ AffilPtr affil;
+
+ if (sdp != NULL && sdp->choice == Seq_descr_pub
+ && (sub = CitSubFromPubdesc (sdp->data.ptrvalue)) != NULL
+ && (affil = AffilFromCitSub (sub)) != NULL
+ && AffilStreetContainsDuplicateText (affil)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void ReportCitSubAffilDuplicateTextFeatCallback (SeqFeatPtr sfp, Pointer data)
+{
+ CitSubPtr sub;
+ AffilPtr affil;
+
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB
+ && (sub = CitSubFromPubdesc (sfp->data.value.ptrvalue)) != NULL
+ && (affil = AffilFromCitSub (sub)) != NULL
+ && AffilStreetContainsDuplicateText (affil)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+}
+
+
+static void ReportCitSubAffilDuplicateText (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+ SeqSubmitPtr ssp;
+ AffilPtr affil;
+
+ if (discrepancy_list == NULL || sep_list == NULL) {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &item_list, ReportCitSubAffilDuplicateTextDescCallback);
+ VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, ReportCitSubAffilDuplicateTextFeatCallback);
+ ssp = FindSeqSubmitForSeqEntry (vnp->data.ptrvalue);
+ if (ssp != NULL && ssp->sub != NULL && ssp->sub->cit != NULL
+ && (affil = AffilFromCitSub (ssp->sub->cit)) != NULL
+ && AffilStreetContainsDuplicateText (affil)) {
+ ValNodeAddPointer (&item_list, OBJ_SEQSUB, ssp);
+ }
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (ONCALLER_CITSUB_AFFIL_DUP_TEXT, "%d Cit-sub pubs have duplicate affil text", item_list));
+ }
+}
+
+
+static void RemoveCitSubAffilDuplicateText (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ SeqDescPtr sdp;
+ SeqFeatPtr sfp;
+ CharPtr orig;
+ CitSubPtr sub;
+ AffilPtr affil;
+ SeqSubmitPtr ssp;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ affil = NULL;
+ if (vnp->choice == OBJ_SEQDESC) {
+ if ((sdp = (SeqDescPtr) vnp->data.ptrvalue) != NULL
+ && sdp->choice == Seq_descr_pub
+ && (sub = CitSubFromPubdesc (sdp->data.ptrvalue)) != NULL) {
+ affil = AffilFromCitSub (sub);
+ }
+ } else if (vnp->choice == OBJ_SEQFEAT) {
+ if ((sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL
+ && sfp->data.choice == SEQFEAT_PUB
+ && (sub = CitSubFromPubdesc (sfp->data.value.ptrvalue)) != NULL) {
+ affil = AffilFromCitSub (sub);
+ }
+ } else if (vnp->choice == OBJ_SEQSUB) {
+ if ((ssp = (SeqSubmitPtr)vnp->data.ptrvalue) != NULL
+ && ssp->sub != NULL) {
+ affil = AffilFromCitSub (ssp->sub->cit);
+ }
+ }
+ if (affil != NULL) {
+ orig = StringSave (affil->street);
+ if (RemoveAffilStreetDuplicateText (affil)) {
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed %s to %s\n", orig, affil->street);
+ }
+ lip->data_in_log = TRUE;
+ }
+ }
+ orig = MemFree (orig);
+ }
+ }
+}
+
+
typedef struct haplotypesequence {
CharPtr haplotype;
CharPtr taxname;
@@ -18840,7 +19936,7 @@ static ValNodePtr ReportHaplotypeSequenceMismatchForList (ValNodePtr PNTR haplot
}
/* first, look for same taxname, same haplotype, different sequence */
- *haplotype_sequence_list = ValNodeSort (*haplotype_sequence_list, allow_NDiff ? SortVnpByHaplotypeThenSequence : SortVnpByHaplotypeThenSequence);
+ *haplotype_sequence_list = ValNodeSort (*haplotype_sequence_list, allow_NDiff ? SortVnpByHaplotypeThenSequenceAllowNDiff : SortVnpByHaplotypeThenSequence);
have_mismatch = FALSE;
same_list = NULL;
h1 = (*haplotype_sequence_list)->data.ptrvalue;
@@ -19104,7 +20200,11 @@ NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds)
SeqFeatPtr mrna = NULL;
SeqFeatXrefPtr xref;
SeqMgrFeatContext mcontext;
+ BioseqPtr mbsp;
+ if (cds == NULL) {
+ return NULL;
+ }
/* first, check for mRNA identified by feature xref */
for (xref = cds->xref; xref != NULL && mrna == NULL; xref = xref->next) {
if (xref->id.choice != 0) {
@@ -19122,6 +20222,14 @@ NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds)
mrna = SeqMgrGetOverlappingmRNA (cds->location, &mcontext);
}
}
+
+ if (mrna == NULL) {
+ mbsp = BioseqFindFromSeqLoc (cds->location);
+ if (IsmRNASequenceInGenProdSet(mbsp)) {
+ mrna = SeqMgrGetRNAgivenProduct(mbsp, &mcontext);
+ }
+ }
+
return mrna;
}
@@ -19252,10 +20360,14 @@ static void ReportCDSWithoutmRNA (ValNodePtr PNTR discrepancy_list, ValNodePtr s
{
ValNodePtr vnp;
ValNodePtr item_list = NULL;
+ SeqEntryPtr orig_scope;
+ orig_scope = SeqEntrySetScope (NULL);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ SeqEntrySetScope (vnp->data.ptrvalue);
VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, ReportCDSWithoutmRNACallback);
}
+ SeqEntrySetScope (orig_scope);
if (item_list != NULL) {
ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_CDS_WITHOUT_MRNA, "%d coding regions do not have an mRNA", item_list));
}
@@ -19512,7 +20624,7 @@ NLM_EXTERN SeqFeatPtr AddmRNAForCDS (SeqFeatPtr sfp)
ProtRefPtr prp;
ValNodePtr name;
CharPtr mRNAname = NULL;
- SeqFeatPtr rna, gene;
+ SeqFeatPtr rna = NULL, gene;
SeqEntryPtr sep;
Boolean partial5, partial3;
BioseqPtr bsp;
@@ -19968,25 +21080,6 @@ static Boolean FeatureCountHasFeatdef (ValNodePtr vnp, Pointer data)
}
-static Boolean FeatureCountHasNumFeats (ValNodePtr vnp, Pointer data)
-{
- Int4 num_feats;
- FeatureCountPtr f;
-
- if (vnp == NULL || data == NULL) {
- return FALSE;
- }
-
- num_feats = *((Int4Ptr)data);
- f = (FeatureCountPtr) vnp->data.ptrvalue;
- if (f != NULL && f->num_feats == num_feats) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static void InsertMissingFeatureCountsWithSeqIdTxt (ValNodePtr PNTR feat_count_list)
{
ValNodePtr seq_list, feat_list, feat_seq_list, tmp_list, vnp, new_list = NULL;
@@ -20536,6 +21629,38 @@ typedef struct partialconflictdata {
} PartialConflictData, PNTR PartialConflictPtr;
+static Boolean Is5EndInUTRList (ValNodePtr list, Int4 end)
+{
+ ValNodePtr vnp;
+ Boolean rval = FALSE;
+ SeqFeatPtr utr;
+
+ for (vnp = list; vnp != NULL && !rval; vnp = vnp->next) {
+ utr = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (end == SeqLocStart (utr->location)) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Boolean Is3EndInUTRList (ValNodePtr list, Int4 end)
+{
+ ValNodePtr vnp;
+ Boolean rval = FALSE;
+ SeqFeatPtr utr;
+
+ for (vnp = list; vnp != NULL && !rval; vnp = vnp->next) {
+ utr = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (end == SeqLocStop (utr->location)) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
static ValNodePtr ReportPartialConflictsForFeatureType (BioseqPtr bsp, Int4 seqfeat, Int4 featdef, CharPtr label)
{
SeqFeatPtr sfp, gene;
@@ -20551,11 +21676,28 @@ static ValNodePtr ReportPartialConflictsForFeatureType (BioseqPtr bsp, Int4 seqf
CharPtr fmt;
ClickableItemPtr cip;
ValNodePtr disc_list = NULL;
+ ValNodePtr utr5 = NULL, utr3 = NULL;
+ Boolean check_for_utrs = FALSE;
if (bsp == NULL || ISA_aa (bsp->mol) || label == NULL) {
return NULL;
}
+ if ((featdef == FEATDEF_CDS || seqfeat == SEQFEAT_CDREGION) && IsMrnaSequence (bsp)) {
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_5UTR, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, seqfeat, featdef, &context)) {
+ ValNodeAddPointer (&utr5, OBJ_SEQFEAT, sfp);
+ }
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_3UTR, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, seqfeat, featdef, &context)) {
+ ValNodeAddPointer (&utr3, OBJ_SEQFEAT, sfp);
+ }
+
+ check_for_utrs = TRUE;
+ }
+
for (sfp = SeqMgrGetNextFeature (bsp, NULL, seqfeat, featdef, &context);
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, seqfeat, featdef, &context)) {
@@ -20585,16 +21727,22 @@ static ValNodePtr ReportPartialConflictsForFeatureType (BioseqPtr bsp, Int4 seqf
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
CheckSeqLocForPartial (gene->location, &gene_partial5, &gene_partial3);
- if (((partial5 && !gene_partial5) || (!partial5 && gene_partial5)) && feat_start == gene_start) {
- conflict5 = TRUE;
- } else {
- conflict5 = FALSE;
+ conflict5 = FALSE;
+ if (((partial5 && !gene_partial5) || (!partial5 && gene_partial5))) {
+ if (feat_start == gene_start) {
+ conflict5 = TRUE;
+ } else if (check_for_utrs && !Is5EndInUTRList(utr5, gene_start)) {
+ conflict5 = TRUE;
+ }
}
- if (((partial3 && !gene_partial3) || (!partial3 && gene_partial3)) && feat_stop == gene_stop) {
- conflict3 = TRUE;
- } else {
- conflict3 = FALSE;
+ conflict3 = FALSE;
+ if (((partial3 && !gene_partial3) || (!partial3 && gene_partial3))) {
+ if (feat_stop == gene_stop) {
+ conflict3 = TRUE;
+ } else if (check_for_utrs && !Is3EndInUTRList(utr3, gene_stop)) {
+ conflict3 = TRUE;
+ }
}
if (conflict5 || conflict3) {
@@ -20625,6 +21773,8 @@ static ValNodePtr ReportPartialConflictsForFeatureType (BioseqPtr bsp, Int4 seqf
gene_loc = SeqLocFree (gene_loc);
}
}
+ utr5 = ValNodeFree (utr5);
+ utr3 = ValNodeFree (utr3);
return disc_list;
}
@@ -20665,7 +21815,7 @@ static void ReportPartialConflictsBioseqCallback (BioseqPtr bsp, Pointer data)
ValNodeLink (&(p->RNA_list), ReportPartialConflictsForFeatureType (bsp, SEQFEAT_RNA, 0, "RNA"));
ValNodeLink (&(p->utr3_list), ReportPartialConflictsForFeatureType (bsp, 0, FEATDEF_3UTR, "3' UTR"));
ValNodeLink (&(p->utr5_list), ReportPartialConflictsForFeatureType (bsp, 0, FEATDEF_5UTR, "5' UTR"));
- if (!IsEukaryotic(bsp)) {
+ if (!IsEukaryotic(bsp) || IsMrnaSequence (bsp)) {
ValNodeLink (&(p->cds_list), ReportPartialConflictsForFeatureType (bsp, 0, FEATDEF_CDS, "coding region"));
}
ValNodeLink (&(p->misc_feature_list), ReportPartialConflictsForFeatureType (bsp, 0, FEATDEF_misc_feature, "misc_feature"));
@@ -20734,10 +21884,15 @@ typedef struct objfindbytext {
} ObjFindByTextData, PNTR ObjFindByTextPtr;
+typedef struct spellfix {
+ CharPtr find;
+ CharPtr replace;
+ Boolean whole_word;
+} SpellFixData, PNTR SpellFixPtr;
+
typedef struct objfindlistoftext {
- CharPtr PNTR search_items;
+ SpellFixPtr search_items;
ValNodePtr PNTR item_lists;
- Boolean whole_word;
} ObjFindListOfTextData, PNTR ObjFindListOfTextPtr;
static void RemoveTranslation (CharPtr str)
@@ -20842,14 +21997,14 @@ static void FlatfileTextFind (
RemoveTranslation (cpy);
}
- for (i = 0; obj->search_items[i] != NULL; i++) {
+ for (i = 0; obj->search_items[i].find != NULL; i++) {
do_add = FALSE;
- if (DoesStringContainPhrase (cpy, obj->search_items[i], FALSE, obj->whole_word)) {
+ if (DoesStringContainPhrase (cpy, obj->search_items[i].find, FALSE, obj->search_items[i].whole_word)) {
if (taxname == NULL) {
/* remove taxname */
taxname = GetTaxnameForObject (entityID, itemtype, itemID);
FindReplaceString (&cpy, taxname, "", FALSE, TRUE);
- if (DoesStringContainPhrase (cpy, obj->search_items[i], FALSE, obj->whole_word)) {
+ if (DoesStringContainPhrase (cpy, obj->search_items[i].find, FALSE, obj->search_items[i].whole_word)) {
do_add = TRUE;
}
} else {
@@ -20883,7 +22038,99 @@ static void FlatfileTextFind (
}
-static void FindTextInFlatfileEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Uint4 item_type, CharPtr PNTR find_list, Boolean whole_word)
+typedef struct replacepair {
+ CharPtr find;
+ CharPtr replace;
+} ReplacePairData, PNTR ReplacePairPtr;
+
+static SpellFixData oncaller_tool_spell_fixes[] = {
+{"Agricultutral","agricultural", FALSE},
+{"Bacilllus","Bacillus", FALSE},
+{"Enviromental","Environmental", FALSE},
+{"Insitiute","institute", FALSE},
+{"Instutite","institute", FALSE},
+{"Instutute", "Institute", FALSE},
+{"Instutute", "Institute", FALSE},
+{"P.R.Chian","P.R. China", FALSE},
+{"PRChian","PR China", FALSE},
+{"Scieces","Sciences", FALSE},
+{"agricultral", "agricultural", FALSE},
+{"agriculturral","agricultural", FALSE},
+{"biotechnlogy","biotechnology", FALSE},
+{"Biotechnlogy","Biotechnology", FALSE},
+{"biotechnolgy","biotechnology", FALSE},
+{"biotechology","biotechnology", FALSE},
+{"caputre","capture", TRUE},
+{"casette","cassette", TRUE},
+{"catalize","catalyze", FALSE},
+{"charaterization","characterization", FALSE},
+{"clonging","cloning", FALSE},
+{"consevered","conserved", FALSE},
+{"cotaining","containing", FALSE},
+{"cytochome","cytochrome", TRUE},
+{"diveristy","diversity", TRUE},
+{"enivronment","environment", FALSE},
+{"enviroment","environment", FALSE},
+{"genone","genome", TRUE},
+{"homologue", "homolog" , TRUE},
+{"hypotethical","hypothetical", FALSE},
+{"hypotetical","hypothetical", FALSE},
+{"hypothetcial","hypothetical", FALSE},
+{"hypothteical","hypothetical", FALSE},
+{"indepedent","independent", FALSE},
+{"insititute","institute", FALSE},
+{"insitute","institute", FALSE},
+{"institue","institute", FALSE},
+{"instute","institute", FALSE},
+{"muesum","museum", TRUE},
+{"musuem","museum", TRUE},
+{"nuclear shutting","nuclear shuttling", TRUE},
+{"phylogentic","phylogenetic", FALSE},
+{"protien","protein", FALSE},
+{"puatative","putative", FALSE},
+{"putaitve","putative", FALSE},
+{"putaive","putative", FALSE},
+{"putataive","putative", FALSE},
+{"putatitve","putative", FALSE},
+{"putatuve","putative", FALSE},
+{"putatvie","putative", FALSE},
+{"pylogeny","phylogeny", FALSE},
+{"resaerch","research", FALSE},
+{"reseach","research", FALSE},
+{"reserach","research", TRUE},
+{"reserch","research", FALSE},
+{"ribosoml","ribosomal", FALSE},
+{"ribossomal","ribosomal", FALSE},
+{"scencies","sciences", FALSE},
+{"scinece","science", FALSE},
+{"simmilar","similar", FALSE},
+{"structual","structural", FALSE},
+{"subitilus","subtilis", FALSE},
+{"sulfer","sulfur", FALSE},
+{"technlogy","technology", FALSE},
+{"technolgy","technology", FALSE},
+{"Technlogy","Technology", FALSE},
+{"Veterinry","Veterinary", FALSE},
+{"Argricultural","Agricultural", FALSE},
+{"transcirbed","transcribed", FALSE},
+{"transcirption","transcription", TRUE},
+{"uiniversity","university", FALSE},
+{"uinversity","university", FALSE},
+{"univercity","university", FALSE},
+{"univerisity","university", FALSE},
+{"univeristy","university", FALSE},
+{"univesity","university", FALSE},
+{"unversity","university", TRUE},
+{"uviversity","university", FALSE},
+{"anaemia", NULL, FALSE },
+{"haem", NULL, FALSE },
+{"haemagglutination", NULL, FALSE },
+{"heam", NULL, FALSE },
+{"mithocon", NULL, FALSE },
+{NULL, NULL, FALSE}};
+
+
+static void FindTextInFlatfileOncaller (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
XtraBlock xtra;
ObjFindListOfTextData od;
@@ -20894,6 +22141,7 @@ static void FindTextInFlatfileEx (ValNodePtr PNTR discrepancy_list, ValNodePtr s
CharPtr find_fmt = "%%d objects contain %s", fmt;
Int4 i, num = 0;
ValNodePtr vnp;
+ ValNodePtr fixable = NULL, nonfixable = NULL;
if (discrepancy_list == NULL || sep_list == NULL) return;
@@ -20903,14 +22151,13 @@ static void FindTextInFlatfileEx (ValNodePtr PNTR discrepancy_list, ValNodePtr s
xtra.reindex = TRUE;
level = ErrSetMessageLevel (SEV_MAX);
- od.whole_word = whole_word;
- od.search_items = find_list;
- for (i = 0; find_list[i] != NULL; i++) {
+ od.search_items = oncaller_tool_spell_fixes;
+ for (i = 0; od.search_items[i].find != NULL; i++) {
num++;
}
od.item_lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num);
- for (i = 0; find_list[i] != NULL; i++) {
+ for (i = 0; od.search_items[i].find != NULL; i++) {
od.item_lists[i] = NULL;
}
@@ -20921,11 +22168,15 @@ static void FindTextInFlatfileEx (ValNodePtr PNTR discrepancy_list, ValNodePtr s
SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL);
SeqEntrySetScope (oldscope);
}
- for (i = 0; find_list[i] != NULL; i++) {
+ for (i = 0; od.search_items[i].find != NULL; i++) {
if (od.item_lists[i] != NULL) {
- fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (find_fmt) + StringLen (find_list[i])));
- sprintf (fmt, find_fmt, find_list[i]);
- ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (item_type, fmt, od.item_lists[i]));
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (find_fmt) + StringLen (od.search_items[i].find)));
+ sprintf (fmt, find_fmt, od.search_items[i].find);
+ if (od.search_items[i].replace == NULL) {
+ ValNodeAddPointer (&nonfixable, 0, NewClickableItem (DISC_FLATFILE_FIND_ONCALLER_UNFIXABLE, fmt, od.item_lists[i]));
+ } else {
+ ValNodeAddPointer (&fixable, 0, NewClickableItem (DISC_FLATFILE_FIND_ONCALLER_FIXABLE, fmt, od.item_lists[i]));
+ }
od.item_lists[i] = NULL;
fmt = MemFree (fmt);
}
@@ -20933,114 +22184,13 @@ static void FindTextInFlatfileEx (ValNodePtr PNTR discrepancy_list, ValNodePtr s
od.item_lists = MemFree (od.item_lists);
- ErrSetMessageLevel (level);
-}
-
-
-static CharPtr flatfile_find_list_oncaller[] = {
- "univeristy",
- "univerisity",
- "univercity",
- "uiniversity",
- "uinversity",
- "univesity",
- "uviversity",
- "putatvie",
- "putaitve",
- "protien",
- "simmilar",
- "Insitiute",
- "Instutite",
- "instute",
- "institue",
- "insitute",
- "insititute",
- "ribosoml",
- "transcirbed",
- "Agricultutral",
- "agriculturral",
- "resaerch",
- "charaterization",
- "clonging",
- "anaemia",
- "heam",
- "haem",
- "technlogy",
- "technolgy",
- "biotechnlogy",
- "biotechnolgy",
- "biotechology",
- "enviroment",
- "hypotetical",
- "puatative",
- "putaive",
- "putatitve",
- "putataive",
- "putatuve",
- "cotaining",
- "hypothteical",
- "hypotethical",
- "hypothetcial",
- "consevered",
- "haemagglutination",
- "indepedent",
- "reserch",
- "agricultral",
- "Bacilllus",
- "catalize",
- "subitilus",
- "P.R.Chian",
- "PRChian",
- "phylogentic",
- "pylogeny",
- "reseach",
- "ribossomal",
- "mithocon",
- "scencies",
- "scinece",
- "enivronment",
- "structual",
- "sulfer",
- NULL
-};
-
+ ValNodeLink (discrepancy_list, nonfixable);
+ ValNodeLink (discrepancy_list, fixable);
-static CharPtr flatfile_find_list_oncaller_wholeword[] = {
- "caputre",
- "casette",
- "chian",
- "cytochome",
- "diveristy",
- "genone",
- "muesum",
- "musuem",
- "nuclear shutting",
- "reserach",
- "transcirption",
- "unversity",
- "varent",
- NULL
-};
-
-
-static void FindTextInFlatfileOncaller (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
-{
- FindTextInFlatfileEx (discrepancy_list, sep_list, DISC_FLATFILE_FIND_ONCALLER, flatfile_find_list_oncaller, FALSE);
- FindTextInFlatfileEx (discrepancy_list, sep_list, DISC_FLATFILE_FIND_ONCALLER, flatfile_find_list_oncaller_wholeword, TRUE);
+ ErrSetMessageLevel (level);
}
-typedef struct replacepair {
- CharPtr find;
- CharPtr replace;
-} ReplacePairData, PNTR ReplacePairPtr;
-
-
-static ReplacePairData oncaller_tool_spell_fixes[] = {
- {"homologue", "homolog" },
- {"charaterization", "characterization"},
- {NULL, NULL}};
-
static void OncallerToolSpellFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
{
ValNodePtr entityID_list = NULL, vnp;
@@ -21058,9 +22208,11 @@ static void OncallerToolSpellFix (ValNodePtr item_list, Pointer data, LogInfoPtr
for (vnp = entityID_list; vnp != NULL; vnp = vnp->next) {
entityID = vnp->data.intvalue;
for (i = 0; oncaller_tool_spell_fixes[i].find != NULL; i++) {
- FindReplaceInEntity (entityID, oncaller_tool_spell_fixes[i].find, oncaller_tool_spell_fixes[i].replace,
- FALSE, FALSE, TRUE,
- FALSE, UPDATE_NEVER, NULL, NULL, NULL, FALSE, NULL, NULL);
+ if (oncaller_tool_spell_fixes[i].replace != NULL) {
+ FindReplaceInEntity (entityID, oncaller_tool_spell_fixes[i].find, oncaller_tool_spell_fixes[i].replace,
+ FALSE, oncaller_tool_spell_fixes[i].whole_word, TRUE,
+ FALSE, UPDATE_NEVER, NULL, NULL, NULL, FALSE, NULL, NULL);
+ }
}
}
}
@@ -21500,7 +22652,7 @@ static void AddATCCStrainToCultureColl (ValNodePtr item_list, Pointer data, LogI
AECRParseActionPtr parse;
SourceQualPairPtr pair;
ValNodePtr field_from, field_to, vnp;
- CharPtr str1, str2, cp, new_str;
+ CharPtr str1, str2, cp;
parse = AECRParseActionNew ();
@@ -21516,6 +22668,7 @@ static void AddATCCStrainToCultureColl (ValNodePtr item_list, Pointer data, LogI
parse->portion->left_marker = MakeTextTextMarker ("ATCC ");
parse->portion->include_left = FALSE;
parse->portion->right_marker = NULL;
+
parse->portion->include_right = FALSE;
parse->portion->inside = TRUE;
parse->portion->case_sensitive = FALSE;
@@ -21531,16 +22684,13 @@ static void AddATCCStrainToCultureColl (ValNodePtr item_list, Pointer data, LogI
for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL);
- str2 = GetTextPortionFromString (str1, parse->portion);
- if (str2 != NULL) {
- cp = StringChr (str2, ';');
+ str2 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL);
+ if (str1 == NULL) {
+ cp = StringChr (str2, ':');
if (cp != NULL) {
- *cp = 0;
+ *cp = ' ';
}
- new_str = (CharPtr) MemNew (sizeof (Char) * (5 + StringLen (str2) + 1));
- sprintf (new_str, "ATCC:%s", str2);
- SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, new_str, parse->existing_text);
- new_str = MemFree (new_str);
+ SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL, str2, parse->existing_text);
}
str1 = MemFree (str1);
str2 = MemFree (str2);
@@ -21553,56 +22703,99 @@ static void AddATCCStrainToCultureColl (ValNodePtr item_list, Pointer data, LogI
static ReplacePairData us_state_abbrev_fixes[] = {
{"AL", "Alabama"},
+ {"AL", "Ala"},
{"AK", "Alaska"},
+ {"AK", "Alas"},
{"AZ", "Arizona"},
+ {"AZ", "Ariz"},
{"AR", "Arkansas"},
+ {"AR", "Ark"},
{"CA", "California"},
+ {"CA", "Calif"},
+ {"CA", "Cali"},
+ {"CA", "Cal"},
{"CO", "Colorado"},
+ {"CO", "Colo"},
+ {"CO", "Col"},
{"CT", "Connecticut"},
+ {"CT", "Conn"},
{"DE", "Delaware"},
+ {"DE", "Del"},
{"FL", "Florida"},
+ {"FL", "Fla"},
{"GA", "Georgia"},
{"HI", "Hawaii"},
{"ID", "Idaho"},
+ {"ID", "Ida"},
{"IL", "Illinois"},
+ {"IL", "Ill"},
{"IN", "Indiana"},
+ {"IN", "Ind"},
{"IA", "Iowa"},
{"KS", "Kansas"},
+ {"KS", "Kans"},
+ {"KS", "Kan"},
{"KY", "Kentucky"},
+ {"KY", "Kent"},
+ {"KY", "Ken"},
{"LA", "Louisiana"},
{"ME", "Maine"},
{"MD", "Maryland"},
{"MA", "Massachusetts"},
+ {"MA", "Mass"},
{"MI", "Michigan"},
+ {"MI", "Mich"},
{"MN", "Minnesota"},
+ {"MN", "Minn"},
{"MS", "Mississippi"},
+ {"MS", "Miss"},
{"MO", "Missouri"},
{"MT", "Montana"},
+ {"MT", "Mont"},
{"NE", "Nebraska"},
+ {"NE", "Nebr"},
+ {"NE", "Neb"},
{"NV", "Nevada"},
+ {"NV", "Nev"},
{"NH", "New Hampshire"},
{"NJ", "New Jersey"},
{"NM", "New Mexico"},
{"NY", "New York"},
{"NC", "North Carolina"},
+ {"NC", "N Car"},
{"ND", "North Dakota"},
+ {"ND", "N Dak"},
{"OH", "Ohio"},
{"OK", "Oklahoma"},
+ {"OK", "Okla"},
{"OR", "Oregon"},
+ {"OR", "Oreg"},
+ {"OR", "Ore"},
{"PA", "Pennsylvania"},
+ {"PA", "Penna"},
+ {"PA", "Penn"},
{"PR", "Puerto Rico"},
{"RI", "Rhode Island"},
{"SC", "South Carolina"},
+ {"SC", "S Car"},
{"SD", "South Dakota"},
+ {"SD", "S Dak"},
{"TN", "Tennessee"},
+ {"TN", "Tenn"},
{"TX", "Texas"},
+ {"TX", "Tex"},
{"UT", "Utah"},
{"VT", "Vermont"},
{"VA", "Virginia"},
+ {"VA", "Virg"},
{"WA", "Washington"},
+ {"WA", "Wash"},
{"WV", "West Virginia"},
{"WI", "Wisconsin"},
+ {"WI", "Wisc"},
+ {"WI", "Wis"},
{"WY", "Wyoming"},
+ {"WY", "Wyo"},
{NULL, NULL}
};
@@ -21678,7 +22871,7 @@ static void CheckUSAStates (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_lis
} else {
found = FALSE;
for (i = 0; us_state_abbrev_fixes[i].find != NULL && !found; i++) {
- if (StringCmp (us_state_abbrev_fixes[i].find, state) == 0) {
+ if (StringICmp (us_state_abbrev_fixes[i].find, state) == 0) {
found = TRUE;
}
}
@@ -21721,7 +22914,8 @@ static void FixUSAStates (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
if (StringCmp (country, "USA") == 0) {
state = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, &field_s, NULL);
for (i = 0; us_state_abbrev_fixes[i].find != NULL; i++) {
- if (StringICmp (us_state_abbrev_fixes[i].replace, state) == 0) {
+ if (StringICmp (us_state_abbrev_fixes[i].replace, state) == 0
+ || StringICmp (us_state_abbrev_fixes[i].find, state) == 0) {
SetFieldValueForObject (vnp->choice,
vnp->data.ptrvalue,
&field_s, NULL,
@@ -21803,7 +22997,7 @@ static void CheckForLinkerSequence (ValNodePtr PNTR discrepancy_list, ValNodePtr
}
-static Boolean IsMrnaSequence (BioseqPtr bsp)
+NLM_EXTERN Boolean IsMrnaSequence (BioseqPtr bsp)
{
SeqDescrPtr sdp;
MolInfoPtr mip;
@@ -21989,6 +23183,7 @@ static void CheckForTitleAuthorConflicts (ValNodePtr PNTR discrepancy_list, ValN
auth_field.data.intvalue = Publication_field_authors_initials;
auth_field.next = NULL;
last_title = GetFieldValueForObject (pub_list->choice, pub_list->data.ptrvalue, &title_field, NULL);
+ TrimSpacesAroundString (last_title);
last_authors = GetFieldValueForObject (pub_list->choice, pub_list->data.ptrvalue, &auth_field, NULL);
author_cluster = NULL;
author_cluster_list = NULL;
@@ -21996,6 +23191,7 @@ static void CheckForTitleAuthorConflicts (ValNodePtr PNTR discrepancy_list, ValN
ValNodeAddPointer (&repeated, pub_list->choice, pub_list->data.ptrvalue);
for (vnp = pub_list->next; vnp != NULL; vnp = vnp->next) {
this_title = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, &title_field, NULL);
+ TrimSpacesAroundString (this_title);
this_authors = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, &auth_field, NULL);
if (StringCmp (last_title, this_title) == 0) {
ValNodeAddPointer (&repeated, vnp->choice, vnp->data.ptrvalue);
@@ -23495,8 +24691,8 @@ static void FindBacterialNonExtendablePartialsCallback (BioseqPtr bsp, Pointer u
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) {
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
- /* skip feature if it already has the exception */
- if (StringISearch (sfp->except_text, kNonExtendableException) != NULL) {
+ /* skip feature if it already has the exception or is not partial*/
+ if (StringISearch (sfp->except_text, kNonExtendableException) != NULL || (!partial5 && !partial3)) {
continue;
}
if (fcontext.strand == Seq_strand_minus) {
@@ -23722,25 +24918,6 @@ static CharPtr suspect_rrna_product_names[] =
const int num_suspect_rrna_product_names = sizeof (suspect_rrna_product_names) / sizeof (CharPtr);
-static void FindSuspectrRNAProductsCallback (SeqFeatPtr sfp, Pointer data)
-{
- Int4 k;
- CharPtr product;
- ValNodePtr PNTR feature_list;
-
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_rRNA || (feature_list = (ValNodePtr PNTR)data) == NULL) {
- return;
- }
-
- product = GetRNAProductString (sfp, NULL);
- for (k = 0; k < num_suspect_rrna_product_names; k++) {
- if (DoesStringContainPhrase (product, suspect_rrna_product_names[k], FALSE, FALSE)) {
- ValNodeAddPointer (&(feature_list[k]), OBJ_SEQFEAT, sfp);
- }
- }
- product = MemFree (product);
-}
-
static StringConstraintPtr MakeSimpleSearchConstraint (CharPtr search, Boolean whole_word)
{
@@ -24270,41 +25447,58 @@ static void FindTrinomialWithoutQualifier (ValNodePtr PNTR discrepancy_list, Val
}
-static CharPtr rRNATerms[] = {
-"16S",
-"18S",
-"23S",
-"26S",
-"28S",
-"small",
-"large",
-NULL };
+typedef struct rnaterm {
+ CharPtr name;
+ Int4 min_length;
+ Boolean ignore_partial;
+} RNATermData, PNTR RNATermPtr;
-static void FindShortrRNAsCallback (SeqFeatPtr sfp, Pointer data)
+static RNATermData rRNATerms[] = {
+ { "16S", 1000, FALSE },
+ { "18S", 1000, FALSE },
+ { "23S", 2000, FALSE },
+ { "25S", 1000, FALSE },
+ { "26S", 1000, FALSE },
+ { "28S", 1000, FALSE },
+ { "28S", 3300, FALSE },
+ { "small", 1000, FALSE },
+ { "large", 1000, FALSE },
+ { "5.8S", 130, TRUE },
+ { "5S", 90, TRUE },
+ { NULL, 0, FALSE} };
+
+NLM_EXTERN Boolean IsShortrRNA (SeqFeatPtr sfp)
{
- ValNodePtr PNTR item_list;
- Int4 i;
+ Int4 i, len;
CharPtr rrna_name;
Boolean is_bad = FALSE;
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_rRNA || sfp->partial || (item_list = (ValNodePtr PNTR) data) == NULL) {
- return;
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_rRNA || sfp->partial) {
+ return FALSE;
}
- if (SeqLocLen (sfp->location) > 1000) {
- return;
- }
+ len = SeqLocLen (sfp->location);
rrna_name = GetRNAProductString(sfp, NULL);
- for (i = 0; rRNATerms[i] != NULL && !is_bad; i++) {
- if (StringISearch (rrna_name, rRNATerms[i]) != NULL) {
+ for (i = 0; rRNATerms[i].name != NULL && !is_bad; i++) {
+ if (StringISearch (rrna_name, rRNATerms[i].name) != NULL
+ && len < rRNATerms[i].min_length
+ && (!rRNATerms[i].ignore_partial || !sfp->partial)) {
is_bad = TRUE;
}
}
rrna_name = MemFree (rrna_name);
- if (is_bad) {
+ return is_bad;
+}
+
+
+static void FindShortrRNAsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ ValNodePtr PNTR item_list;
+
+ if (IsShortrRNA(sfp) && (item_list = (ValNodePtr PNTR) data) != NULL) {
ValNodeAddPointer (item_list, OBJ_SEQFEAT, sfp);
}
}
@@ -24980,13 +26174,15 @@ static void GetFeatureList (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_li
Uint1 key;
CharPtr label = NULL;
+ // for sorting
+ CharPtr sorted_label = NULL;
+ ValNodePtr label_ls = NULL, vnp_label, subcat = NULL;
+
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
VisitFeaturesInSep (vnp->data.ptrvalue, &feature_list, ListAllFeatures);
}
for (vnp = feature_list; vnp != NULL; vnp = vnp->next) {
- item_list = vnp->data.ptrvalue;
- label = NULL;
curr = FeatDefFindNext (NULL, &key, &label, FEATDEF_ANY, TRUE);
while (curr != NULL && curr->featdef_key != vnp->choice) {
curr = FeatDefFindNext (curr, &key, &label, FEATDEF_ANY, TRUE);
@@ -24996,21 +26192,44 @@ static void GetFeatureList (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_li
} else {
label = curr->typelabel;
}
- fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt_fmt) + StringLen (label)));
- sprintf (fmt, fmt_fmt, label);
- cip = NewClickableItem (DISC_FEATURE_LIST, fmt, item_list);
- fmt = MemFree (fmt);
- vnp->choice = 0;
- vnp->data.ptrvalue = cip;
+ ValNodeAddPointer(&label_ls, 0, label);
+ };
+
+ label_ls = ValNodeSort(label_ls, SortVnpByString);
+
+ for (vnp_label = label_ls; vnp_label != NULL; vnp_label = vnp_label->next) {
+ sorted_label = vnp_label->data.ptrvalue;
+ for (vnp = feature_list; vnp != NULL; vnp = vnp->next) {
+ item_list = vnp->data.ptrvalue;
+ label = NULL;
+ curr = FeatDefFindNext (NULL, &key, &label, FEATDEF_ANY, TRUE);
+ while (curr != NULL && curr->featdef_key != vnp->choice) {
+ curr = FeatDefFindNext (curr, &key, &label, FEATDEF_ANY, TRUE);
+ }
+ if (curr == NULL) {
+ label = "unknown";
+ } else {
+ label = curr->typelabel;
+ }
+ if (StringCmp(sorted_label, label) != 0) {
+ continue;
+ }
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt_fmt) + StringLen (label)));
+ sprintf (fmt, fmt_fmt, label);
+ cip = NewClickableItem (DISC_FEATURE_LIST, fmt, item_list);
+ fmt = MemFree (fmt);
+ ValNodeAddPointer(&subcat, 0, cip);
+ break;
+ }
}
- if (feature_list != NULL) {
+ if (subcat != NULL) {
cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
MemSet (cip, 0, sizeof (ClickableItemData));
cip->clickable_item_type = DISC_FEATURE_LIST;
cip->description = StringSave ("Feature List");
cip->item_list = NULL;
- cip->subcategories = feature_list;
+ cip->subcategories = subcat;
ValNodeAddPointer (discrepancy_list, 0, cip);
}
@@ -25379,6 +26598,7 @@ NLM_EXTERN void FindMismatchedComments (ValNodePtr PNTR discrepancy_list, ValNod
fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (sub_fmt) + StringLen ((CharPtr) sdp->data.ptrvalue)));
sprintf (fmt, sub_fmt, (CharPtr) sdp->data.ptrvalue);
cip = NewClickableItem (DISC_MISMATCHED_COMMENTS, fmt, comment_list);
+ fmt = MemFree (fmt);
ValNodeAddPointer (&cat_list, 0, cip);
comment_list = vnp;
}
@@ -25389,6 +26609,7 @@ NLM_EXTERN void FindMismatchedComments (ValNodePtr PNTR discrepancy_list, ValNod
fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (sub_fmt) + StringLen ((CharPtr) sdp->data.ptrvalue)));
sprintf (fmt, sub_fmt, (CharPtr) sdp->data.ptrvalue);
cip = NewClickableItem (DISC_MISMATCHED_COMMENTS, fmt, comment_list);
+ fmt = MemFree (fmt);
ValNodeAddPointer (&cat_list, 0, cip);
comment_list = NULL;
}
@@ -25874,7 +27095,7 @@ static void FindProjectIdSequences (ValNodePtr PNTR discrepancy_list, ValNodePtr
}
}
id_list = BspProjectIdListFree(id_list);
- prot_list = BspProjectIdListFree(id_list);
+ prot_list = BspProjectIdListFree(prot_list);
}
@@ -25946,7 +27167,7 @@ static void FindMissingStructuredComments (ValNodePtr PNTR discrepancy_list, Val
tmp_list = ValNodeExtractList (&count_list, count_list->choice);
}
}
- if (subcat->next == NULL) {
+ if (subcat != NULL && subcat->next == NULL) {
subcat = FreeClickableList (subcat);
} else {
cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
@@ -26006,6 +27227,655 @@ static void FindMissingGenomeAssemblyStructuredComments (ValNodePtr PNTR discrep
}
+typedef struct struccomfieldlist {
+ CharPtr prefix;
+ ValNodePtr field_list;
+ ValNodePtr PNTR values_lists; /* array of ValNodeLists of DuplicateQuals */
+ ValNodePtr missing;
+} StrucComFieldListData, PNTR StrucComFieldListPtr;
+
+
+static StrucComFieldListPtr StrucComFieldListNew (CharPtr prefix)
+{
+ StrucComFieldListPtr s;
+ s = (StrucComFieldListPtr) MemNew (sizeof (StrucComFieldListData));
+ s->prefix = StringSave(prefix);
+ s->field_list = NULL;
+ s->values_lists = NULL;
+ s->missing = NULL;
+ return s;
+}
+
+
+static StrucComFieldListPtr StrucComFieldListFree (StrucComFieldListPtr s)
+{
+ Int4 num, i;
+ if (s != NULL) {
+ s->prefix = MemFree (s->prefix);
+ if (s->values_lists != NULL) {
+ num = ValNodeLen (s->field_list);
+ for (i = 0; i < num; i++) {
+ s->values_lists[i] = DuplicateQualListFree (s->values_lists[i]);
+ }
+ s->values_lists = MemFree (s->values_lists);
+ }
+ s->field_list = FieldTypeListFree (s->field_list);
+ s->missing = ValNodeFree (s->missing);
+ s = MemFree (s);
+ }
+ return s;
+}
+
+
+static ValNodePtr StrucComFieldListValNodeListFree (ValNodePtr vnp)
+{
+ ValNodePtr vnp_next;
+ while (vnp != NULL) {
+ vnp_next = vnp->next;
+ vnp->next = NULL;
+ vnp->data.ptrvalue = StrucComFieldListFree(vnp->data.ptrvalue);
+ vnp = ValNodeFree (vnp);
+ vnp = vnp_next;
+ }
+ return vnp;
+}
+
+
+static int LIBCALLBACK StrucComFieldListValNode (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ StrucComFieldListPtr s1, s2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ s1 = vnp1->data.ptrvalue;
+ s2 = vnp2->data.ptrvalue;
+ if (s1 != NULL && s2 != NULL) {
+ rval = StringCmp (s1->prefix, s2->prefix);
+ }
+ }
+
+ return rval;
+}
+
+
+static void CollectStrucComFieldListCallback (SeqDescPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ UserFieldPtr ufp;
+ StrucComFieldListPtr s;
+ ValNodePtr vnp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user
+ || (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || !IsUserObjectStructuredComment(uop)) {
+ return;
+ }
+ s = StrucComFieldListNew(GetStructuredCommentPrefix(uop));
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (!IsStructuredCommentPrefix(ufp) && !IsStructuredCommentSuffix(ufp)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = StructuredCommentField_named;
+ vnp->data.ptrvalue = StringSave (ufp->label->str);
+ ValNodeAddPointer (&(s->field_list), FieldType_struc_comment_field, vnp);
+ }
+ }
+ ValNodeAddPointer ((ValNodePtr PNTR) data, 0, s);
+}
+
+
+static void ConsolidateStrucComFieldLists (ValNodePtr list)
+{
+ StrucComFieldListPtr s1, s2;
+ ValNodePtr prev, vnp, next;
+ if (list == NULL) {
+ return;
+ }
+ if (list->next != NULL) {
+ s1 = list->data.ptrvalue;
+ prev = list;
+ for (vnp = list->next; vnp != NULL; vnp = next) {
+ next = vnp->next;
+ s2 = vnp->data.ptrvalue;
+ if (StringCmp (s1->prefix, s2->prefix) == 0) {
+ ValNodeLink (&(s1->field_list), s2->field_list);
+ s2->field_list = NULL;
+ prev->next = next;
+ vnp->next = NULL;
+ vnp = StrucComFieldListValNodeListFree(vnp);
+ } else {
+ prev = vnp;
+ s1 = vnp->data.ptrvalue;
+ }
+ }
+ }
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ s1 = vnp->data.ptrvalue;
+ s1->field_list = ValNodeSort (s1->field_list, SortVnpByFieldType);
+ ValNodeUnique (&(s1->field_list), SortVnpByFieldType, FieldTypeListFree);
+ s1->values_lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * ValNodeLen(s1->field_list));
+ }
+}
+
+
+static void FindInconsistentStructuredCommentsCallback (BioseqPtr bsp, Pointer data)
+{
+ ValNodePtr field_list;
+ StrucComFieldListPtr sl;
+ ValNodePtr vnp, vnp2;
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ Boolean found;
+ DuplicateQualPtr dq;
+ Int4 i;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || (field_list = (ValNodePtr) data) == NULL) {
+ return;
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = vnp->data.ptrvalue;
+ found = FALSE;
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if (IsUserObjectStructuredComment(sdp->data.ptrvalue)
+ && StringCmp (sl->prefix, GetStructuredCommentPrefix(sdp->data.ptrvalue)) == 0) {
+ for (vnp2 = sl->field_list, i = 0; vnp2 != NULL; vnp2 = vnp2->next, i++) {
+ dq = DuplicateQualNew (OBJ_SEQDESC, sdp, vnp2);
+ ValNodeAddPointer (&(sl->values_lists[i]), 0, dq);
+ }
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ ValNodeAddPointer (&(sl->missing), OBJ_BIOSEQ, bsp);
+ for (vnp2 = sl->field_list, i = 0; vnp2 != NULL; vnp2 = vnp2->next, i++) {
+ dq = DuplicateQualNew (OBJ_BIOSEQ, bsp, vnp2);
+ ValNodeAddPointer (&(sl->values_lists[i]), 0, dq);
+ }
+ }
+ }
+
+}
+
+
+static ClickableItemPtr
+MakeItemForListOfObjects
+(CharPtr object,
+ CharPtr qual_name,
+ CharPtr value,
+ Uint4 item_type,
+ Int4 num_items,
+ ValNodePtr item_list)
+{
+ ClickableItemPtr cip;
+ CharPtr missing_fmt = "%d %ss are missing field %s";
+ CharPtr all_fmt = "All %ss have field %s value '%s'";
+ CharPtr some_fmt = "%d %ss have field %s value '%s'";
+
+ item_list = ValNodeSort (item_list, SortVnpByDiscrepancyItemText);
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = item_type;
+ cip->item_list = item_list;
+
+ if (StringHasNoText (value)) {
+ cip->description = (CharPtr) MemNew (sizeof (Char) *
+ (StringLen (missing_fmt) + StringLen (object) + StringLen (qual_name) + 15));
+ sprintf (cip->description, missing_fmt, ValNodeLen (cip->item_list), object, qual_name);
+ } else if (ValNodeLen (item_list) == num_items) {
+ cip->description = (CharPtr) MemNew (sizeof (Char) *
+ (StringLen (all_fmt) + StringLen (object) + StringLen (qual_name) + StringLen (value)));
+ sprintf (cip->description, all_fmt, object, qual_name, value);
+ } else {
+ cip->description = (CharPtr) MemNew (sizeof (Char) *
+ (StringLen (some_fmt) + 15 + StringLen (object) + StringLen (qual_name) + StringLen (value)));
+ sprintf (cip->description, some_fmt, ValNodeLen (item_list), object, qual_name, value);
+ }
+ return cip;
+}
+
+
+static CharPtr GetFieldSummary (CharPtr qual_name, Boolean any_missing, Int4 num_cat)
+{
+ CharPtr desc = NULL;
+ Int4 len;
+ CharPtr all_missing = "all missing";
+ CharPtr some_missing = "some missing";
+ CharPtr all_present = "all present";
+ CharPtr all_same = "all same";
+ CharPtr inconsistent = "inconsistent";
+
+ len = StringLen (qual_name) + 4;
+
+ if (any_missing) {
+ if (num_cat == 1) {
+ len += StringLen (all_missing);
+ desc = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (desc, "%s (%s)", qual_name, all_missing);
+ } else {
+ if (num_cat == 2) {
+ len += StringLen (some_missing) + StringLen (all_same) + 2;
+ desc = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (desc, "%s (%s, %s)", qual_name, some_missing, all_same);
+ } else {
+ len += StringLen (some_missing) + StringLen (inconsistent) + 2;
+ desc = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (desc, "%s (%s, %s)", qual_name, some_missing, inconsistent);
+ }
+ }
+ } else {
+ if (num_cat == 1) {
+ len += StringLen (all_present) + StringLen (all_same) + 2;
+ desc = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (desc, "%s (%s, %s)", qual_name, all_present, all_same);
+ } else {
+ len += StringLen (some_missing) + StringLen (inconsistent) + 2;
+ desc = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (desc, "%s (%s, %s)", qual_name, all_present, inconsistent);
+ }
+ }
+ return desc;
+}
+
+
+typedef struct fieldsummary {
+ Boolean any_missing;
+ Boolean any_inconsistent;
+} FieldSummaryData, PNTR FieldSummaryPtr;
+
+
+static void AnalyzeFieldReports (ValNodePtr cip_list, FieldSummaryPtr f)
+{
+ ValNodePtr vnp;
+ ClickableItemPtr cip;
+ CharPtr cp;
+
+ if (f->any_inconsistent && f->any_inconsistent) {
+ return;
+ }
+ for (vnp = cip_list; vnp != NULL; vnp = vnp->next) {
+ cip = vnp->data.ptrvalue;
+ cp = StringRChr (cip->description, '(');
+ if (cp != NULL) {
+ if (StringISearch (cp, "missing") != NULL) {
+ f->any_missing = TRUE;
+ }
+ if (StringISearch (cp, "inconsistent") != NULL) {
+ f->any_inconsistent = TRUE;
+ }
+ }
+ AnalyzeFieldReports(cip->subcategories, f);
+ }
+}
+
+
+static CharPtr SummarizeFieldSummaries (ValNodePtr cip_list)
+{
+ FieldSummaryData f;
+ CharPtr some_missing = "some missing";
+ CharPtr all_present = "all present";
+ CharPtr all_same = "all same";
+ CharPtr inconsistent = "inconsistent";
+ CharPtr presence, consistency;
+ CharPtr fmt = "(%s, %s)";
+ CharPtr summ;
+
+ MemSet (&f, 0, sizeof (FieldSummaryData));
+ AnalyzeFieldReports(cip_list, &f);
+ if (f.any_missing) {
+ presence = some_missing;
+ } else {
+ presence = all_present;
+ }
+ if (f.any_inconsistent) {
+ consistency = inconsistent;
+ } else {
+ consistency = all_same;
+ }
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (presence) + StringLen (consistency)));
+ sprintf (summ, fmt, presence, consistency);
+ return summ;
+}
+
+
+static ClickableItemPtr MakeItemForValuesList (ValNodePtr PNTR values_list, Uint4 item_type, CharPtr object)
+{
+ DuplicateQualPtr dq1, dq2;
+ ValNodePtr repeated = NULL, subcat = NULL, vnp_c;
+ ClickableItemPtr cip;
+ CharPtr qual_name;
+ Int4 num_items;
+ Boolean some_missing = FALSE;
+
+ if (values_list == NULL || (*values_list) == NULL) {
+ return NULL;
+ }
+
+ *values_list = ValNodeSort (*values_list, SortVnpByDuplicateQualFieldTypeThenValue);
+ dq1 = (*values_list)->data.ptrvalue;
+ if (StringHasNoText (dq1->val)) {
+ some_missing = TRUE;
+ }
+ ValNodeAddPointer (&repeated, dq1->choice, dq1->data);
+ num_items = ValNodeLen (*values_list);
+ qual_name = SummarizeFieldType (dq1->qual);
+ TrimSpacesAroundString(qual_name);
+ for (vnp_c = (*values_list)->next; vnp_c != NULL; vnp_c = vnp_c->next) {
+ dq2 = vnp_c->data.ptrvalue;
+ if (StringCmp (dq1->val, dq2->val) != 0) {
+ cip = MakeItemForListOfObjects (object, qual_name, dq1->val, item_type, num_items, repeated);
+ ValNodeAddPointer (&subcat, 0, cip);
+ repeated = NULL;
+ dq1 = dq2;
+ if (StringHasNoText (dq1->val)) {
+ some_missing = TRUE;
+ }
+ }
+ ValNodeAddPointer (&repeated, dq2->choice, dq2->data);
+ }
+ cip = MakeItemForListOfObjects (object, qual_name, dq1->val, item_type, num_items, repeated);
+ repeated = NULL;
+ ValNodeAddPointer (&subcat, 0, cip);
+
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = item_type;
+ cip->subcategories = subcat;
+ cip->description = GetFieldSummary(qual_name, some_missing, ValNodeLen (subcat));
+ qual_name = MemFree (qual_name);
+ return cip;
+}
+
+
+static ValNodePtr GetDiscrepanciesForFieldedObjects (StrucComFieldListPtr sl, Uint4 item_type, CharPtr object)
+{
+ ValNodePtr subcat = NULL;
+ ValNodePtr vnp;
+ ClickableItemPtr cip;
+ Int4 i;
+
+ if (sl == NULL || sl->values_lists == NULL) {
+ return NULL;
+ }
+
+ for (vnp = sl->field_list, i = 0; vnp != NULL; i++, vnp = vnp->next) {
+ cip = MakeItemForValuesList(&(sl->values_lists[i]), item_type, object);
+ ValNodeAddPointer (&subcat, 0, cip);
+ }
+
+ return subcat;
+}
+
+
+static ClickableItemPtr MakeMasterFieldedDiscrepancy (Uint4 item_type, CharPtr title, ValNodePtr missing_cat, ValNodePtr mismatch_cat)
+{
+ ClickableItemPtr cip = NULL;
+ CharPtr fmt = "%s %s";
+ CharPtr summ;
+
+ if (missing_cat != NULL || mismatch_cat != NULL) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = item_type;
+ cip->subcategories = missing_cat;
+ ValNodeLink (&(cip->subcategories), mismatch_cat);
+ summ = SummarizeFieldSummaries (cip->subcategories);
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (title) + StringLen (summ)));
+ sprintf (cip->description, fmt, title, summ);
+ summ = MemFree (summ);
+ }
+ return cip;
+}
+
+
+static void FindInconsistentStructuredComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, missing_cat = NULL, mismatch_cat = NULL;
+ ValNodePtr field_list = NULL;
+ StrucComFieldListPtr sl;
+ CharPtr missing_fmt = "%d Bioseqs are missing %s structured comment";
+ ClickableItemPtr cip;
+ CharPtr prefix, object;
+ CharPtr object_fmt = "%s structured comment";
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &field_list, CollectStrucComFieldListCallback);
+ }
+ /* sort so prefixes appear next to each other */
+ field_list = ValNodeSort (field_list, StrucComFieldListValNode);
+ /* consolidate lists for prefixes */
+ ConsolidateStrucComFieldLists(field_list);
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, field_list, FindInconsistentStructuredCommentsCallback);
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = (StrucComFieldListPtr) vnp->data.ptrvalue;
+ prefix = sl->prefix;
+ if (StringHasNoText (prefix)) {
+ prefix = "unnamed";
+ }
+ if (sl->missing != NULL) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = DISC_INCONSISTENT_STRUCTURED_COMMENTS;
+ cip->item_list = sl->missing;
+ sl->missing = NULL;
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_fmt) + StringLen (prefix) + 15));
+ sprintf (cip->description, missing_fmt, ValNodeLen (cip->item_list), prefix);
+ ValNodeAddPointer (&missing_cat, 0, cip);
+ }
+ /* Add mismatch reports */
+ object = (CharPtr) MemNew (sizeof (Char) * (StringLen (object_fmt) + StringLen(prefix)));
+ sprintf (object, object_fmt, prefix);
+ ValNodeLink (&mismatch_cat, GetDiscrepanciesForFieldedObjects (sl, DISC_INCONSISTENT_STRUCTURED_COMMENTS, object));
+ object = MemFree (object);
+ }
+
+ cip = MakeMasterFieldedDiscrepancy (DISC_INCONSISTENT_STRUCTURED_COMMENTS,
+ "Structured Comment Report",
+ missing_cat, mismatch_cat);
+ if (cip != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+
+ field_list = StrucComFieldListValNodeListFree (field_list);
+}
+
+
+NLM_EXTERN Boolean IsDBLinkObject (UserObjectPtr uop)
+{
+ if (uop == NULL || uop->type == NULL
+ || StringICmp (uop->type->str, "DBLink") != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static void CollectDBLinkFieldListCallback (SeqDescPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ UserFieldPtr ufp;
+ StrucComFieldListPtr s;
+ Int4 field_type;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user
+ || (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || !IsDBLinkObject(uop)) {
+ return;
+ }
+ s = StrucComFieldListNew("DBLink");
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ field_type = GetDBLinkFieldTypeFromDBLinkName (ufp->label->str);
+ if (field_type > -1) {
+ ValNodeAddInt (&(s->field_list), FieldType_dblink, field_type);
+ }
+ }
+ ValNodeAddPointer ((ValNodePtr PNTR) data, 0, s);
+}
+
+
+static void FindInconsistentDBLinkFieldsCallback (BioseqPtr bsp, Pointer data)
+{
+ ValNodePtr field_list;
+ StrucComFieldListPtr sl;
+ ValNodePtr vnp, vnp2;
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ Boolean found;
+ DuplicateQualPtr dq;
+ Int4 i;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || (field_list = (ValNodePtr) data) == NULL) {
+ return;
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = vnp->data.ptrvalue;
+ found = FALSE;
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if (IsDBLinkObject(sdp->data.ptrvalue)) {
+ for (vnp2 = sl->field_list, i = 0; vnp2 != NULL; vnp2 = vnp2->next, i++) {
+ dq = DuplicateQualNew (OBJ_SEQDESC, sdp, vnp2);
+ ValNodeAddPointer (&(sl->values_lists[i]), 0, dq);
+ }
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ ValNodeAddPointer (&(sl->missing), OBJ_BIOSEQ, bsp);
+ for (vnp2 = sl->field_list, i = 0; vnp2 != NULL; vnp2 = vnp2->next, i++) {
+ dq = DuplicateQualNew (OBJ_BIOSEQ, bsp, vnp2);
+ ValNodeAddPointer (&(sl->values_lists[i]), 0, dq);
+ }
+ }
+ }
+}
+
+
+static void FindInconsistentDBLinkFields (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, missing_cat = NULL, mismatch_cat = NULL;
+ ValNodePtr field_list = NULL;
+ StrucComFieldListPtr sl;
+ CharPtr missing_fmt = "%d Bioseqs are missing DBLink object";
+ ClickableItemPtr cip;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &field_list, CollectDBLinkFieldListCallback);
+ }
+
+ /* consolidate lists for prefixes */
+ ConsolidateStrucComFieldLists(field_list);
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, field_list, FindInconsistentDBLinkFieldsCallback);
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = (StrucComFieldListPtr) vnp->data.ptrvalue;
+ if (sl->missing != NULL) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = DISC_INCONSISTENT_DBLINK;
+ cip->item_list = sl->missing;
+ sl->missing = NULL;
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_fmt) + 15));
+ sprintf (cip->description, missing_fmt, ValNodeLen (cip->item_list));
+ ValNodeAddPointer (&missing_cat, 0, cip);
+ }
+ /* Add mismatch reports */
+ ValNodeLink (&mismatch_cat, GetDiscrepanciesForFieldedObjects (sl, DISC_INCONSISTENT_DBLINK, "DBLink object"));
+ }
+
+ cip = MakeMasterFieldedDiscrepancy (DISC_INCONSISTENT_DBLINK,
+ "DBLink Report",
+ missing_cat, mismatch_cat);
+ if (cip != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+
+ field_list = StrucComFieldListValNodeListFree (field_list);
+
+}
+
+
+static void FindInconsistentMolinfoTechCallback (BioseqPtr bsp, Pointer data)
+{
+ ValNodePtr field_list;
+ StrucComFieldListPtr sl;
+ ValNodePtr vnp, vnp2;
+ DuplicateQualPtr dq;
+ Int4 i;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || (field_list = (ValNodePtr) data) == NULL) {
+ return;
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = vnp->data.ptrvalue;
+ for (vnp2 = sl->field_list, i = 0; vnp2 != NULL; vnp2 = vnp2->next, i++) {
+ dq = DuplicateQualNew (OBJ_BIOSEQ, bsp, vnp2);
+ ValNodeAddPointer (&(sl->values_lists[i]), 0, dq);
+ }
+ }
+}
+
+
+static void FindInconsistentMolinfoTech (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, missing_cat = NULL, mismatch_cat = NULL;
+ ValNodePtr field_list = NULL;
+ StrucComFieldListPtr sl;
+ CharPtr missing_fmt = "%d Bioseqs are missing Molinfo technique";
+ ClickableItemPtr cip;
+
+ sl = StrucComFieldListNew ("Molinfo");
+ vnp = ValNodeNew (NULL);
+ vnp->choice = MolinfoField_technique;
+ ValNodeAddPointer (&(sl->field_list), FieldType_molinfo_field, vnp);
+ ValNodeAddPointer (&field_list, 0, sl);
+
+ /* yes, there's only one item in field list, but we want to set up the values array */
+ ConsolidateStrucComFieldLists(field_list);
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, field_list, FindInconsistentMolinfoTechCallback);
+ }
+
+ for (vnp = field_list; vnp != NULL; vnp = vnp->next) {
+ sl = (StrucComFieldListPtr) vnp->data.ptrvalue;
+ if (sl->missing != NULL) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip->clickable_item_type = DISC_INCONSISTENT_MOLINFO_TECH;
+ cip->item_list = sl->missing;
+ sl->missing = NULL;
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_fmt) + 15));
+ sprintf (cip->description, missing_fmt, ValNodeLen (cip->item_list));
+ ValNodeAddPointer (&missing_cat, 0, cip);
+ }
+ /* Add mismatch reports */
+ ValNodeLink (&mismatch_cat, GetDiscrepanciesForFieldedObjects (sl, DISC_INCONSISTENT_MOLINFO_TECH, "Molinfo"));
+ }
+
+ cip = MakeMasterFieldedDiscrepancy (DISC_INCONSISTENT_MOLINFO_TECH,
+ "Molinfo Technique Report",
+ missing_cat, mismatch_cat);
+ if (cip != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+
+ field_list = StrucComFieldListValNodeListFree (field_list);
+}
+
+
static void FindCDSWithCDDXrefCallback (SeqFeatPtr sfp, Pointer data)
{
ValNodePtr vnp;
@@ -26202,6 +28072,20 @@ NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome)
}
}
+NLM_EXTERN Boolean IsBioseqOrganelle (BioseqPtr bsp)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ BioSourcePtr biop;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsLocationOrganelle (biop->genome)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
static void FindOrganelleNotGenomicCallback(BioseqPtr bsp, Pointer data)
{
SeqDescPtr sdp;
@@ -27107,7 +28991,7 @@ static void FindMrnaSequencesWithMinusStrandFeaturesCallback (BioseqPtr bsp, Poi
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
sfp != NULL && !found;
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
- if (context.strand == Seq_strand_minus) {
+ if (context.strand == Seq_strand_minus && sfp->idx.subtype != FEATDEF_primer_bind) {
found = TRUE;
}
}
@@ -27143,9 +29027,10 @@ static void FindTaxnameMissingFromDeflineCallback (BioseqPtr bsp, Pointer data)
SeqMgrDescContext context;
SeqDescPtr sdp;
BioSourcePtr biop;
- CharPtr cp;
+ CharPtr title, cp;
Int4 len;
CharPtr lookfor;
+ Boolean add = FALSE;
if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) {
return;
@@ -27167,17 +29052,24 @@ static void FindTaxnameMissingFromDeflineCallback (BioseqPtr bsp, Pointer data)
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
if (sdp != NULL) {
- cp = StringISearch (sdp->data.ptrvalue, lookfor);
+ title = sdp->data.ptrvalue;
+ cp = StringISearch (title, lookfor);
if (cp == NULL) {
/* taxname not in defline at all */
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ add = TRUE;
} else {
/* capitalization must match for all but the first letter */
len = StringLen (lookfor);
if (StringNCmp (cp + 1, lookfor + 1, len - 1) != 0) {
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ add = TRUE;
+ }
+ if (cp != title && !isspace (*(cp - 1)) && !ispunct (*(cp - 1))) {
+ add = TRUE;
}
}
+ if (add) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
}
}
@@ -27186,15 +29078,19 @@ static void FindTaxnameMissingFromDefline (ValNodePtr PNTR discrepancy_list, Val
{
CharPtr bad_fmt = "%d deflines do not contain the complete taxname.";
ValNodePtr seqs = NULL, vnp;
+ SeqEntryPtr orig_scope;
if (discrepancy_list == NULL)
{
return;
}
+ orig_scope = SeqEntrySetScope (NULL);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ SeqEntrySetScope(vnp->data.ptrvalue);
VisitBioseqsInSep (vnp->data.ptrvalue, &seqs, FindTaxnameMissingFromDeflineCallback);
}
+ SeqEntrySetScope(orig_scope);
if (seqs != NULL)
{
@@ -27234,6 +29130,872 @@ static void CountUnverifiedSequences (ValNodePtr PNTR discrepancy_list, ValNode
}
+static void FindSuspiciousStructuredCommentCallback (SeqDescPtr sdp, Pointer data)
+{
+ if (sdp != NULL && data != NULL
+ && sdp->choice == Seq_descr_user
+ && NewRuleForStructuredComment (sdp->data.ptrvalue) != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindSuspiciousStructuredCommentPrefix (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ CharPtr bad_fmt = "%d structured comments are invalid but would be valid with a different prefix.";
+ ValNodePtr comments = NULL, vnp;
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &comments, FindSuspiciousStructuredCommentCallback);
+ }
+
+ if (comments != NULL)
+ {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX, bad_fmt, comments));
+ }
+}
+
+
+static void SwitchSuspiciousStructuredCommentPrefix (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ SeqDescPtr sdp;
+ UserObjectPtr uop;
+ CharPtr last_prefix;
+ ValNodePtr changed = NULL;
+ Int4 count;
+ CharPtr change_fmt = "Changed %d structured comment%s to %s prefix\n";
+ CommentRulePtr new_cr;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQDESC
+ && (sdp = (SeqDescPtr) vnp->data.ptrvalue) != NULL
+ && sdp->choice == Seq_descr_user
+ && (uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL
+ && (new_cr = NewRuleForStructuredComment (uop)) != NULL) {
+ SetStructuredCommentPrefixAndSuffix (uop, new_cr->prefix);
+ if (new_cr->require_order) {
+ ReorderStructuredCommentFields (uop);
+ }
+ if (lip != NULL) {
+ lip->data_in_log = TRUE;
+ if (lip->fp != NULL) {
+ ValNodeAddPointer (&changed, 0, new_cr->prefix);
+ }
+ }
+ }
+ }
+ if (changed != NULL) {
+ changed = ValNodeSort (changed, SortVnpByString);
+ last_prefix = changed->data.ptrvalue;
+ count = 1;
+ for (vnp = changed->next; vnp != NULL; vnp = vnp->next) {
+ if (StringCmp (last_prefix, vnp->data.ptrvalue) != 0) {
+ fprintf (lip->fp, change_fmt, count, count == 1 ? "" : "s", last_prefix);
+ count = 0;
+ last_prefix = vnp->data.ptrvalue;
+ }
+ count++;
+ }
+ fprintf (lip->fp, change_fmt, count, count == 1 ? "" : "s", last_prefix);
+ }
+}
+
+
+static int CmpPCRPrimer (PCRPrimerPtr p1, PCRPrimerPtr p2)
+{
+ int rval = 0;
+
+ if (p1 == NULL && p2 == NULL) {
+ return 0;
+ } else if (p1 == NULL) {
+ return -1;
+ } else if (p2 == NULL) {
+ return 1;
+ }
+
+ rval = StringICmp (p1->name, p2->name);
+ if (rval == 0) {
+ rval = StringICmp (p1->seq, p2->seq);
+ }
+ return rval;
+}
+
+
+static int LIBCALLBACK SortVnpByPCRPrimer (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ return CmpPCRPrimer (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ return 0;
+}
+
+
+static int CmpPCRPrimerList (PCRPrimerPtr list1, PCRPrimerPtr list2)
+{
+ ValNodePtr sort1 = NULL, sort2 = NULL, vnp1, vnp2;
+ PCRPrimerPtr p;
+ Int4 len1 = 0, len2 = 0;
+ int rval = 0;
+
+ if (list1 == NULL && list2 == NULL) {
+ return 0;
+ } else if (list1 == NULL) {
+ return -1;
+ } else if (list2 == NULL) {
+ return 1;
+ } else if (list1->next == NULL && list2->next == NULL) {
+ return CmpPCRPrimer (list1, list2);
+ }
+
+ for (p = list1; p != NULL; p = p->next) {
+ ValNodeAddPointer (&sort1, 0, p);
+ len1 ++;
+ }
+ for (p = list2; p != NULL; p = p->next) {
+ ValNodeAddPointer (&sort2, 0, p);
+ len2 ++;
+ }
+
+ if (len1 < len2) {
+ rval = -1;
+ } else if (len1 > len2) {
+ rval = 1;
+ } else {
+ sort1 = ValNodeSort (sort1, SortVnpByPCRPrimer);
+ sort2 = ValNodeSort (sort2, SortVnpByPCRPrimer);
+ for (vnp1 = sort1, vnp2 = sort2;
+ vnp1 != NULL && vnp2 != NULL && rval == 0;
+ vnp1 = vnp1->next, vnp2 = vnp2->next) {
+ rval = CmpPCRPrimer (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ if (rval == 0) {
+ if (vnp1 != NULL) {
+ rval = 1;
+ } else if (vnp2 != NULL) {
+ rval = -1;
+ }
+ }
+ sort1 = ValNodeFree (sort1);
+ sort2 = ValNodeFree (sort2);
+ }
+ return rval;
+}
+
+
+static int CmpPCRReaction (PCRReactionPtr set1, PCRReactionPtr set2)
+{
+ int rval = 0;
+
+ if (set1 == NULL && set2 == NULL) {
+ return 0;
+ } else if (set1 == NULL) {
+ return -1;
+ } else if (set2 == NULL) {
+ return 1;
+ }
+ rval = CmpPCRPrimerList (set1->forward, set2->forward);
+ if (rval == 0) {
+ rval = CmpPCRPrimerList (set1->reverse, set2->reverse);
+ }
+ return rval;
+}
+
+
+static int LIBCALLBACK SortVnpByPCRReaction (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ return CmpPCRReaction (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ return 0;
+}
+
+
+
+static Boolean HasDuplicatePrimerPair (BioSourcePtr biop)
+{
+ PCRReactionPtr set;
+ ValNodePtr list = NULL, vnp;
+ Boolean rval = FALSE;
+
+ if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) {
+ return FALSE;
+ }
+
+ for (set = biop->pcr_primers; set != NULL; set = set->next) {
+ ValNodeAddPointer (&list, 0, set);
+ }
+ list = ValNodeSort (list, SortVnpByPCRReaction);
+ set = list->data.ptrvalue;
+ for (vnp = list->next; vnp != NULL && !rval; vnp = vnp->next) {
+ if (CmpPCRReaction(set, vnp->data.ptrvalue) == 0) {
+ rval = TRUE;
+ } else {
+ set = vnp->data.ptrvalue;
+ }
+ }
+
+ list = ValNodeFree (list);
+ return rval;
+}
+
+
+static void FindDuplicatePrimerPairDescCallback (SeqDescPtr sdp, Pointer data)
+{
+ if (sdp != NULL && sdp->choice == Seq_descr_source
+ && HasDuplicatePrimerPair (sdp->data.ptrvalue)
+ && data != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindDuplicatePrimerPairFeatCallback (SeqFeatPtr sfp, Pointer data)
+{
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC
+ && HasDuplicatePrimerPair (sfp->data.value.ptrvalue)
+ && data != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+}
+
+
+static void FindDuplicatePCRPrimerPairs (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ CharPtr bad_fmt = "%d BioSources have duplicate primer pairs.";
+ ValNodePtr list = NULL, vnp;
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &list, FindDuplicatePrimerPairDescCallback);
+ VisitFeaturesInSep (vnp->data.ptrvalue, &list, FindDuplicatePrimerPairFeatCallback);
+ }
+
+ if (list != NULL)
+ {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (ONCALLER_DUPLICATE_PRIMER_SET, bad_fmt, list));
+ }
+}
+
+
+static Int4 RemoveDuplicatePCRPrimerPairsFromBioSource (BioSourcePtr biop)
+{
+ PCRReactionPtr set, prev_set = NULL, next_set;
+ ValNodePtr list = NULL, dup = NULL, vnp;
+ Int4 rval = 0;
+ Boolean found;
+
+ if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) {
+ return 0;
+ }
+
+ /* make list of primer sets to sort */
+ for (set = biop->pcr_primers; set != NULL; set = set->next) {
+ ValNodeAddPointer (&list, 0, set);
+ }
+ /* sort primer set list */
+ list = ValNodeSort (list, SortVnpByPCRReaction);
+ /* find duplicates */
+ set = list->data.ptrvalue;
+ for (vnp = list->next; vnp != NULL && !rval; vnp = vnp->next) {
+ if (CmpPCRReaction(set, vnp->data.ptrvalue) == 0) {
+ ValNodeAddPointer (&dup, 1, vnp->data.ptrvalue);
+ } else {
+ set = vnp->data.ptrvalue;
+ }
+ }
+ /* remove sorted list (no longer needed) */
+ list = ValNodeFree (list);
+
+ /* now remove sets identified as duplicates */
+ for (set = biop->pcr_primers; set != NULL; set = next_set) {
+ next_set = set->next;
+ found = FALSE;
+ for (vnp = dup; vnp != NULL && !found; vnp = vnp->next) {
+ if (vnp->choice == 1 && vnp->data.ptrvalue == set) {
+ found = TRUE;
+ }
+ }
+ if (found) {
+ if (prev_set == NULL) {
+ biop->pcr_primers = next_set;
+ } else {
+ prev_set->next = next_set;
+ }
+ set->next = NULL;
+ set = PCRReactionFree (set);
+ rval++;
+ } else {
+ prev_set = set;
+ }
+ }
+ dup = ValNodeFree (dup);
+
+ return rval;
+}
+
+
+static void RemoveDuplicatePCRPrimerPairs (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+ SeqDescPtr sdp;
+ SeqFeatPtr sfp;
+ Int4 count = 0;
+ CharPtr change_fmt = "Removed %d duplicate PCR primer sets\n";
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQDESC
+ && (sdp = (SeqDescPtr) vnp->data.ptrvalue) != NULL
+ && sdp->choice == Seq_descr_source) {
+ count += RemoveDuplicatePCRPrimerPairsFromBioSource (sdp->data.ptrvalue);
+ } else if (vnp->choice == OBJ_SEQFEAT
+ && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL
+ && sfp->data.choice == SEQFEAT_BIOSRC) {
+ count += RemoveDuplicatePCRPrimerPairsFromBioSource (sfp->data.value.ptrvalue);
+ }
+ }
+ if (count > 0) {
+ if (lip != NULL) {
+ lip->data_in_log = TRUE;
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, change_fmt, count);
+ }
+ }
+ }
+}
+
+
+static void FindProteinNamesCallback (SeqFeatPtr sfp, Pointer data)
+{
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT || data == NULL) {
+ return;
+ }
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+}
+
+
+static CharPtr FirstProtNameFromFeat (SeqFeatPtr sfp)
+{
+ ProtRefPtr prp;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT
+ || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL
+ || prp->name == NULL) {
+ return NULL;
+ } else {
+ return prp->name->data.ptrvalue;
+ }
+}
+
+
+static int LIBCALLBACK SortProtFeatByFirstProtName (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ SeqFeatPtr sfp1, sfp2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ sfp1 = (SeqFeatPtr) vnp1->data.ptrvalue;
+ sfp2 = (SeqFeatPtr) vnp2->data.ptrvalue;
+ return StringCmp (FirstProtNameFromFeat(sfp1), FirstProtNameFromFeat(sfp2));
+ }
+ }
+ return 0;
+}
+
+
+static ValNodePtr LIBCALL ClickableItemCategorize PROTO ((ValNodePtr list, int item_type, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr ))))
+{
+ ValNodePtr vnp;
+ ClickableItemPtr cip_current;
+ ValNodeBlock rval;
+
+ if (list == NULL) {
+ return NULL;
+ }
+ InitValNodeBlock (&rval, NULL);
+ cip_current = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip_current, 0, sizeof (ClickableItemData));
+ cip_current->clickable_item_type = item_type;
+ ValNodeAddPointer (&(cip_current->item_list), list->choice, list->data.ptrvalue);
+ ValNodeAddPointerToEnd (&rval, 0, cip_current);
+
+ for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
+ if (compar(&(cip_current->item_list), &vnp) == 0) {
+ ValNodeAddPointer (&(cip_current->item_list), vnp->choice, vnp->data.ptrvalue);
+ } else {
+ cip_current = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ cip_current->clickable_item_type = item_type;
+ MemSet (cip_current, 0, sizeof (ClickableItemData));
+ ValNodeAddPointer (&(cip_current->item_list), vnp->choice, vnp->data.ptrvalue);
+ ValNodeAddPointerToEnd (&rval, 0, cip_current);
+ }
+ }
+ return rval.head;
+}
+
+
+static void RemoveLowItemCountClickableItems (ValNodePtr PNTR list, Int4 min)
+{
+ ValNodePtr vnp, prev = NULL, next;
+ ClickableItemPtr cip;
+
+ if (list == NULL) {
+ return;
+ }
+ for (vnp = *list; vnp != NULL; vnp = next) {
+ next = vnp->next;
+ cip = (ClickableItemPtr) vnp->data.ptrvalue;
+ if (cip == NULL || ValNodeLen (cip->item_list) < min) {
+ if (prev == NULL) {
+ *list = next;
+ } else {
+ prev->next = next;
+ }
+ vnp->next = NULL;
+ vnp = FreeClickableList (vnp);
+ } else {
+ prev = vnp;
+ }
+ }
+}
+
+
+static void FindFrequentlyAppearingProteinNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, cip_list = NULL;
+ ClickableItemPtr cip;
+ CharPtr bad_fmt = "%d proteins have name '%s'";
+ ValNodePtr list = NULL;
+ Int4 num_prots, min;
+ CharPtr prot_name;
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitFeaturesInSep (vnp->data.ptrvalue, &list, FindProteinNamesCallback);
+ }
+
+ num_prots = ValNodeLen (list);
+ list = ValNodeSort (list, SortProtFeatByFirstProtName);
+ cip_list = ClickableItemCategorize(list, DISC_PROTEIN_NAMES, SortProtFeatByFirstProtName);
+ list = ValNodeFree (list);
+
+ min = num_prots;
+ if (min < 100) {
+ min = 100;
+ }
+ RemoveLowItemCountClickableItems (&cip_list, min);
+ for (vnp = cip_list; vnp != NULL; vnp = vnp->next) {
+ cip = (ClickableItemPtr) vnp->data.ptrvalue;
+ prot_name = FirstProtNameFromFeat(cip->item_list->data.ptrvalue);
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + StringLen (prot_name) + 15));
+ sprintf (cip->description, bad_fmt, ValNodeLen (cip->item_list), prot_name);
+ cip->clickable_item_type = DISC_PROTEIN_NAMES;
+ }
+
+ if (cip_list == NULL) {
+ /* do nothing, nothing found */
+ } else if (cip_list->next == NULL) {
+ ValNodeLink (discrepancy_list, cip_list);
+ } else {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->description = StringSave ("Many proteins have the same name");
+ cip->subcategories = cip_list;
+ cip->clickable_item_type = DISC_PROTEIN_NAMES;
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+}
+
+
+static Boolean IsATGC (Char ch)
+{
+ if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C') {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean EndsWithSequence (CharPtr defline)
+{
+ CharPtr end;
+ Int4 count = 0;
+
+ if (StringHasNoText (defline)) {
+ return FALSE;
+ }
+ end = defline + (StringLen (defline) - 1);
+ while (end > defline && IsATGC(*end) && count < 19) {
+ end--;
+ count++;
+ }
+ if (count >= 19) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static void FindSequenceCharAtEndOfDeflineCallback (SeqDescPtr sdp, Pointer data)
+{
+ if (sdp == NULL || sdp->choice != Seq_descr_title) {
+ return;
+ }
+ if (EndsWithSequence (sdp->data.ptrvalue)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindSequenceCharAtEndOfDefline (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d deflines appear to end with sequence characters";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &list, FindSequenceCharAtEndOfDeflineCallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_TITLE_ENDS_WITH_SEQUENCE, bad_fmt, list));
+ }
+}
+
+
+static void FindSequencesWithGapsCallback(BioseqPtr bsp, Pointer data)
+{
+ DeltaSeqPtr dsp;
+ Boolean has_gaps = FALSE;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+
+ if (bsp == NULL || data == NULL || bsp->repr != Seq_repr_delta) {
+ return;
+ }
+ for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL && !has_gaps; dsp = dsp->next) {
+ if (IsDeltaSeqGap(dsp)) {
+ has_gaps = TRUE;
+ }
+ }
+ if (!has_gaps) {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_gap, &context);
+ if (sfp != NULL) {
+ has_gaps = TRUE;
+ }
+ }
+ if (has_gaps) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+static void FindSequencesWithGaps(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d sequences contain gaps";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &list, FindSequencesWithGapsCallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_GAPS, bad_fmt, list));
+ }
+}
+
+
+static Boolean IsBGPipe (SeqDescPtr sdp)
+{
+ UserObjectPtr uop;
+ CharPtr prefix;
+ UserFieldPtr ufp;
+
+ if (sdp == NULL
+ || sdp->choice != Seq_descr_user
+ || (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || uop->type == NULL
+ || StringICmp(uop->type->str, "StructuredComment") != 0) {
+ return FALSE;
+ }
+
+ prefix = GetStructuredCommentPrefix (uop);
+ if (StringICmp (prefix, "##Genome-Annotation-Data-START##") != 0) {
+ return FALSE;
+ }
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL && StringICmp (ufp->label->str, "Annotation Pipeline") == 0
+ && ufp->choice == 1
+ && StringICmp (ufp->data.ptrvalue, "NCBI Prokaryotic Genome Annotation Pipeline") == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static Boolean CodeBreakIsStopCodon(CodeBreakPtr crp)
+{
+ CodeBreakPtr tmp;
+ tmp = crp;
+ while (tmp != NULL) {
+ if (tmp->aa.choice == 1 && tmp->aa.value.intvalue == 42) return TRUE;
+ tmp = tmp->next;
+ }
+ return FALSE;
+};
+
+
+static void FindFeaturesWithBadBGPipeQualifiersCallback(BioseqPtr bsp, Pointer data)
+{
+ ValNodePtr PNTR pList;
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+ SeqMgrDescContext dcontext;
+ SeqDescPtr sdp;
+ SeqIdPtr sip;
+ Boolean is_bgpipe = FALSE;
+ CdRegionPtr crp;
+
+ if (bsp == NULL || (pList = (ValNodePtr PNTR) data) == NULL) {
+ return;
+ }
+
+ // must not be refseq
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ return;
+ }
+ }
+
+ // must be BGPIPE
+ for (sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_user, &dcontext);
+ sdp != NULL && !is_bgpipe;
+ sdp = SeqMgrGetNextDescriptor(bsp, sdp, Seq_descr_user, &dcontext)) {
+ is_bgpipe = IsBGPipe(sdp);
+ }
+ if (!is_bgpipe) {
+ return;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature(bsp, sfp, 0, 0, &fcontext)) {
+ if (!StringHasNoText(sfp->except_text)) {
+ ValNodeAddPointer (pList, OBJ_SEQFEAT, sfp);
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && (crp = (CdRegionPtr)(sfp->data.value.ptrvalue)) != NULL
+ && crp->code_break != NULL
+ && (sfp->comment == NULL
+ || StringCmp(sfp->comment, "ambiguity in stop codon")
+ || !CodeBreakIsStopCodon(crp->code_break)) ) {
+ ValNodeAddPointer (pList, OBJ_SEQFEAT, sfp);
+ }
+ }
+}
+
+
+static void FindBadBGPipeQuals(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d features contain invalid BGPIPE qualifiers";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &list, FindFeaturesWithBadBGPipeQualifiersCallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_BAD_BGPIPE_QUALS, bad_fmt, list));
+ }
+}
+
+
+static void FindShortlncRNACallback(SeqFeatPtr sfp, Pointer userdata)
+{
+ ValNodePtr PNTR list;
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
+ Boolean partial5, partial3;
+
+ if ( (list = (ValNodePtr PNTR) userdata) == NULL
+ || sfp == NULL || sfp->idx.subtype != FEATDEF_ncRNA
+ || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
+ || rrp->ext.choice != 3
+ || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL
+ || StringICmp (rgp->_class, "lncrna") != 0) {
+ return;
+ }
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ if (partial5 || partial3) {
+ return;
+ }
+
+ if (SeqLocLen (sfp->location) < 200) {
+ ValNodeAddPointer (list, OBJ_SEQFEAT, sfp);
+ }
+}
+
+
+static void FindShortlncRNA(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d lncRNA features are suspiciously short";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitFeaturesInSep (vnp->data.ptrvalue, &list, FindShortlncRNACallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_SHORT_LNCRNA, bad_fmt, list));
+ }
+}
+
+
+static void FindTerminalNsCallback (BioseqPtr bsp, Pointer data)
+{
+ Uint1 begin_n, begin_gap, end_n, end_gap;
+
+ if (bsp == NULL) {
+ return;
+ }
+
+ CheckBioseqEndsForNAndGap (bsp, &begin_n, &begin_gap, &end_n, &end_gap);
+ if (begin_n != eEndIsChar_No || end_n != eEndIsChar_No) {
+ ValNodeAddPointer ((ValNodePtr PNTR)data, OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+static void FindTerminalNs(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d sequences have terminal Ns";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &list, FindTerminalNsCallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_TERMINAL_NS, bad_fmt, list));
+ }
+}
+
+
+static void AddParentToObjectList (Uint2 parenttype, Pointer parentptr, ValNodePtr PNTR list)
+{
+ SeqAnnotPtr sap;
+
+ switch (parenttype) {
+ case OBJ_BIOSEQ:
+ case OBJ_BIOSEQSET:
+ ValNodeAddPointer ((ValNodePtr PNTR) list, parenttype, parentptr);
+ break;
+ case OBJ_SEQANNOT:
+ if ((sap = (SeqAnnotPtr) parentptr) != NULL) {
+ AddParentToObjectList(sap->idx.parenttype, sap->idx.parentptr, list);
+ }
+ break;
+ }
+}
+
+
+static void FindAlignmentsWithScoresCallback(SeqAlignPtr salp, Pointer data)
+{
+ if (salp == NULL || salp->score == NULL || data == NULL) {
+ return;
+ }
+
+ AddParentToObjectList(salp->idx.parenttype, salp->idx.parentptr, (ValNodePtr PNTR) data);
+}
+
+
+static void FindAlignmentsWithScores(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, list = NULL;
+ CharPtr bad_fmt = "%d alignments have score attributes";
+
+ if (discrepancy_list == NULL || sep_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitAlignmentsInSep (vnp->data.ptrvalue, &list, FindAlignmentsWithScoresCallback);
+ }
+
+ if (list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_ALIGNMENT_HAS_SCORE, bad_fmt, list));
+ }
+}
+
+
static Boolean WantThisResult (ClickableItemPtr cip, DiscrepancyConfigPtr dcp)
{
ValNodePtr item;
@@ -27343,7 +30105,7 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Joined Features: on when non-eukaryote", "JOINED_FEATURES", AddJoinedFeatureDiscrepancies, NULL },
{ "Overlapping Genes", "OVERLAPPING_GENES", AddOverlappingGeneDiscrepancies, NULL },
{ "Overlapping CDS", "OVERLAPPING_CDS", AddOverlappingCodingRegionDiscrepancies, MarkOverlappingCDSs },
- { "Contained CDS", "CONTAINED_CDS", AddContainedCodingRegionDiscrepancies, NULL },
+ { "Contained CDS", "CONTAINED_CDS", AddContainedCodingRegionDiscrepancies, ConvertContainedCDSToMiscFeat },
{ "CDS RNA Overlap", "RNA_CDS_OVERLAP", AddRNACDSOverlapDiscrepancies, NULL },
{ "Short Contig", "SHORT_CONTIG", FindShortContigs, RemoveShortContigsWithoutAnnotation },
{ "Inconsistent BioSource", "INCONSISTENT_BIOSOURCE", FindNonmatchingContigSources, NULL },
@@ -27352,7 +30114,7 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Suspect Product Name QuickFix", "DISC_PRODUCT_NAME_QUICKFIX", FindSuspectProductNames, FixSuspectProductNameQuickFixes },
{ "Inconsistent Source And Definition Line", "INCONSISTENT_SOURCE_DEFLINE", FindInconsistentSourceAndDefline, NULL },
{ "Partial CDSs in Complete Sequences", "PARTIAL_CDS_COMPLETE_SEQUENCE", FindParticalCDSsInCompleteSequences, NULL },
- { "Hypothetical or Unknown Protein with EC Number", "EC_NUMBER_ON_UNKNOWN_PROTEIN", FindUnknownProteinsWithECNumbers, NULL },
+ { "Hypothetical or Unknown Protein with EC Number", "EC_NUMBER_ON_UNKNOWN_PROTEIN", FindUnknownProteinsWithECNumbers, MoveEcNumberToNote },
{ "Find Missing Tax Lookups", "TAX_LOOKUP_MISSING", NULL, NULL } ,
{ "Find Tax Lookup Mismatches", "TAX_LOOKUP_MISMATCH", NULL, NULL },
{ "Find Short Sequences", "SHORT_SEQUENCES", FindShortSequences, NULL },
@@ -27379,6 +30141,7 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Runs of 10 or more Ns", "N_RUNS", BaseCountAndNRunDiscrepancies, NULL},
{ "Zero Base Counts", "ZERO_BASECOUNT", BaseCountAndNRunDiscrepancies, NULL},
{ "Adjacent PseudoGenes with Identical Text", "ADJACENT_PSEUDOGENES", FindAdjacentPseudoGenes, NULL},
+ { "Bioseqs longer than 5000nt without Annotations", "DISC_LONG_NO_ANNOTATION", FindLongBioseqsWithoutAnnotation, NULL},
{ "Bioseqs without Annotations", "NO_ANNOTATION", FindBioseqsWithoutAnnotation, NULL},
{ "Influenza Strain/Collection Date Mismatch", "DISC_INFLUENZA_DATE_MISMATCH", FindInfluenzaStrainCollectionDateMismatches, NULL},
{ "Introns shorter than 10 nt", "DISC_SHORT_INTRON", FindShortIntrons, AddExceptionsToShortIntrons},
@@ -27394,7 +30157,9 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Count features present or missing from sequences", "DISC_FEATURE_COUNT", CountFeaturesOnSequences, NULL},
{ "BioSources with the same specimen voucher should have the same taxname", "DISC_SPECVOUCHER_TAXNAME_MISMATCH", CollectSpecVoucherTaxnameDiscrepancies, NULL},
{ "Feature partialness should agree with gene partialness if endpoints match", "DISC_GENE_PARTIAL_CONFLICT", ReportPartialConflicts, NULL},
- { "Flatfile representation of object contains suspect text", "DISC_FLATFILE_FIND_ONCALLER", FindTextInFlatfileOncaller, OncallerToolSpellFix},
+ { "Flatfile representation of object contains suspect text", "DISC_FLATFILE_FIND_ONCALLER", FindTextInFlatfileOncaller, NULL},
+ { "Flatfile representation of object contains fixable suspect text", "DISC_FLATFILE_FIND_ONCALLER", FindTextInFlatfileOncaller, OncallerToolSpellFix},
+ { "Flatfile representation of object contains unfixable suspect text", "DISC_FLATFILE_FIND_ONCALLER", FindTextInFlatfileOncaller, NULL},
{ "Coding region product contains suspect text", "DISC_CDS_PRODUCT_FIND", FindTextInCDSProduct, NULL},
{ "Definition lines should be unique", "DISC_DUP_DEFLINE", FindDupDeflines, NULL},
{ "ATCC strain should also appear in culture collection", "DUP_DISC_ATCC_CULTURE_CONFLICT", CheckATCCStrainCultureCollConflict, AddATCCStrainToCultureColl},
@@ -27482,7 +30247,7 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Complete taxname should be present in definition line", "TEST_TAXNAME_NOT_IN_DEFLINE", FindTaxnameMissingFromDefline, NULL},
{ "Count number of unverified sequences", "TEST_COUNT_UNVERIFIED", CountUnverifiedSequences, NULL},
{ "Show translation exception", "SHOW_TRANSL_EXCEPT", ShowTranslExcept, NULL},
- { "Show hypothetic protein having a gene name", "SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME", ShowCDsHavingGene, NULL},
+ { "Show hypothetic protein having a gene name", "SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME", ShowCDsHavingGene, RemoveGeneNamesFromHypotheticalCodingRegions},
{ "Test defline existence", "TEST_DEFLINE_PRESENT", TestDeflineExistence, NULL},
{ "Remove mRNA overlapping a pseudogene", "TEST_MRNA_OVERLAPPING_PSEUDO_GENE", TestMrnaOverlappingPseudoGene, RmvMrnaOverlappingPseudoGene},
{ "Find completely overlapped genes", "FIND_OVERLAPPED_GENES", FindOverlappedGenes, NULL},
@@ -27506,7 +30271,28 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{"mRNA should have both protein_id and transcript_id", "MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS", CheckFormRNAWithoutProTransIDs, NULL},
{"Country discription should only have 1 colon.", "ONCALLER_COUNTRY_COLON", CheckCountryColons, FixCountryColons},
{"Sequences with BioProject IDs","ONCALLER_BIOPROJECT_ID", FindBioProjectIdSequences, NULL },
- {"Type strain comment in OrgMod does not agree with organism name", "ONCALLER_STRAIN_TAXNAME_CONFLICT", StrainTaxnameConflict, NULL}
+ {"Type strain comment in OrgMod does not agree with organism name", "ONCALLER_STRAIN_TAXNAME_CONFLICT", StrainTaxnameConflict, NULL},
+ {"SubSource collected-by contains more than 3 names", "ONCALLER_MORE_NAMES_COLLECTED_BY", FindMoreNamesInCollectedBy, MarkAndRemoveCollectedItems},
+ {"SubSource identified-by contains more than 3 names", "ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY", FindMoreNamesInIdentifiedBy, MarkAndRemoveIdentifiedItems},
+ {"Suspected organism in identified-by SubSource", "ONCALLER_SUSPECTED_ORG_IDENTIFIED", FindSuspOrgNameInIdentified, MarkAndRemoveIdentifiedItems},
+ {"Suspected organism in collected-by SubSource", "ONCALLER_SUSPECTED_ORG_COLLECTED", FindSuspOrgNameInCollected, MarkAndRemoveCollectedItems},
+ {"Suspicious structured comment prefix", "ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX", FindSuspiciousStructuredCommentPrefix, SwitchSuspiciousStructuredCommentPrefix},
+ {"Cit-sub affiliation street contains text from other affiliation fields", "DISC_CITSUB_AFFIL_DUP_TEXT", ReportCitSubAffilDuplicateText, RemoveCitSubAffilDuplicateText},
+ {"Duplicate PCR primer pair", "ONCALLER_DUPLICATE_PRIMER_SET", FindDuplicatePCRPrimerPairs, RemoveDuplicatePCRPrimerPairs},
+ {"Country name end with colon", "END_COLON_IN_COUNTRY", FindEndColon, RemoveEndColon},
+ {"Frequently appearing proteins", "DISC_PROTEIN_NAMES", FindFrequentlyAppearingProteinNames, NULL},
+ {"Sequence characters at end of defline", "DISC_TITLE_ENDS_WITH_SEQUENCE", FindSequenceCharAtEndOfDefline, NULL},
+ {"Inconsistent structured comments", "DISC_INCONSISTENT_STRUCTURED_COMMENTS", FindInconsistentStructuredComments, NULL},
+ {"Inconsistent DBLink fields", "DISC_INCONSISTENT_DBLINK", FindInconsistentDBLinkFields, NULL},
+ {"Inconsistent Molinfo Techniqueq", "DISC_INCONSISTENT_MOLINFO_TECH", FindInconsistentMolinfoTech, NULL},
+ {"Sequences with gaps", "DISC_GAPS", FindSequencesWithGaps, NULL},
+ {"Bad BGPIPE qualifiers", "DISC_BAD_BGPIPE_QUALS", FindBadBGPipeQuals, NULL},
+ {"Short lncRNA sequences", "TEST_SHORT_LNCRNA", FindShortlncRNA, NULL},
+ {"Ns at end of sequences", "TEST_TERMINAL_NS", FindTerminalNs, NULL},
+ {"Alignment has score attribute", "TEST_ALIGNMENT_HAS_SCORE", FindAlignmentsWithScores, NULL},
+ {"Uncultured Notes", "UNCULTURED_NOTES_ONCALLER", FindUnculturedNotes, NULL},
+ {"Special phrases of seq ids", "SEQ_ID_PHRASES", FindSeqIdHavingPhrases, NULL},
+ {"Product has string 'no product string in file'", "NO_PRODUCT_STRING", ProductsWithNoProductString, NULL}
};
@@ -27522,12 +30308,22 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == MOLTYPE_NOT_MRNA
|| test_type == TECHNIQUE_NOT_TSA
|| test_type == MISSING_STRUCTURED_COMMENT
- || test_type == MISSING_PROJECT)
+ || test_type == MISSING_PROJECT
+ || test_type == DISC_SUSPECT_PRODUCT_NAME
+ || test_type == DISC_PRODUCT_NAME_TYPO
+ || test_type == DISC_PRODUCT_NAME_QUICKFIX)
rval = TRUE;
else rval = FALSE;
break;
case eReportTypeDiscrepancy:
if (test_type == DISC_SOURCE_QUALS_ASNDISC
+ || test_type == UNCULTURED_NOTES_ONCALLER
+ || test_type == ONCALLER_COUNTRY_COLON
+ || test_type == END_COLON_IN_COUNTRY
+ || test_type == ONCALLER_MORE_NAMES_COLLECTED_BY
+ || test_type == ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY
+ || test_type == ONCALLER_SUSPECTED_ORG_IDENTIFIED
+ || test_type == ONCALLER_SUSPECTED_ORG_COLLECTED
|| test_type == ONCALLER_STRAIN_TAXNAME_CONFLICT
|| test_type == ONCALLER_BIOPROJECT_ID
|| test_type == DIVISION_CODE_CONFLICTS
@@ -27551,6 +30347,8 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == DISC_SPECVOUCHER_TAXNAME_MISMATCH
|| test_type == DISC_GENE_PARTIAL_CONFLICT
|| test_type == DISC_FLATFILE_FIND_ONCALLER
+ || test_type == DISC_FLATFILE_FIND_ONCALLER_FIXABLE
+ || test_type == DISC_FLATFILE_FIND_ONCALLER_UNFIXABLE
|| test_type == DISC_CDS_PRODUCT_FIND
|| test_type == DISC_DUP_DEFLINE
|| test_type == DISC_COUNT_NUCLEOTIDES
@@ -27612,7 +30410,10 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == TEST_SMALL_GENOME_SET_PROBLEM
|| test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES
|| test_type == TEST_TAXNAME_NOT_IN_DEFLINE
- || test_type == TEST_COUNT_UNVERIFIED) {
+ || test_type == TEST_COUNT_UNVERIFIED
+ || test_type == ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX
+ || test_type == ONCALLER_CITSUB_AFFIL_DUP_TEXT
+ || test_type == ONCALLER_DUPLICATE_PRIMER_SET) {
rval = FALSE;
} else {
rval = TRUE;
@@ -27620,6 +30421,12 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
break;
case eReportTypeOnCaller:
if (test_type == DISC_RNA_NO_PRODUCT
+ || test_type == UNCULTURED_NOTES_ONCALLER
+ || test_type == END_COLON_IN_COUNTRY
+ || test_type == ONCALLER_MORE_NAMES_COLLECTED_BY
+ || test_type == ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY
+ || test_type == ONCALLER_SUSPECTED_ORG_IDENTIFIED
+ || test_type == ONCALLER_SUSPECTED_ORG_COLLECTED
|| test_type == ONCALLER_STRAIN_TAXNAME_CONFLICT
|| test_type == ONCALLER_BIOPROJECT_ID
|| test_type == DISC_SUSPECT_PRODUCT_NAME
@@ -27650,6 +30457,8 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == DISC_SPECVOUCHER_TAXNAME_MISMATCH
|| test_type == DISC_GENE_PARTIAL_CONFLICT
|| test_type == DISC_FLATFILE_FIND_ONCALLER
+ || test_type == DISC_FLATFILE_FIND_ONCALLER_FIXABLE
+ || test_type == DISC_FLATFILE_FIND_ONCALLER_UNFIXABLE
|| test_type == DISC_CDS_PRODUCT_FIND
|| test_type == DISC_DUP_DEFLINE
|| test_type == DISC_COUNT_NUCLEOTIDES
@@ -27714,13 +30523,19 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES
|| test_type == TEST_TAXNAME_NOT_IN_DEFLINE
|| test_type == TEST_COUNT_UNVERIFIED
- || test_type == DISC_SHORT_RRNA) {
+ || test_type == DISC_SHORT_RRNA
+ || test_type == ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX
+ || test_type == ONCALLER_CITSUB_AFFIL_DUP_TEXT
+ || test_type == ONCALLER_DUPLICATE_PRIMER_SET
+ || test_type == DISC_NO_ANNOTATION
+ || test_type == TEST_SHORT_LNCRNA) {
rval = TRUE;
}
break;
case eReportTypeMegaReport:
rval = TRUE;
break;
+ default: break;
}
return rval;
}
@@ -27800,8 +30615,8 @@ extern void PrintDiscrepancyTestList (FILE *fp)
fprintf (fp, "\n");
fprintf (fp, "Terms searched for by DISC_FLATFILE_FIND_ONCALLER:\n");
- for (i = 0; flatfile_find_list_oncaller[i] != NULL; i++) {
- fprintf (fp, "%s\n", flatfile_find_list_oncaller[i]);
+ for (i = 0; oncaller_tool_spell_fixes[i].find != NULL; i++) {
+ fprintf (fp, "%s\n", oncaller_tool_spell_fixes[i].find);
}
fprintf (fp, "\n");
@@ -27866,9 +30681,11 @@ extern void ConfigureForBigSequence (DiscrepancyConfigPtr dcp)
dcp->conf_list[DISC_PERCENTN] = TRUE;
dcp->conf_list[DISC_N_RUNS] = TRUE;
dcp->conf_list[DISC_ZERO_BASECOUNT] = TRUE;
+ dcp->conf_list[DISC_LONG_NO_ANNOTATION] = TRUE;
dcp->conf_list[DISC_NO_ANNOTATION] = TRUE;
dcp->conf_list[DISC_COUNT_NUCLEOTIDES] = TRUE;
dcp->conf_list[MISSING_GENOMEASSEMBLY_COMMENTS] = TRUE;
+ dcp->conf_list[DISC_GAPS] = TRUE;
if (dcp->use_big_test_set) {
/*
@@ -27943,12 +30760,12 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp)
}
dcp->conf_list[DISC_STRAIN_TAXNAME_MISMATCH] = FALSE;
dcp->conf_list[DISC_CITSUBAFFIL_CONFLICT] = FALSE;
- dcp->conf_list[DISC_OVERLAPPING_GENES] = FALSE;
+// dcp->conf_list[DISC_OVERLAPPING_GENES] = FALSE;
dcp->conf_list[DISC_INCONSISTENT_BIOSRC_DEFLINE] = FALSE;
dcp->conf_list[DISC_NO_TAXLOOKUP] = FALSE;
dcp->conf_list[DISC_BAD_TAXLOOKUP] = FALSE;
dcp->conf_list[DISC_COUNT_TRNA] = FALSE;
- dcp->conf_list[DISC_BADLEN_TRNA] = FALSE;
+ //dcp->conf_list[DISC_BADLEN_TRNA] = FALSE; // JIRA: SQD-3909
dcp->conf_list[DISC_STRAND_TRNA] = FALSE;
dcp->conf_list[DISC_COUNT_RRNA] = FALSE;
dcp->conf_list[DISC_CDS_OVERLAP_TRNA] = FALSE;
@@ -27974,7 +30791,7 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp)
dcp->conf_list[DISC_RETROVIRIDAE_DNA] = FALSE;
dcp->conf_list[DISC_MISSING_DEFLINES] = FALSE;
dcp->conf_list[ONCALLER_GENE_MISSING] = FALSE;
- dcp->conf_list[ONCALLER_SUPERFLUOUS_GENE] = FALSE;
+ //dcp->conf_list[ONCALLER_SUPERFLUOUS_GENE] = FALSE;
dcp->conf_list[ONCALLER_CONSORTIUM] = FALSE;
dcp->conf_list[DISC_FEATURE_LIST] = FALSE;
dcp->conf_list[TEST_ORGANELLE_PRODUCTS] = FALSE;
@@ -27991,6 +30808,7 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp)
dcp->conf_list[DISC_SRC_QUAL_PROBLEM] = FALSE;
dcp->conf_list[DISC_CATEGORY_HEADER] = FALSE;
dcp->conf_list[TEST_TAXNAME_NOT_IN_DEFLINE] = FALSE;
+ dcp->conf_list[TEST_SP_NOT_UNCULTURED] = FALSE;
}
@@ -28014,20 +30832,23 @@ extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReport
extern ValNodePtr CollectDiscrepancies (DiscrepancyConfigPtr dcp, ValNodePtr sep_list, PerformDiscrepancyTest taxlookup)
{
ValNodePtr discrepancy_list = NULL;
+ ValNodePtr vnp;
+ SeqEntryPtr sep;
+ Uint2 entityID;
Int4 i;
PerformDiscrepancyTest last_test_func = NULL;
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ sep = (SeqEntryPtr) vnp->data.ptrvalue;
+ entityID = ObjMgrGetEntityIDForChoice (sep);
+ if (SeqMgrFeaturesAreIndexed(entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, NULL);
+ }
+ }
+
discrepancy_info_list[DISC_NO_TAXLOOKUP].test_func = taxlookup;
discrepancy_info_list[DISC_BAD_TAXLOOKUP].test_func = taxlookup;
- /* if a TSA report */
- for (i=0; i< MAX_DISC_TYPE; i++)
- dcp->conf_list[i] =
- !(dcp->conf_list[i]) ? dcp->conf_list[i]:
- (dcp->run_tsa_checks ?
- IsTestTypeAppropriateForReportType(i, eReportTypeTSA)
- : !IsTestTypeAppropriateForReportType(i, eReportTypeTSA));
-
for (i = 0; i < MAX_DISC_TYPE; i++)
{
if ((dcp == NULL || dcp->conf_list[i])
@@ -28166,20 +30987,26 @@ extern CharPtr GetBioseqSetLabel (BioseqSetPtr bssp)
}
+typedef struct num_bad {
+ Int4 num_gap;
+ Int4 num_other;
+} NumBad, PNTR NumBadPtr;
+
+
+
static void LIBCALLBACK CountNonATGCNTProc (CharPtr sequence, Pointer userdata)
{
- Int4Ptr p_i;
CharPtr cp;
+ NumBadPtr p;
if (sequence == NULL || userdata == NULL) return;
- p_i = (Int4Ptr) userdata;
+ p = (NumBadPtr) userdata;
for (cp = sequence; *cp != 0; cp++)
{
- if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C')
- {
- (*p_i) ++;
- }
+ if (*cp == '-') (p->num_gap) ++;
+ else if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C')
+ (p->num_other) ++;
}
}
@@ -28195,14 +31022,17 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
SeqDescrPtr sdp;
CharPtr locus_tag = "";
CharPtr bsp_fmt = "%s (length %d)\n";
- CharPtr bsp_unusual_fmt = "%s (length %d, %d other)\n";
+ CharPtr bsp_unusual_other = "%s (length %d, %d other)\n";
+ CharPtr bsp_unusual_gap = "%s (length %d, %d gap)\n";
+ CharPtr bsp_unusual_other_gap = "%s (length %d, %d other, %d gap)\n";
ObjValNodePtr ovn;
SeqEntryPtr sep;
SeqSubmitPtr ssp;
Boolean special_flag = FALSE;
Uint1 data_choice;
ValNodePtr extra_fields = NULL, field, field_strings = NULL, field_values, val_vnp;
- Int4 field_len = 0, label_len, num_bad;
+ Int4 field_len = 0, label_len;
+ NumBad num_bad;
if (vnp == NULL)
{
@@ -28275,14 +31105,44 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
if (bsp != NULL)
{
tmp = GetBioseqLabel (vnp->data.ptrvalue);
- num_bad = 0;
- if ( !ISA_aa(bsp->mol)) SeqPortStream (bsp, 0, (Pointer) &num_bad, CountNonATGCNTProc);
- if (num_bad > 0) {
- row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_unusual_fmt) + StringLen (tmp) + 47));
- sprintf (row_text, bsp_unusual_fmt, tmp, bsp->length, num_bad);
+ num_bad.num_gap = 0;
+ num_bad.num_other = 0;
+ if ( !ISA_aa(bsp->mol)) {
+ SeqPortStream(bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL | EXPAND_GAPS_TO_DASHES,
+ (Pointer)&num_bad, CountNonATGCNTProc);
+ if (num_bad.num_other && num_bad.num_gap) {
+ row_text = (CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_unusual_other_gap) + StringLen (tmp) + 47));
+ sprintf (row_text, bsp_unusual_other_gap, tmp,bsp->length, num_bad.num_other,
+ num_bad.num_gap);
+ }
+ else if (num_bad.num_gap) {
+ row_text = (CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_unusual_gap) + StringLen (tmp) + 47));
+ sprintf (row_text, bsp_unusual_gap, tmp, bsp->length, num_bad.num_gap);
+ }
+ else if (num_bad.num_other) {
+ row_text = (CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_unusual_other) + StringLen (tmp) + 47));
+ sprintf (row_text, bsp_unusual_other, tmp, bsp->length, num_bad.num_other);
+ }
+ else {
+ row_text =(CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32));
+ sprintf (row_text, bsp_fmt, tmp, bsp->length);
+ }
} else {
- row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32));
- sprintf (row_text, bsp_fmt, tmp, bsp->length);
+ if (num_bad.num_gap) {
+ row_text = (CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_unusual_gap) + StringLen (tmp) + 47));
+ sprintf (row_text, bsp_unusual_gap, tmp, bsp->length, num_bad.num_gap);
+ }
+ else {
+ row_text =(CharPtr) MemNew (
+ sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32));
+ sprintf (row_text, bsp_fmt, tmp, bsp->length);
+ }
+
}
tmp = MemFree (tmp);
}
@@ -28793,7 +31653,9 @@ extern void WriteDiscrepancyEx (FILE *fp, ClickableItemPtr dip, Boolean use_feat
fprintf (fp, "%s:", descr_prefix);
}
fprintf (fp, "%s\n", dip->description);
- if (DISC_SOURCE_QUALS_ASNDISC != dip->clickable_item_type) { // removed the duplicated output
+ if (DISC_SOURCE_QUALS_ASNDISC == dip->clickable_item_type) {
+ /* suppress duplicate information */
+ } else {
for (vnp = dip->subcategories; vnp != NULL; vnp = vnp->next) {
dip = vnp->data.ptrvalue;
if (dip != NULL) {
@@ -28834,7 +31696,7 @@ extern void DisableTRNATests (DiscrepancyConfigPtr dcp)
if (dcp != NULL) {
dcp->conf_list[DISC_COUNT_TRNA] = FALSE;
dcp->conf_list[DISC_DUP_TRNA] = FALSE;
- dcp->conf_list[DISC_BADLEN_TRNA] = FALSE;
+ //dcp->conf_list[DISC_BADLEN_TRNA] = FALSE; // JIRA: SQD-3909
dcp->conf_list[DISC_COUNT_RRNA] = FALSE;
dcp->conf_list[DISC_DUP_RRNA] = FALSE;
dcp->conf_list[DISC_TRANSL_NO_NOTE] = FALSE;
@@ -29017,54 +31879,58 @@ static Boolean OkToExpand (ClickableItemPtr cip, DiscReportOutputConfigPtr oc)
typedef struct discreportoutputflag {
CharPtr clickable_item_type;
CharPtr description;
+ CharPtr nofix_description;
} DiscReportOutputFlagData, PNTR DiscReportOutputFlagDataPtr;
DiscReportOutputFlagData extra_disc_fatal [] = {
- {"MISSING_GENOMEASSEMBLY_COMMENTS", NULL}
+ {"MISSING_GENOMEASSEMBLY_COMMENTS", NULL, NULL}
};
DiscReportOutputFlagData disc_fatal[] = {
- {"BAD_LOCUS_TAG_FORMAT", NULL},
- {"DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS", NULL},
- {"DISC_BACTERIA_SHOULD_NOT_HAVE_MRNA", NULL},
- {"DISC_CITSUBAFFIL_CONFLICT", NULL},
- {"DISC_INCONSISTENT_MOLTYPES", NULL},
- {"DISC_MAP_CHROMOSOME_CONFLICT", NULL},
- {"DISC_MICROSATELLITE_REPEAT_TYPE", NULL},
- {"DISC_MISSING_AFFIL", NULL},
- {"DISC_NONWGS_SETS_PRESENT", NULL},
- {"DISC_QUALITY_SCORES", "Quality scores are missing on some sequences."},
- {"DISC_RBS_WITHOUT_GENE", NULL},
- {"DISC_SHORT_RRNA", NULL},
- {"DISC_SEGSETS_PRESENT", NULL},
- {"DISC_SOURCE_QUALS_ASNDISC", "collection-date"},
- {"DISC_SOURCE_QUALS_ASNDISC", "country"},
- {"DISC_SOURCE_QUALS_ASNDISC", "isolation-source"},
- {"DISC_SOURCE_QUALS_ASNDISC", "host"},
- {"DISC_SOURCE_QUALS_ASNDISC", "strain"},
- {"DISC_SOURCE_QUALS_ASNDISC", "taxname"},
- {"DISC_SOURCE_QUALS_ASNDISC", "taxname (all present, all unique)"},
- {"DISC_SUBMITBLOCK_CONFLICT", NULL},
- {"DISC_SUSPECT_RRNA_PRODUCTS", NULL},
- {"DISC_TITLE_AUTHOR_CONFLICT", NULL},
- {"DISC_UNPUB_PUB_WITHOUT_TITLE", NULL},
- {"DISC_USA_STATE", NULL},
- {"EC_NUMBER_ON_UNKNOWN_PROTEIN", NULL},
- {"EUKARYOTE_SHOULD_HAVE_MRNA", "no mRNA present"},
- {"INCONSISTENT_LOCUS_TAG_PREFIX", NULL},
- {"INCONSISTENT_PROTEIN_ID", NULL},
- {"MISSING_GENES", NULL},
- {"MISSING_LOCUS_TAGS", NULL},
- {"MISSING_PROTEIN_ID", NULL},
- {"ONCALLER_ORDERED_LOCATION", NULL},
- {"PARTIAL_CDS_COMPLETE_SEQUENCE", NULL},
- {"PSEUDO_MISMATCH", NULL},
- {"RNA_CDS_OVERLAP", "coding regions are completely contained in RNAs"},
- {"RNA_NO_PRODUCT", NULL},
- {"SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME", NULL},
- {"SUSPECT_PRODUCT_NAMES", "Remove organism from product name"},
- {"SUSPECT_PRODUCT_NAMES", "Possible parsing error or incorrect formatting; remove inappropriate symbols"},
- {"TEST_OVERLAPPING_RRNAS", NULL}
+ {"BAD_LOCUS_TAG_FORMAT", NULL, NULL},
+ {"CONTAINED_CDS", NULL, "coding regions are completely contained in another coding region but have note"},
+ {"DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS", NULL, NULL},
+ {"DISC_BACTERIA_SHOULD_NOT_HAVE_MRNA", NULL, NULL},
+ {"DISC_BAD_BGPIPE_QUALS", NULL, NULL},
+ {"DISC_CITSUBAFFIL_CONFLICT", NULL, "No citsubs were found!"},
+ {"DISC_INCONSISTENT_MOLTYPES", NULL, "Moltypes are consistent"},
+ {"DISC_MAP_CHROMOSOME_CONFLICT", NULL, NULL},
+ {"DISC_MICROSATELLITE_REPEAT_TYPE", NULL, NULL},
+ {"DISC_MISSING_AFFIL", NULL, NULL},
+ {"DISC_NONWGS_SETS_PRESENT", NULL, NULL},
+ {"DISC_QUALITY_SCORES", "Quality scores are missing on some sequences.", NULL },
+ {"DISC_RBS_WITHOUT_GENE", NULL, NULL},
+ {"DISC_SHORT_RRNA", NULL, NULL},
+ {"DISC_SEGSETS_PRESENT", NULL, NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "collection-date", NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "country", NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "isolation-source", NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "strain", NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "taxname", NULL},
+ {"DISC_SOURCE_QUALS_ASNDISC", "taxname (all present, all unique)", NULL},
+ {"DISC_SUBMITBLOCK_CONFLICT", NULL, NULL},
+ {"DISC_SUSPECT_RRNA_PRODUCTS", NULL, NULL},
+ {"DISC_TITLE_AUTHOR_CONFLICT", NULL, NULL},
+ {"DISC_UNPUB_PUB_WITHOUT_TITLE", NULL, NULL},
+ {"EC_NUMBER_ON_UNKNOWN_PROTEIN", NULL, NULL},
+ {"EUKARYOTE_SHOULD_HAVE_MRNA", "no mRNA present", NULL},
+ {"INCONSISTENT_LOCUS_TAG_PREFIX", NULL, NULL},
+ {"INCONSISTENT_PROTEIN_ID", NULL, NULL},
+ {"MISSING_GENES", NULL, NULL},
+ {"MISSING_LOCUS_TAGS", NULL, NULL},
+ {"MISSING_PROTEIN_ID", NULL, NULL},
+ {"N_RUNS", NULL, NULL},
+ {"ONCALLER_ORDERED_LOCATION", NULL, NULL},
+ {"PARTIAL_CDS_COMPLETE_SEQUENCE", NULL, NULL},
+ {"PSEUDO_MISMATCH", NULL, NULL},
+ {"RNA_CDS_OVERLAP", "coding regions are completely contained in RNAs", NULL},
+ {"RNA_CDS_OVERLAP", "coding regions completely contain RNAs", NULL},
+ {"RNA_NO_PRODUCT", NULL, NULL},
+ {"SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME", NULL, NULL},
+ {"SUSPECT_PRODUCT_NAMES", "Remove organism from product name", NULL},
+ {"SUSPECT_PRODUCT_NAMES", "Possible parsing error or incorrect formatting; remove inappropriate symbols", NULL},
+ {"TEST_OVERLAPPING_RRNAS", NULL, NULL},
+ {"TEST_TERMINAL_NS", NULL, NULL}
};
Uint4 disc_cnt = sizeof(disc_fatal)/sizeof(DiscReportOutputFlagData);
Uint4 extra_disc_cnt = sizeof(extra_disc_fatal)/sizeof(DiscReportOutputFlagData);
@@ -29074,7 +31940,11 @@ static Boolean NeedsOutputTag(CharPtr setting_name, CharPtr descp, DiscReportOut
Uint4 i;
for (i=0; i< cnt; i++) {
if (!StringICmp(setting_name, flagdt[i].clickable_item_type)
- && (flagdt[i].description == NULL || StringISearch(descp, flagdt[i].description) != NULL)) {
+ && (flagdt[i].nofix_description == NULL
+ || StringISearch(descp, flagdt[i].nofix_description) == NULL)
+ && (flagdt[i].description == NULL
+ || StringISearch(descp, flagdt[i].description) != NULL)) {
+
return TRUE;
}
}
@@ -29082,65 +31952,84 @@ static Boolean NeedsOutputTag(CharPtr setting_name, CharPtr descp, DiscReportOut
}
-static Boolean IsDiscCntGrt1(CharPtr descp)
+static Boolean ItemIsTrnaInCDS(ClickableItemPtr cip)
{
- CharPtr ptr, tmp, tmp2;
- Uint4 i;
- Uint8 cnt=0;
-
- ptr = StringChr(descp, ' ');
- if (ptr != NULL) {
- i = ptr - descp;
- tmp = (CharPtr) MemNew (i * sizeof(char));
- StringNCpy(tmp, descp, i);
- cnt = StringToUint8(tmp, (const char **)&tmp2);
- if (cnt > 1) return TRUE;
- else return FALSE;
+ if (cip != NULL && cip->clickable_item_type == DISC_RNA_CDS_OVERLAP
+ && cip->description != NULL
+ && StringSearch(cip->description, "completely contain tRNAs") != NULL) {
+ return TRUE;
+ } else {
+ return FALSE;
}
- return FALSE;
-
-} // IsDiscCntGrt1
+}
static void AddOutputTag(ClickableItemPtr cip, Boolean disc_count_nucleotides_grt_1, Boolean extratags)
{
CharPtr setting_name;
ValNodePtr sub_cate;
- ClickableItemPtr sub_cip;
+ Boolean has_sub_trna_in_cds = FALSE;
+ Boolean needs_tag = FALSE;
+ ClickableItemPtr subcip;
setting_name = GetDiscrepancyTestSettingName ((DiscrepancyType) cip->clickable_item_type);
- if (StringDoesHaveText(setting_name)) {
- if (cip->subcategories != NULL) {
- for (sub_cate = cip->subcategories; sub_cate != NULL; sub_cate = sub_cate->next) {
- sub_cip = (ClickableItemPtr)(sub_cate->data.ptrvalue);
- if (NeedsOutputTag(setting_name, sub_cip->description, disc_fatal, disc_cnt)
- || (extratags && NeedsOutputTag(setting_name, sub_cip->description,
- extra_disc_fatal, extra_disc_cnt)) ) {
- if (!StringCmp("DISC_SOURCE_QUALS_ASNDISC", setting_name)) {
- if (StringSearch(sub_cip->description, "some missing")
- || StringSearch(sub_cip->description, "some duplicate"))
- SetStringValue(&(sub_cip->description), "FATAL",
- ExistingTextOption_prefix_colon);
- }
- else SetStringValue (&(sub_cip->description), "FATAL",
- ExistingTextOption_prefix_colon);
- }
- }
+ // check subcategories first;
+ if (StringDoesHaveText(setting_name))
+ {
+ if (cip->subcategories != NULL)
+ {
+ // check subcategories
+ for (sub_cate = cip->subcategories;
+ sub_cate != NULL;
+ sub_cate = sub_cate->next)
+ {
+ subcip = (ClickableItemPtr)(sub_cate->data.ptrvalue);
+ if (ItemIsTrnaInCDS(subcip))
+ {
+ has_sub_trna_in_cds = TRUE;
+ }
+ AddOutputTag(subcip, disc_count_nucleotides_grt_1, extratags);
}
- if (NeedsOutputTag(setting_name, cip->description, disc_fatal, disc_cnt)
- || (extratags && NeedsOutputTag(setting_name, cip->description,
- extra_disc_fatal, extra_disc_cnt))) {
- if (!StringCmp("DISC_SOURCE_QUALS_ASNDISC", setting_name)) {
- if ( StringISearch(cip->description, "taxname (all present, all unique)")) {
- if (disc_count_nucleotides_grt_1)
- SetStringValue(&(cip->description), "FATAL", ExistingTextOption_prefix_colon);
- }
- else if (StringSearch(cip->description, "some missing")
- || StringSearch(cip->description, "some duplicate"))
- SetStringValue(&(cip->description), "FATAL", ExistingTextOption_prefix_colon);
- }
- else SetStringValue (&(cip->description), "FATAL", ExistingTextOption_prefix_colon);
+ }
+
+ // check self
+ needs_tag = FALSE;
+ if (NeedsOutputTag(setting_name, cip->description, disc_fatal, disc_cnt)
+ || (extratags && NeedsOutputTag(setting_name, cip->description,
+ extra_disc_fatal, extra_disc_cnt)))
+ {
+ if (StringCmp("DISC_SOURCE_QUALS_ASNDISC", setting_name) == 0)
+ {
+ if ( StringISearch(cip->description,
+ "taxname (all present, all unique)") != NULL && disc_count_nucleotides_grt_1)
+ {
+ needs_tag = TRUE;
+ }
+ else if (StringSearch(cip->description, "some missing") != NULL
+ || StringSearch(cip->description, "some duplicate") != NULL)
+ {
+ needs_tag = TRUE;
+ }
+ }
+ else
+ {
+ needs_tag = TRUE;
}
+ }
+ else if (has_sub_trna_in_cds || ItemIsTrnaInCDS(cip))
+ {
+ needs_tag = TRUE;
+ }
+ else if (cip->item_list && cip->item_list->fatal)
+ {
+ needs_tag = TRUE;
+ }
+
+ if (needs_tag)
+ {
+ SetStringValue(&(cip->description),
+ "FATAL", ExistingTextOption_prefix_colon);
+ }
}
} // AddOutputTag
@@ -29200,7 +32089,6 @@ static void WriteAsnDiscReportEx (ValNodePtr discrepancy_list, FILE *ofp, DiscRe
StringCpy(cip->description, ptr + StringLen("FATAL: "));
SetStringValue (&prefix, "FATAL", ExistingTextOption_prefix_colon);
}
-
if (oc->summary_report) {
fprintf (ofp, "%s%s\n", prefix == NULL ? "" : prefix, cip->description);
if ((oc->add_output_tag || oc->add_extra_output_tag) && SubsHaveTags(cip, oc))
@@ -29486,6 +32374,34 @@ NLM_EXTERN DiscReportOutputConfigPtr DiscReportOutputConfigNew ()
}
+static void CountNucBioseqsCallback (BioseqPtr bsp, Pointer data)
+{
+ Int4Ptr pNum = (Int4Ptr) data;
+
+ if (pNum != NULL && bsp != NULL && !ISA_aa(bsp->mol)) {
+ (*pNum)++;
+ }
+}
+
+
+NLM_EXTERN void AddToOutputConfig(SeqEntryPtr sep, DiscReportOutputConfigPtr c)
+{
+ if (c == NULL) {
+ return;
+ }
+ VisitBioseqsInSep(sep, &(c->num_nucs), CountNucBioseqsCallback);
+}
+
+
+NLM_EXTERN void AddListToOutputConfig(ValNodePtr list, DiscReportOutputConfigPtr c)
+{
+ ValNodePtr vnp;
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ AddToOutputConfig((SeqEntryPtr)vnp->data.ptrvalue, c);
+ }
+}
+
+
NLM_EXTERN DiscReportOutputConfigPtr DiscReportOutputConfigFree (DiscReportOutputConfigPtr c)
{
if (c != NULL) {
@@ -29939,25 +32855,37 @@ NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportNew ()
}
+static void FreeGlobalDiscrepancyListBlock(ValNodeBlockPtr block)
+{
+ if (block != NULL) {
+ block->head = FreeGlobalDiscrepancyList (block->head);
+ block->tail = NULL;
+ }
+}
+
+
NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportFree (GlobalDiscrepReportPtr g)
{
if (g != NULL) {
- g->locus_tag_list = FreeGlobalDiscrepancyList (g->locus_tag_list);
- g->missing_locus_tag = FreeGlobalDiscrepancyList (g->missing_locus_tag);
- g->cds_product_list = FreeGlobalDiscrepancyList (g->cds_product_list);
- g->missing_cds_product = FreeGlobalDiscrepancyList (g->missing_cds_product);
- g->mrna_product_list = FreeGlobalDiscrepancyList (g->mrna_product_list);
- g->missing_mrna_product = FreeGlobalDiscrepancyList (g->missing_mrna_product);
- g->missing_gnl_list = FreeGlobalDiscrepancyList (g->missing_gnl_list);
- g->gnl_list = FreeGlobalDiscrepancyList (g->gnl_list);
+ FreeGlobalDiscrepancyListBlock(&(g->locus_tag_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_locus_tag));
+ FreeGlobalDiscrepancyListBlock(&(g->cds_product_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_cds_product));
+ FreeGlobalDiscrepancyListBlock(&(g->mrna_product_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_mrna_product));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_gnl_list));
+ FreeGlobalDiscrepancyListBlock(&(g->gnl_list));
+ FreeGlobalDiscrepancyListBlock(&(g->global_prot_name_list));
g->global_srcs = ValNodeFreeData (g->global_srcs);
g->global_src_qual_vals = GlobalSrcValListFree (g->global_src_qual_vals);
- g->feature_count_list = FeatureCountListFree (g->feature_count_list);
+ g->feature_count_list.head = FeatureCountListFree (g->feature_count_list.head);
+ g->feature_count_list.tail = NULL;
g->src_qual_repeated_list = FreeClickableList (g->src_qual_repeated_list);
g->src_qual_multi_list = FreeClickableList (g->src_qual_multi_list);
- g->discrepancy_list = FreeClickableList (g->discrepancy_list);
+ g->discrepancy_list.head = FreeClickableList (g->discrepancy_list.head);
+ g->discrepancy_list.tail = NULL;
g->output_config = DiscReportOutputConfigFree (g->output_config);
g->test_config = DiscrepancyConfigFree (g->test_config);
g = MemFree (g);
@@ -30005,9 +32933,13 @@ static void GetLocalSourceQualReportItems (ValNodePtr src_list, ValNodePtr qual_
static void AddSourceQualReportInfoToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrepReportPtr g, CharPtr filename)
{
- ValNodePtr src_list, qual_list, vnp;
+ ValNodePtr src_list, qual_list, vnp, feat_list;
src_list = GetObjectListForFieldType (FieldType_source_qual, sep);
+ // remove source features from list */
+ feat_list = ValNodeExtractList (&src_list, OBJ_SEQFEAT);
+ feat_list = ValNodeFree (feat_list);
+
qual_list = GetSourceQualSampleFieldList (sep);
AdjustSourceQualSampleFieldListForOnCallerTest (&qual_list, src_list);
@@ -30023,6 +32955,15 @@ static void AddSourceQualReportInfoToGlobalDiscrepReport (SeqEntryPtr sep, Globa
src_list = ValNodeFree (src_list);
}
+void FindGlobalFrequentlyAppearingProteinNames(SeqFeatPtr sfp, Pointer data)
+{
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT || data == NULL) {
+ return;
+ }
+
+ ValNodeAddPointer ((ValNodePtr PNTR)data, 0,
+ GlobalDiscrepancyNew( FirstProtNameFromFeat(sfp), OBJ_SEQFEAT, sfp));
+};
NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrepReportPtr g, CharPtr filename)
{
@@ -30034,6 +32975,7 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
GlobalDiscrepancyListsData lists;
GenProdSetDiscrepancyListsData gps_lists;
ProtIdListsData prot_lists;
+ ValNodePtr global_prot_name_list = NULL;
if (g == NULL || sep == NULL) return;
@@ -30051,6 +32993,12 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
MemSet (&prot_lists, 0, sizeof (ProtIdListsData));
VisitBioseqsInSep (sep, &prot_lists, FindProteinIDCallback);
+ // DISC_PROTEIN_NAMES
+ if (!(g->test_config->is_big_sequence)) {
+ VisitFeaturesInSep(sep, &global_prot_name_list,
+ FindGlobalFrequentlyAppearingProteinNames);
+ }
+
if (lists.locus_tag_list != NULL) {
/* collect adjacent genes */
lists.locus_tag_list = ValNodeSort (lists.locus_tag_list, SortVnpByGlobalDiscrepancyString);
@@ -30063,21 +33011,23 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
/* convert lists to strings and add to global lists */
ConvertGlobalDiscrepancyListToText (lists.locus_tag_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->locus_tag_list), lists.locus_tag_list);
+ ValNodeLinkToEnd (&(g->locus_tag_list), lists.locus_tag_list);
ConvertGlobalDiscrepancyListToText (lists.missing_locus_tag, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->missing_locus_tag), lists.missing_locus_tag);
+ ValNodeLinkToEnd (&(g->missing_locus_tag), lists.missing_locus_tag);
ConvertGlobalDiscrepancyListToText (gps_lists.cds_product_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->cds_product_list), gps_lists.cds_product_list);
+ ValNodeLinkToEnd (&(g->cds_product_list), gps_lists.cds_product_list);
ConvertGlobalDiscrepancyListToText (gps_lists.missing_protein_id, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->missing_cds_product), gps_lists.missing_protein_id);
+ ValNodeLinkToEnd (&(g->missing_cds_product), gps_lists.missing_protein_id);
ConvertGlobalDiscrepancyListToText (gps_lists.mrna_product_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->mrna_product_list), gps_lists.mrna_product_list);
+ ValNodeLinkToEnd (&(g->mrna_product_list), gps_lists.mrna_product_list);
ConvertGlobalDiscrepancyListToText (gps_lists.missing_mrna_product, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->missing_mrna_product), gps_lists.missing_mrna_product);
+ ValNodeLinkToEnd (&(g->missing_mrna_product), gps_lists.missing_mrna_product);
ConvertGlobalDiscrepancyListToText (prot_lists.gnl_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&g->gnl_list, prot_lists.gnl_list);
+ ValNodeLinkToEnd (&g->gnl_list, prot_lists.gnl_list);
ConvertGlobalDiscrepancyListToText (prot_lists.missing_gnl_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&g->missing_gnl_list, prot_lists.missing_gnl_list);
+ ValNodeLinkToEnd (&g->missing_gnl_list, prot_lists.missing_gnl_list);
+ ConvertGlobalDiscrepancyListToText (global_prot_name_list, g->output_config->use_feature_table_format, filename);
+ ValNodeLinkToEnd (&g->global_prot_name_list, global_prot_name_list);
if (g->test_config->conf_list[DISC_SOURCE_QUALS_ASNDISC]) {
AddSourceQualReportInfoToGlobalDiscrepReport (sep, g, filename);
@@ -30086,7 +33036,7 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
if (g->test_config->conf_list[DISC_FEATURE_COUNT]) {
VisitBioseqsInSep (sep, &local_counts, CountFeaturesOnSequenceCallback);
SaveFeatureCountSequenceIds (local_counts, filename);
- ValNodeLink (&(g->feature_count_list), local_counts);
+ ValNodeLinkToEnd (&(g->feature_count_list), local_counts);
local_counts = NULL;
}
@@ -30105,6 +33055,7 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
dcp->conf_list[DISC_INCONSISTENT_PROTEIN_ID_PREFIX] = FALSE;
dcp->conf_list[DISC_SOURCE_QUALS_ASNDISC] = FALSE;
dcp->conf_list[DISC_FEATURE_COUNT] = FALSE;
+ dcp->conf_list[DISC_PROTEIN_NAMES] = FALSE;
sep_list.data.ptrvalue = sep;
sep_list.next = NULL;
@@ -30113,7 +33064,7 @@ NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrep
dcp = DiscrepancyConfigFree (dcp);
SaveStringsForDiscrepancyItemList (local_discrepancy_list, g->output_config->use_feature_table_format, filename);
- ValNodeLink (&(g->discrepancy_list), local_discrepancy_list);
+ ValNodeLinkToEnd (&(g->discrepancy_list), local_discrepancy_list);
}
@@ -30202,189 +33153,216 @@ static ValNodePtr CreateGlobalFeatureCountReports (ValNodePtr PNTR feature_count
}
-extern void AddListOutputTags(ValNodePtr discrepancy_list, DiscReportOutputConfigPtr oc)
+NLM_EXTERN void AddListOutputTags(ValNodePtr discrepancy_list, DiscReportOutputConfigPtr oc)
{
- ValNodePtr vnp, vnp2;
- ClickableItemPtr cip, cip2;
- CharPtr setting_name;
- Boolean disc_count_nucleotides_grt_1 = FALSE;
+ ValNodePtr vnp;
+ ClickableItemPtr cip;
+ if (!oc->add_output_tag && !oc->add_extra_output_tag) {
+ return;
+ }
for (vnp = discrepancy_list; vnp != NULL; vnp = vnp->next) {
cip = (ClickableItemPtr) vnp->data.ptrvalue;
if (cip != NULL) {
- setting_name = GetDiscrepancyTestSettingName ((DiscrepancyType) cip->clickable_item_type);
- if (!StringCmp(setting_name, "DISC_COUNT_NUCLEOTIDES"))
- disc_count_nucleotides_grt_1 = IsDiscCntGrt1(cip->description);
-
- if (oc->add_output_tag || oc->add_extra_output_tag) {
- if (disc_count_nucleotides_grt_1 == FALSE
- && !StringCmp("DISC_SOURCE_QUALS_ASNDISC", setting_name)
- && StringSearch(cip->description, "taxname (all present, all unique)")) { // 2nd check
- for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) {
- cip2 = (ClickableItemPtr) vnp2->data.ptrvalue;
- if (!StringCmp("DISC_COUNT_NUCLEOTIDES",
- discrepancy_info_list[cip2->clickable_item_type].setting_name)) {
- disc_count_nucleotides_grt_1 = IsDiscCntGrt1(cip2->description);
- if (disc_count_nucleotides_grt_1 == TRUE) break;
- }
- }
- if (vnp2 == NULL) disc_count_nucleotides_grt_1 = FALSE;
- }
-
- AddOutputTag(cip, disc_count_nucleotides_grt_1, oc->add_extra_output_tag);
- }
+ AddOutputTag(cip, oc->num_nucs > 1, oc->add_extra_output_tag);
}
}
} // AddListOutputTags
+ClickableItemPtr LIBCALL ClickableGlobalItemCategorize (ValNodePtr list, int item_type)
+{
+ ValNodePtr vnp;
+ ClickableItemPtr cip;
+ CharPtr str;
+ CharPtr fmt = "All proteins have same name \"hypothetical protein\"";
+ Boolean other_name;
+ if (list == NULL) {
+ return NULL;
+ }
-NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp)
+ other_name = FALSE;
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ str = StringSave(GetGlobalDiscrepancyStr (vnp->data.ptrvalue));
+ if (StringICmp(str, "hypothetical protein") != 0) {
+ other_name = TRUE;
+ break;
+ }
+ }
+ if (!other_name) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->clickable_item_type = item_type;
+ cip->description
+ = (CharPtr) MemNew ( sizeof (Char) * (StringLen (fmt) + 15));
+ sprintf (cip->description, fmt);
+ return cip;
+ }
+ return NULL;
+};
+
+NLM_EXTERN Boolean WriteGlobalDiscrepancyReportEx (GlobalDiscrepReportPtr g, FILE *fp, CharPtr extra_comment)
{
- ValNodePtr local_list = NULL, vnp;
+ ValNodeBlock local_list;
ClickableItemPtr cip;
-unsigned i=0;
-
- if (g == NULL || fp == NULL) return;
+ Boolean any_errors = FALSE;
- g->locus_tag_list = ValNodeSort (g->locus_tag_list, SortVnpByGlobalDiscrepancyString);
- g->missing_locus_tag = ValNodeSort (g->missing_locus_tag, SortVnpByGlobalDiscrepancyString);
- g->cds_product_list = ValNodeSort (g->cds_product_list, SortVnpByGlobalDiscrepancyString);
- g->missing_cds_product = ValNodeSort (g->missing_cds_product, SortVnpByGlobalDiscrepancyString);
- g->mrna_product_list = ValNodeSort (g->mrna_product_list, SortVnpByGlobalDiscrepancyString);
- g->missing_mrna_product = ValNodeSort (g->missing_mrna_product, SortVnpByGlobalDiscrepancyString);
+ if (g == NULL || fp == NULL) return FALSE;
+ InitValNodeBlock(&local_list, NULL);
+
+ ValNodeSortBlock (&(g->locus_tag_list), SortVnpByGlobalDiscrepancyStringCaseSensitive);
+ ValNodeSortBlock (&(g->missing_locus_tag), SortVnpByGlobalDiscrepancyString);
+ ValNodeSortBlock (&(g->cds_product_list), SortVnpByGlobalDiscrepancyString);
+ ValNodeSortBlock (&(g->missing_cds_product), SortVnpByGlobalDiscrepancyString);
+ ValNodeSortBlock (&(g->mrna_product_list), SortVnpByGlobalDiscrepancyString);
+ ValNodeSortBlock (&(g->missing_mrna_product), SortVnpByGlobalDiscrepancyString);
+ ValNodeSortBlock (&(g->global_prot_name_list), SortVnpByGlobalDiscrepancyString);
+
+ // DISC_PROTEIN_NAMES
+ if (g->global_prot_name_list.head != NULL) {
+ cip
+ = ClickableGlobalItemCategorize(g->global_prot_name_list.head, DISC_PROTEIN_NAMES);
+ if (cip != NULL) {
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
+ }
+ }
- if (g->locus_tag_list != NULL) {
- if (g->missing_locus_tag != NULL) {
- cip = ReportMissingFields (g->missing_locus_tag, discReportMissingLocusTags, DISC_GENE_MISSING_LOCUS_TAG);
+ if (g->locus_tag_list.head != NULL) {
+ if (g->missing_locus_tag.head != NULL) {
+ cip = ReportMissingFields (g->missing_locus_tag.head, discReportMissingLocusTags, DISC_GENE_MISSING_LOCUS_TAG);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
}
}
CollateDiscrepancyReports (&(g->adjacent_locus_tag_disc_list));
- cip = ReportNonUniqueGlobalDiscrepancy (g->locus_tag_list,
+ cip = ReportNonUniqueGlobalDiscrepancy (g->locus_tag_list.head,
discReportDuplicateLocusTagFmt,
discReportOneDuplicateLocusTagFmt,
DISC_GENE_DUPLICATE_LOCUS_TAG,
TRUE);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
if (g->adjacent_locus_tag_disc_list != NULL) {
ValNodeLink (&(cip->subcategories), g->adjacent_locus_tag_disc_list);
}
} else if (g->adjacent_locus_tag_disc_list != NULL) {
- ValNodeLink (&local_list, g->adjacent_locus_tag_disc_list);
+ ValNodeLinkToEnd (&local_list, g->adjacent_locus_tag_disc_list);
}
g->adjacent_locus_tag_disc_list = NULL;
/* inconsistent locus tags */
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyPrefixes (g->locus_tag_list,
- discReportInconsistentLocusTagPrefixFmt,
- DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX));
+ ValNodeLinkToEnd (&local_list,
+ ReportInconsistentGlobalDiscrepancyPrefixes (g->locus_tag_list.head,
+ discReportInconsistentLocusTagPrefixFmt,
+ DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX));
/* bad formats */
- cip = ReportBadLocusTagFormat (g->locus_tag_list);
+ cip = ReportBadLocusTagFormat (g->locus_tag_list.head);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
}
}
- if (g->cds_product_list != NULL) {
+ if (g->cds_product_list.head != NULL) {
/* report duplicates */
- cip = ReportNonUniqueGlobalDiscrepancy (g->cds_product_list,
+ cip = ReportNonUniqueGlobalDiscrepancy (g->cds_product_list.head,
discReportDuplicateProteinIDFmt,
discReportOneDuplicateProteinIDFmt,
DISC_DUP_GENPRODSET_PROTEIN,
TRUE);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
}
/* report inconsistent IDs */
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyPrefixes (g->cds_product_list,
- discReportInconsistentProteinIDPrefixFmt,
- DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
+ ValNodeLinkToEnd (&local_list,
+ ReportInconsistentGlobalDiscrepancyPrefixes (g->cds_product_list.head,
+ discReportInconsistentProteinIDPrefixFmt,
+ DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
}
- if (g->mrna_product_list != NULL) {
- if (g->missing_locus_tag != NULL) {
- cip = ReportMissingFields (g->mrna_product_list, discReportMissingTranscriptIDFmt, DISC_MISSING_GENPRODSET_TRANSCRIPT_ID);
+ if (g->mrna_product_list.head != NULL) {
+ if (g->missing_locus_tag.head != NULL) {
+ cip = ReportMissingFields (g->mrna_product_list.head, discReportMissingTranscriptIDFmt, DISC_MISSING_GENPRODSET_TRANSCRIPT_ID);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
}
}
- cip = ReportNonUniqueGlobalDiscrepancy (g->mrna_product_list,
+ cip = ReportNonUniqueGlobalDiscrepancy (g->mrna_product_list.head,
discReportDuplicateTranscriptIdFmt,
discReportOneDuplicateTranscriptIdFmt,
DISC_DUP_GENPRODSET_TRANSCRIPT_ID,
TRUE);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
}
}
/* missing gnl protein IDs */
- cip = ReportMissingFields (g->missing_gnl_list, discReportBadProteinIdFmt, DISC_MISSING_PROTEIN_ID);
+ cip = ReportMissingFields (g->missing_gnl_list.head, discReportBadProteinIdFmt, DISC_MISSING_PROTEIN_ID);
if (cip != NULL) {
- ValNodeAddPointer (&local_list, 0, cip);
- }
- g->gnl_list = ValNodeSort (g->gnl_list, SortVnpByGlobalDiscrepancyString);
- ValNodeLink (&local_list,
- ReportInconsistentGlobalDiscrepancyStrings (g->gnl_list,
- discReportInconsistentProteinIDPrefixFmt,
- DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
-
-
- g->locus_tag_list = FreeGlobalDiscrepancyList (g->locus_tag_list);
- g->missing_locus_tag = FreeGlobalDiscrepancyList (g->missing_locus_tag);
- g->cds_product_list = FreeGlobalDiscrepancyList (g->cds_product_list);
- g->missing_cds_product = FreeGlobalDiscrepancyList (g->missing_cds_product);
- g->mrna_product_list = FreeGlobalDiscrepancyList (g->mrna_product_list);
- g->missing_mrna_product = FreeGlobalDiscrepancyList (g->missing_mrna_product);
- g->missing_gnl_list = FreeGlobalDiscrepancyList (g->missing_gnl_list);
- g->gnl_list = FreeGlobalDiscrepancyList (g->gnl_list);
+ ValNodeAddPointerToEnd (&local_list, 0, cip);
+ }
+ ValNodeSortBlock (&(g->gnl_list), SortVnpByGlobalDiscrepancyString);
+ ValNodeLinkToEnd (&local_list,
+ ReportInconsistentGlobalDiscrepancyStrings (g->gnl_list.head,
+ discReportInconsistentProteinIDPrefixFmt,
+ DISC_INCONSISTENT_PROTEIN_ID_PREFIX));
+
+ FreeGlobalDiscrepancyListBlock(&(g->locus_tag_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_locus_tag));
+ FreeGlobalDiscrepancyListBlock(&(g->cds_product_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_cds_product));
+ FreeGlobalDiscrepancyListBlock(&(g->mrna_product_list));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_mrna_product));
+ FreeGlobalDiscrepancyListBlock(&(g->missing_gnl_list));
+ FreeGlobalDiscrepancyListBlock(&(g->gnl_list));
+ FreeGlobalDiscrepancyListBlock(&(g->global_prot_name_list));
/* create discrepancies for inconsistent and missing values from global lists */
- ValNodeLink (&local_list, GetMissingAndInconsistentDiscrepanciesFromGlobalSrcValList (&(g->global_src_qual_vals), &(g->global_srcs), &(g->src_qual_multi_list)));
+ ValNodeLinkToEnd (&local_list, GetMissingAndInconsistentDiscrepanciesFromGlobalSrcValList (&(g->global_src_qual_vals), &(g->global_srcs), &(g->src_qual_multi_list)));
/* note - be sure to include local discrepancy reports */
CollateDiscrepancyReports (&(g->src_qual_repeated_list));
- ValNodeLink (&local_list, g->src_qual_repeated_list);
+ ValNodeLinkToEnd (&local_list, g->src_qual_repeated_list);
g->src_qual_repeated_list = NULL;
/* create report for feature counts */
- ValNodeLink (&local_list, CreateGlobalFeatureCountReports (&(g->feature_count_list)));
+ ValNodeLinkToEnd (&local_list, CreateGlobalFeatureCountReports (&(g->feature_count_list.head)));
/* data collected for some tests with global components should not be displayed */
- RemoveUnwantedDiscrepancyItems (&local_list, g->test_config);
+ RemoveUnwantedDiscrepancyItems (&(local_list.head), g->test_config);
+ InitValNodeBlock (&local_list, local_list.head);
/* group discrepany reports from separate files */
- CollateDiscrepancyReports (&(g->discrepancy_list));
+ CollateDiscrepancyReports (&(g->discrepancy_list.head));
+ InitValNodeBlock(&(g->discrepancy_list), g->discrepancy_list.head);
// add output tag (fatal now)
- if (local_list == NULL) local_list = g->discrepancy_list;
- else
- for (vnp = local_list, i=0; vnp != NULL; vnp = vnp->next, i++) {
- if (vnp->next == NULL) {vnp->next = g->discrepancy_list; break;};
- }
+ ValNodeLinkToEnd(&local_list, g->discrepancy_list.head);
- AddListOutputTags(local_list, g->output_config);
-// AddListOutputTags(g->discrepancy_list, g->output_config);
+ AddListOutputTags(local_list.head, g->output_config);
+ if (local_list.head != NULL) {
+ any_errors = TRUE;
+ }
- fprintf (fp, "Discrepancy Report Results\n\n");
+ fprintf (fp, "Discrepancy Report Results%s\n\n", extra_comment == NULL ? "" : extra_comment);
fprintf (fp, "Summary\n");
- WriteDiscrepancyReportSummary (local_list, fp);
-
- // WriteDiscrepancyReportSummary (g->discrepancy_list, fp);
+ WriteDiscrepancyReportSummary (local_list.head, fp);
fprintf (fp, "\n\nDetailed Report\n\n");
- WriteAsnDiscReport (local_list, fp, g->output_config, TRUE);
- local_list = FreeClickableList (local_list);
+ WriteAsnDiscReport (local_list.head, fp, g->output_config, TRUE);
+ local_list.head = FreeClickableList (local_list.head);
- g->discrepancy_list = NULL;
+ InitValNodeBlock(&(g->discrepancy_list), NULL);
+ return any_errors;
+}
+
+
+NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp)
+{
+ WriteGlobalDiscrepancyReportEx (g, fp, NULL);
}
@@ -30999,6 +33977,37 @@ static CharPtr GetDash (CharPtr str)
return NULL;
}
+static CharPtr GetSlash (CharPtr str)
+
+{
+ Char ch;
+
+ if (str == NULL) return NULL;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == '/') return str;
+ str++;
+ ch = *str;
+ }
+
+ return NULL;
+}
+
+static CharPtr GetColon (CharPtr str)
+
+{
+ Char ch;
+
+ if (str == NULL) return NULL;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == ':') return str;
+ str++;
+ ch = *str;
+ }
+
+ return NULL;
+}
static CharPtr legalMonths [] = {
"Jan",
"Feb",
@@ -31030,76 +34039,343 @@ static Int2 daysPerMonth [] = {
31
};
-NLM_EXTERN Boolean CollectionDateIsValid (CharPtr name)
+static ValNodePtr SplitCollectionDates (CharPtr name)
+
+{
+ ValNodePtr head = NULL;
+ CharPtr ptr, tmp;
+ Char str [512];
+
+ if (StringHasNoText (name)) return FALSE;
+
+ StringNCpy_0 (str, name, sizeof (str));
+ tmp = str;
+
+ while (StringDoesHaveText (tmp)) {
+ ptr = GetSlash (tmp);
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ ValNodeCopyStr (&head, 0, tmp);
+ tmp = ptr;
+ }
+
+ return head;
+}
+
+static CharPtr SplitAtTime (CharPtr name)
+
+{
+ Char ch, cha, chb;
+ CharPtr ptr;
+
+ if (StringHasNoText (name)) return NULL;
+
+ ch = *name;
+ if (ch == 'T') return NULL;
+
+ ptr = StringChr (name, 'T');
+ if (ptr == NULL) return NULL;
+
+ chb = *(ptr - 1);
+ cha = *(ptr + 1);
+ if (IS_DIGIT (chb) && IS_DIGIT (cha)) {
+ *ptr = '\0';
+ ptr++;
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static Boolean OneCollectionTimeIsValid (CharPtr time)
+
+{
+ long int h = 0, m = 0, s = 0;
+
+ if (StringHasNoText (time) || time[strlen(time) - 1] != 'Z') return FALSE;
+
+ if (sscanf(time, "%ld:%ld:%ld", &h, &m, &s) == 3 ||
+ sscanf(time, "%ld:%ld", &h, &m) == 2 ||
+ sscanf(time, "%ld", &h) == 1) {
+ if (h < 24 && m < 60 && s < 60 &&
+ h > -1 && m > -1 && s > -1) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/* returns 0 for bad, 1 for old style, 2 for ISO style */
+
+
+
+static Int2 TokenizeCollectionDate (CharPtr buf, CharPtr PNTR monthP, CharPtr PNTR dayP, CharPtr PNTR yearP)
+
+{
+ Int2 i, num_tokens = 0;
+ Boolean is_all_digits;
+ CharPtr nxt, ptr;
+ CharPtr token [4];
+
+ if (StringHasNoText (buf)) return 0;
+
+ for (i = 0; i < 4; i++) {
+ token [i] = NULL;
+ }
+
+ i = 0;
+ num_tokens = 0;
+ is_all_digits = TRUE;
+
+ ptr = buf;
+ while (ptr != NULL && i < 4) {
+ nxt = GetDash (ptr);
+ if (nxt != NULL) {
+ *nxt = '\0';
+ nxt++;
+ }
+ token [i] = ptr;
+ if (! StringIsAllDigits (ptr)) {
+ is_all_digits = FALSE;
+ }
+ num_tokens++;
+ i++;
+ ptr = nxt;
+ }
+
+ if (num_tokens == 0) return 0;
+ if (num_tokens > 3) return 0;
+
+ /* check for alternative form */
+
+ if (is_all_digits) {
+
+ switch (num_tokens) {
+ case 1 :
+ if (StringLen (token [0]) == 4) {
+ *yearP = token [0];
+ return 2;
+ }
+ break;
+ case 2 :
+ if (StringLen (token [0]) == 4 &&
+ StringLen (token [1]) == 2) {
+ *yearP = token [0];
+ *monthP = token [1];
+ return 2;
+ }
+ break;
+ case 3 :
+ if (StringLen (token [0]) == 4 &&
+ StringLen (token [1]) == 2 &&
+ StringLen (token [02]) == 2) {
+ *yearP = token [0];
+ *monthP = token [1];
+ *dayP = token [2];
+ return 2;
+ }
+ break;
+ default :
+ break;
+ }
+
+ } else {
+
+ switch (num_tokens) {
+ case 1 :
+ if (StringLen (token [0]) == 4) {
+ *yearP = token [0];
+ return 1;
+ }
+ break;
+ case 2 :
+ if (StringLen (token [1]) == 4 &&
+ StringLen (token [0]) == 3) {
+ *yearP = token [1];
+ *monthP = token [0];
+ return 1;
+ }
+ break;
+ case 3 :
+ if (StringLen (token [2]) == 4 &&
+ StringLen (token [1]) == 3 &&
+ StringLen (token [0]) == 2) {
+ *yearP = token [2];
+ *monthP = token [1];
+ *dayP = token [0];
+ return 1;
+ }
+ break;
+ default :
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static Int2 OneCollectionDateIsValid (CharPtr name, Int4Ptr yrp, Int2Ptr mnp, Int2Ptr dyp)
{
Char ch;
Int2 dy = 0, dpm = 0, mn = 0;
- Int2 i;
- CharPtr ptr1, ptr2, month = NULL, day = NULL, year = NULL;
+ Int2 date_type, i;
+ CharPtr ptr, month = NULL, day = NULL, year = NULL;
Char str [256];
long int val;
Int4 yr = 0;
- if (StringHasNoText (name)) return FALSE;
+ if (StringHasNoText (name)) return 0;
StringNCpy_0 (str, name, sizeof (str));
- ptr1 = GetDash (str);
- if (ptr1 != NULL) {
- *ptr1 = '\0';
- ptr1++;
- ptr2 = GetDash (ptr1);
- if (ptr2 != NULL) {
- *ptr2 = '\0';
- ptr2++;
- day = str;
- month = ptr1;
- year = ptr2;
- } else {
- month = str;
- year = ptr1;
- }
- } else {
- year = str;
- }
+
+ date_type = TokenizeCollectionDate (str, &month, &day, &year);
if (day != NULL) {
- if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) return FALSE;
- if (StringLen (day) != 2 || !isdigit(day[0]) || !isdigit(day[1])) return FALSE;
- dy = (Int4) val;
+ if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) return 0;
+ if (StringLen (day) != 2 || !isdigit(day[0]) || !isdigit(day[1])) return 0;
+ dy = (Int2) val;
}
if (month != NULL) {
- for (i = 0; legalMonths [i] != NULL; i++) {
- if (StringCmp (month, legalMonths [i]) == 0) {
- mn = i + 1;
- break;
+ if (StringIsAllDigits (month)) {
+ if (sscanf (month, "%ld", &val) != 1 || val < 1 || val > 12) return 0;
+ mn = (Int2) val;
+ i = mn - 1;
+ dpm = daysPerMonth [i];
+ } else {
+ for (i = 0; legalMonths [i] != NULL; i++) {
+ if (StringCmp (month, legalMonths [i]) == 0) {
+ mn = i + 1;
+ break;
+ }
}
+ if (legalMonths [i] == NULL) return 0;
+ dpm = daysPerMonth [i];
}
- if (legalMonths [i] == NULL) return FALSE;
- dpm = daysPerMonth [i];
}
if (year != NULL) {
- ptr1 = year;
- ch = *ptr1;
+ ptr = year;
+ ch = *ptr;
while (ch != '\0') {
- if (! (IS_DIGIT (ch))) return FALSE;
- ptr1++;
- ch = *ptr1;
+ if (! (IS_DIGIT (ch))) return 0;
+ ptr++;
+ ch = *ptr;
}
if (sscanf (year, "%ld", &val) == 1) {
yr = (Int4) val;
if (val >= 1700 && val < 2100) {
if (dy > 0 && dpm > 0 && dy > dpm) {
- if (mn != 2 || dy != 29 || (yr % 4) != 0) return FALSE;
+ if (mn != 2 || dy != 29 || (yr % 4) != 0) return 0;
}
- return TRUE;
+ if (yrp != NULL) {
+ *yrp = yr;
+ }
+ if (mnp != NULL) {
+ *mnp = mn;
+ }
+ if (dyp != NULL) {
+ *dyp = dy;
+ }
+ return date_type;
}
}
}
- return FALSE;
+ return 0;
+}
+
+NLM_EXTERN Boolean CollectionDateIsValid (CharPtr name)
+
+{
+ Int2 datetype;
+ Int2 dy = 0, mn = 0;
+ ValNodePtr head = NULL, vnp;
+ Boolean rsult = TRUE;
+ CharPtr str, time;
+ Int4 yr = 0;
+
+ if (StringHasNoText (name)) return FALSE;
+
+ head = SplitCollectionDates (name);
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ time = SplitAtTime (str);
+ datetype = OneCollectionDateIsValid (str, &yr, &mn, &dy);
+ if (datetype == 0) {
+ rsult = FALSE;
+ } else if (StringDoesHaveText (time)) {
+ if (datetype != 2) {
+ rsult = FALSE;
+ } else if (! OneCollectionTimeIsValid (time)) {
+ rsult = FALSE;
+ }
+ }
+ }
+
+ ValNodeFreeData (head);
+
+ return rsult;
+}
+
+NLM_EXTERN Boolean CollectionDatesInOrder (CharPtr name)
+
+{
+ Int2 datetype;
+ Int2 dy = 0, mn = 0, lastdy = 0, lastmn = 0;
+ ValNodePtr head = NULL, vnp;
+ Boolean rsult = TRUE;
+ CharPtr str, time;
+ Int4 yr = 0, lastyr = 0;
+
+ if (StringHasNoText (name)) return FALSE;
+
+ head = SplitCollectionDates (name);
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ time = SplitAtTime (str);
+ datetype = OneCollectionDateIsValid (str, &yr, &mn, &dy);
+ if (datetype == 0) {
+ rsult = FALSE;
+ } else if (StringDoesHaveText (time)) {
+ if (datetype != 2) {
+ rsult = FALSE;
+ } else if (! OneCollectionTimeIsValid (time)) {
+ rsult = FALSE;
+ }
+ }
+ if (rsult) {
+ if (lastyr != 0) {
+ if (lastyr > yr) {
+ rsult = FALSE;
+ } else if (lastyr == yr) {
+ if (lastmn != 0) {
+ if (lastmn > mn) {
+ rsult = FALSE;
+ } else if (lastmn == mn) {
+ if (lastdy != 0) {
+ if (lastdy > dy) {
+ rsult = FALSE;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ lastyr = yr;
+ lastmn = mn;
+ lastdy = dy;
+ }
+
+ ValNodeFreeData (head);
+
+ return rsult;
}
@@ -31208,144 +34484,140 @@ static BetterDatePtr CollectionDateFromString (CharPtr name)
}
-NLM_EXTERN Boolean CollectionDateIsInTheFuture (CharPtr name)
+static Boolean OneCollectionDateIsInTheFuture (CharPtr str)
{
- DatePtr dp_now;
- BetterDatePtr dp_coll_date;
- Boolean rval = FALSE;
+ Int2 datetype, dy = 0, mn = 0;
+ DatePtr dp_now;
+ Boolean rsult = FALSE;
+ Int4 yr = 0;
- dp_coll_date = CollectionDateFromString (name);
- if (dp_coll_date == NULL) return FALSE;
+ if (StringHasNoText (str)) return FALSE;
- if (dp_coll_date->data[1] < 0)
- {
- /* year before 1900 */
- dp_coll_date = BetterDateFree (dp_coll_date);
- return FALSE;
- }
+ datetype = OneCollectionDateIsValid (str, &yr, &mn, &dy);
+ if (datetype == 0) return FALSE;
dp_now = DateCurr();
+ if (dp_now == NULL) return FALSE;
/* compare years */
- if (dp_now->data[1] < dp_coll_date->data[1])
+ if (dp_now->data[1] + 1900 < yr)
{
- rval = TRUE;
+ rsult = TRUE;
}
- else if (dp_now->data[1] > dp_coll_date->data[1])
+ else if (dp_now->data[1] + 1900 > yr)
{
- rval = FALSE;
+ rsult = FALSE;
}
/* years are equal - compare months */
- else if (dp_now->data[2] < dp_coll_date->data[2])
+ else if (dp_now->data[2] < mn)
{
- rval = TRUE;
+ rsult = TRUE;
}
- else if (dp_now->data[2] > dp_coll_date->data[2])
+ else if (dp_now->data[2] > mn)
{
- rval = FALSE;
+ rsult = FALSE;
}
/* years and months are equal - compare days */
- else if (dp_now->data[3] < dp_coll_date->data[3])
+ else if (dp_now->data[3] < dy)
{
- rval = TRUE;
+ rsult = TRUE;
}
else
{
- rval = FALSE;
+ rsult = FALSE;
}
dp_now = DateFree (dp_now);
- dp_coll_date = BetterDateFree (dp_coll_date);
- return rval;
+
+ return rsult;
}
-/* collection date is not required, but if present must be valid and in the past */
-static Boolean HasCollectionDate (BioSourcePtr biop)
+NLM_EXTERN CharPtr AssemblyDateFromCollectionDate (CharPtr collection_date, Boolean ambiguous)
{
- SubSourcePtr ssp;
- Boolean rval = TRUE;
+ BetterDatePtr bdate;
+ CharPtr assembly_date = NULL;
- if (biop == NULL) {
- return FALSE;
+ if (StringHasNoText(collection_date)) {
+ return NULL;
}
- ssp = biop->subtype;
- while (ssp != NULL && rval) {
- if (ssp->subtype == SUBSRC_collection_date) {
- if (!CollectionDateIsValid(ssp->name) || CollectionDateIsInTheFuture(ssp->name)) {
- rval = FALSE;
- }
- }
- ssp = ssp->next;
+
+ bdate = CollectionDateFromString(collection_date);
+ if (!bdate) {
+ return NULL;
}
- return rval;
+ if (ambiguous) {
+ bdate->data[3] = 0;
+ bdate->data[2] = 0;
+ }
+
+ if (bdate->data[3] > 0) {
+ assembly_date = (CharPtr) MemNew (sizeof (Char) * 12);
+ sprintf(assembly_date, "%02d-%c%c%c-%d",
+ bdate->data[3], /* day */
+ toupper(legalMonths[bdate->data[2] - 1][0]), /* month */
+ toupper(legalMonths[bdate->data[2] - 1][1]),
+ toupper(legalMonths[bdate->data[2] - 1][2]),
+ bdate->data[1] + 1900 /* year */);
+ } else if (bdate->data[2] > 0) {
+ assembly_date = (CharPtr) MemNew (sizeof (Char) * 9);
+ sprintf(assembly_date, "%c%c%c-%d",
+ toupper(legalMonths[bdate->data[2] - 1][0]), /* month */
+ toupper(legalMonths[bdate->data[2] - 1][1]),
+ toupper(legalMonths[bdate->data[2] - 1][2]),
+ bdate->data[1] + 1900 /* year */);
+ } else {
+ assembly_date = (CharPtr) MemNew (sizeof (Char) * 5);
+ sprintf (assembly_date, "%d", bdate->data[1] + 1900);
+ }
+
+ return assembly_date;
}
-static Boolean BarcodeGPSOkay (BioSourcePtr biop)
+NLM_EXTERN Boolean CollectionDateIsInTheFuture (CharPtr name)
+
+{
+ ValNodePtr head = NULL, vnp;
+ Boolean rsult = FALSE;
+ CharPtr str, time;
+
+ if (StringHasNoText (name)) return FALSE;
+
+ head = SplitCollectionDates (name);
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ time = SplitAtTime (str);
+ if (OneCollectionDateIsInTheFuture (str)) {
+ rsult = TRUE;
+ }
+ }
+
+ ValNodeFreeData (head);
+
+ return rsult;
+}
+
+/* collection date is not required, but if present must be valid and in the past */
+static Boolean HasCollectionDate (BioSourcePtr biop)
{
SubSourcePtr ssp;
Boolean rval = TRUE;
- CharPtr country = NULL;
- Boolean format_ok, lat_in_range, lon_in_range, precision_ok;
- FloatHi lat, lon;
- Char buf [256];
- CharPtr ptr;
- CharPtr guess;
if (biop == NULL) {
return FALSE;
}
ssp = biop->subtype;
- /* first find country */
- for (ssp = biop->subtype; ssp != NULL && country == NULL; ssp = ssp->next)
- {
- if (ssp->subtype == SUBSRC_country)
- {
- country = ssp->name;
- }
- }
- if (StringContainsBodyOfWater (country))
- {
- return TRUE;
- }
- if (country != NULL)
- {
- StringNCpy_0 (buf, country, sizeof (buf));
- ptr = StringChr (buf, ':');
- if (ptr != NULL) {
- *ptr = '\0';
- }
- country = buf;
- }
-
- for (ssp = biop->subtype; ssp != NULL && rval; ssp = ssp->next)
- {
- if (ssp->subtype == SUBSRC_lat_lon)
- {
- IsCorrectLatLonFormat (ssp->name, &format_ok, &precision_ok, &lat_in_range, &lon_in_range);
- if (!format_ok || !precision_ok || !lat_in_range || !lon_in_range)
- {
- rval = FALSE;
- }
- else if (!ParseLatLon (ssp->name, &lat, &lon))
- {
+ while (ssp != NULL && rval) {
+ if (ssp->subtype == SUBSRC_collection_date) {
+ if (!CollectionDateIsValid(ssp->name) || CollectionDateIsInTheFuture(ssp->name)) {
rval = FALSE;
}
- else if (country != NULL && IsCountryInLatLonList (country))
- {
- if (!(TestLatLonForCountry (country, lat, lon)))
- {
- guess = GuessCountryForLatLon (lat, lon);
- if (StringHasNoText (guess) || !CountryBoxesOverlap (country, guess)) {
- rval = FALSE;
- }
- }
- }
}
+ ssp = ssp->next;
}
-
return rval;
}
@@ -31374,61 +34646,6 @@ static Boolean BarcodeBioSourceTest (BioseqPtr bsp, BarcodeBioSourceTestFunc tes
}
-static void BarcodeBioSourceTestCallback (BioseqPtr bsp, Pointer userdata, BarcodeBioSourceTestFunc test_func)
-{
- BarcodeSearchPtr bsd;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bsd = (BarcodeSearchPtr) userdata) == NULL
- || bsd->cfg == NULL
- || test_func == NULL)
- {
- return;
- }
-
- if (BarcodeBioSourceTest (bsp, test_func, bsd->cfg->require_keyword))
- {
- ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
-static void FindBadGPS (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeBioSourceTestCallback (bsp, userdata, BarcodeGPSOkay);
-}
-
-
-
-static void
-BarcodeTestForSeqEntry
-(SeqEntryPtr sep,
- ValNodePtr PNTR discrepancy_list,
- VisitBioseqsFunc callback,
- CharPtr fmt,
- BarcodeTestConfigPtr cfg)
-{
- BarcodeSearchData bsd;
-
- bsd.bioseq_list = NULL;
- bsd.cfg = cfg;
- if (bsd.cfg == NULL)
- {
- bsd.cfg = BarcodeTestConfigNew ();
- }
- VisitBioseqsInSep (sep, &bsd, callback);
-
- if (bsd.bioseq_list != NULL)
- {
- ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (0, fmt, bsd.bioseq_list));
- }
- if (cfg != bsd.cfg)
- {
- bsd.cfg = BarcodeTestConfigFree (bsd.cfg);
- }
-}
-
-
static void BarcodePercentNDiscrepanciesForSeqEntry (ValNodePtr results, ValNodePtr PNTR discrepancy_list, FloatLo min_n_percent)
{
BarcodeTestResultsPtr res;
@@ -31936,7 +35153,7 @@ static void FillInMissingTraces (ValNodePtr trace_check_list)
Char id_txt[255];
Char cmmd [256];
ValNodePtr vnp;
- BarcodeTestResultsPtr res;
+ BarcodeTestResultsPtr res = NULL;
ReadBufferData rbd;
CharPtr line, cp;
@@ -31947,6 +35164,8 @@ static void FillInMissingTraces (ValNodePtr trace_check_list)
}
if (tracefetchcmd == NULL) return;
+ id_txt [0] = '\0';
+
TmpNam (path_in);
fp = FileOpen (path_in, "w");
if (fp == NULL) {
@@ -31981,8 +35200,12 @@ static void FillInMissingTraces (ValNodePtr trace_check_list)
line = AbstractReadFunction (&rbd);
vnp = trace_check_list;
- res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
- SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ if (vnp != NULL) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res != NULL && res->bsp != NULL) {
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ }
+ }
while (line != NULL && line[0] != EOF && vnp != NULL) {
if (!StringHasNoText (line)) {
@@ -31990,15 +35213,21 @@ static void FillInMissingTraces (ValNodePtr trace_check_list)
if (cp != NULL) {
*cp = 0;
while (StringCmp (id_txt, line) != 0 && vnp != NULL) {
- if (res->num_trace < 2) {
+ if (res != NULL && res->num_trace < 2) {
res->failed_tests[eBarcodeTest_LowTrace] = TRUE;
}
vnp = vnp->next;
- res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
- SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ if (vnp != NULL) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res != NULL && res->bsp != NULL) {
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ }
+ }
}
if (vnp != NULL) {
- res->num_trace++;
+ if (res != NULL) {
+ res->num_trace++;
+ }
}
}
}
@@ -32945,6 +36174,7 @@ static Boolean ParseFromDToken (CharPtr dtoken, FloatHiPtr val, CharPtr d, Int4P
{
dir = dtoken[0];
rval = ParseNumericFromDToken (dtoken + 1, &f, prec);
+ f = ABS(f);
}
else if (IsDirectionChar (dtoken[token_len - 1]))
{
@@ -32957,6 +36187,7 @@ static Boolean ParseFromDToken (CharPtr dtoken, FloatHiPtr val, CharPtr d, Int4P
token_len --;
}
rval = ParseNumericFromDToken (dtoken, &f, prec);
+ f = ABS(f);
}
else
{
@@ -33892,6 +37123,7 @@ static ReplacePairData country_name_fixes[] = {
{"South Georgia & South Sandwich Islands", "South Georgia and the South Sandwich Islands"},
{"La Reunion Island", "Reunion"},
{"Brasil", "Brazil"},
+ {"Democratic Republic of Congo", "Democratic Republic of the Congo"},
{"UK", "United Kingdom"},
{"ABW", "Aruba"},
{"AFG", "Afghanistan"},
@@ -34221,6 +37453,28 @@ static Boolean ContainsMultipleCountryNames (CharPtr PNTR list, CharPtr search_s
}
+static CharPtr suppress_country_fix_keywords[] = {
+ "Sea",
+ "USSR",
+ NULL};
+
+
+static Boolean SuppressCountryFix (CharPtr country)
+{
+ Int4 i;
+
+ if (StringHasNoText (country)) {
+ return TRUE;
+ }
+ for (i = 0; suppress_country_fix_keywords[i] != NULL; i++) {
+ if (DoesStringContainPhrase(country, suppress_country_fix_keywords[i], FALSE, TRUE)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
static CharPtr NewFixCountry (CharPtr country, CharPtr PNTR country_list)
{
CharPtr cp, next_sep, start_after;
@@ -34236,6 +37490,11 @@ static CharPtr NewFixCountry (CharPtr country, CharPtr PNTR country_list)
if (new_country != NULL) {
return new_country;
}
+
+ if (SuppressCountryFix(country)) {
+ return new_country;
+ }
+
country = StringSave (country);
cp = country;
while (*cp != 0 && !too_many_countries) {
@@ -34631,7 +37890,7 @@ NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp)
typedef struct srcqualfixlist {
Int4 src_qual;
- CharPtr PNTR fix_list;
+ CharPtr PNTR PNTR fix_lists;
} SrcQualFixListData, PNTR SrcQualFixListPtr;
@@ -34652,22 +37911,28 @@ static CharPtr src_qual_host_words[] = {
"alfalfa",
"almond",
"apple",
+ "asparagus",
"badger",
"bean",
"bitter melon",
"blackberry",
+ "blossoms",
"blueberry",
"bovine",
"brinjal",
"broad bean",
"cabbage",
- "caprine",
- "cat",
- "cattle",
"canine",
"cantaloupe",
+ "caprine",
+ "carrot",
"cassava",
+ "cat",
+ "catfish",
+ "cattle",
"cauliflower",
+ "Channel catfish",
+ "chestnut",
"chicken",
"chimpanzee",
"clover",
@@ -34675,7 +37940,9 @@ static CharPtr src_qual_host_words[] = {
"cotton",
"cow",
"cowpea",
+ "crab",
"cucumber",
+ "curd",
"dairy cow",
"dog",
"duck",
@@ -34686,19 +37953,35 @@ static CharPtr src_qual_host_words[] = {
"goat",
"goldfish",
"goose",
+ "guanabana",
"honeydew",
"horse",
+ "ice cream",
"juniper",
+ "larva",
+ "laurel",
+ "leek",
+ "lentil",
+ "lilac",
"lily",
"maize",
+ "mamey",
+ "mamey sapote",
"mango",
"mangrove",
+ "mangroves",
+ "marigold",
"marine sponge",
+ "melon",
"mosquito",
"mulberry",
"mungbean",
"nematode",
+ "oat",
+ "ornamental pear",
"ovine",
+ "papaya",
+ "pea",
"peach",
"peacock",
"pear",
@@ -34709,133 +37992,145 @@ static CharPtr src_qual_host_words[] = {
"potato",
"raccoon dog",
"red fox",
+ "rhizospheric soil",
"rice",
"salmon",
+ "seagrass",
"sesame",
"sheep",
+ "shrimp",
"sorghum",
+ "sour cherry",
+ "sourdough",
"soybean",
"sponge",
"squash",
+ "strawberry",
+ "sugar beet",
"sunflower",
+ "sweet cherry",
"swine",
"tobacco",
"tomato",
+ "turf",
+ "turfgrass",
"turkey",
"turtle",
"watermelon",
"wheat",
"white clover",
+ "willow",
"wolf",
"yak",
NULL };
- static CharPtr src_qual_lab_host_words[] = {
- "porcine",
- "caprine",
- "ovine",
- "cattle",
- "canine",
- "cat",
- "feline",
- "bovine",
- "tomato",
- "pepper",
- "yak",
- "horse",
- "pig",
- "cow",
- "rice",
- "turkey",
- "chicken",
- "sheep",
- "yak",
- "salmon",
- "wolf",
- "nematode",
- "fox",
- "swine",
- "fish",
- "maize",
- "soybean",
- "wheat",
+static CharPtr src_qual_isolation_source_words[] = {
+ "adductor muscle",
+ "aquaculture water",
+ "bile",
+ "bitumen",
+ "bone marrow",
+ "brain biopsy",
+ "buffy coat",
+ "cabbage leaves",
+ "catfish",
+ "Channel catfish",
+ "compost soil",
+ "crown",
+ "curd sample",
+ "dairy farm soil",
+ "farm soil",
+ "field soil",
+ "fish intestine",
+ "freshwater",
+ "fruit body",
+ "groundwater",
+ "hepatic bile duct",
+ "hepatic biliary duct",
+ "hot marine salterns",
+ "human skin",
+ "lake isolate",
+ "lake mud",
+ "mangrove soil",
+ "midgut",
+ "pond sediment",
+ "pond water",
+ "poultry farm soil",
+ "river sand",
+ "saline lake",
+ "sewage sludge",
+ "soda lake",
+ "soil rhizosphere",
+ "soil sample",
+ "solar saltern",
+ "solar salterns",
+ "sulphur spring",
+ "surface water",
+ "tannery waste",
+ "tannery waste effluent",
+ "tissue biopsy",
+ "twig",
+ "underground water",
+ "vegetable",
+ "vegetables",
NULL };
-static CharPtr src_qual_isolation_source_words[] = {
+static CharPtr src_qual_isolation_source_and_tissue_type_words[] = {
"abdomen",
"abdominal fluid",
"acne",
"activated sludge",
+ "adductor muscle",
"agricultural soil",
"air",
- "alfalfa",
- "almond",
"amniotic fluid",
- "apple",
+ "antenna",
"aspirate",
- "badger",
- "bean",
"bile",
"biofilm",
- "bitter melon",
- "blackberry",
"blood",
- "blueberry",
+ "blood cells",
+ "blood sample",
"body fluid",
"bone",
- "bovine",
+ "bovine feces",
"bovine milk",
"brain",
"brain abscess",
"brain tissue",
"branch",
- "brinjal",
"bronchial mucosa",
"bronchoalveolar lavage",
"buccal epithelial cells",
"buccal mucosa",
"buccal swab",
"bursa",
- "cabbage",
"callus",
- "canine",
- "cantaloupe",
- "caprine",
- "cassava",
- "cattle",
- "cauliflower",
"cave sediment",
"cave sediments",
"cerebellum",
"cerebrospinal fluid",
+ "cervix",
"cheese",
- "chicken",
- "chimpanzee",
"clinical",
"clinical isolate",
"clinical isolates",
"clinical sample",
"clinical samples",
+ "cloaca",
+ "cloacal swab",
"compost",
- "corn",
+ "coral reef",
"corn rhizosphere",
"cornea",
- "cotton",
"cotton rhizosphere",
- "cow",
- "cowpea",
- "cucumber",
- "dairy cow",
"dairy cow rumen",
"distillery",
- "dog",
"drinking water",
- "duck",
"ear",
"egg",
"embryogenic callus",
"epithelium",
- "equine",
"esophageal mucosa",
"estuarine water",
"estuarine waters",
@@ -34844,11 +38139,12 @@ static CharPtr src_qual_isolation_source_words[] = {
"fecal sample",
"fecal samples",
"feces",
- "feline",
"fermented food",
+ "fermented soybeans",
+ "fetal brain",
"fin",
"fin wound",
- "fish",
+ "fish eggs",
"flooded rice soil",
"flower",
"food",
@@ -34857,64 +38153,66 @@ static CharPtr src_qual_isolation_source_words[] = {
"food samples",
"forest",
"forest soil",
- "fox",
"freshwater stream",
"fruit",
+ "fruitbody",
+ "fruiting body",
"gastric mucosa",
"gastrointestinal tract",
+ "genital cells",
+ "genitals",
"gill",
"gills",
- "goat",
"goat milk",
- "goldfish",
"head",
"head kidney",
"heart",
+ "heart blood",
"hemocyte",
"hepatocyte",
"hepatopancreas",
- "honeydew",
+ "horse",
"horse",
"hot spring",
"hot springs",
"human plasma",
+ "human skin",
+ "infected leaf",
"inflorescence",
"intestinal mucosa",
"intestine",
"intestines",
- "juniper",
"kidney",
+ "kimchi",
+ "lake",
"lake sediment",
"lake soil",
"lake water",
"leaf",
"leaves",
- "lily",
+ "lentil",
"liver",
"liver abscess",
"lung",
"lymph node",
"lymphocyte",
+ "maize",
"mammary gland",
- "mango",
"mangrove sediment",
"mangrove sediments",
"manure",
"marine environment",
"marine sediment",
"marine sediments",
- "marine sponge",
"marine water",
"mature leaf",
"meat",
+ "midgut",
"milk",
"mitral valve",
- "mosquito",
"mouth wound",
"mucosa",
"mucus",
- "mulberry",
- "mungbean",
"muscle",
"muscle tissue",
"mycelium",
@@ -34925,7 +38223,7 @@ static CharPtr src_qual_isolation_source_words[] = {
"nasopharyngeal aspirate",
"nasopharyngeal swab",
"nasopharynx",
- "nematode",
+ "nest",
"neuroblast",
"nodule",
"nodules",
@@ -34935,81 +38233,65 @@ static CharPtr src_qual_isolation_source_words[] = {
"oral lexion",
"oral mucosa",
"ovary",
- "ovary",
"oviduct",
- "ovine",
"paddy soil",
"parietal cortex",
"patient",
- "peach",
- "pear",
- "pepper",
"pericardial",
"pharnyx",
- "pig",
"placenta",
"plasma",
+ "pleopod",
+ "pleopods",
"pleura",
"pod",
- "pomegranate",
- "porcine",
- "potato",
- "raccoon dog",
- "red fox",
+ "purulent fluid",
"respiratory tract",
"rhizosphere",
"rhizosphere soil",
- "rice",
"rice rhizosphere",
"rice soil",
"river sediment",
"river sediments",
"river water",
"root",
- "roots",
"root nodule",
"root nodules",
"root tip",
"root tips",
+ "roots",
"rumen",
"saliva",
"salivary gland",
- "salmon",
"saltern soil",
"seafood",
- "seagrass",
"seawater",
"sediment",
"sediments",
"seedling",
+ "seedling roots",
"sera",
"serum",
- "sesame",
- "sheep",
+ "sesame seeds",
"shrimp pond",
"skeletal muscle",
"skin",
"skin lesion",
"sludge",
"soil",
- "sorghum",
- "soybean",
"spindle leaf",
"spleen",
- "sponge",
"sputum",
- "squash",
"stem",
- "stems",
"stem base",
+ "stems",
"stomach",
"stool",
"stool sample",
"stool samples",
- "sunflower",
+ "strawberry",
"swab",
"swamp soil",
- "swine",
"tail",
"tentacle",
"testes",
@@ -35019,106 +38301,26 @@ static CharPtr src_qual_isolation_source_words[] = {
"throat swab",
"throat wash",
"thymus",
- "tobacco",
- "tomato",
"trachea",
+ "tracheal aspirate",
"tracheal swab",
- "turkey",
- "turtle",
+ "turfgrass",
"urine",
"uterine mucosa",
"wastewater",
"water",
- "watermelon",
- "wheat",
+ "white clover",
"whole blood",
"whole cell/tissue lysate",
- "wolf",
- "wound",
"wound",
- "yak",
"yogurt",
NULL };
-static CharPtr src_qual_tissue_type_words[] = {
- "blood",
- "bone",
- "brain tissue",
- "brain",
- "buccal epithelial cells",
- "bursa",
- "bursa of fabricus",
- "callus",
- "cerebellum",
- "cornea",
- "dairy cow rumen",
- "ear",
- "embryogenic callus",
- "epithelium",
- "eye",
- "fin",
- "flower",
- "fruit",
- "gastrointestinal tract",
- "gill",
- "gills",
- "head kidney",
- "heart",
- "hepatopancreas",
- "inflorescence",
- "intestine",
- "intestines",
- "kidney",
- "leaf",
- "leaves",
- "liver",
- "lung",
- "lymph node",
- "mammary gland",
- "mammary gland",
- "mature leaf",
- "muscle tissue",
- "muscle",
- "mycelium",
- "nodule",
- "nodules",
- "ovary",
- "oviduct",
- "paddy soil",
- "parietal cortex",
- "pharynx",
- "placenta",
- "plasma",
- "pleopod",
- "pleopods",
- "respiratory tract",
- "root nodule",
- "root nodules",
- "root tip",
- "root tips",
- "root",
- "roots",
- "rumen",
- "salivary gland",
- "skeletal muscle",
- "skin",
- "skin",
- "spleen",
- "spindle leaf",
- "stem",
- "stomach",
- "tentacle",
- "testes",
- "testis",
- "thymus",
- "trachea",
- "whole blood",
- NULL };
-
static CharPtr src_qual_dev_stage_words[] = {
"adult",
"egg",
"juvenile",
+ "larva",
NULL };
static CharPtr src_qual_cell_type_words[] = {
@@ -35128,14 +38330,51 @@ static CharPtr src_qual_cell_type_words[] = {
"neuroblast",
NULL };
+static CharPtr PNTR src_qual_sex_lists[] = {
+ src_qual_sex_words,
+ NULL };
+
+static CharPtr PNTR src_qual_host_lists[] = {
+ src_qual_host_words,
+ NULL };
+
+static CharPtr PNTR src_qual_isolation_source_lists[] = {
+ src_qual_sex_words,
+ src_qual_host_words,
+ src_qual_isolation_source_and_tissue_type_words,
+ src_qual_isolation_source_words,
+ src_qual_dev_stage_words,
+ src_qual_cell_type_words,
+ NULL };
+
+static CharPtr PNTR src_qual_lab_host_lists[] = {
+ src_qual_host_words,
+ NULL };
+
+static CharPtr PNTR src_qual_tissue_type_lists[] = {
+ src_qual_sex_words,
+ src_qual_host_words,
+ src_qual_isolation_source_and_tissue_type_words,
+ src_qual_dev_stage_words,
+ src_qual_cell_type_words,
+ NULL };
+
+static CharPtr PNTR src_qual_dev_stage_lists[] = {
+ src_qual_dev_stage_words,
+ NULL };
+
+static CharPtr PNTR src_qual_cell_type_lists[] = {
+ src_qual_cell_type_words,
+ NULL };
+
static SrcQualFixListData src_qual_fixes[] = {
- {Source_qual_sex, src_qual_sex_words} ,
- {Source_qual_nat_host, src_qual_host_words},
- {Source_qual_isolation_source, src_qual_isolation_source_words},
- {Source_qual_lab_host, src_qual_lab_host_words},
- {Source_qual_tissue_type, src_qual_tissue_type_words},
- {Source_qual_dev_stage, src_qual_dev_stage_words},
- {Source_qual_cell_type, src_qual_cell_type_words},
+ {Source_qual_sex, src_qual_sex_lists} ,
+ {Source_qual_nat_host, src_qual_host_lists},
+ {Source_qual_isolation_source, src_qual_isolation_source_lists},
+ {Source_qual_lab_host, src_qual_lab_host_lists},
+ {Source_qual_tissue_type, src_qual_tissue_type_lists},
+ {Source_qual_dev_stage, src_qual_dev_stage_lists},
+ {Source_qual_cell_type, src_qual_cell_type_lists},
{0, NULL}
};
@@ -35186,7 +38425,7 @@ static void FixSourceQualCaps (BioSourcePtr biop, Pointer data)
NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp)
{
- Int4 i;
+ Int4 i, j;
SrcQualFixData sd;
MemSet (&sd, 0, sizeof (SrcQualFixData));
@@ -35196,11 +38435,13 @@ NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp)
sd.vn.choice = SourceQualChoice_textqual;
/* find fix function */
- for (i = 0; src_qual_fixes[i].fix_list != NULL; i++) {
+ for (i = 0; src_qual_fixes[i].fix_lists != NULL; i++) {
if (src_qual_fixes[i].src_qual == src_qual) {
- sd.fix_list = src_qual_fixes[i].fix_list;
- sd.vn.data.intvalue = src_qual;
- VisitBioSourcesInSep (sep, &sd, FixSourceQualCaps);
+ for (j = 0; src_qual_fixes[i].fix_lists[j] != NULL; j++) {
+ sd.fix_list = src_qual_fixes[i].fix_lists[j];
+ sd.vn.data.intvalue = src_qual;
+ VisitBioSourcesInSep (sep, &sd, FixSourceQualCaps);
+ }
}
}
@@ -35302,16 +38543,30 @@ extern ValNodePtr ListCodingRegionsContainedInSourceFeatures (SeqEntryPtr sep)
}
-extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_stretch, Boolean expand_gaps)
+extern void CountNsInSequence (
+ BioseqPtr bsp,
+ Int4Ptr p_totalN,
+ Int4Ptr p_totalDash,
+ Int4Ptr p_totalTilde,
+ Int4Ptr p_max_stretch,
+ Boolean expand_gaps,
+ Boolean no_stretch_in_assembly_gap
+)
+
{
-/*
StreamFlgType flags = STREAM_CORRECT_INVAL;
- Int4 pos, total = 0, max_stretch = 0, this_stretch = 0;
+ Int4 pos, totalN = 0, totalDash = 0, totalTilde = 0, max_stretch = 0, this_stretch = 0;
Int2 residue;
StreamCache sc;
- if (p_total != NULL) {
- *p_total = 0;
+ if (p_totalN != NULL) {
+ *p_totalN = 0;
+ }
+ if (p_totalDash != NULL) {
+ *p_totalDash = 0;
+ }
+ if (p_totalTilde != NULL) {
+ *p_totalTilde = 0;
}
if (p_max_stretch != NULL) {
*p_max_stretch = 0;
@@ -35319,17 +38574,30 @@ extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_str
if (bsp == NULL) return;
if (expand_gaps) {
- flags |= STREAM_EXPAND_GAPS;
+ flags |= STREAM_EXPAND_GAPS | SEQ_GAP_AS_TILDE;
}
- if (! StreamCacheSetup (bsp, NULL, 0, &sc)) return;
+ if (! StreamCacheSetup (bsp, NULL, flags, &sc)) return;
pos = 0;
residue = StreamCacheGetResidue (&sc);
while (residue != '\0' && pos < bsp->length) {
if (residue == 'N') {
- total++;
+ totalN++;
+ this_stretch++;
+ } else if (residue == '-') {
+ totalDash++;
this_stretch++;
+ } else if (residue == '~') {
+ totalTilde++;
+ if (no_stretch_in_assembly_gap) {
+ if (this_stretch > max_stretch) {
+ max_stretch = this_stretch;
+ }
+ this_stretch = 0;
+ } else {
+ this_stretch++;
+ }
} else {
if (this_stretch > max_stretch) {
max_stretch = this_stretch;
@@ -35341,13 +38609,18 @@ extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_str
pos++;
}
- if (p_total != NULL) {
- *p_total = total;
+ if (p_totalN != NULL) {
+ *p_totalN = totalN;
+ }
+ if (p_totalDash != NULL) {
+ *p_totalDash = totalDash;
+ }
+ if (p_totalTilde != NULL) {
+ *p_totalTilde = totalTilde;
}
if (p_max_stretch != NULL) {
*p_max_stretch = max_stretch;
}
-*/
}
@@ -35491,6 +38764,10 @@ NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep)
static CharPtr RemovableCultureNotes[] = {
+ "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",
+ "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",
+ "[BankIt_uncultured16S_wizard]; [universal primers]",
+ "[BankIt_cultured16S_wizard]",
"[uncultured (using universal primers)]",
"[uncultured (using universal primers) bacterial source]",
"[cultured bacterial source]",
@@ -35511,6 +38788,7 @@ static CharPtr RemovableCultureNotes[] = {
"[intergenic wizard]",
"[intergenic wizard; spans unknown]",
"[Microsatellite wizard]",
+ "[Microsatellite wizard; multiple repeats]",
"[D-loop wizard]",
"[D-loop wizard; spans unknown]",
"[D-loop wizard; spans known]",
@@ -35518,6 +38796,9 @@ static CharPtr RemovableCultureNotes[] = {
};
static CharPtr ReplaceableCultureNotes[] = {
+ "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
+ "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
+ "[BankIt_uncultured16S_wizard]; [species_specific primers]",
"[uncultured (with species-specific primers)]",
"[uncultured]; [amplified with species-specific primers]",
"[uncultured (using species-specific primers) bacterial source]",
@@ -35525,6 +38806,143 @@ static CharPtr ReplaceableCultureNotes[] = {
NULL
};
+static Boolean HasNotes(CharPtr name, CharPtr * notes)
+{
+ Int4 i;
+ for (i=0; notes[i] != NULL; i++) {
+ if (StringStr(name, notes[i]) != NULL) return TRUE;
+ }
+
+ return FALSE;
+};
+
+static Boolean HasUnculturedNotes(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+
+ if (biop == NULL || biop->subtype == NULL) return FALSE;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_other) {
+ if ( HasNotes(ssp->name, RemovableCultureNotes)
+ || HasNotes(ssp->name, ReplaceableCultureNotes))
+ return TRUE;
+ }
+ }
+ return FALSE;
+};
+
+static void FindUnculturedNotes(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, src_list=NULL, item_list = NULL;
+ src_list = CollectBioSources(sep_list, HasUnculturedNotes, TRUE);
+
+ for (vnp = src_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQDESC)
+ ValNodeAddPointer (&item_list, OBJ_SEQDESC, vnp->data.ptrvalue);
+ else ValNodeAddPointer (&item_list, OBJ_SEQFEAT, vnp->data.ptrvalue);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0,
+ NewClickableItem (UNCULTURED_NOTES_ONCALLER, "%d bio-sources have uncultured notes",
+ item_list));
+ }
+};
+
+static CharPtr SeqIdPhrases[] = {
+ "paired",
+ "trimmed",
+ "length",
+ "node",
+ "cov_",
+ NULL
+};
+
+static Boolean HasSpecialPhrase(CharPtr id_str)
+{
+ Int2 i;
+
+ for (i=0; SeqIdPhrases[i] != NULL; i++) {
+ if (StringISearch(id_str, SeqIdPhrases[i]) != NULL) return TRUE;
+ }
+ return FALSE;
+};
+
+static void DoesBioseqHasSeqIdsWithPhrases(BioseqPtr bsp, Pointer data)
+{
+ SeqIdPtr sip;
+ Char id_str[255];
+
+ if (bsp == NULL || data == NULL) return;
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ SeqIdPrint(sip, id_str, PRINTID_FASTA_SHORT);
+ if (HasSpecialPhrase(id_str)) {
+ ValNodeAddPointer((ValNodePtr PNTR)data, OBJ_BIOSEQ, bsp);
+ return;
+ }
+ }
+ return;
+};
+
+static void FindSeqIdHavingPhrases(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+ ClickableItemPtr cip;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep(vnp->data.ptrvalue, &item_list, DoesBioseqHasSeqIdsWithPhrases);
+ if (item_list != NULL) break;
+ }
+
+ if (item_list != NULL) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->clickable_item_type = SEQ_ID_PHRASES;
+ cip->description = StringSave ("Sequence Ids contain unacceptable phrases (cov_, length, node, paired, or trimmed)");
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+};
+
+static void CDSProductHasNoProductString (SeqFeatPtr sfp, Pointer data)
+{
+ ProtRefPtr prp;
+ BioseqPtr bsp;
+ SeqFeatPtr cds;
+ ValNodePtr vnp;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_PROT || sfp->data.value.ptrvalue == NULL || data == NULL) {
+ return;
+ }
+
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ if (cds != NULL) {
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
+ {
+ if (StringISearch(vnp->data.ptrvalue, "no product string in file") != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR)data, OBJ_SEQFEAT, sfp);
+ }
+ }
+ }
+ }
+};
+
+static void ProductsWithNoProductString(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, CDSProductHasNoProductString);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer(discrepancy_list, 0,
+ NewClickableItem (NO_PRODUCT_STRING, "%d products have \"no product string in file\"", item_list));
+ }
+};
static Boolean RemoveCultureNotesFromText (CharPtr PNTR p_txt)
{
@@ -35741,3 +39159,360 @@ NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip)
return FALSE;
}
}
+
+
+NLM_EXTERN CharPtr DescribeStructuredCommentDifferences (UserObjectPtr uop1, UserObjectPtr uop2)
+{
+ UserFieldPtr ufp1, ufp2;
+ ValNodeBlock diff_list;
+ CharPtr diff_fmt = "%s fields differ";
+ CharPtr field_diff_fmt = "%s field found instead of %s";
+ CharPtr only_one_fmt = "only one StructuredComment has %s field";
+ CharPtr diff;
+ CharPtr label1;
+ Char id_buf1[20];
+ CharPtr label2;
+ Char id_buf2[20];
+
+ if (uop1 == NULL && uop2 == NULL) {
+ return NULL;
+ } else if (uop1 == NULL || uop2 == NULL) {
+ return StringSave ("One StructuredComment is empty");
+ }
+
+ InitValNodeBlock (&diff_list, NULL);
+
+ for (ufp1 = uop1->data, ufp2 = uop2->data;
+ ufp1 != NULL && ufp2 != NULL;
+ ufp1 = ufp1->next, ufp2 = ufp2->next) {
+ if (CompareUserFields(ufp1, ufp2) != 0) {
+ if (ufp1->label == NULL) {
+ label1 = "Unlabeled field";
+ } else if (ufp1->label->str != NULL) {
+ label1 = ufp1->label->str;
+ } else {
+ sprintf (id_buf1, "%d", ufp1->label->id);
+ label1 = id_buf1;
+ }
+ if (ufp2->label == NULL) {
+ label2 = "Unlabeled field";
+ } else if (ufp2->label->str != NULL) {
+ label2 = ufp2->label->str;
+ } else {
+ sprintf (id_buf2, "%d", ufp2->label->id);
+ label2 = id_buf2;
+ }
+ if (StringCmp (label1, label2) != 0) {
+ diff = (CharPtr) MemNew (sizeof (Char) * (StringLen (field_diff_fmt) + StringLen (label1) + StringLen (label2)));
+ sprintf (diff, field_diff_fmt, label1, label2);
+ ValNodeAddPointerToEnd (&diff_list, 0, diff);
+ } else {
+ diff = (CharPtr) MemNew (sizeof (Char) * (StringLen (diff_fmt) + StringLen (label1)));
+ sprintf (diff, diff_fmt, label1);
+ ValNodeAddPointerToEnd (&diff_list, 0, diff);
+ }
+ }
+ }
+ while (ufp1 != NULL) {
+ if (ufp1->label == NULL) {
+ label1 = "Unlabeled field";
+ } else if (ufp1->label->str != NULL) {
+ label1 = ufp1->label->str;
+ } else {
+ sprintf (id_buf1, "%d", ufp1->label->id);
+ label1 = id_buf1;
+ }
+ diff = (CharPtr) MemNew (sizeof (Char) * (StringLen (only_one_fmt) + StringLen (label1)));
+ sprintf (diff, only_one_fmt, label1);
+ ValNodeAddPointerToEnd (&diff_list, 0, diff);
+ ufp1 = ufp1->next;
+ }
+ while (ufp2 != NULL) {
+ if (ufp2->label == NULL) {
+ label2 = "Unlabeled field";
+ } else if (ufp2->label->str != NULL) {
+ label2 = ufp2->label->str;
+ } else {
+ sprintf (id_buf2, "%d", ufp2->label->id);
+ label2 = id_buf2;
+ }
+ diff = (CharPtr) MemNew (sizeof (Char) * (StringLen (only_one_fmt) + StringLen (label2)));
+ sprintf (diff, only_one_fmt, label2);
+ ValNodeAddPointerToEnd (&diff_list, 0, diff);
+ ufp2 = ufp2->next;
+ }
+
+
+ diff = ValNodeMergeStrsEx (diff_list.head, ";");
+ diff_list.head = ValNodeFreeData (diff_list.head);
+ return diff;
+}
+
+
+static Boolean NoStructuredCommentFieldsExceptPrefix (UserObjectPtr uop)
+{
+ UserFieldPtr ufp;
+ Boolean any_other = FALSE;
+
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
+ return FALSE;
+ }
+
+ for (ufp = uop->data; ufp != NULL && !any_other; ufp = ufp->next) {
+ if (!IsUserFieldStructuredCommentPrefixOrSuffix(ufp)) {
+ any_other = TRUE;
+ }
+ }
+ return !any_other;
+}
+
+
+static void RemoveEmptyStructuredCommentsCallback (SeqDescPtr sdp, Pointer data)
+{
+ ObjValNodePtr ovp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user || !sdp->extended) {
+ return;
+ }
+ if (NoStructuredCommentFieldsExceptPrefix(sdp->data.ptrvalue)) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+}
+
+
+NLM_EXTERN void RemoveEmptyStructuredComments (Uint2 entityID)
+{
+ SeqEntryPtr sep;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ VisitDescriptorsInSep (sep, NULL, RemoveEmptyStructuredCommentsCallback);
+ DeleteMarkedObjects (entityID, 0, NULL);
+}
+
+
+NLM_EXTERN Boolean IsStructuredCommentPrefix (UserFieldPtr ufp)
+{
+ if (ufp == NULL) {
+ return FALSE;
+ }
+ if (ufp->label != NULL
+ && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0
+ && ufp->choice == 1) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean IsStructuredCommentSuffix (UserFieldPtr ufp)
+{
+ if (ufp == NULL) {
+ return FALSE;
+ }
+ if (ufp->label != NULL
+ && StringICmp (ufp->label->str, "StructuredCommentSuffix") == 0
+ && ufp->choice == 1) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN CharPtr GetStructuredCommentPrefix (UserObjectPtr uop)
+{
+ UserFieldPtr ufp;
+ CharPtr prefix = NULL;
+
+ if (uop == NULL) {
+ return NULL;
+ }
+
+ for (ufp = uop->data; ufp != NULL && prefix == NULL; ufp = ufp->next) {
+ if (IsStructuredCommentPrefix(ufp)) {
+ prefix = ufp->data.ptrvalue;
+ }
+ }
+ return prefix;
+}
+
+
+NLM_EXTERN FieldDiffPtr FieldDiffNew (FieldTypePtr field, CharPtr seq_id, CharPtr biosample_id, CharPtr val1, CharPtr val2, Uint1 src_type, Pointer src_data)
+{
+ FieldDiffPtr diff = (FieldDiffPtr) MemNew (sizeof (FieldDiffData));
+ diff->field = AsnIoMemCopy (field, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite);
+ diff->seq_id = StringSave (seq_id);
+ diff->biosample_id = StringSave (biosample_id);
+ diff->val1 = StringSave (val1);
+ diff->val2 = StringSave (val2);
+ if (src_data != NULL) {
+ diff->src = ValNodeNew (NULL);
+ diff->src->choice = src_type;
+ diff->src->data.ptrvalue = src_data;
+ }
+ return diff;
+}
+
+
+NLM_EXTERN FieldDiffPtr FieldDiffFree (FieldDiffPtr diff)
+{
+ if (diff != NULL) {
+ diff->field = FieldTypeFree (diff->field);
+ diff->seq_id = MemFree (diff->seq_id);
+ diff->biosample_id = MemFree (diff->biosample_id);
+ diff->val1 = MemFree (diff->val1);
+ diff->val2 = MemFree (diff->val2);
+ diff->src = ClickableItemObjectListFree (diff->src);
+ diff = MemFree (diff);
+ }
+ return diff;
+}
+
+
+NLM_EXTERN ValNodePtr LIBCALL FieldDiffListFree (ValNodePtr list)
+{
+ ValNodePtr vnp_next;
+
+ while (list != NULL) {
+ vnp_next = list->next;
+ list->next = NULL;
+ list->data.ptrvalue = FieldDiffFree (list->data.ptrvalue);
+ list = ValNodeFree (list);
+ list = vnp_next;
+ }
+ return list;
+}
+
+
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffField (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ FieldDiffPtr g1, g2;
+ int cmp = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ g1 = (FieldDiffPtr) vnp1->data.ptrvalue;
+ g2 = (FieldDiffPtr) vnp2->data.ptrvalue;
+ if (g1 != NULL && g2 != NULL) {
+ cmp = CompareFieldTypesEx(g1->field, g2->field, TRUE);
+ if (cmp == 0) {
+ cmp = StringCmp (g1->biosample_id, g2->biosample_id);
+ if (cmp == 0) {
+ cmp = StringCmp (g1->seq_id, g2->seq_id);
+ }
+ }
+ }
+ }
+ }
+ return cmp;
+}
+
+
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffIdThenField (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ FieldDiffPtr g1, g2;
+ int cmp = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ g1 = (FieldDiffPtr) vnp1->data.ptrvalue;
+ g2 = (FieldDiffPtr) vnp2->data.ptrvalue;
+ if (g1 != NULL && g2 != NULL) {
+ cmp = StringCmp (g1->biosample_id, g2->biosample_id);
+ if (cmp == 0) {
+ cmp = StringCmp (g1->seq_id, g2->seq_id);
+ }
+ if (cmp == 0) {
+ cmp = CompareFieldTypesEx(g1->field, g2->field, TRUE);
+ }
+ }
+ }
+ }
+ return cmp;
+}
+
+
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffBiosampleIdThenFieldThenVal (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ FieldDiffPtr g1, g2;
+ int cmp = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ g1 = (FieldDiffPtr) vnp1->data.ptrvalue;
+ g2 = (FieldDiffPtr) vnp2->data.ptrvalue;
+ if (g1 != NULL && g2 != NULL) {
+ cmp = StringCmp (g1->biosample_id, g2->biosample_id);
+ if (cmp == 0) {
+ cmp = CompareFieldTypesEx(g1->field, g2->field, TRUE);
+ }
+ if (cmp == 0) {
+ cmp = StringCmp (g1->val1, g2->val1);
+ }
+ }
+ }
+ }
+ return cmp;
+}
+
+
+NLM_EXTERN ValNodePtr GetBioSourceFieldDiffs (CharPtr seqid, CharPtr biosample_id, BioSourcePtr biop1, BioSourcePtr biop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data)
+{
+ ValNodePtr list = NULL, field;
+ CharPtr val1, val2;
+
+ if (biop1 == NULL || biop2 == NULL) {
+ return NULL;
+ }
+
+ for (field = field_list; field != NULL; field = field->next) {
+ if (field->choice == FieldType_source_qual) {
+ val1 = GetSourceQualFromBioSource (biop1, field->data.ptrvalue, NULL);
+ val2 = GetSourceQualFromBioSource (biop2, field->data.ptrvalue, NULL);
+ ValNodeAddPointer (&list, 0, FieldDiffNew (field, seqid, biosample_id, val1, val2, src_type, src_data));
+ val1 = MemFree (val1);
+ val2 = MemFree (val2);
+ }
+ }
+ return list;
+}
+
+
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldDiffs (CharPtr seq_id, CharPtr biosample_id, UserObjectPtr uop1, UserObjectPtr uop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data)
+{
+ ValNodePtr list = NULL, field;
+ CharPtr val1, val2;
+
+ if (uop1 == NULL || uop2 == NULL) {
+ return NULL;
+ }
+
+ for (field = field_list; field != NULL; field = field->next) {
+ if (field->choice == FieldType_struc_comment_field) {
+ val1 = GetStructuredCommentFieldFromUserObject (uop1, field->data.ptrvalue, NULL);
+ val2 = GetStructuredCommentFieldFromUserObject (uop2, field->data.ptrvalue, NULL);
+ ValNodeAddPointer (&list, 0, FieldDiffNew (field, seq_id, biosample_id, val1, val2, src_type, src_data));
+ val1 = MemFree (val1);
+ val2 = MemFree (val2);
+ }
+ }
+ return list;
+}
+
diff --git a/api/sqnutil4.c b/api/sqnutil4.c
index 63820f94..d6bafad9 100755
--- a/api/sqnutil4.c
+++ b/api/sqnutil4.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/27/2007
*
-* $Revision: 1.187 $
+* $Revision: 1.257 $
*
* File Description:
* This file contains functions for automatically generating definition lines.
@@ -57,6 +57,7 @@
#include <salpedit.h>
#include <alignmgr.h>
#include <alignmgr2.h>
+#include <explore.h>
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
#include <macroapi.h>
@@ -67,73 +68,38 @@
/* you should make the corresponding change to the DefLinePos enum. */
ModifierItemGlobalData DefLineModifiers[] = {
- { "Acronym" , TRUE , ORGMOD_acronym },
- { "Anamorph" , TRUE , ORGMOD_anamorph },
- { "Authority" , TRUE , ORGMOD_authority },
{ "Bio-material" , TRUE, ORGMOD_bio_material },
{ "Biotype" , TRUE , ORGMOD_biotype },
{ "Biovar" , TRUE , ORGMOD_biovar },
{ "Breed" , TRUE , ORGMOD_breed },
{ "Cell-line" , FALSE, SUBSRC_cell_line },
- { "Cell-type" , FALSE, SUBSRC_cell_type },
{ "Chemovar" , TRUE , ORGMOD_chemovar },
{ "Chromosome" , FALSE, SUBSRC_chromosome },
{ "Clone" , FALSE, SUBSRC_clone },
- { "Clone-lib" , FALSE, SUBSRC_clone_lib },
- { "Collected-by" , FALSE, SUBSRC_collected_by },
- { "Collection-date" , FALSE, SUBSRC_collection_date },
- { "Common" , TRUE , ORGMOD_common },
{ "Country" , FALSE, SUBSRC_country },
{ "Cultivar" , TRUE , ORGMOD_cultivar },
{ "Culture-collection" , TRUE , ORGMOD_culture_collection },
{ "Dev-stage" , FALSE, SUBSRC_dev_stage },
{ "Ecotype" , TRUE , ORGMOD_ecotype },
{ "Endogenous-virus-name", FALSE, SUBSRC_endogenous_virus_name},
- { "Environmental-sample" , FALSE, SUBSRC_environmental_sample },
- { "Forma" , TRUE , ORGMOD_forma },
- { "Forma-specialis" , TRUE , ORGMOD_forma_specialis },
- { "Frequency" , FALSE, SUBSRC_frequency },
{ "Genotype" , FALSE, SUBSRC_genotype },
- { "Germline" , FALSE, SUBSRC_germline },
- { "Group" , TRUE , ORGMOD_group },
{ "Haplogroup" , FALSE, SUBSRC_haplogroup },
{ "Haplotype" , FALSE, SUBSRC_haplotype },
- { "Host" , TRUE , ORGMOD_nat_host },
- { "Identified-by" , FALSE, SUBSRC_identified_by },
{ "Isolate" , TRUE , ORGMOD_isolate },
- { "Isolation-source" , FALSE, SUBSRC_isolation_source },
- { "Lab-host" , FALSE, SUBSRC_lab_host },
- { "Lat-lon" , FALSE, SUBSRC_lat_lon },
{ "Linkage-group" , FALSE, SUBSRC_linkage_group },
{ "Map" , FALSE, SUBSRC_map },
- { "Mating-type" , FALSE, SUBSRC_mating_type },
- { "Metagenomic" , FALSE, SUBSRC_metagenomic },
- { "Note-OrgMod" , TRUE, ORGMOD_other },
- { "Note-SubSrc" , FALSE, SUBSRC_other },
{ "Pathovar" , TRUE , ORGMOD_pathovar },
{ "Plasmid-name" , FALSE, SUBSRC_plasmid_name },
- { "Plastid-name" , FALSE, SUBSRC_plastid_name },
{ "Pop-variant" , FALSE, SUBSRC_pop_variant },
- { "Rearranged" , FALSE, SUBSRC_rearranged },
{ "Segment" , FALSE, SUBSRC_segment },
{ "Serogroup" , TRUE , ORGMOD_serogroup },
{ "Serotype" , TRUE , ORGMOD_serotype },
{ "Serovar" , TRUE , ORGMOD_serovar },
- { "Sex" , FALSE, SUBSRC_sex },
{ "Specimen voucher" , TRUE , ORGMOD_specimen_voucher },
{ "Strain" , TRUE , ORGMOD_strain },
{ "Subclone" , FALSE, SUBSRC_subclone },
- { "Subgroup" , TRUE , ORGMOD_subgroup },
- { "Sub-species" , TRUE , ORGMOD_sub_species },
{ "Substrain" , TRUE , ORGMOD_substrain },
- { "Subtype" , TRUE , ORGMOD_subtype },
- { "Synonym" , TRUE , ORGMOD_synonym },
- { "Teleomorph" , TRUE , ORGMOD_teleomorph },
- { "Tissue-lib" , FALSE, SUBSRC_tissue_lib },
- { "Tissue-type" , FALSE, SUBSRC_tissue_type },
- { "Transgenic" , FALSE, SUBSRC_transgenic },
- { "Type" , TRUE , ORGMOD_type },
- { "Variety" , TRUE , ORGMOD_variety }
+ { "Transgenic" , FALSE, SUBSRC_transgenic }
};
#define numDefLineModifiers (sizeof (DefLineModifiers) / sizeof (ModifierItemGlobalData))
@@ -300,7 +266,8 @@ static ValNodePtr FindStringInStrings (
return NULL;
}
-
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
NLM_EXTERN ValNodePtr FindExactStringListMatch (
ValNodePtr list,
CharPtr value
@@ -318,6 +285,7 @@ NLM_EXTERN ValNodePtr FindExactStringListMatch (
return NULL;
}
+//Not part of Autodef or Cleanup
/* This function creates a new linked list of strings with copies of
* contents of orig.
*/
@@ -335,6 +303,7 @@ static ValNodePtr CopyStrings (
}
return new_string_start;
}
+//LCOV_EXCL_STOP
/*
* This section of the code contains functions and structures for obtaining a
@@ -421,7 +390,8 @@ static Boolean IsDeflineModifierRequiredByDefault (Boolean is_orgmod, Int2 index
}
}
-
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
static void AddOneSubtypeField (ValNodePtr PNTR sq_list, SourceQualDescPtr orig, CharPtr str, Uint1 subfield)
{
SourceQualDescPtr sqdp_cpy;
@@ -439,6 +409,7 @@ static void AddOneSubtypeField (ValNodePtr PNTR sq_list, SourceQualDescPtr orig,
}
+//Not part of Autodef or Cleanup
static void AddSubtypeFields (ValNodePtr PNTR sq_list, SourceQualDescPtr orig)
{
if (sq_list == NULL || orig == NULL) return;
@@ -465,6 +436,7 @@ static void AddSubtypeFields (ValNodePtr PNTR sq_list, SourceQualDescPtr orig)
}
+//Not part of Autodef or Cleanup
static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name, Boolean get_subfields)
{
Int4 k;
@@ -500,7 +472,7 @@ static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, B
}
}
-
+//Not part of Autodef or Cleanup
static void AddNoteQual (ValNodePtr PNTR list, Boolean is_orgmod, Boolean use_alternate_note_name)
{
SourceQualDescPtr sqdp;
@@ -532,6 +504,7 @@ static void AddNoteQual (ValNodePtr PNTR list, Boolean is_orgmod, Boolean use_al
}
+//Not part of Autodef or Cleanup
NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -556,6 +529,7 @@ NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
}
+//Not part of Autodef or Cleanup
extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields)
{
ValNodePtr source_qual_list = NULL;
@@ -585,11 +559,13 @@ extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmo
return source_qual_list;
}
+//Not part of Autodef or Cleanup
extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued)
{
return GetSourceQualDescListEx (get_subsrc, get_orgmod, get_discouraged, get_discontinued, TRUE);
}
+//Not part of Autodef or Cleanup
/*
* The CountModifiersProc is used as the callback function for
* VisitBioSourcesInSep when we are getting a list of all the modifiers
@@ -690,6 +666,7 @@ static void CountModifiersProc (
}
}
+//Not part of Autodef or Cleanup
/* The CountModifiers function visits all of the bio sources, determining
* which modifiers are present, which modifiers have only one value,
* which modifiers have all different values, and which modifiers are
@@ -745,6 +722,7 @@ NLM_EXTERN void CountModifiers (
}
}
+//Not part of Autodef or Cleanup
/* The BioSrcDescData structure is used to hold a BioSourcePtr, a list
* of strings used to describe the biosource, including the taxonomy name
* and the values of all of the modifiers selected so far for this bio
@@ -771,6 +749,7 @@ static BioSrcDescPtr CopyBioSrcDescPtr (
return new_bsdp_start;
}
+//Not part of Autodef or Cleanup
/* The FreeBioSrcDescPtr function frees the memory associated with a
* linked list of BioSrcDescData structures.
*/
@@ -785,6 +764,8 @@ static void FreeBioSrcDescPtr (
MemFree (bsdp);
}
+
+//Not part of Autodef or Cleanup
/* The AddQualToBioSrcDescPtr function finds the qualifier at the
* feature_index position in the DefLineModifiers array in the
* BioSourcePtr and adds the value for that modifier to the array
@@ -848,7 +829,8 @@ static void AddQualToBioSrcDescPtr (
}
}
}
-
+
+//Not part of Autodef or Cleanup
/* The CompareOrganismDescriptors function compares the contents of the
* lists of strings for each BioSrcDesc item.
* The function returns:
@@ -889,6 +871,7 @@ static int CompareOrganismDescriptors (
}
}
+//Not part of Autodef or Cleanup
/* The OrgGroupData structure contains a list of BioSrcDescData items
* for which the contents of the descriptive strings list are identical,
* i.e., all the organisms in the group would have the same description
@@ -927,6 +910,7 @@ static OrgGroupPtr CopyOrgGroupList (
return new_ogp_start;
}
+//Not part of Autodef or Cleanup
/* The FreeOrgGroupPtr function frees the memory associated with a
* list of OrgGroups */
static void FreeOrgGroupPtr (
@@ -944,6 +928,7 @@ static void FreeOrgGroupPtr (
return;
}
+//Not part of Autodef or Cleanup
/* The ReorderGroupOrgs function sorts the OrgGroup list based on the results
* of the CompareOrganismDescriptors function.
*/
@@ -991,6 +976,7 @@ static void ReorderGroupOrgs (
}
}
+//Not part of Autodef or Cleanup
/* The ReGroupOrgs function operates on a single OrgGroup item.
* If any of the BioSrcDesc items in the group now have different
* descriptions, the function breaks it up into smaller, homogenous OrgGroups.
@@ -1030,6 +1016,7 @@ static void ReGroupOrgs (
}
}
+//Not part of Autodef or Cleanup
/* The AddQualToGroup function operates on a single OrgGroup item.
* The function adds a qualifier to each BioSrcDesc item in the OrgGroup,
* breaks the group into multiple groups if the group is no longer
@@ -1058,6 +1045,7 @@ static void AddQualToGroup (
ReGroupOrgs (this_group);
}
+//Not part of Autodef or Cleanup
/* The AddQualToGroupList function operates on a list of OrgGroup items.
* It calls AddQualToGroup for each item in the list.
*/
@@ -1077,6 +1065,7 @@ static void AddQualToGroupList (
}
}
+//Not part of Autodef or Cleanup
/* The CopyModifierIndices function creates a new ValNode list with the
* same data.intvalue values for each node as the original modifier_indices
* ValNode list.
@@ -1095,7 +1084,8 @@ static ValNodePtr CopyModifierIndices (
new_indices->next = CopyModifierIndices (modifier_indices->next);
return new_indices;
}
-
+
+//Not part of Autodef or Cleanup
/* The CopyModifierCombo creates a copy of a ModificationCombination item.
* This includes creating a copy of the number and list of modifiers
* and a copy of the number and list of OrgGroups, as well as copying the
@@ -1141,6 +1131,7 @@ static ModifierCombinationPtr CopyModifierCombo (
return newm;
}
+//Not part of Autodef or Cleanup
/* This function creates a new ModifierCombination item using the supplied
* OrgGroup list. It calculates the number of groups, maximum number of
* organisms in any one group, and number of unique organisms.
@@ -1180,6 +1171,7 @@ static ModifierCombinationPtr NewModifierCombo (
return newm;
}
+//Not part of Autodef or Cleanup
/* This function frees the memory associated with a list of
* ModifierCombination items.
*/
@@ -1194,6 +1186,7 @@ static void FreeModifierCombo (
MemFree (m);
}
+//Not part of Autodef or Cleanup
/* This function adds the qualifier at the feature_index position in the
* DefLineModifiers array to each OrgGroup in the list and recalculates
* the maximum number of organisms in any one group and the number of
@@ -1237,6 +1230,8 @@ static void AddQualToModifierCombo (
}
}
+
+//Not part of Autodef or Cleanup
/* This function creates the initial OrgGroup list that is copied for every
* ModifierCombination item.
*/
@@ -1317,6 +1312,7 @@ typedef struct bestsortdata {
Boolean is_unique;
} BestSortData, PNTR BestSortPtr;
+//Not part of Autodef or Cleanup
static Boolean Index1FoundBeforeIndex2 (
Int4 index1,
Int4 index2,
@@ -1337,6 +1333,7 @@ static Boolean Index1FoundBeforeIndex2 (
return FALSE;
}
+//Not part of Autodef or Cleanup
/* This function determines whether or not we should try adding this modifier
* to our combination. If we've already tried it and not added it to the list,
* there's no reason to try adding it again.
@@ -1351,7 +1348,7 @@ static Boolean OkToTryAddingQual (
ValNodePtr vnp;
/* if feature_index indicates a value we don't use for best combos, skip */
- if (feature_index == DEFLINE_POS_Map || feature_index == DEFLINE_POS_Specific_host)
+ if (feature_index == DEFLINE_POS_Map)
{
return FALSE;
}
@@ -1377,6 +1374,8 @@ static Boolean OkToTryAddingQual (
return TRUE;
}
+
+//Not part of Autodef or Cleanup
static ValNodePtr GetListOfAvailableModifiers ( ModifierItemLocalPtr ItemList)
{
ValNodePtr vnp, head;
@@ -1414,12 +1413,11 @@ static Int4 DefLineQualSortOrder [] = {
DEFLINE_POS_Cultivar,
DEFLINE_POS_Specimen_voucher,
DEFLINE_POS_Ecotype,
- DEFLINE_POS_Type,
DEFLINE_POS_Serotype,
- DEFLINE_POS_Authority,
DEFLINE_POS_Breed
};
+//Not part of Autodef or Cleanup
static int LIBCALLBACK SortByImportanceAndPresence (
VoidPtr ptr1,
VoidPtr ptr2
@@ -1471,6 +1469,7 @@ static int LIBCALLBACK SortByImportanceAndPresence (
}
+//Not part of Autodef or Cleanup
/* The function FindBestCombo tries to find the best combination of modifiers
* to create unique organism descriptions. This is accomplished by
* creating a list of required modifiers, and then creating a list of
@@ -1645,6 +1644,7 @@ static ModifierCombinationPtr FindBestCombo(
}
+//Not part of Autodef or Cleanup
/* create combo with the specified modifiers */
NLM_EXTERN ValNodePtr GetModifierIndicesFromModList (
ModifierItemLocalPtr modList
@@ -1663,6 +1663,7 @@ NLM_EXTERN ValNodePtr GetModifierIndicesFromModList (
}
return modifier_indices;
}
+//LCOV_EXCL_STOP
/* This is the callback function for sorting the modifier list. It
@@ -1675,9 +1676,7 @@ static Int4 DefLineQualPresentationOrder [] = {
DEFLINE_POS_Cultivar,
DEFLINE_POS_Specimen_voucher,
DEFLINE_POS_Ecotype,
- DEFLINE_POS_Type,
DEFLINE_POS_Serotype,
- DEFLINE_POS_Authority,
DEFLINE_POS_Breed
};
@@ -1881,7 +1880,7 @@ static void AddHIVModifierIndices (
}
}
- if ( ! have_country_in_list && have_country_mod)
+ if ( ! have_country_in_list && have_country_mod && modifier_indices != NULL)
{
vnp = ValNodeNew (*modifier_indices);
vnp->data.intvalue = DEFLINE_POS_Country;
@@ -1899,7 +1898,7 @@ static void AddHIVModifierIndices (
&& have_isolate_mod
&& ( clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_prefer_isolate
|| clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_want_both
- || ! have_clone_mod))
+ || ! have_clone_mod) && modifier_indices != NULL)
{
vnp = ValNodeNew (*modifier_indices);
vnp->data.intvalue = DEFLINE_POS_Isolate;
@@ -1910,7 +1909,7 @@ static void AddHIVModifierIndices (
&& have_clone_mod
&& ( clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_prefer_clone
|| clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_want_both
- || ! have_isolate_mod))
+ || ! have_isolate_mod) && modifier_indices != NULL)
{
vnp = ValNodeNew (*modifier_indices);
vnp->data.intvalue = DEFLINE_POS_Clone;
@@ -2017,6 +2016,9 @@ AddWGSModifierIndices
{
return;
}
+ //LCOV_EXCL_START
+ //When creating definition lines, always remove existing ones, so
+ //SpecialHandlingForSpecialTechniques will never return true
for (vnp = *modifier_indices;
vnp != NULL && vnp->data.intvalue != DEFLINE_POS_Strain;
@@ -2042,6 +2044,7 @@ AddWGSModifierIndices
}
}
}
+ //LCOV_EXCL_STOP
}
/* This function provides a label to be used in the definition line for
@@ -2249,7 +2252,8 @@ NLM_EXTERN Boolean UseSubSrcModifier (
return rval;
}
-
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
/* The SetRequiredModifiers function copies the default required values from
* the global DefLineModifiers array into the local list of modifier
* information.
@@ -2279,6 +2283,7 @@ static const Int4 s_auto_def_id_preferred_quals[] = {
static const Int4 k_num_auto_def_id_preferred_quals = sizeof (s_auto_def_id_preferred_quals) / sizeof (Int4);
+//Not part of Autodef or Cleanup
/* This function generates the modifiers for "AutoDefID" */
NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList)
{
@@ -2301,6 +2306,7 @@ NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList)
}
}
}
+//LCOV_EXCL_STOP
/* This function fixes HIV abbreviations, removes items in parentheses,
@@ -2414,6 +2420,8 @@ NLM_EXTERN Boolean IsTSA (BioseqPtr bsp)
}
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
NLM_EXTERN Boolean IsGenomeProjectIDDescriptor (SeqDescrPtr sdp)
{
UserObjectPtr uop;
@@ -2429,8 +2437,11 @@ NLM_EXTERN Boolean IsGenomeProjectIDDescriptor (SeqDescrPtr sdp)
}
return FALSE;
}
+//LCOV_EXCL_STOP
+//LCOV_EXCL_START
+//Not used for Autodef and Cleanup
NLM_EXTERN SeqDescrPtr GetGenomeProjectIDDescriptor (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -2447,7 +2458,7 @@ NLM_EXTERN SeqDescrPtr GetGenomeProjectIDDescriptor (BioseqPtr bsp)
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Int4 GetGenomeProjectID (BioseqPtr bsp)
{
SeqMgrDescContext context;
@@ -2480,6 +2491,7 @@ NLM_EXTERN Int4 GetGenomeProjectID (BioseqPtr bsp)
}
+//Not part of Autodef or Cleanup
static void AddSpTaxnameToList (SeqDescrPtr sdp, Pointer userdata)
{
BioSourcePtr biop;
@@ -2492,7 +2504,7 @@ static void AddSpTaxnameToList (SeqDescrPtr sdp, Pointer userdata)
ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, biop->org->taxname);
}
-
+//Not part of Autodef or Cleanup
NLM_EXTERN Boolean ShouldExcludeSp (SeqEntryPtr sep)
{
ValNodePtr name_list = NULL, vnp1, vnp2;
@@ -2519,7 +2531,7 @@ NLM_EXTERN Boolean ShouldExcludeSp (SeqEntryPtr sep)
name_list = ValNodeFree (name_list);
return all_diff;
}
-
+//LCOV_EXCL_STOP
/* This function sets the default values for the organism description settings */
NLM_EXTERN void InitOrganismDescriptionModifiers(OrganismDescriptionModifiersPtr odmp, SeqEntryPtr sep)
@@ -2714,7 +2726,7 @@ static CharPtr GetOrganismDescription (
{
mod = mod->next;
}
- if ( UseOrgModifier (mod, taxName, odmp->allow_mod_at_end_of_taxname))
+ if (mod != NULL && UseOrgModifier (mod, taxName, odmp->allow_mod_at_end_of_taxname))
{
if (odmp->allow_semicolon_in_modifier) {
no_semicolon_len = StringLen (mod->subname);
@@ -3058,6 +3070,8 @@ static Boolean IsAEmptyIntervalOfB (SeqLocPtr a, SeqLocPtr b, BioseqPtr bsp)
}
+//LCOV_EXCL_START
+//Due to logic error, this function is never called
static Boolean LocAContainsIntervalOfB (SeqLocPtr a, SeqLocPtr b)
{
SeqLocPtr interval;
@@ -3075,6 +3089,7 @@ static Boolean LocAContainsIntervalOfB (SeqLocPtr a, SeqLocPtr b)
}
return rval;
}
+//LCOV_EXCL_STOP
/* This section of code deals with identifying and labeling features
@@ -3265,8 +3280,16 @@ static Boolean LIBCALLBACK IsLTR (
SeqFeatPtr sfp
)
{
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_LTR) return FALSE;
- return TRUE;
+ GBQualPtr gb;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
+ for (gb = sfp->qual; gb != NULL; gb = gb->next) {
+ if (StringICmp(gb->qual, "rpt_type") == 0 && StringISearch(gb->val, "long_terminal_repeat") != NULL) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
}
static CharPtr GetLTRDescription (
@@ -3531,8 +3554,9 @@ static Boolean FeatureDoesNotGetPartialComplete (SeqFeatPtr sfp)
{
GBQualPtr gbqual;
Int4 keyword_idx;
+
if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
-
+
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
{
keyword_idx = IsMobileElementGBQual(gbqual);
@@ -3550,13 +3574,17 @@ NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp)
GBQualPtr gbqual;
if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE;
- for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
- {
- if (IsMobileElementGBQual(gbqual) > -1) {
+ if (sfp->idx.subtype == FEATDEF_repeat_region) {
+ for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
+ {
+ if (IsMobileElementGBQual(gbqual) > -1) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+ } else {
return TRUE;
- }
}
- return FALSE;
}
static Boolean LIBCALLBACK IsRemovableMobileElement (SeqFeatPtr sfp)
@@ -3851,45 +3879,45 @@ static Boolean LIBCALLBACK IsIntergenicSpacer (
)
{
if (sfp == NULL
- || sfp->idx.subtype != FEATDEF_misc_feature
- || sfp->comment == NULL
- || StringStr (sfp->comment, "intergenic spacer") == NULL)
- {
+|| sfp->idx.subtype != FEATDEF_misc_feature
+|| sfp->comment == NULL
+|| StringStr(sfp->comment, "intergenic spacer") == NULL)
+{
return FALSE;
- }
- return TRUE;
+}
+return TRUE;
}
-static ValNodePtr GettRNAGenesAndSpacers (CharPtr str);
-static ValNodePtr FreetRNAGenesAndSpacersList (ValNodePtr list);
+static ValNodePtr GettRNAGenesAndSpacers(CharPtr str);
+static ValNodePtr FreetRNAGenesAndSpacersList(ValNodePtr list);
-static Boolean LIBCALLBACK IsParsableList (
- SeqFeatPtr sfp
-)
+static Boolean LIBCALLBACK IsParsableList(
+ SeqFeatPtr sfp
+ )
{
- ValNodePtr list;
+ ValNodePtr list;
- if (sfp == NULL
- || sfp->idx.subtype != FEATDEF_misc_feature
- || sfp->comment == NULL)
- {
- return FALSE;
- }
-
- list = GettRNAGenesAndSpacers (sfp->comment);
- if (list == NULL)
- {
- return FALSE;
- }
- else
- {
- FreetRNAGenesAndSpacersList (list);
- return TRUE;
- }
+ if (sfp == NULL
+ || sfp->idx.subtype != FEATDEF_misc_feature
+ || sfp->comment == NULL)
+ {
+ return FALSE;
+ }
+
+ list = GettRNAGenesAndSpacers(sfp->comment);
+ if (list == NULL)
+ {
+ return FALSE;
+ }
+ else
+ {
+ FreetRNAGenesAndSpacersList(list);
+ return TRUE;
+ }
}
-/* This function produces the default definition line label for a misc_feature
+/* This function produces the default definition line label for a misc_feature
* that has the word "intergenic spacer" in the comment. If the comment starts
* with the word "contains", "contains" is ignored. If "intergenic spacer"
* appears first in the comment (or first after the word "contains", the text
@@ -3899,58 +3927,62 @@ static Boolean LIBCALLBACK IsParsableList (
* "intergenic spacer", this text will appear in the definition line before the words
* "intergenic spacer".
*/
-static void LIBCALLBACK GetIntergenicSpacerFeatureLabel (
- ValNodePtr featlist,
- BioseqPtr bsp,
- Uint1 biomol,
- FeatureLabelPtr flp
-)
+static void LIBCALLBACK GetIntergenicSpacerFeatureLabel(
+ ValNodePtr featlist,
+ BioseqPtr bsp,
+ Uint1 biomol,
+ FeatureLabelPtr flp
+ )
{
- SeqFeatPtr main_feat;
- CharPtr cp, buffer;
- Int4 datalen, offset;
+ SeqFeatPtr main_feat;
+ CharPtr cp, buffer;
+ Int4 datalen, offset;
- if (featlist == NULL || featlist->data.ptrvalue == NULL) return;
- main_feat = featlist->data.ptrvalue;
- if (StringHasNoText (main_feat->comment)) return;
- if (StringNCmp (main_feat->comment, "contains ", 9) == 0)
- {
- buffer = main_feat->comment + 9;
- }
- else if (StringNCmp (main_feat->comment, "may contain ", 12) == 0)
- {
- buffer = main_feat->comment + 12;
- }
- else
- {
- buffer = main_feat->comment;
- }
- cp = StringStr (buffer, "intergenic spacer");
- if (cp == NULL) return;
- flp->typeword = StringSave ("intergenic spacer");
- flp->pluralizable = FALSE;
- if (cp == buffer)
- {
- flp->is_typeword_first = TRUE;
- offset = StringLen ("intergenic spacer") + 1;
- if (StringNCmp (cp + offset, "and ", 4) == 0
- || *(cp + StringLen("intergenic spacer")) == 0)
+ if (featlist == NULL || featlist->data.ptrvalue == NULL) return;
+ main_feat = featlist->data.ptrvalue;
+ if (StringHasNoText(main_feat->comment)) return;
+ if (StringNCmp(main_feat->comment, "contains ", 9) == 0)
{
- flp->description = NULL;
+ buffer = main_feat->comment + 9;
+ }
+ else if (StringNCmp(main_feat->comment, "may contain ", 12) == 0)
+ {
+ buffer = main_feat->comment + 12;
}
else
{
- flp->description = StringSave (cp + StringLen ("intergenic spacer") + 1);
- cp = StringChr (flp->description, ';');
- if (cp != NULL)
- {
- *cp = 0;
- }
+ buffer = main_feat->comment;
}
- }
- else
- {
- flp->is_typeword_first = FALSE;
+ cp = StringStr(buffer, "intergenic spacer");
+ if (cp == NULL) return;
+ flp->typeword = StringSave("intergenic spacer");
+ flp->pluralizable = FALSE;
+ if (cp == buffer)
+ {
+ flp->is_typeword_first = TRUE;
+ offset = StringLen("intergenic spacer") + 1;
+ if (StringNCmp(cp + offset, "and ", 4) == 0
+ || *(cp + StringLen("intergenic spacer")) == 0)
+ {
+ flp->description = NULL;
+ }
+ else
+ {
+ flp->description = StringSave(cp + StringLen("intergenic spacer") + 1);
+ cp = StringChr(flp->description, ';');
+ if (cp != NULL)
+ {
+ *cp = 0;
+ }
+ }
+ }
+ else
+ {
+ flp->is_typeword_first = FALSE;
+ if (StringCmp(cp + StringLen(flp->typeword), " region") == 0) {
+ flp->typeword = MemFree(flp->typeword);
+ flp->typeword = StringSave("intergenic spacer region");
+ }
datalen = cp - buffer;
flp->description = MemNew ( datalen + 1);
if (flp->description == NULL) return;
@@ -3981,6 +4013,21 @@ static CommentFeatPtr CommentFeatFree (CommentFeatPtr cfp)
}
+static ValNodePtr CommentFeatListFree (ValNodePtr vnp)
+{
+ ValNodePtr vnp_next;
+
+ while (vnp != NULL) {
+ vnp_next = vnp->next;
+ vnp->next = NULL;
+ vnp->data.ptrvalue = CommentFeatFree ((CommentFeatPtr)(vnp->data.ptrvalue));
+ vnp = ValNodeFree (vnp);
+ vnp = vnp_next;
+ }
+ return vnp;
+}
+
+
typedef struct intergenicspacerdef
{
CharPtr first_gene;
@@ -4306,17 +4353,22 @@ FeatureClauseFromParsedComment
if (fcp != NULL)
{
fcp->feature_label_data.is_typeword_first = FALSE;
- fcp->feature_label_data.typeword = StringSave ("gene");
- if (tdp->gene_name == NULL) {
- fcp->feature_label_data.description = StringSave (tdp->product_name);
+ if (StringCmp(tdp->product_name, "control region") == 0 || StringCmp(tdp->product_name, "D-loop") == 0) {
+ fcp->feature_label_data.typeword = StringSave(tdp->product_name);
+ fcp->feature_label_data.description = StringSave("");
} else {
- fcp->feature_label_data.description = (CharPtr) MemNew (sizeof (Char) * (StringLen (gene_fmt)
+ fcp->feature_label_data.typeword = StringSave ("gene");
+ if (tdp->gene_name == NULL) {
+ fcp->feature_label_data.description = StringSave (tdp->product_name);
+ } else {
+ fcp->feature_label_data.description = (CharPtr) MemNew (sizeof (Char) * (StringLen (gene_fmt)
+ StringLen (tdp->gene_name)
+ StringLen (tdp->product_name)));
- if (fcp->feature_label_data.description != NULL)
- {
- sprintf (fcp->feature_label_data.description, gene_fmt,
- tdp->product_name, tdp->gene_name);
+ if (fcp->feature_label_data.description != NULL)
+ {
+ sprintf (fcp->feature_label_data.description, gene_fmt,
+ tdp->product_name, tdp->gene_name);
+ }
}
}
if (is_partial)
@@ -4522,6 +4574,37 @@ DeflineFeatureRequestListPtr rp)
return head;
}
+static Boolean IsRegulatory(SeqFeatPtr sfp)
+{
+ ImpFeatPtr imp;
+
+ if (sfp == NULL ||
+ sfp->data.choice != SEQFEAT_IMP ||
+ (imp = (ImpFeatPtr)(sfp->data.value.ptrvalue)) == NULL ||
+ StringCmp(imp->key, "regulatory") != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static CharPtr GetRegulatoryClass(SeqFeatPtr sfp)
+{
+ GBQualPtr gbqual;
+
+ if (sfp == NULL || !IsRegulatory(sfp)) {
+ return NULL;
+ }
+ for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
+ if (StringICmp(gbqual->qual, "regulatory_class") == 0) {
+ return gbqual->val;
+ }
+ }
+ return NULL;
+}
+
+
static Boolean LIBCALLBACK IsSatelliteSequence (
SeqFeatPtr sfp
)
@@ -4546,8 +4629,14 @@ static Boolean LIBCALLBACK IsPromoter (
SeqFeatPtr sfp
)
{
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_promoter) return FALSE;
- return TRUE;
+ if (sfp == NULL) {
+ return FALSE;
+ } else if (sfp->idx.subtype == FEATDEF_promoter) {
+ return TRUE;
+ } else if (StringCmp (GetRegulatoryClass(sfp), "promoter") == 0) {
+ return TRUE;
+ }
+ return FALSE;
}
static Boolean LIBCALLBACK IsEndogenousVirusSourceFeature (
@@ -4625,6 +4714,72 @@ static CharPtr find_noncoding_feature_keyword (
return NULL;
}
+
+// returns ValNode list of CommentFeatPtr
+static ValNodePtr ParsetRNAAndOtherElement (CharPtr str)
+{
+ CharPtr cp, other;
+ ValNodePtr list = NULL;
+ CommentFeatPtr cf;
+
+ cp = str;
+ if (StringNCmp(cp, "contains ", 9) == 0) {
+ cp += 9;
+ }
+
+ other = StringSearch (cp, " and ");
+ if (other == NULL) {
+ return list;
+ }
+
+ while (cp < other) {
+ cf = ParseGeneFromNoteForDefLine (&cp);
+ if (cf == NULL) {
+ list = CommentFeatListFree(list);
+ return list;
+ } else {
+ ValNodeAddPointer (&list, MISCFEAT_TRNA_GENE, cf);
+ while (*cp == ',' || isspace(*cp)) {
+ cp ++;
+ }
+ }
+ }
+
+ other += 5;
+
+ if (StringCmp(other, "control region") == 0 || StringCmp (other, "D-loop") == 0) {
+ cf = (CommentFeatPtr) MemNew (sizeof (CommentFeatData));
+ cf->product_name = StringSave(other);
+ ValNodeAddPointer (&list, MISCFEAT_TRNA_GENE, cf);
+ } else {
+ list = CommentFeatListFree(list);
+ }
+ return list;
+}
+
+
+static Boolean LIBCALLBACK IsTrnaPlusOther (
+ SeqFeatPtr sfp
+)
+{
+ ValNodePtr list;
+ Boolean rval = FALSE;
+
+ if (sfp == NULL ||
+ sfp->idx.subtype != FEATDEF_misc_feature ||
+ sfp->comment == NULL) {
+ rval = FALSE;
+ } else {
+ list = ParsetRNAAndOtherElement(sfp->comment);
+ if (list != NULL) {
+ rval = TRUE;
+ }
+ list = CommentFeatListFree (list);
+ }
+ return rval;
+}
+
+
static Boolean LIBCALLBACK IsNoncodingProductFeat (
SeqFeatPtr sfp
)
@@ -4634,6 +4789,7 @@ static Boolean LIBCALLBACK IsNoncodingProductFeat (
|| sfp->comment == NULL
|| StringStr (sfp->comment, "intergenic") != NULL
|| IsParsableList (sfp)
+ || IsTrnaPlusOther (sfp)
|| (find_noncoding_feature_keyword (sfp->comment) == NULL
&& (StringStr (sfp->comment, "nonfunctional ") == NULL
|| StringStr (sfp->comment, " due to ") == NULL)))
@@ -4641,6 +4797,7 @@ static Boolean LIBCALLBACK IsNoncodingProductFeat (
return FALSE;
}
+
return TRUE;
}
@@ -4713,6 +4870,114 @@ static Boolean LIBCALLBACK IsMiscFeat (
return TRUE;
}
+
+static Boolean IsSatellite (SeqFeatPtr sfp)
+{
+ GBQualPtr gbq;
+ Boolean rval = FALSE;
+
+ if ( sfp == NULL
+ || sfp->idx.subtype != FEATDEF_repeat_region) {
+ return FALSE;
+ }
+ for (gbq = sfp->qual; gbq != NULL && !rval; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "satellite") == 0) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+// use comment or rpt_family
+static Boolean DoesRepeatRegionHaveLabel(SeqFeatPtr sfp)
+{
+ GBQualPtr g;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) {
+ return FALSE;
+ }
+ if (!StringHasNoText(sfp->comment)) {
+ return TRUE;
+ }
+
+ for (g = sfp->qual; g != NULL; g = g->next) {
+ if (StringICmp(g->qual, "rpt_family") == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static Boolean LIBCALLBACK IsRepeatRegion (
+ SeqFeatPtr sfp
+)
+{
+ if ( sfp == NULL
+ || sfp->idx.subtype != FEATDEF_repeat_region
+ || !DoesRepeatRegionHaveLabel(sfp)
+ || IsSatellite(sfp))
+ {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static CharPtr RepeatRegionLabelFromString(CharPtr str)
+{
+ CharPtr extra = "repeat region";
+ CharPtr rval = NULL;
+ Int4 len, extra_len;
+
+ if (str == NULL) {
+ return NULL;
+ }
+ len = StringLen(str);
+ extra_len = StringLen(extra);
+
+ if (len < extra_len || StringCmp(str + len - extra_len, extra) != 0) {
+ rval = StringSave(str);
+ } else {
+ rval = (CharPtr)MemNew(sizeof(Char) * (1 + len - extra_len));
+ StringNCpy(rval, str, len - extra_len);
+ rval[len - extra_len] = 0;
+ }
+ return rval;
+}
+
+static void LIBCALLBACK GetRepeatRegionLabel (
+ ValNodePtr featlist,
+ BioseqPtr bsp,
+ Uint1 biomol,
+ FeatureLabelPtr flp
+)
+{
+ SeqFeatPtr main_feat;
+ Boolean found = FALSE;
+ GBQualPtr g;
+
+ flp->description = NULL;
+ flp->typeword = StringSave("repeat region");
+ flp->pluralizable = FALSE;
+ flp->is_typeword_first = FALSE;
+
+ if (featlist == NULL) return;
+ main_feat = featlist->data.ptrvalue;
+ if (main_feat == NULL) return;
+
+ for (g = main_feat->qual; g != NULL; g = g->next) {
+ if (StringICmp(g->qual, "rpt_family") == 0) {
+ flp->description = RepeatRegionLabelFromString(g->val);
+ found = TRUE;
+ }
+ }
+
+ if (!found && !StringHasNoText(main_feat->comment)) {
+ flp->description = RepeatRegionLabelFromString(main_feat->comment);
+ }
+}
+
+
static Boolean LIBCALLBACK IsOperon (
SeqFeatPtr sfp
)
@@ -4754,6 +5019,7 @@ static Boolean IsRecognizedFeature (
|| IsNoncodingProductFeat (sfp)
|| IsPromoter (sfp)
|| IsMiscFeat (sfp)
+ || IsRepeatRegion (sfp)
|| IsOperon (sfp))
{
return TRUE;
@@ -5226,6 +5492,7 @@ static void FindBestMatchCandidate
&& SeqLocAinB (clause->slp, slp) > -1)
|| IsLocAInBonSameStrand (clause->slp, slp)
|| ( IsPromoter (clause_sfp)
+ && search_parent != NULL
&& IsAAdjacentToB (clause->slp, search_parent->slp, bsp,
ADJACENT_TYPE_UPSTREAM, TRUE))
|| (IsmRNASequence (bsp)
@@ -5571,7 +5838,9 @@ static void ExpandAltSplicedExons (
}
ValNodeFree (fcp->featlist->next);
fcp->featlist->next = NULL;
- new_clause->next = rest_of_list;
+ if (new_clause != NULL) {
+ new_clause->next = rest_of_list;
+ }
/* put back location for first exon - was reduced to union of
* all exon intervals in GroupAltSplicedExons
@@ -6372,7 +6641,7 @@ static Boolean ShowInterval (
)
{
if (IsSatelliteSequence (sfp) || IsExon (sfp) || IsIntron (sfp)
- || IsPromoter (sfp) || Is3UTR (sfp) || Is5UTR (sfp))
+ || IsPromoter (sfp) || Is3UTR (sfp) || Is5UTR (sfp) || IsRepeatRegion(sfp))
return FALSE;
return TRUE;
}
@@ -6662,7 +6931,7 @@ static CharPtr GetGenericInterval
{
if (utr3vnp != NULL)
{
- if (featlist->next != NULL)
+ if (featlist != NULL && featlist->next != NULL)
{
StringCat (interval, ",");
}
@@ -6807,7 +7076,8 @@ static MatchLabelFunctionData label_functions[] = {
{ IsMobileElement, GetMobileElementFeatureLabel },
{ IsPromoter, GetPromoterFeatureLabel },
{ IsIntergenicSpacer, GetIntergenicSpacerFeatureLabel },
- { IsGeneCluster, GetGeneClusterFeatureLabel }
+ { IsGeneCluster, GetGeneClusterFeatureLabel },
+ { IsRepeatRegion, GetRepeatRegionLabel }
};
typedef enum {
@@ -6816,6 +7086,7 @@ typedef enum {
DEFLINE_FEATLABEL_Promoter,
DEFLINE_FEATLABEL_IntergenicSpacer,
DEFLINE_FEATLABEL_GeneCluster,
+ DEFLINE_FEATLABEL_RepeatRegion,
NumDefLineFeatLabels
} DefLineFeatLabel;
@@ -7284,6 +7555,7 @@ static CharPtr misc_words [] = {
"external transcribed spacer",
"ribosomal RNA intergenic spacer",
"ribosomal RNA",
+ "intergenic spacer region",
"intergenic spacer"
};
@@ -7292,6 +7564,7 @@ typedef enum {
MISC_RNA_WORD_EXTERNAL_SPACER,
MISC_RNA_WORD_RNA_INTERGENIC_SPACER,
MISC_RNA_WORD_RNA,
+ MISC_RNA_WORD_INTERGENIC_SPACER_REGION,
MISC_RNA_WORD_INTERGENIC_SPACER,
NUM_MISC_RNA_WORDS
} MiscWord;
@@ -7398,7 +7671,8 @@ GetFeatureClausesFromMiscRNATokens
if (vnp->choice == MISC_RNA_WORD_INTERNAL_SPACER
|| vnp->choice == MISC_RNA_WORD_EXTERNAL_SPACER
|| vnp->choice == MISC_RNA_WORD_RNA_INTERGENIC_SPACER
- || vnp->choice == MISC_RNA_WORD_INTERGENIC_SPACER) {
+ || vnp->choice == MISC_RNA_WORD_INTERGENIC_SPACER
+ || vnp->choice == MISC_RNA_WORD_INTERGENIC_SPACER_REGION) {
if (word_loc == vnp->data.ptrvalue) {
fcp->feature_label_data.is_typeword_first = TRUE;
fcp->feature_label_data.typeword = StringSave (misc_words [vnp->choice]);
@@ -7416,6 +7690,8 @@ GetFeatureClausesFromMiscRNATokens
}
} else if (vnp->choice == MISC_RNA_WORD_RNA) {
fcp->feature_label_data.description = StringSave (vnp->data.ptrvalue);
+ fcp->feature_label_data.is_typeword_first = FALSE;
+ fcp->feature_label_data.typeword = StringSave ("gene");
}
if ((vnp == token_list && partial5) || (vnp->next == NULL && partial3)) {
fcp->interval = StringSave ("partial sequence");
@@ -7490,6 +7766,11 @@ static CharPtr GetRegionDescription
}
+/* Some misc_RNA clauses have a comment that actually lists multiple
+ * features. This function creates a clause for each element in the
+ * comment and inserts the list of new clauses into the feature list
+ * at the point where the single previous clause was.
+ */
static ValNodePtr GetMiscRNAelements
( SeqFeatPtr misc_rna,
BioseqPtr bsp,
@@ -7536,50 +7817,49 @@ static ValNodePtr GetMiscRNAelements
return clause_list;
}
-/* Some misc_RNA clauses have a comment that actually lists multiple
- * features. This function creates a clause for each element in the
- * comment and inserts the list of new clauses into the feature list
- * at the point where the single previous clause was.
+
+/* Some misc_feature clauses have a comment that actually lists a tRNA
+ * and either a control region or D-loop. This function creates a clause
+ * for each element in the comment and inserts the list of new clauses into
+ * the feature list at the point where the single previous clause was.
*/
-static void ReplaceRNAClauses (
- ValNodePtr PNTR clause_list,
- BioseqPtr bsp,
+static ValNodePtr GettRNAAndOtherElements
+( SeqFeatPtr misc_feat,
+ BioseqPtr bsp,
DeflineFeatureRequestListPtr rp)
{
+ ValNodePtr clause_list = NULL, cf_list = NULL, vnp;
FeatureClausePtr fcp;
- SeqFeatPtr main_feat;
- ValNodePtr clause, replacement_clauses, nextclause, vnp;
+ Boolean partial5, partial3, is_partial;
- if (clause_list == NULL || *clause_list == NULL) return;
- clause = *clause_list;
- while (clause != NULL)
- {
- nextclause = clause->next;
- fcp = (clause->data.ptrvalue);
- if (fcp == NULL
- || fcp->featlist == NULL
- || fcp->featlist->choice != DEFLINE_FEATLIST)
- {
- return;
- }
- main_feat = (SeqFeatPtr) fcp->featlist->data.ptrvalue;
-
- if (IsrRNA (main_feat) || IsMiscRNA (main_feat))
- {
- replacement_clauses = GetMiscRNAelements ( main_feat, bsp, rp );
- if (replacement_clauses != NULL)
- {
- for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {}
- vnp->next = clause->next;
- clause->next = replacement_clauses;
- fcp->delete_me = TRUE;
- }
+ if (misc_feat == NULL ||
+ misc_feat->idx.subtype != FEATDEF_misc_feature ||
+ StringHasNoText (misc_feat->comment)) {
+ return NULL;
+ }
+
+ cf_list = ParsetRNAAndOtherElement(misc_feat->comment);
+ if (cf_list == NULL) {
+ return NULL;
+ }
+
+ CheckSeqLocForPartial (misc_feat->location, &partial5, &partial3);
+
+ for (vnp = cf_list; vnp != NULL; vnp = vnp->next) {
+ is_partial = FALSE;
+ if (vnp == cf_list && partial5) {
+ is_partial = TRUE;
+ } else if (vnp->next == NULL && partial3) {
+ is_partial = TRUE;
}
- clause = nextclause;
+ fcp = FeatureClauseFromParsedComment (vnp->data.ptrvalue, misc_feat, is_partial, bsp, rp);
+ ValNodeAddPointer (&clause_list, DEFLINE_CLAUSEPLUS, fcp);
}
- DeleteFeatureClauses (clause_list);
+ cf_list = CommentFeatListFree(cf_list);
+ return clause_list;
}
+
/* Some misc_feat clauses have a comment that lists one or more tRNAs and
* an intergenic spacer. This function creates a clause for each element
* in the comment and inserts the list of new clauses into the feature list
@@ -7621,6 +7901,12 @@ static void ReplaceIntergenicSpacerClauses (
{
fcp->delete_me = TRUE;
}
+ } else if ((replacement_clauses = GetMiscRNAelements ( main_feat, bsp, rp )) != NULL ||
+ (replacement_clauses = GettRNAAndOtherElements ( main_feat, bsp, rp )) != NULL) {
+ for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {}
+ vnp->next = clause->next;
+ clause->next = replacement_clauses;
+ fcp->delete_me = TRUE;
}
clause = nextclause;
}
@@ -7744,7 +8030,8 @@ static void RemoveUnwantedMiscFeats (
&& ! IsControlRegion (sfp)
&& ! IsIntergenicSpacer (sfp)
&& ! IsGeneCluster (sfp)
- && ! IsParsableList (sfp))
+ && ! IsParsableList (sfp)
+ && ! IsTrnaPlusOther (sfp))
{
fcp->delete_me = TRUE;
}
@@ -7928,7 +8215,7 @@ static void ReplaceExonClauseList (
Int4 i;
CharPtr new_description;
Int4 new_description_len;
- CharPtr exdesc1, exdesc2;
+ CharPtr exdesc1 = NULL, exdesc2 = NULL;
if (fcp == NULL || clause == NULL) return;
@@ -7959,8 +8246,12 @@ static void ReplaceExonClauseList (
tmpclause = tmpclause->next;
}
- exdesc1 = GetExonDescription (bsp, fcp->featlist->data.ptrvalue);
- exdesc2 = GetExonDescription (bsp, lastfeat->data.ptrvalue);
+ if (fcp->featlist != NULL) {
+ exdesc1 = GetExonDescription (bsp, fcp->featlist->data.ptrvalue);
+ }
+ if (lastfeat != NULL) {
+ exdesc2 = GetExonDescription (bsp, lastfeat->data.ptrvalue);
+ }
if (exdesc1 == NULL || exdesc2 == NULL)
{
if (exdesc1 != NULL) MemFree (exdesc1);
@@ -8133,7 +8424,10 @@ AddProductEnding
StringCat (str, "; micronuclear");
}
else if (mitochloroflag > 0) {
- if (mitochloroflag > 9) {
+ if (mitochloroflag > DEFAULT_ORGANELLE_CLAUSE && mitochloroflag - DEFAULT_ORGANELLE_CLAUSE < DEFAULT_ORGANELLE_CLAUSE) {
+ sprintf(orgnelle, "; nuclear copy of %s gene", organelleByPopup[mitochloroflag - DEFAULT_ORGANELLE_CLAUSE]);
+ StringCat(str, orgnelle);
+ } else if (mitochloroflag > 9) {
/* beyond list */
}
else {
@@ -8881,6 +9175,29 @@ static Boolean LIBCALLBACK ShouldRemoveLTR (
return TRUE;
}
+
+static Boolean LIBCALLBACK ShouldRemoveRepeatRegion (
+ SeqFeatPtr sfp,
+ FeatureClausePtr parent_fcp,
+ FeatureClausePtr this_fcp,
+ BioseqPtr bsp,
+ Boolean isLonely,
+ Boolean isRequested,
+ Boolean isSegment,
+ DeflineFeatureRequestListPtr rp
+)
+{
+ if (isRequested)
+ {
+ return FALSE;
+ }
+ else
+ {
+ return TRUE;
+ }
+}
+
+
static Boolean LIBCALLBACK ShouldRemove3UTR (
SeqFeatPtr sfp,
FeatureClausePtr parent_fcp,
@@ -9067,7 +9384,7 @@ static Boolean LIBCALLBACK ShouldRemovePrecursorRNA
Boolean isSegment,
DeflineFeatureRequestListPtr rp)
{
- if (!isLonely && IsBioseqPrecursorRNA(bsp))
+ if (!isLonely && IsBioseqPrecursorRNA(bsp) && !isRequested)
{
return TRUE;
}
@@ -9102,10 +9419,13 @@ static RemovableItemGlobalData remove_items[] = {
{ IsNoncodingProductFeat, ShouldRemoveNoncodingProductFeat, "Misc feats with comments:" },
{ IsRemovableMobileElement, ShouldRemoveMobileElement, "Optional Mobile Element" },
{ IsPrecursorRNA, ShouldRemovePrecursorRNA, "Precursor RNAs" },
- { IsncRNA, ShouldRemovencRNA, "ncRNAs that overlap precursor RNAs"}
+ { IsncRNA, ShouldRemovencRNA, "ncRNAs that overlap precursor RNAs"},
+ { IsRepeatRegion, ShouldRemoveRepeatRegion, "Repeat regions" }
};
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup, used for GUI
NLM_EXTERN CharPtr GetRemovableItemName (Int4 i)
{
if (i < 0 || i >= NumRemovableItems) {
@@ -9114,6 +9434,7 @@ NLM_EXTERN CharPtr GetRemovableItemName (Int4 i)
return remove_items[i].group_name;
}
}
+//LCOV_EXCL_STOP
NLM_EXTERN void InitFeatureRequests (
DeflineFeatureRequestListPtr feature_requests
@@ -9136,6 +9457,8 @@ NLM_EXTERN void InitFeatureRequests (
}
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
NLM_EXTERN DeflineFeatureRequestListPtr FreeDeflineFeatureRequestList (DeflineFeatureRequestListPtr feature_requests)
{
if (feature_requests != NULL) {
@@ -9144,6 +9467,7 @@ NLM_EXTERN DeflineFeatureRequestListPtr FreeDeflineFeatureRequestList (DeflineFe
}
return feature_requests;
}
+//LCOV_EXCL_STOP
static Boolean RemoveCondition (
@@ -9829,6 +10153,9 @@ static void PluralizeConsolidatedClauseDescription (
/* don't pluralize tRNA names */
if (StringNCmp (fcp->feature_label_data.description, "tRNA-", 5) ==0) return;
+ /* don't pluralize if typeword present */
+ if (fcp->feature_label_data.typeword != NULL && !StringHasNoText(fcp->feature_label_data.typeword)) return;
+
new_desc = MemNew (StringLen (fcp->feature_label_data.description) + 2);
if (new_desc == NULL) return;
@@ -10159,6 +10486,8 @@ static Boolean FeatureIsOnSegment (
return FALSE;
}
+//LCOV_EXCL_START
+//Segsets no longer supported
static Boolean FeatureClauseIsOnSegment (
FeatureClausePtr fcp,
ValNodePtr segment_features
@@ -10185,6 +10514,7 @@ static Boolean FeatureClauseIsOnSegment (
return FALSE;
}
+//Segsets no longer supported
static FeatureClausePtr CopyMatchingClauses (
FeatureClausePtr fcp,
ValNodePtr segment_features
@@ -10267,6 +10597,7 @@ static FeatureClausePtr CopyMatchingClauses (
return new_fcp;
}
+//Segsets no longer supported
static void CopyFeatureList (
ValNodePtr match_features,
ValNodePtr parent_features,
@@ -10304,6 +10635,7 @@ static void CopyFeatureList (
}
+//Segsets no longer supported
static void ExtractSegmentClauses (
ValNodePtr segment_features,
ValNodePtr parent_features,
@@ -10312,6 +10644,7 @@ static void ExtractSegmentClauses (
{
CopyFeatureList (segment_features, parent_features, segment_clauses);
}
+//LCOV_EXCL_STOP
typedef struct segmentdeflinedata {
BioseqPtr parent_bsp;
@@ -10358,6 +10691,8 @@ NLM_EXTERN void DefLineFeatClauseListFree (ValNodePtr vnp)
}
+//LCOV_EXCL_START
+//Segsets no longer supported
static Boolean IntervalIntersectsIvals
(Int2 numivals,
Int4Ptr ivals,
@@ -10381,6 +10716,7 @@ static Boolean IntervalIntersectsIvals
}
+//Segsets no longer supported
/* if there are no features at all on this segment, select the genes that
* traverse the segment.
*/
@@ -10422,6 +10758,7 @@ static ValNodePtr GrabTraversingGenes
}
return segment_feature_list;
}
+//LCOV_EXCL_STOP
static CharPtr BuildFeatureClauses (
@@ -10437,6 +10774,8 @@ static CharPtr BuildFeatureClauses (
DeflineFeatureRequestList PNTR feature_requests
);
+//LCOV_EXCL_START
+//Segsets no longer supported
static Boolean LIBCALLBACK GetFeatureClauseForSeg (
SeqLocPtr slp,
SeqMgrSegmentContextPtr context)
@@ -10534,18 +10873,20 @@ static Boolean LIBCALLBACK GetFeatureClauseForSeg (
DeleteMarkedObjects (entityID, 0, NULL);
return TRUE;
}
+//LCOV_EXCL_STOP
static Boolean HasAnyPromoters (BioseqPtr bsp)
{
SeqFeatPtr sfp;
SeqMgrFeatContext fcontext;
-
- sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_promoter, &fcontext);
- if (sfp == NULL) {
- return FALSE;
- } else {
- return TRUE;
+ Boolean rval = FALSE;
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_regulatory, &fcontext);
+ sfp != NULL && !rval;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_regulatory, &fcontext)) {
+ rval = IsPromoter(sfp);
}
+ return rval;
}
static void AddFakePromoterClause (ValNodePtr PNTR feature_list, BioseqPtr bsp, DeflineFeatureRequestListPtr rp)
@@ -10595,47 +10936,54 @@ static Boolean IsInGenProdSet (BioseqPtr bsp)
NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature_list_type)
{
CharPtr str = NULL;
- BioSourcePtr biop;
+ BioSourcePtr biop = NULL;
SeqDescrPtr sdp;
SeqMgrDescContext context;
+ CharPtr organelle_name = NULL;
Char ending_str [200];
- CharPtr mol_name;
+ CharPtr mol_name = NULL;
MolInfoPtr molinfo;
ending_str [0] = 0;
- if (feature_list_type == DEFLINE_COMPLETE_SEQUENCE
- || feature_list_type == DEFLINE_PARTIAL_SEQUENCE
- || feature_list_type == DEFLINE_COMPLETE_GENOME
- || feature_list_type == DEFLINE_PARTIAL_GENOME)
+ biop = GetBiopForBsp (bsp);
+ if (biop != NULL)
{
- biop = GetBiopForBsp (bsp);
- if (biop != NULL)
- {
switch (biop->genome) {
- case GENOME_macronuclear :
- sprintf (ending_str, "macronuclear");
+ case GENOME_macronuclear :
+ if (feature_list_type != DEFLINE_SEQUENCE) {
+ organelle_name = "macronuclear";
+ }
break;
- case GENOME_nucleomorph :
- sprintf (ending_str, "nucleomorph");
+ case GENOME_nucleomorph :
+ if (feature_list_type != DEFLINE_SEQUENCE) {
+ organelle_name = "nucleomorph";
+ }
break;
- case GENOME_mitochondrion :
- sprintf (ending_str, "mitochondrion");
+ case GENOME_mitochondrion :
+ organelle_name = "mitochondrion";
break;
- case GENOME_apicoplast :
- case GENOME_chloroplast :
- case GENOME_chromoplast :
- case GENOME_kinetoplast :
- case GENOME_plastid :
- case GENOME_cyanelle :
- case GENOME_leucoplast :
- case GENOME_proplastid :
- case GENOME_hydrogenosome :
- case GENOME_chromatophore :
- sprintf (ending_str, "%s", organelleByGenome [biop->genome]);
+ case GENOME_apicoplast :
+ case GENOME_chloroplast :
+ case GENOME_kinetoplast :
+ case GENOME_plastid :
+ case GENOME_leucoplast :
+ organelle_name = organelleByGenome [biop->genome];
+ break;
+ case GENOME_cyanelle :
+ case GENOME_proplastid :
+ case GENOME_hydrogenosome :
+ case GENOME_chromatophore :
+ case GENOME_chromoplast :
+ if (feature_list_type != DEFLINE_SEQUENCE) {
+ organelle_name = organelleByGenome [biop->genome];
+ }
break;
}
- }
+ }
+
+ if (organelle_name != NULL) {
+ sprintf (ending_str, "%s", organelle_name);
}
if (feature_list_type == DEFLINE_COMPLETE_SEQUENCE)
@@ -10656,14 +11004,16 @@ NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature
}
else if (feature_list_type == DEFLINE_SEQUENCE)
{
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
- if (sdp == NULL || (molinfo = sdp->data.ptrvalue) == NULL)
- {
- mol_name = NULL;
- }
- else
- {
- mol_name = BiomolNameFromBiomol (molinfo->biomol);
+ if (organelle_name == NULL) {
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ if (sdp == NULL || (molinfo = sdp->data.ptrvalue) == NULL)
+ {
+ mol_name = NULL;
+ }
+ else
+ {
+ mol_name = BiomolNameFromBiomol (molinfo->biomol);
+ }
}
if (mol_name == NULL)
{
@@ -10671,7 +11021,7 @@ NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature
}
else
{
- sprintf (ending_str + StringLen (ending_str), "%s sequence", mol_name);
+ sprintf (ending_str + StringLen (ending_str), " %s sequence", mol_name);
}
}
@@ -10698,8 +11048,8 @@ static Boolean IsAmplifiedMiscFeat (ValNodePtr feature_list)
} else if (feature_list->choice == DEFLINE_FEATLIST) {
sfp = (SeqFeatPtr) feature_list->data.ptrvalue;
if (sfp != NULL && sfp->idx.subtype == FEATDEF_misc_feature
- && (StringNICmp (sfp->comment, phrase1, StringLen (phrase1)) == 0)
- || StringNICmp (sfp->comment, phrase2, StringLen (phrase2)) == 0) {
+ && (StringNICmp (sfp->comment, phrase1, StringLen (phrase1)) == 0
+ || StringNICmp (sfp->comment, phrase2, StringLen (phrase2)) == 0)) {
rval = TRUE;
}
}
@@ -10821,8 +11171,6 @@ static CharPtr BuildFeatureClauses (
RemoveUnwantedMiscFeats (feature_list, TRUE);
}
- ReplaceRNAClauses (feature_list, bsp, feature_requests);
-
/* take any exons on the minus strand */
/* and reverse their order within the clause */
ReverseClauses (feature_list, IsExonOrIntron);
@@ -10900,6 +11248,8 @@ static Int2 GetProductFlagFromCDSProductNames (BioseqPtr bsp)
}
+//LCOV_EXCL_START
+//Segsets no longer supported
static void BuildFeatClauseListForSegSet (
BioseqPtr bsp,
Uint2 entityID,
@@ -10951,6 +11301,7 @@ static void BuildFeatClauseListForSegSet (
vnp->data.ptrvalue = deflist;
FreeListElement (sdld.parent_feature_list);
}
+//LCOV_EXCL_STOP
static Boolean Is5SList (ValNodePtr feature_list)
@@ -11032,7 +11383,10 @@ static void BuildOneFeatClauseList (
if (bsp != NULL && bsp->repr == Seq_repr_seg &&
bsp->seq_ext != NULL && bsp->seq_ext_type == 1)
{
+ //LCOV_EXCL_START
+ //Segsets no longer supported
BuildFeatClauseListForSegSet (bsp, entityID, feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, list);
+ //LCOV_EXCL_STOP
}
}
@@ -11062,7 +11416,7 @@ static void BuildOneFeatClauseList (
deflist->sep = SeqMgrGetSeqEntryForData (bsp),
deflist->bsp = bsp;
if (Is5SList(head)) {
- deflist->clauselist = StringSave ("5S ribosomal RNA gene region");
+ deflist->clauselist = StringSave ("5S ribosomal RNA gene region.");
} else {
deflist->clauselist = BuildFeatureClauses (bsp,
molecule_type,
@@ -11096,6 +11450,8 @@ static void RecurseForBuildingFeatClauseLists(
{
BioseqSetPtr bssp;
+ //LCOV_EXCL_START
+ //when regenerating, always calling at bioseq level
if ( IS_Bioseq_set (sep))
{
bssp = (BioseqSetPtr) sep->data.ptrvalue;
@@ -11113,10 +11469,12 @@ static void RecurseForBuildingFeatClauseLists(
return;
}
}
+ //LCOV_EXCL_STOP
BuildOneFeatClauseList (sep, entityID, feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, list);
}
+
NLM_EXTERN void BuildDefLineFeatClauseList (
SeqEntryPtr sep,
Uint2 entityID,
@@ -11131,6 +11489,8 @@ NLM_EXTERN void BuildDefLineFeatClauseList (
DeleteMarkedObjects (entityID, 0, NULL);
}
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
static Boolean IdenticalExceptForPartialComplete (CharPtr str1, CharPtr str2)
{
CharPtr cp, word_in_first, word_in_second;
@@ -11178,6 +11538,7 @@ static Boolean IdenticalExceptForPartialComplete (CharPtr str1, CharPtr str2)
}
+//Not part of Autodef or Cleanup
static CharPtr GetTaxnameForBsp (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -11195,7 +11556,7 @@ static CharPtr GetTaxnameForBsp (BioseqPtr bsp)
return taxname;
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
NLM_EXTERN Boolean AreFeatureClausesUnique (ValNodePtr list)
{
ValNodePtr vnp1, vnp2;
@@ -11221,6 +11582,7 @@ NLM_EXTERN Boolean AreFeatureClausesUnique (ValNodePtr list)
}
return TRUE;
}
+//LCOV_EXCL_STOP
NLM_EXTERN CharPtr GetKeywordPrefix (SeqEntryPtr sep)
@@ -11305,6 +11667,8 @@ typedef struct deflineclauseoptions {
Boolean gene_cluster_opp_strand;
} DefLineClauseOptions, PNTR DefLineClauseOptionsPtr;
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup (no more popset retros)
static DefLineClauseOptionsPtr DefLineClauseOptionsNew (void)
{
DefLineClauseOptionsPtr clause_options;
@@ -11330,6 +11694,7 @@ static DefLineClauseOptionsPtr DefLineClauseOptionsFree (DefLineClauseOptionsPtr
typedef void (*Nlm_SetFeatureRequestsProc) PROTO ((DefLineClauseOptionsPtr));
+//Not part of Autodef or Cleanup (no more popset retros)
static void DefaultClauseOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11339,6 +11704,7 @@ static void DefaultClauseOptions (DefLineClauseOptionsPtr clause_options)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void CompleteSequenceClauseOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11348,7 +11714,7 @@ static void CompleteSequenceClauseOptions (DefLineClauseOptionsPtr clause_option
clause_options->gene_cluster_opp_strand = FALSE;
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static void CompleteGenomeClauseOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11358,7 +11724,7 @@ static void CompleteGenomeClauseOptions (DefLineClauseOptionsPtr clause_options)
clause_options->gene_cluster_opp_strand = FALSE;
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static void SequenceClauseOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11369,6 +11735,7 @@ static void SequenceClauseOptions (DefLineClauseOptionsPtr clause_options)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void MiscFeatNonCodingOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11377,6 +11744,7 @@ static void MiscFeatNonCodingOptions (DefLineClauseOptionsPtr clause_options)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void MiscFeatSemicolonOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11386,7 +11754,7 @@ static void MiscFeatSemicolonOptions (DefLineClauseOptionsPtr clause_options)
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static void MitochondrialProductClauseOptions (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11396,6 +11764,7 @@ static void MitochondrialProductClauseOptions (DefLineClauseOptionsPtr clause_op
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void RequestPromoterAndExon (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11404,6 +11773,7 @@ static void RequestPromoterAndExon (DefLineClauseOptionsPtr clause_options)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void RequestIntronAndExon (DefLineClauseOptionsPtr clause_options)
{
InitFeatureRequests (&(clause_options->feature_requests));
@@ -11411,7 +11781,7 @@ static void RequestIntronAndExon (DefLineClauseOptionsPtr clause_options)
clause_options->feature_requests.keep_items[RemovableIntron] = TRUE;
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static Nlm_SetFeatureRequestsProc ClauseOptionSetList[] = {
DefaultClauseOptions,
RequestPromoterAndExon,
@@ -11427,6 +11797,7 @@ static Nlm_SetFeatureRequestsProc ClauseOptionSetList[] = {
typedef Boolean (*Nlm_SetOrgModifiersProc) PROTO ((OrganismDescriptionModifiersPtr, ValNodePtr PNTR, ModifierItemLocalPtr));
+//Not part of Autodef or Cleanup (no more popset retros)
static void DefaultOrgOptions (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list)
{
odmp->use_modifiers = TRUE;
@@ -11434,6 +11805,7 @@ static void DefaultOrgOptions (OrganismDescriptionModifiersPtr odmp, ValNodePtr
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean SubstituteMod (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 mod1, Int4 mod2)
{
ValNodePtr vnp;
@@ -11466,24 +11838,27 @@ static Boolean SubstituteMod (OrganismDescriptionModifiersPtr odmp, ValNodePtr P
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean IsolateInsteadOfClone (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Clone, DEFLINE_POS_Isolate);
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean HaplotypeInsteadOfVoucher (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Specimen_voucher, DEFLINE_POS_Haplotype);
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean VoucherInsteadOfIsolate (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Isolate, DEFLINE_POS_Specimen_voucher);
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseNone (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
if (mod_list == NULL || *mod_list == NULL) {
@@ -11496,6 +11871,7 @@ static Boolean UseNone (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mo
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AddOneAdjustmentMod (ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 specific)
{
Boolean already_has_sv = FALSE;
@@ -11523,6 +11899,7 @@ static Boolean AddOneAdjustmentMod (ValNodePtr PNTR mod_list, ModifierItemLocalP
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseOneSpecific (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 specific)
{
DefaultOrgOptions(odmp, mod_list);
@@ -11530,24 +11907,28 @@ static Boolean UseOneSpecific (OrganismDescriptionModifiersPtr odmp, ValNodePtr
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseStrain (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Strain);
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseSpecimenVoucher (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Specimen_voucher);
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseHaplotype (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Haplotype);
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseAutoDefId (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
size_t index;
@@ -11587,6 +11968,7 @@ static Boolean UseAutoDefId (OrganismDescriptionModifiersPtr odmp, ValNodePtr PN
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean DontExcludeSp (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
if (odmp == NULL || !odmp->exclude_sp) {
@@ -11598,6 +11980,7 @@ static Boolean DontExcludeSp (OrganismDescriptionModifiersPtr odmp, ValNodePtr P
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean UseCountryAndIsolate (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
Boolean add_country, add_isolate;
@@ -11613,6 +11996,7 @@ static Boolean UseCountryAndIsolate (OrganismDescriptionModifiersPtr odmp, ValNo
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Nlm_SetOrgModifiersProc OrgModifiersSetList[] = {
IsolateInsteadOfClone,
UseNone,
@@ -11629,6 +12013,7 @@ static Nlm_SetOrgModifiersProc OrgModifiersSetList[] = {
typedef Boolean (*Nlm_CompareDeflinesProc) PROTO ((CharPtr, CharPtr));
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean MatchExceptOldProduct (CharPtr old_str, CharPtr new_str)
{
Int4 old_len, new_len, pattern_len, new_pattern_len, organelle_len, i;
@@ -11683,6 +12068,7 @@ static Boolean MatchExceptOldProduct (CharPtr old_str, CharPtr new_str)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean NoSequenceForIntron (CharPtr old_str, CharPtr new_str)
{
Int4 old_len, new_len;
@@ -11701,6 +12087,7 @@ static Boolean NoSequenceForIntron (CharPtr old_str, CharPtr new_str)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean SkipPair (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start, CharPtr b_start, CharPtr val1, CharPtr val2)
{
Int4 len1, len2;
@@ -11736,6 +12123,7 @@ static Boolean SkipPair (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start, CharPt
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForSpace (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start, CharPtr b_start)
{
Boolean rval = FALSE;
@@ -11754,6 +12142,7 @@ static Boolean AdjustForSpace (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start,
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForCharBeforePhrase (CharPtr PNTR a, CharPtr PNTR b, CharPtr phrase, Char ch)
{
Boolean rval = FALSE;
@@ -11774,19 +12163,21 @@ static Boolean AdjustForCharBeforePhrase (CharPtr PNTR a, CharPtr PNTR b, CharPt
return rval;
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForCommaBeforePhrase (CharPtr PNTR a, CharPtr PNTR b, CharPtr phrase)
{
return AdjustForCharBeforePhrase (a, b, phrase, ',');
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForCommaBeforeAnd (CharPtr PNTR a, CharPtr PNTR b)
{
return AdjustForCommaBeforePhrase (a, b, " and ");
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForSkippableWord (CharPtr PNTR a, CharPtr PNTR b, CharPtr str1, CharPtr str2, CharPtr word)
{
Int4 len;
@@ -11810,6 +12201,7 @@ static Boolean AdjustForSkippableWord (CharPtr PNTR a, CharPtr PNTR b, CharPtr s
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForKnownDiffs (CharPtr PNTR a, CharPtr PNTR b, CharPtr str1, CharPtr str2)
{
Boolean rval = SkipPair (a, b, str1, str2, " pseudogene, partial sequence", " gene, partial cds")
@@ -11843,6 +12235,7 @@ static Boolean AdjustForKnownDiffs (CharPtr PNTR a, CharPtr PNTR b, CharPtr str1
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean AdjustForCommaBeforeGenomicSequence (CharPtr PNTR a, CharPtr PNTR b)
{
return AdjustForCommaBeforePhrase (a, b, " genomic sequence");
@@ -11854,6 +12247,8 @@ static CharPtr defline_skippable_words[] = {
" mitochondrial",
NULL};
+
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean SkipWord (CharPtr PNTR a, CharPtr a_start, CharPtr PNTR b)
{
Int4 index, len;
@@ -11875,10 +12270,11 @@ static Boolean SkipWord (CharPtr PNTR a, CharPtr a_start, CharPtr PNTR b)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean MatchWithPhraseExceptions (CharPtr str1, CharPtr str2)
{
Int4 len_curr, len_new;
- CharPtr a, b;
+ CharPtr a = NULL, b = NULL;
CharPtr mitochondrial = "; mitochondrial";
Int4 len_mito = StringLen (mitochondrial);
@@ -11922,7 +12318,7 @@ static Boolean MatchWithPhraseExceptions (CharPtr str1, CharPtr str2)
}
}
-
+//Not part of Autodef or Cleanup (no more popset retros)
static Nlm_CompareDeflinesProc CompareDeflinesList[] = {
MatchExceptOldProduct,
NoSequenceForIntron,
@@ -11930,6 +12326,8 @@ static Nlm_CompareDeflinesProc CompareDeflinesList[] = {
NULL };
+
+//Not part of Autodef or Cleanup (no more popset retros)
static Boolean DeflinesMatch (CharPtr old_str, CharPtr new_str)
{
Int4 index;
@@ -11946,6 +12344,7 @@ static Boolean DeflinesMatch (CharPtr old_str, CharPtr new_str)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void ReplaceOldName (BioseqPtr bsp, CharPtr PNTR old_title)
{
SeqDescPtr sdp;
@@ -11970,6 +12369,7 @@ static void ReplaceOldName (BioseqPtr bsp, CharPtr PNTR old_title)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void AddMissingPeriod (CharPtr PNTR old_title)
{
Int4 len;
@@ -11989,6 +12389,7 @@ static void AddMissingPeriod (CharPtr PNTR old_title)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void ChangeExonList (CharPtr PNTR old_title)
{
CharPtr exon_start, first_and, second_and;
@@ -12017,6 +12418,7 @@ static void ChangeExonList (CharPtr PNTR old_title)
}
+//Not part of Autodef or Cleanup (no more popset retros)
NLM_EXTERN DefLineClauseOptionsPtr MakeFeatureRequestsMatchExpectedTitle (BioseqPtr bsp)
{
SeqEntryPtr sep;
@@ -12168,6 +12570,7 @@ NLM_EXTERN DefLineClauseOptionsPtr MakeFeatureRequestsMatchExpectedTitle (Bioseq
}
+//Not part of Autodef or Cleanup (no more popset retros)
static Int4 MatchlenForAutodef (CharPtr str1, CharPtr str2)
{
Int4 len_curr, len_new;
@@ -12201,6 +12604,7 @@ static Int4 MatchlenForAutodef (CharPtr str1, CharPtr str2)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void FindCommonTitleCallback (BioseqPtr bsp, Pointer data)
{
Int4 len_curr, len_new;
@@ -12265,13 +12669,12 @@ static void FindCommonTitleCallback (BioseqPtr bsp, Pointer data)
}
}
-
typedef struct verifycommonfeatureclause {
CharPtr common_clause;
Boolean is_ok;
} VerifyCommonFeatureClauseData, PNTR VerifyCommonFeatureClausePtr;
-
+//Not part of Autodef or Cleanup (no more popset retros)
static void PrintBioSource (BioSourcePtr biop)
{
OrgModPtr mod;
@@ -12288,6 +12691,7 @@ static void PrintBioSource (BioSourcePtr biop)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void VerifyCommonFeatureClauseCallback (BioseqPtr bsp, Pointer data)
{
VerifyCommonFeatureClausePtr v;
@@ -12365,6 +12769,7 @@ static void VerifyCommonFeatureClauseCallback (BioseqPtr bsp, Pointer data)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static CharPtr GetCommonFeatureClause (SeqEntryPtr sep)
{
CharPtr common_clause = NULL;
@@ -12396,6 +12801,7 @@ static CharPtr GetCommonFeatureClause (SeqEntryPtr sep)
}
return common_clause;
}
+//LCOV_EXCL_STOP
NLM_EXTERN void BuildDefinitionLinesFromFeatureClauseLists (
@@ -12422,6 +12828,8 @@ NLM_EXTERN void BuildDefinitionLinesFromFeatureClauseLists (
}
}
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
NLM_EXTERN void BuildDefLinesFromFeatClauseListsForOneBsp (
ValNodePtr list,
ModifierItemLocalPtr modList,
@@ -12454,6 +12862,7 @@ NLM_EXTERN void BuildDefLinesFromFeatClauseListsForOneBsp (
* visible in the flat file if all sequences in the nuc-prot set have
* their own title.
*/
+// Not actually called on nuc-prot sets during title regeneration
NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep)
{
BioseqSetPtr bssp;
@@ -12497,6 +12906,7 @@ NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep)
}
}
}
+//LCOV_EXCL_STOP
static void ProtTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
@@ -12527,6 +12937,8 @@ NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep)
DeleteMarkedObjects (entityID, 0, NULL);
}
+//LCOV_EXCL_START
+//not used in autodef or cleanup
static void MRnaTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
{
@@ -12550,6 +12962,7 @@ static void MRnaTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
}
}
+//not used in autodef or cleanup
NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep)
{
@@ -12570,7 +12983,7 @@ typedef struct popsetdefline {
} PopsetDeflineData, PNTR PopsetDeflinePtr;
-
+//Not regenerating popset titles
NLM_EXTERN Boolean GetsDocsumTitle(Uint1 set_class)
{
if (set_class == BioseqseqSet_class_pop_set
@@ -12583,7 +12996,7 @@ NLM_EXTERN Boolean GetsDocsumTitle(Uint1 set_class)
}
}
-
+//Not part of Autodef when not regenerating popset titles
static Boolean HasTitle(SeqDescrPtr descr)
{
while (descr != NULL) {
@@ -12596,6 +13009,7 @@ static Boolean HasTitle(SeqDescrPtr descr)
}
+//Not part of Autodef or Cleanup (no more popset retros)
NLM_EXTERN void AddPopsetDeflineWithClause (BioseqSetPtr bssp, CharPtr clause)
{
SeqEntryPtr set_sep;
@@ -12637,6 +13051,7 @@ NLM_EXTERN void AddPopsetDeflineWithClause (BioseqSetPtr bssp, CharPtr clause)
}
+//Not regenerating popset titles
static void AddPopsetCallback (BioseqSetPtr bssp, Pointer data)
{
SeqEntryPtr set_sep, first_sep;
@@ -12698,7 +13113,7 @@ static void AddPopsetCallback (BioseqSetPtr bssp, Pointer data)
MemFree (org_desc);
}
-
+//Not regenerating popset titles
NLM_EXTERN void AddPopsetTitles
(SeqEntryPtr sep,
DeflineFeatureRequestListPtr feature_requests,
@@ -12718,7 +13133,7 @@ NLM_EXTERN void AddPopsetTitles
VisitSetsInSep (sep, &pop, AddPopsetCallback);
}
-
+//Not regenerating popset titles
static void RemovePopsetTitlesCallback(BioseqSetPtr bssp, Pointer data)
{
SeqDescrPtr sdp;
@@ -12736,7 +13151,7 @@ static void RemovePopsetTitlesCallback(BioseqSetPtr bssp, Pointer data)
}
}
-
+//Not regenerating popset titles
NLM_EXTERN void RemovePopsetTitles(SeqEntryPtr sep)
{
Uint2 entityID;
@@ -12745,13 +13160,405 @@ NLM_EXTERN void RemovePopsetTitles(SeqEntryPtr sep)
entityID = ObjMgrGetEntityIDForChoice (sep);
DeleteMarkedObjects (entityID, 0, NULL);
}
+//LCOV_EXCL_STOP
+
+
+const CharPtr kAutoDefOptions = "AutodefOptions";
+const CharPtr kAltSpliceFlag = "AltSpliceFlag";
+const CharPtr kDoNotApplyToAff = "DoNotApplyToAff";
+const CharPtr kDoNotApplyToCf = "DoNotApplyToCf";
+const CharPtr kDoNotApplyToNr = "DoNotApplyToNr";
+const CharPtr kDoNotApplyToSp = "DoNotApplyToSp";
+const CharPtr kFeatureListType = "FeatureListType";
+const CharPtr kGeneClusterOppStrand = "GeneClusterOppStrand";
+const CharPtr kHIVRule = "HIVRule";
+const CharPtr kIncludeCountryText = "IncludeCountryText";
+const CharPtr kKeep3UTRs = "Keep3UTRs";
+const CharPtr kKeep5UTRs = "Keep5UTRs";
+const CharPtr kKeepAfterSemicolon = "KeepAfterSemicolon";
+const CharPtr kKeepExons = "KeepExons";
+const CharPtr kKeepIntrons = "KeepIntrons";
+const CharPtr kKeepLTRs = "KeepLTRs";
+const CharPtr kKeepPromoters = "KeepPromoters";
+const CharPtr kLeaveParenthetical = "LeaveParenthetical";
+const CharPtr kMaxMods = "MaxMods";
+const CharPtr kMiscFeatRule = "MiscFeatRule";
+const CharPtr kModifierList = "ModifierList";
+const CharPtr kProductFlag = "ProductFlag";
+const CharPtr kNuclearCopyFlag = "NuclearCopyFlag";
+const CharPtr kSpecifyNuclearProduct = "SpecifyNuclearProduct";
+const CharPtr kSuppressedFeatures = "SuppressedFeatures";
+const CharPtr kSuppressFeatureAltSplice = "SuppressFeatureAltSplice";
+const CharPtr kSuppressLocusTags = "SuppressLocusTags";
+const CharPtr kSuppressMobileElementSubfeatures = "SuppressMobileElementSubfeatures";
+const CharPtr kUseFakePromoters = "UseFakePromoters";
+const CharPtr kUseLabels = "UseLabels";
+const CharPtr kUseNcRNAComment = "UseNcRNAComment";
+const CharPtr kAllowModAtEndOfTaxname = "AllowModAtEndOfTaxname";
+const CharPtr kKeepuOrf = "KeepuOrf";
+const CharPtr kKeepMobileElement = "KeepMobileElement";
+const CharPtr kKeepNoncodingProductFeat = "KeepNoncodingProductFeat";
+const CharPtr kKeepPrecursorRNA = "KeepPrecursorRNA";
+const CharPtr kKeepncRNA = "KeepncRNA";
+const CharPtr kKeepRepeatRegion = "KeepRepeatRegion";
+const CharPtr kSuppressAllele = "SuppressAllele";
+
+/* field values for HIV rule*/
+const CharPtr kPreferClone = "PreferClone";
+const CharPtr kPreferIsolate = "PreferIsolate";
+const CharPtr kWantBoth = "WantBoth";
+/* field values for feature list */
+const CharPtr kCompleteGenome = "Complete Genome";
+const CharPtr kCompleteSequence = "Complete Sequence";
+const CharPtr kListAllFeatures = "List All Features";
+const CharPtr kPartialGenome = "Partial Genome";
+const CharPtr kPartialSequence = "Partial Sequence";
+const CharPtr kSequence = "Sequence";
+/* field values for misc feat rules */
+const CharPtr kCommentFeat = "CommentFeat";
+const CharPtr kDelete = "Delete";
+const CharPtr kNoncodingProductFeat = "NoncodingProductFeat";
+
+static Boolean IsAutoDefOptions(UserObjectPtr uop)
+{
+ if (uop != NULL && uop->type != NULL &&
+ StringICmp(uop->type->str, kAutoDefOptions) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+//LCOV_EXCL_START
+//not testing construction of autodef options object at this time
+void LabelUserField(UserFieldPtr ufp, CharPtr field_name)
+{
+ ufp->label = ObjectIdNew();
+ ufp->label->str = StringSave(field_name);
+}
-NLM_EXTERN void
-AutoDefForSeqEntry
-(SeqEntryPtr sep,
- Uint2 entityID,
- OrganismDescriptionModifiersPtr odmp,
+//not testing construction of autodef options object at this time
+void AddFieldToUserObject(UserFieldPtr ufp, UserObjectPtr uop)
+{
+ ufp->next = uop->data;
+ uop->data = ufp;
+}
+
+//not testing construction of autodef options object at this time
+void AddBooleanAutodefField(UserObjectPtr uop, CharPtr field_name)
+{
+ UserFieldPtr ufp = UserFieldNew();
+ LabelUserField(ufp, field_name);
+ ufp->choice = 4;
+ ufp->data.boolvalue = TRUE;
+ AddFieldToUserObject(ufp, uop);
+}
+
+//not testing construction of autodef options object at this time
+void AddAutodefProductFlag(UserObjectPtr uop, Int2 product_flag)
+{
+ UserFieldPtr ufp;
+ CharPtr val;
+
+ if (product_flag == DEFAULT_ORGANELLE_CLAUSE) {
+ AddBooleanAutodefField(uop, kSpecifyNuclearProduct);
+ } else if (product_flag > DEFAULT_ORGANELLE_CLAUSE) {
+ val = organelleByPopup[product_flag - DEFAULT_ORGANELLE_CLAUSE];
+ if (val != NULL) {
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kNuclearCopyFlag);
+ ufp->choice = 1;
+ ufp->data.ptrvalue = StringSave(val);
+ AddFieldToUserObject(ufp, uop);
+ }
+ } else {
+ val = organelleByPopup[product_flag];
+ if (val != NULL) {
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kProductFlag);
+ ufp->choice = 1;
+ ufp->data.ptrvalue = StringSave(val);
+ AddFieldToUserObject(ufp, uop);
+ }
+ }
+}
+
+//not testing construction of autodef options object at this time
+void AddHIVRule(UserObjectPtr uop, Int4 rule)
+{
+ UserFieldPtr ufp;
+
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kHIVRule);
+ ufp->choice = 1;
+ switch (rule) {
+ case clone_isolate_HIV_rule_prefer_clone:
+ ufp->data.ptrvalue = StringSave(kPreferClone);
+ break;
+ case clone_isolate_HIV_rule_prefer_isolate:
+ ufp->data.ptrvalue = StringSave(kPreferIsolate);
+ break;
+ case clone_isolate_HIV_rule_want_both:
+ ufp->data.ptrvalue = StringSave(kWantBoth);
+ break;
+ default:
+ break;
+ }
+ AddFieldToUserObject(ufp, uop);
+}
+
+//not testing construction of autodef options object at this time
+void AddOrganismDescriptionModifiersToAutoDefUserObject
+(UserObjectPtr uop,
+ OrganismDescriptionModifiersPtr odmp)
+{
+ UserFieldPtr ufp;
+
+ if (odmp->use_labels) {
+ AddBooleanAutodefField(uop, kUseLabels);
+ }
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kMaxMods);
+ ufp->choice = 2;
+ ufp->data.intvalue = odmp->max_mods;
+ AddFieldToUserObject(ufp, uop);
+ if (odmp->keep_paren) {
+ AddBooleanAutodefField(uop, kLeaveParenthetical);
+ }
+ if (odmp->exclude_sp) {
+ AddBooleanAutodefField(uop, kDoNotApplyToSp);
+ }
+ if (odmp->exclude_cf) {
+ AddBooleanAutodefField(uop, kDoNotApplyToCf);
+ }
+ if (odmp->exclude_aff) {
+ AddBooleanAutodefField(uop, kDoNotApplyToAff);
+ }
+ if (odmp->exclude_nr) {
+ AddBooleanAutodefField(uop, kDoNotApplyToNr);
+ }
+ if (odmp->include_country_extra) {
+ AddBooleanAutodefField(uop, kIncludeCountryText);
+ }
+ AddHIVRule(uop, odmp->clone_isolate_HIV_rule_num);
+ if (odmp->allow_semicolon_in_modifier) {
+ AddBooleanAutodefField(uop, kKeepAfterSemicolon);
+ }
+ if (odmp->allow_mod_at_end_of_taxname) {
+ AddBooleanAutodefField(uop, kAllowModAtEndOfTaxname);
+ }
+}
+
+
+//not testing construction of autodef options object at this time
+void AddFeatureListType(UserObjectPtr uop, Int4 rule)
+{
+ UserFieldPtr ufp;
+
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kFeatureListType);
+ ufp->choice = 1;
+ switch (rule) {
+ case DEFLINE_USE_FEATURES:
+ ufp->data.ptrvalue = StringSave(kListAllFeatures);
+ break;
+ case DEFLINE_COMPLETE_GENOME:
+ ufp->data.ptrvalue = StringSave(kCompleteGenome);
+ break;
+ case DEFLINE_COMPLETE_SEQUENCE:
+ ufp->data.ptrvalue = StringSave(kCompleteSequence);
+ break;
+ case DEFLINE_SEQUENCE:
+ ufp->data.ptrvalue = StringSave(kSequence);
+ break;
+ case DEFLINE_PARTIAL_GENOME:
+ ufp->data.ptrvalue = StringSave(kPartialGenome);
+ break;
+ case DEFLINE_PARTIAL_SEQUENCE:
+ ufp->data.ptrvalue = StringSave(kPartialSequence);
+ break;
+ default:
+ break;
+ }
+ AddFieldToUserObject(ufp, uop);
+}
+
+
+//not testing construction of autodef options object at this time
+void AddMiscFeatParseRule(UserObjectPtr uop, Int4 misc_feat_parse_rule)
+{
+ UserFieldPtr ufp;
+
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kMiscFeatRule);
+ ufp->choice = 1;
+ switch (misc_feat_parse_rule) {
+ case 1:
+ ufp->data.ptrvalue = StringSave(kCommentFeat);
+ break;
+ case 2:
+ ufp->data.ptrvalue = StringSave(kNoncodingProductFeat);
+ break;
+ case 3:
+ ufp->data.ptrvalue = StringSave(kDelete);
+ break;
+ default:
+ break;
+ }
+ AddFieldToUserObject(ufp, uop);
+}
+
+//not testing construction of autodef options object at this time
+UserFieldPtr BuildStringsField(CharPtr field_name, ValNodePtr vals)
+{
+ UserFieldPtr ufp;
+ CharPtr PNTR cpp;
+ ValNodePtr vnp;
+ Int4 i;
+
+ ufp = UserFieldNew();
+ LabelUserField(ufp, field_name);
+ ufp->choice = 7;
+ ufp->num = ValNodeLen(vals);
+ cpp = (CharPtr PNTR) MemNew(ufp->num * sizeof(CharPtr));
+ for (i = 0, vnp = vals; vnp != NULL; vnp = vnp->next, i++) {
+ cpp[i] = StringSave(vnp->data.ptrvalue);
+ }
+ ufp->data.ptrvalue = cpp;
+ return ufp;
+}
+
+//not testing construction of autodef options object at this time
+void AddSuppressedFeatures(UserObjectPtr uop, ValNodePtr list)
+{
+ UserFieldPtr ufp;
+ ValNodePtr vnp, val_list = NULL;
+ CharPtr val;
+ Int4 num_unrecognized = 0;
+
+ if (list == NULL) {
+ return;
+ }
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ val = GetFeatureNameFromFeatureType(GetFeatureTypeFromFeatdef(vnp->choice));
+ if (StringICmp(val, "any") == 0) {
+ num_unrecognized++;
+ } else {
+ ValNodeAddPointer(&val_list, 0, val);
+ }
+ }
+ ufp = BuildStringsField(kSuppressedFeatures, val_list);
+ val_list = ValNodeFree(val_list);
+ AddFieldToUserObject(ufp, uop);
+}
+
+
+//not testing construction of autodef options object at this time
+void AddDeflineFeatureRequestListToAutoDefUserObject
+(UserObjectPtr uop,
+DeflineFeatureRequestListPtr rq)
+{
+ if (rq->keep_items[RemovableExon]) {
+ AddBooleanAutodefField(uop, kKeepExons);
+ }
+ if (rq->keep_items[RemovableIntron]) {
+ AddBooleanAutodefField(uop, kKeepIntrons);
+ }
+ if (rq->keep_items[Removable5UTR]) {
+ AddBooleanAutodefField(uop, kKeep5UTRs);
+ }
+ if (rq->keep_items[Removable3UTR]) {
+ AddBooleanAutodefField(uop, kKeep3UTRs);
+ }
+ if (rq->keep_items[RemovablePromoter]) {
+ AddBooleanAutodefField(uop, kKeepPromoters);
+ if (rq->add_fake_promoters) {
+ AddBooleanAutodefField(uop, kUseFakePromoters);
+ }
+ }
+ if (rq->keep_items[RemovableLTR]) {
+ AddBooleanAutodefField(uop, kKeepLTRs);
+ }
+ if (rq->keep_items[RemovableuORF]) {
+ AddBooleanAutodefField(uop, kKeepuOrf);
+ }
+ if (rq->keep_items[RemovableNoncodingProductFeat]) {
+ AddBooleanAutodefField(uop, kKeepNoncodingProductFeat);
+ }
+ if (rq->keep_items[RemovableMobileElement]) {
+ AddBooleanAutodefField(uop, kKeepMobileElement);
+ }
+ if (rq->keep_items[RemovablePrecursorRNA]) {
+ AddBooleanAutodefField(uop, kKeepPrecursorRNA);
+ }
+ if (rq->keep_items[RemovablencRNA]) {
+ AddBooleanAutodefField(uop, kKeepncRNA);
+ }
+ if (rq->keep_items[RemovableRepeatRegion]) {
+ AddBooleanAutodefField(uop, kKeepRepeatRegion);
+ }
+
+ if (rq->suppress_alt_splice_phrase) {
+ AddBooleanAutodefField(uop, kSuppressFeatureAltSplice);
+ }
+ if (rq->remove_subfeatures) {
+ AddBooleanAutodefField(uop, kSuppressMobileElementSubfeatures);
+ }
+ AddFeatureListType(uop, rq->feature_list_type);
+ AddMiscFeatParseRule(uop, rq->misc_feat_parse_rule);
+ if (rq->suppress_locus_tags) {
+ AddBooleanAutodefField(uop, kSuppressLocusTags);
+ }
+ AddSuppressedFeatures(uop, rq->suppressed_feature_list);
+
+ if (rq->use_ncrna_note) {
+ AddBooleanAutodefField(uop, kUseNcRNAComment);
+ }
+}
+
+const CharPtr kSubSources = "SubSources";
+const CharPtr kOrgMods = "OrgMods";
+
+//not testing construction of autodef options object at this time
+void AddModListToAutoDefUserObject(UserObjectPtr uop, ValNodePtr modifier_indices)
+{
+ UserFieldPtr ufp, ufp_ss = NULL, ufp_mod = NULL;
+ ValNodePtr ss_vals = NULL, mod_vals = NULL, vnp;
+
+ for (vnp = modifier_indices; vnp != NULL; vnp = vnp->next) {
+ if (DefLineModifiers[vnp->data.intvalue].isOrgMod) {
+ ValNodeAddPointer(&mod_vals, 0, DefLineModifiers[vnp->data.intvalue].name);
+ } else {
+ ValNodeAddPointer(&ss_vals, 0, DefLineModifiers[vnp->data.intvalue].name);
+ }
+ }
+ if (ss_vals != NULL) {
+ ufp_ss = BuildStringsField(kSubSources, ss_vals);
+ ss_vals = ValNodeFree(ss_vals);
+ }
+
+ if (mod_vals != NULL) {
+ ufp_mod = BuildStringsField(kOrgMods, mod_vals);
+ mod_vals = ValNodeFree(mod_vals);
+ }
+
+ if (ufp_ss != NULL || ufp_mod != NULL) {
+ ufp = UserFieldNew();
+ LabelUserField(ufp, kModifierList);
+ ufp->choice = 11;
+ if (ufp_ss != NULL) {
+ ufp_ss->next = ufp_mod;
+ ufp->data.ptrvalue = ufp_ss;
+ } else {
+ ufp->data.ptrvalue = ufp_mod;
+ }
+ AddFieldToUserObject(ufp, uop);
+ }
+}
+
+//not testing construction of autodef options object at this time
+NLM_EXTERN UserObjectPtr MakeAutoDefOptionsUserObject
+(OrganismDescriptionModifiersPtr odmp,
ModifierItemLocalPtr modList,
ValNodePtr modifier_indices,
DeflineFeatureRequestListPtr feature_requests,
@@ -12759,7 +13566,107 @@ AutoDefForSeqEntry
Boolean alternate_splice_flag,
Boolean gene_cluster_opp_strand)
{
+ UserObjectPtr uop;
+
+ uop = UserObjectNew();
+ uop->type = ObjectIdNew();
+ uop->type->str = StringSave(kAutoDefOptions);
+ uop->_class = StringSave("1.0");
+
+ AddOrganismDescriptionModifiersToAutoDefUserObject(uop, odmp);
+ if (odmp->use_modifiers) {
+ AddModListToAutoDefUserObject(uop, modifier_indices);
+ }
+ AddDeflineFeatureRequestListToAutoDefUserObject(uop, feature_requests);
+ if (gene_cluster_opp_strand) {
+ AddBooleanAutodefField(uop, kGeneClusterOppStrand);
+ }
+ if (alternate_splice_flag) {
+ AddBooleanAutodefField(uop, kAltSpliceFlag);
+ }
+ AddAutodefProductFlag(uop, product_flag);
+
+
+ return uop;
+}
+
+//not testing construction of autodef options object at this time
+static void RemoveAutoDefObjectCallback(SeqDescPtr sdp, Pointer data)
+{
+ ObjValNodePtr ovp;
+
+ if (sdp != NULL && sdp->extended && sdp->choice == Seq_descr_user &&
+ IsAutoDefOptions(sdp->data.ptrvalue)) {
+ ovp = (ObjValNodePtr)sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+}
+
+//not testing construction of autodef options object at this time
+NLM_EXTERN void RemoveAutodefObjects(SeqEntryPtr sep)
+{
+ VisitDescriptorsInSep(sep, NULL, RemoveAutoDefObjectCallback);
+ DeleteMarkedObjects(0, OBJ_SEQENTRY, (Pointer)sep);
+}
+
+
+//not testing construction of autodef options object at this time
+NLM_EXTERN void RemoveAutodefObjectsForDesc(SeqDescPtr sdp)
+{
+ ObjValNodePtr ovp;
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ SeqEntryPtr sep;
+
+ if (sdp == NULL || sdp->extended == 0) {
+ return;
+ }
+ ovp = (ObjValNodePtr)sdp;
+ sep = SeqMgrGetSeqEntryForData(ovp->idx.parentptr);
+ RemoveAutodefObjects(sep);
+}
+
+//not testing construction of autodef options object at this time
+void AddAutoDefUserObjectCallback(BioseqPtr bsp, Pointer data)
+{
+ UserObjectPtr uop, cpy;
+ SeqDescPtr sdp;
+
+ if (bsp == NULL || ISA_aa(bsp->mol) || (uop = (UserObjectPtr)data) == NULL) {
+ return;
+ }
+ cpy = (UserObjectPtr)AsnIoMemCopy(uop,
+ (AsnReadFunc)UserObjectAsnRead, (AsnWriteFunc)UserObjectAsnWrite);
+ sdp = CreateNewDescriptorOnBioseq(bsp, Seq_descr_user);
+ sdp->data.ptrvalue = cpy;
+}
+
+//not testing construction of autodef options object at this time
+NLM_EXTERN void AddAutoDefUserObjectToSeqEntry(SeqEntryPtr sep, UserObjectPtr uop)
+{
+ if (sep == NULL) return;
+
+ RemoveAutodefObjects(sep);
+ VisitBioseqsInSep(sep, uop, AddAutoDefUserObjectCallback);
+}
+//LCOV_EXCL_STOP
+
+NLM_EXTERN void
+AutoDefForSeqEntryEx
+(SeqEntryPtr sep,
+Uint2 entityID,
+OrganismDescriptionModifiersPtr odmp,
+ModifierItemLocalPtr modList,
+ValNodePtr modifier_indices,
+DeflineFeatureRequestListPtr feature_requests,
+Int2 product_flag,
+Boolean alternate_splice_flag,
+Boolean gene_cluster_opp_strand,
+Boolean update_options)
+{
+
ValNodePtr defline_clauses = NULL;
+ UserObjectPtr uop;
if (sep == NULL) return;
@@ -12767,6 +13674,7 @@ AutoDefForSeqEntry
SeqEntrySetScope (sep);
+
BuildDefLineFeatClauseList (sep, entityID,
feature_requests,
product_flag, alternate_splice_flag,
@@ -12783,10 +13691,314 @@ AutoDefForSeqEntry
AddPopsetTitles (sep, feature_requests, product_flag,
alternate_splice_flag, gene_cluster_opp_strand);
+
+ if (update_options) {
+ uop = MakeAutoDefOptionsUserObject(odmp, modList, modifier_indices,
+ feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand);
+ AddAutoDefUserObjectToSeqEntry(sep, uop);
+ uop = UserObjectFree(uop);
+ }
+}
+
+//LCOV_EXCL_START
+//not testing construction of autodef options object at this time
+NLM_EXTERN void
+AutoDefForSeqEntry
+(SeqEntryPtr sep,
+Uint2 entityID,
+OrganismDescriptionModifiersPtr odmp,
+ModifierItemLocalPtr modList,
+ValNodePtr modifier_indices,
+DeflineFeatureRequestListPtr feature_requests,
+Int2 product_flag,
+Boolean alternate_splice_flag,
+Boolean gene_cluster_opp_strand)
+{
+ AutoDefForSeqEntryEx(sep, entityID, odmp, modList, modifier_indices,
+ feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, TRUE);
+}
+//LCOV_EXCL_STOP
+
+Boolean SetBoolFromField(UserFieldPtr field, CharPtr field_name, BoolPtr val)
+{
+ if (StringICmp(field->label->str, field_name) == 0) {
+ if (field->choice == 4 && field->data.boolvalue) {
+ *val = TRUE;
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+void SetDefLineTypeFromFieldString(UserFieldPtr field, CharPtr match, DefLineType new_val, DefLineType PNTR val)
+{
+ if (field->choice == 1) {
+ if (StringICmp(field->data.ptrvalue, match) == 0) {
+ *val = new_val;
+ }
+ }
+}
+
+
+Boolean SetInt4FromFieldString(UserFieldPtr field, CharPtr match, Int4 new_val, Int4Ptr val)
+{
+ if (field->choice == 1) {
+ if (StringICmp(field->data.ptrvalue, match) == 0) {
+ *val = new_val;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+void SetInt2FromFieldString(UserFieldPtr field, CharPtr match, Int2 new_val, Int2Ptr val)
+{
+ if (field->choice == 1) {
+ if (StringICmp(field->data.ptrvalue, match) == 0) {
+ *val = new_val;
+ }
+ }
+}
+
+
+void SetModifierIndices(ValNodePtr PNTR modifier_indices, UserFieldPtr field)
+{
+ UserFieldPtr curr;
+ CharPtr PNTR cpp;
+ Int4 i;
+ Int4 subtype;
+
+ if (modifier_indices == NULL || field == NULL || field->choice != 11) {
+ return;
+ }
+
+ for (curr = field->data.ptrvalue; curr != NULL; curr = curr->next) {
+ if (curr->label != NULL && curr->choice == 7) {
+ cpp = (CharPtr PNTR) curr->data.ptrvalue;
+ for (i = 0; i < curr->num; i++) {
+ subtype = GetDeflinePosForFieldName(cpp[i]);
+ ValNodeAddInt(modifier_indices, 0, subtype);
+ }
+ }
+ }
+}
+
+void SetSuppressedFeatures(ValNodePtr PNTR suppressed_features, UserFieldPtr field)
+{
+ CharPtr PNTR cpp;
+ Int4 i;
+ Uint1 subtype;
+
+ if (suppressed_features == NULL || field == NULL || field->choice != 7) {
+ return;
+ }
+ cpp = (CharPtr PNTR) field->data.ptrvalue;
+ for (i = 0; i < field->num; i++) {
+ subtype = (Uint1)GetFeatdefFromFeatureType(GetFeatureTypeByName(cpp[i]));
+ ValNodeAddPointer(suppressed_features, subtype, NULL);
+ }
+}
+
+NLM_EXTERN void RegenerateAutoDef(BioseqPtr bsp)
+{
+ SeqMgrDescContext context;
+ SeqDescPtr sdp;
+ SeqEntryPtr sep;
+ UserObjectPtr uop;
+ OrganismDescriptionModifiers odm;
+ ModifierItemLocalPtr modlist;
+ ValNodePtr modifier_indices = NULL;
+ DeflineFeatureRequestList feature_request;
+ Int2 product_flag = DEFAULT_ORGANELLE_CLAUSE;
+ Boolean alternate_splice_flag = FALSE;
+ Boolean gene_cluster_opp_strand = FALSE;
+ UserFieldPtr field;
+ Int4 index;
+
+ if (bsp == NULL || ISA_aa(bsp->mol)){
+ return;
+ }
+ sep = SeqMgrGetSeqEntryForData(bsp);
+ if (sep == NULL) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_user, &context);
+ while (sdp != NULL && !IsAutoDefOptions(sdp->data.ptrvalue)) {
+ sdp = SeqMgrGetNextDescriptor(bsp, sdp, Seq_descr_user, &context);
+ }
+ if (sdp == NULL) {
+ return;
+ }
+
+ uop = sdp->data.ptrvalue;
+
+ MemSet(&odm, 0, sizeof(OrganismDescriptionModifiers));
+ modlist = MemNew(NumDefLineModifiers() * sizeof(ModifierItemLocalData));
+ InitFeatureRequests(&feature_request);
+
+ for (field = uop->data; field != NULL; field = field->next) {
+ if (field->label != NULL) {
+ /* organism */
+ if (SetBoolFromField(field, kDoNotApplyToAff, &(odm.exclude_aff))) {
+ } else if (SetBoolFromField(field, kDoNotApplyToCf, &(odm.exclude_cf))) {
+ } else if (SetBoolFromField(field, kDoNotApplyToNr, &(odm.exclude_nr))) {
+ } else if (SetBoolFromField(field, kDoNotApplyToCf, &(odm.exclude_cf))) {
+ } else if (SetBoolFromField(field, kDoNotApplyToSp, &(odm.exclude_sp))) {
+ } else if (SetBoolFromField(field, kGeneClusterOppStrand, &gene_cluster_opp_strand)) {
+ } else if (SetBoolFromField(field, kIncludeCountryText, &(odm.include_country_extra))) {
+ } else if (SetBoolFromField(field, kKeepAfterSemicolon, &(odm.allow_semicolon_in_modifier))) {
+ } else if (SetBoolFromField(field, kLeaveParenthetical, &(odm.keep_paren))) {
+ } else if (SetBoolFromField(field, kUseLabels, &(odm.use_labels))) {
+ } else if (SetBoolFromField(field, kAllowModAtEndOfTaxname, &(odm.allow_mod_at_end_of_taxname))) {
+ } else if (StringICmp(field->label->str, kHIVRule) == 0) {
+ if (SetInt4FromFieldString(field, kPreferClone, clone_isolate_HIV_rule_prefer_clone, &(odm.clone_isolate_HIV_rule_num)) ||
+ SetInt4FromFieldString(field, kPreferIsolate, clone_isolate_HIV_rule_prefer_isolate, &(odm.clone_isolate_HIV_rule_num)) ||
+ SetInt4FromFieldString(field, kWantBoth, clone_isolate_HIV_rule_want_both, &(odm.clone_isolate_HIV_rule_num))) {
+ odm.use_modifiers = TRUE;
+ }
+ } else if (StringICmp(field->label->str, kModifierList) == 0) {
+ SetModifierIndices(&modifier_indices, field);
+ /* features */
+ } else if (SetBoolFromField(field, kAltSpliceFlag, &(alternate_splice_flag))) {
+ } else if (SetBoolFromField(field, kKeep3UTRs, &(feature_request.keep_items[Removable3UTR]))) {
+ } else if (SetBoolFromField(field, kKeep5UTRs, &(feature_request.keep_items[Removable5UTR]))) {
+ } else if (SetBoolFromField(field, kKeepExons, &(feature_request.keep_items[RemovableExon]))) {
+ } else if (SetBoolFromField(field, kKeepIntrons, &(feature_request.keep_items[RemovableIntron]))) {
+ } else if (SetBoolFromField(field, kKeepLTRs, &(feature_request.keep_items[RemovableLTR]))) {
+ } else if (SetBoolFromField(field, kKeepPromoters, &(feature_request.keep_items[RemovablePromoter]))) {
+ } else if (SetBoolFromField(field, kKeepuOrf, &(feature_request.keep_items[RemovableuORF]))) {
+ } else if (SetBoolFromField(field, kKeepMobileElement, &(feature_request.keep_items[RemovableMobileElement]))) {
+ } else if (SetBoolFromField(field, kKeepNoncodingProductFeat, &(feature_request.keep_items[RemovableNoncodingProductFeat]))) {
+ } else if (SetBoolFromField(field, kKeepPrecursorRNA, &(feature_request.keep_items[RemovablePrecursorRNA]))) {
+ } else if (SetBoolFromField(field, kKeepncRNA, &(feature_request.keep_items[RemovablencRNA]))) {
+ } else if (SetBoolFromField(field, kKeepRepeatRegion, &(feature_request.keep_items[RemovableRepeatRegion]))) {
+ } else if (SetBoolFromField(field, kUseFakePromoters, &(feature_request.add_fake_promoters))) {
+ } else if (SetBoolFromField(field, kSuppressFeatureAltSplice, &(feature_request.suppress_alt_splice_phrase))) {
+ } else if (SetBoolFromField(field, kSuppressLocusTags, &(feature_request.suppress_locus_tags))) {
+ } else if (SetBoolFromField(field, kSuppressMobileElementSubfeatures, &(feature_request.remove_subfeatures))) {
+ } else if (SetBoolFromField(field, kUseNcRNAComment, &(feature_request.use_ncrna_note))) {
+ } else if (SetBoolFromField(field, kSuppressAllele, &(feature_request.suppress_allele))) {
+ } else if (StringICmp(field->label->str, kSpecifyNuclearProduct) == 0) {
+ if (field->choice == 4 && field->data.boolvalue) {
+ product_flag = DEFAULT_ORGANELLE_CLAUSE;
+ }
+ } else if (StringICmp(field->label->str, kMaxMods) == 0) {
+ if (field->choice == 2) {
+ odm.max_mods = field->data.intvalue;
+ }
+ } else if (StringICmp(field->label->str, kFeatureListType) == 0) {
+ SetDefLineTypeFromFieldString(field, kCompleteGenome, DEFLINE_COMPLETE_GENOME, &(feature_request.feature_list_type));
+ SetDefLineTypeFromFieldString(field, kCompleteSequence, DEFLINE_COMPLETE_SEQUENCE, &(feature_request.feature_list_type));
+ SetDefLineTypeFromFieldString(field, kPartialGenome, DEFLINE_PARTIAL_GENOME, &(feature_request.feature_list_type));
+ SetDefLineTypeFromFieldString(field, kPartialSequence, DEFLINE_PARTIAL_SEQUENCE, &(feature_request.feature_list_type));
+ SetDefLineTypeFromFieldString(field, kSequence, DEFLINE_SEQUENCE, &(feature_request.feature_list_type));
+ SetDefLineTypeFromFieldString(field, kListAllFeatures, DEFLINE_USE_FEATURES, &(feature_request.feature_list_type));
+ } else if (StringICmp(field->label->str, kMiscFeatRule) == 0) {
+ SetInt4FromFieldString(field, kCommentFeat, 1, &(feature_request.misc_feat_parse_rule));
+ SetInt4FromFieldString(field, kDelete, 2, &(feature_request.misc_feat_parse_rule));
+ SetInt4FromFieldString(field, kNoncodingProductFeat, 3, &(feature_request.misc_feat_parse_rule));
+ } else if (StringICmp(field->label->str, kProductFlag) == 0) {
+ SetInt2FromFieldString(field, "mitochondrial", 1, &product_flag);
+ SetInt2FromFieldString(field, "chloroplast", 2, &product_flag);
+ SetInt2FromFieldString(field, "kinetoplast", 3, &product_flag);
+ SetInt2FromFieldString(field, "plastid", 4, &product_flag);
+ SetInt2FromFieldString(field, "chromoplast", 5, &product_flag);
+ SetInt2FromFieldString(field, "cyanelle", 6, &product_flag);
+ SetInt2FromFieldString(field, "apicoplast", 7, &product_flag);
+ SetInt2FromFieldString(field, "leucoplast", 8, &product_flag);
+ SetInt2FromFieldString(field, "proplastid", 9, &product_flag);
+ } else if (StringICmp(field->label->str, kSuppressedFeatures) == 0) {
+ SetSuppressedFeatures(&(feature_request.suppressed_feature_list), field);
+ }
+ }
+ }
+ if (modifier_indices != NULL) {
+ odm.use_modifiers = TRUE;
+ }
+
+ AutoDefForSeqEntryEx(sep, bsp->idx.entityID, &odm, modlist, modifier_indices,
+ &feature_request, product_flag, alternate_splice_flag,
+ gene_cluster_opp_strand, FALSE);
+
+ /* cleanup */
+ if (modlist != NULL) {
+ for (index = 0; index < NumDefLineModifiers(); index++) {
+ ValNodeFree(modlist[index].values_seen);
+ }
+ MemFree(modlist);
+ }
+
+ modifier_indices = ValNodeFree(modifier_indices);
+
+}
+
+
+//LCOV_EXCL_START
+//Not part of Autodef or Cleanup
+NLM_EXTERN void DoTbl2AsnAutoDef(SeqEntryPtr sep, Uint2 entityID)
+
+{
+ ValNodePtr defline_clauses = NULL;
+ DeflineFeatureRequestList feature_requests;
+ size_t index;
+ ValNodePtr modifier_indices = NULL;
+ ModifierItemLocalPtr modList;
+ OrganismDescriptionModifiers odmp;
+ SeqEntryPtr oldscope;
+
+ if (sep == NULL) return;
+ if (entityID < 1) return;
+
+ modList = MemNew(NumDefLineModifiers() * sizeof(ModifierItemLocalData));
+ if (modList == NULL) return;
+
+ InitFeatureRequests(&feature_requests);
+
+ SetRequiredModifiers(modList);
+ CountModifiers(modList, sep);
+
+ InitOrganismDescriptionModifiers(&odmp, sep);
+
+ RemoveNucProtSetTitles(sep);
+ oldscope = SeqEntrySetScope(sep);
+
+ BuildDefLineFeatClauseList(sep, entityID, &feature_requests,
+ DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE,
+ &defline_clauses);
+ if (AreFeatureClausesUnique(defline_clauses)) {
+ modifier_indices = GetModifierIndicesFromModList(modList);
+ }
+ else {
+ modifier_indices = FindBestModifiers(sep, modList);
+ }
+
+ BuildDefinitionLinesFromFeatureClauseLists(defline_clauses, modList,
+ modifier_indices, &odmp);
+ DefLineFeatClauseListFree(defline_clauses);
+ if (modList != NULL) {
+ for (index = 0; index < NumDefLineModifiers(); index++) {
+ ValNodeFree(modList[index].values_seen);
+ }
+ MemFree(modList);
+ }
+ modifier_indices = ValNodeFree(modifier_indices);
+
+ ClearProteinTitlesInNucProts(entityID, NULL);
+ InstantiateProteinTitles(entityID, NULL);
+ /*
+ RemovePopsetTitles (sep);
+ */
+ AddPopsetTitles(sep, &feature_requests, DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE);
+
+ SeqEntrySetScope(oldscope);
}
/* Retro PopSet Title Functions */
+//Not part of Autodef or Cleanup (no more popset retros)
static SeqDescPtr BioseqHasTitleOrNucProtSetHasTitle (BioseqPtr bsp)
{
SeqDescPtr sdp = NULL;
@@ -12812,6 +14024,7 @@ static SeqDescPtr BioseqHasTitleOrNucProtSetHasTitle (BioseqPtr bsp)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static BioseqPtr FindFirstNucBioseqWithTitle (SeqEntryPtr sep)
{
@@ -12837,6 +14050,7 @@ static BioseqPtr FindFirstNucBioseqWithTitle (SeqEntryPtr sep)
}
+//Not part of Autodef or Cleanup (no more popset retros)
static void RetroPopSetAutoDefCallback (BioseqSetPtr bssp, Pointer data)
{
SeqEntryPtr set_sep;
@@ -12895,6 +14109,7 @@ static void RetroPopSetAutoDefCallback (BioseqSetPtr bssp, Pointer data)
}
+//Not part of Autodef or Cleanup (no more popset retros)
NLM_EXTERN void PopSetAutoDefRetro (SeqEntryPtr sep, PopSetRetroStatPtr stat)
{
@@ -12910,8 +14125,7 @@ NLM_EXTERN void PopSetAutoDefRetro (SeqEntryPtr sep, PopSetRetroStatPtr stat)
}
-
-
+//Not used for Autodef or Cleanup
/* functions for editing seq-locs */
NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5)
{
@@ -12992,9 +14206,8 @@ NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5)
return start_diff;
}
-
/* functions for feature conversion. shared by sequin5 and macroapi */
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean IsBioseqSetInGPS (BioseqSetPtr bssp)
{
if (bssp == NULL) return FALSE;
@@ -13003,7 +14216,7 @@ NLM_EXTERN Boolean IsBioseqSetInGPS (BioseqSetPtr bssp)
return IsBioseqSetInGPS ((BioseqSetPtr) bssp->idx.parentptr);
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean IsBioseqInGPS (BioseqPtr bsp)
{
if (bsp == NULL || bsp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET)
@@ -13016,14 +14229,14 @@ NLM_EXTERN Boolean IsBioseqInGPS (BioseqPtr bsp)
}
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean IsFeatInGPS (SeqFeatPtr sfp)
{
if (sfp == NULL) return FALSE;
return IsBioseqInGPS (BioseqFindFromSeqLoc (sfp->location));
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN RnaRefPtr RnaRefFromLabel (Uint2 featdef_to, CharPtr label, BoolPtr add_label_to_comment)
{
RnaRefPtr rrp;
@@ -13098,7 +14311,7 @@ NLM_EXTERN RnaRefPtr RnaRefFromLabel (Uint2 featdef_to, CharPtr label, BoolPtr a
return rrp;
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean ConvertProtToProtFunc
(SeqFeatPtr sfp,
Uint2 featdef_to)
@@ -13125,13 +14338,16 @@ NLM_EXTERN Boolean ConvertProtToProtFunc
case FEATDEF_transit_peptide_aa :
prp->processed = 4;
break;
+ case FEATDEF_propeptide :
+ prp->processed = 5;
+ break;
default :
break;
}
return TRUE;
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN void
ApplyCDSOptionsToFeature
(SeqFeatPtr sfp,
@@ -13178,7 +14394,7 @@ ApplyCDSOptionsToFeature
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean
ConvertCDSToRNA
(SeqFeatPtr sfp,
@@ -13220,7 +14436,7 @@ ConvertCDSToRNA
return TRUE;
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean ConvertGeneToRNA (SeqFeatPtr sfp, Uint2 featdef_to)
{
Char label [256];
@@ -13324,7 +14540,7 @@ NLM_EXTERN Boolean ConvertGeneToRNA (SeqFeatPtr sfp, Uint2 featdef_to)
/* These functions are used for converting features on nucleotide sequences to
* features on protein sequences */
-
+//Not used for Autodef or Cleanup
/* copied from seqport.c, for the benefit of load_fuzz_to_DNA */
static Boolean add_fuzziness_to_loc (SeqLocPtr slp, Boolean less)
{
@@ -13361,7 +14577,7 @@ static Boolean add_fuzziness_to_loc (SeqLocPtr slp, Boolean less)
return TRUE;
}
-
+//Not used for Autodef or Cleanup
/* copied from seqport.c, for the benefit of MYdnaLoc_to_aaLoc */
static Boolean load_fuzz_to_DNA(SeqLocPtr dnaLoc, SeqLocPtr aaLoc, Boolean
first)
@@ -13418,7 +14634,7 @@ Seq-loc*/
return FALSE;
}
-
+//Not used for Autodef or Cleanup
static SeqLocPtr MYdnaLoc_to_aaLoc(SeqFeatPtr sfp,
SeqLocPtr location_loc,
Boolean merge,
@@ -13526,7 +14742,7 @@ codons */
return SeqLocPackage(aa_loc);
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN SeqLocPtr BuildProtLoc (SeqFeatPtr overlapping_cds, SeqLocPtr slp, Int4Ptr frame)
{
SeqLocPtr tmp_loc, aa_loc = NULL, prot_loc = NULL, last_loc = NULL, next_loc;
@@ -13580,6 +14796,7 @@ NLM_EXTERN SeqLocPtr BuildProtLoc (SeqFeatPtr overlapping_cds, SeqLocPtr slp, In
return prot_loc;
}
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to)
{
BioseqPtr bsp;
@@ -13634,6 +14851,9 @@ NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to)
case FEATDEF_transit_peptide_aa :
prp->processed = 4;
break;
+ case FEATDEF_propeptide :
+ prp->processed = 5;
+ break;
default :
break;
}
@@ -13645,7 +14865,7 @@ NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to)
return TRUE;
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN SeqLocPtr GetProteinLocationForNucleotideFeatureConversion (SeqLocPtr nuc_slp, BoolPtr no_cds)
{
SeqFeatPtr cds;
@@ -13679,7 +14899,7 @@ NLM_EXTERN SeqLocPtr GetProteinLocationForNucleotideFeatureConversion (SeqLocPtr
/* converted here. */
/* */
/*---------------------------------------------------------------------*/
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean ConvertImpToProtFunc
(SeqFeatPtr sfp,
Uint2 featdef_to)
@@ -13764,6 +14984,9 @@ NLM_EXTERN Boolean ConvertImpToProtFunc
case FEATDEF_transit_peptide_aa :
prp->processed = 4;
break;
+ case FEATDEF_propeptide :
+ prp->processed = 5;
+ break;
}
/* Transfer unchanged fields from old feature */
@@ -13799,7 +15022,7 @@ NLM_EXTERN Boolean ConvertImpToProtFunc
return TRUE;
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN SeqLocPtr FindNucleotideLocationForProteinFeatureConversion (SeqLocPtr slp)
{
SeqMgrFeatContext context;
@@ -13830,7 +15053,7 @@ NLM_EXTERN SeqLocPtr FindNucleotideLocationForProteinFeatureConversion (SeqLocPt
/* ConvertProtToImp () - */
/* */
/*---------------------------------------------------------------------*/
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
{
ProtRefPtr prp;
@@ -13842,7 +15065,7 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
ValNodePtr vnp;
GBQualPtr gbqual = NULL;
GBQualPtr prevGbq;
- GBQualPtr topOfGbqList;
+ GBQualPtr topOfGbqList = NULL;
DbtagPtr dbt;
Char idStr[64];
ObjectIdPtr oip;
@@ -13878,6 +15101,10 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
if (4 != prp->processed)
return FALSE;
break;
+ case FEATDEF_propeptide :
+ if (5 != prp->processed)
+ return FALSE;
+ break;
}
/* Convert the location from the protein */
@@ -13971,7 +15198,9 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
gbqual = GBQualNew ();
if (NULL == gbqual)
return FALSE;
- prevGbq->next = gbqual;
+ if (prevGbq != NULL) {
+ prevGbq->next = gbqual;
+ }
gbqual->qual = StringSave ("EC_number");
gbqual->val = StringSave (ec);
}
@@ -13987,7 +15216,9 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
gbqual = GBQualNew ();
if (NULL == gbqual)
return FALSE;
- prevGbq->next = gbqual;
+ if (prevGbq != NULL) {
+ prevGbq->next = gbqual;
+ }
gbqual->qual = StringSave ("function");
gbqual->val = StringSave (activity);
}
@@ -14002,7 +15233,9 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
gbqual = GBQualNew ();
if (NULL == gbqual)
continue;
- prevGbq->next = gbqual;
+ if (prevGbq != NULL) {
+ prevGbq->next = gbqual;
+ }
oip = dbt->tag;
if (oip->str != NULL && (! StringHasNoText (oip->str))) {
sprintf (idStr, "%s:%s", (CharPtr)dbt->tag, oip->str);
@@ -14019,7 +15252,9 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
/* Insert the new qualifiers in front of any existing ones */
- gbqual->next = sfp->qual;
+ if (gbqual != NULL) {
+ gbqual->next = sfp->qual;
+ }
sfp->qual = topOfGbqList;
/* Free the obsolete Protein reference */
@@ -14028,7 +15263,7 @@ NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
return TRUE;
}
-
+//Not used for Autodef and Cleanup
/* functions for converting from biosource */
NLM_EXTERN CharPtr SubSourceText (BioSourcePtr biop, Uint1 subtype, BoolPtr found)
{
@@ -14058,6 +15293,7 @@ NLM_EXTERN CharPtr SubSourceText (BioSourcePtr biop, Uint1 subtype, BoolPtr foun
return subtype_txt;
}
+//Not used for Autodef and Cleanup
NLM_EXTERN CharPtr OrgModText (BioSourcePtr biop, Uint1 subtype, BoolPtr found)
{
Int4 subtype_len = 0;
@@ -14092,6 +15328,7 @@ NLM_EXTERN CharPtr OrgModText (BioSourcePtr biop, Uint1 subtype, BoolPtr found)
return subtype_txt;
}
+//Not used for Autodef and Cleanup
NLM_EXTERN CharPtr NoteText (BioSourcePtr biop, CharPtr comment)
{
CharPtr orgmod_note, subsource_note;
@@ -14134,7 +15371,7 @@ NLM_EXTERN CharPtr NoteText (BioSourcePtr biop, CharPtr comment)
return note_text;
}
-
+//Not used for Autodef and Cleanup
/*---------------------------------------------------------------------*/
/* */
/* ConvertBioSrcToRepeatRegion () */
@@ -14207,7 +15444,7 @@ NLM_EXTERN Boolean ConvertBioSrcToRepeatRegion (SeqFeatPtr sfp, Uint2 featdef_to
return TRUE;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean
ConvertNonPseudoCDSToMiscFeat
(SeqFeatPtr sfp,
@@ -14308,7 +15545,7 @@ ConvertNonPseudoCDSToMiscFeat
return TRUE;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Uint1 RnaTypeFromFeatdef (Uint2 featdef)
{
switch (featdef)
@@ -14347,7 +15584,7 @@ NLM_EXTERN Uint1 RnaTypeFromFeatdef (Uint2 featdef)
}
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean ConvertRegionToRNAFunc
(SeqFeatPtr sfp,
Uint2 featdef_to)
@@ -14388,7 +15625,7 @@ NLM_EXTERN Boolean ConvertRegionToRNAFunc
return TRUE;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN CharPtr GetImportFeatureName (Uint2 featdef_key)
{
FeatDefPtr curr;
@@ -14407,7 +15644,7 @@ NLM_EXTERN CharPtr GetImportFeatureName (Uint2 featdef_key)
return NULL;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean ConvertRegionToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
{
GBQualPtr gbqual;
@@ -14445,7 +15682,7 @@ NLM_EXTERN Boolean ConvertRegionToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
return TRUE;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
{
ImpFeatPtr ifp;
@@ -14472,8 +15709,20 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
return TRUE;
}
+//Not used for Autodef and Cleanup
+static Boolean OkToAddToImpFeat (CharPtr val, Uint2 featdef_to)
+{
+ if (StringHasNoText (val)) {
+ return FALSE;
+ } else if (featdef_to == FEATDEF_D_loop && StringsAreEquivalent(val, "D-Loop")) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
-NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc
+//Not used for Autodef and Cleanup
+NLM_EXTERN Boolean ConvertGeneToImpFeatFunc
(SeqFeatPtr sfp,
Uint2 featdef_to)
{
@@ -14481,6 +15730,7 @@ NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc
CharPtr new_comment;
GeneRefPtr grp;
Int4 comment_len = 0;
+ CharPtr featname;
if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE)
{
@@ -14495,11 +15745,11 @@ NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp != NULL)
{
- if (!StringHasNoText (grp->locus))
+ if (OkToAddToImpFeat (grp->locus, featdef_to))
{
comment_len += StringLen (grp->locus) + 2;
}
- if (!StringHasNoText (grp->desc))
+ if (OkToAddToImpFeat (grp->desc, featdef_to))
{
comment_len += StringLen (grp->desc) + 2;
}
@@ -14522,12 +15772,12 @@ NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc
* comment_len would only have been > 0 if grp had existed
* and had nonempty fields.
*/
- if (!StringHasNoText (grp->desc))
+ if (OkToAddToImpFeat (grp->desc, featdef_to))
{
StringCat (new_comment, grp->desc);
StringCat (new_comment, "; ");
}
- if (!StringHasNoText (grp->locus))
+ if (OkToAddToImpFeat (grp->locus, featdef_to))
{
StringCat (new_comment, grp->locus);
StringCat (new_comment, "; ");
@@ -14547,13 +15797,24 @@ NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc
GeneRefFree ((GeneRefPtr) sfp->data.value.ptrvalue);
sfp->data.choice = SEQFEAT_IMP;
sfp->data.value.ptrvalue = (Pointer) ifp;
- ifp->key = StringSave ("misc_feature");
+
+ featname = GetImportFeatureName (featdef_to);
+ ifp->key = MemFree (ifp->key);
+ if (featname == NULL)
+ {
+ ifp->key = StringSave ("misc_feature");
+ }
+ else
+ {
+ ifp->key = StringSave (featname);
+ }
+
return TRUE;
}
-
+//Not used for Autodef and Cleanup
/* For mat-peptide instantiation */
static SeqIdPtr MakeMatPeptideProductId (SeqLocPtr mat_peptide_loc)
{
@@ -14592,7 +15853,7 @@ static SeqIdPtr MakeMatPeptideProductId (SeqLocPtr mat_peptide_loc)
return sip;
}
-
+//Not used for Autodef and Cleanup
static void InstantiateMatPeptideProductForProteinFeature (SeqFeatPtr sfp, Pointer data)
{
BioseqPtr mat_bsp, prot_bsp;
@@ -14717,7 +15978,7 @@ static void InstantiateMatPeptideProductForProteinFeature (SeqFeatPtr sfp, Point
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
{
ByteStorePtr bs;
@@ -14831,7 +16092,7 @@ NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
}
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
{
SeqMgrFeatContext fcontext;
@@ -14860,7 +16121,7 @@ NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
return prot_sfp;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp)
{
GeneRefPtr grp;
@@ -14890,7 +16151,7 @@ NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp)
return TRUE;
}
-
+//Not used for Autodef and Cleanup
NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp)
{
BioseqPtr bsp, prot_bsp;
@@ -14923,13 +16184,83 @@ NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp)
return TRUE;
}
+//Not used for Autodef and Cleanup
+NLM_EXTERN Boolean ConvertmRNAToCodingRegion (SeqFeatPtr sfp)
+{
+ BioseqPtr bsp, prot_bsp;
+ SeqFeatPtr prot;
+ ProtRefPtr prp;
+ RnaRefPtr rrp;
+ CharPtr product = NULL;
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA) {
+ return FALSE;
+ }
+
+ rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
+ if (rrp != NULL && rrp->ext.choice == 1) {
+ product = StringSave(rrp->ext.value.ptrvalue);
+ }
+
+ sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
+ sfp->data.value.ptrvalue = CdRegionNew ();
+ sfp->data.choice = SEQFEAT_CDREGION;
+ sfp->idx.subtype = 0;
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
+ prot_bsp = BioseqFindFromSeqLoc (sfp->product);
+ prot = GetProtFeature (prot_bsp);
+ if (prot != NULL) {
+ prp = prot->data.value.ptrvalue;
+ if (!StringHasNoText(product)) {
+ ValNodeAddPointer (&prp->name, 0, product);
+ product = NULL;
+ }
+ if (!StringHasNoText (sfp->comment)) {
+ ValNodeAddPointer (&prp->name, 0, sfp->comment);
+ sfp->comment = NULL;
+ }
+ }
+ }
+ product = MemFree (product);
+
+ return TRUE;
+}
+
+//Not used for Autodef and Cleanup
+NLM_EXTERN Boolean ConverttRNAToGene(SeqFeatPtr sfp)
+{
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+ GeneRefPtr grp;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_tRNA
+ || (rrp = (RnaRefPtr)(sfp->data.value.ptrvalue)) == NULL
+ || rrp->ext.choice != 2
+ || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
+ {
+ return FALSE;
+ }
+
+ grp = GeneRefNew();
+ grp->desc = GetRNARefProductString(rrp, NULL);
+ sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
+ sfp->data.value.ptrvalue = grp;
+ sfp->data.choice = SEQFEAT_GENE;
+ sfp->idx.subtype = 0;
+ return TRUE;
+}
+
+//Not used for Autodef and Cleanup
NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep)
{
VisitFeaturesInSep (sep, NULL, InstantiateMatPeptideProductForProteinFeature);
}
+//Not used for Autodef and Cleanup
NLM_EXTERN CharPtr GetTSAIDDB (BioseqPtr bsp)
{
CharPtr db = NULL, cp;
@@ -14953,6 +16284,7 @@ NLM_EXTERN CharPtr GetTSAIDDB (BioseqPtr bsp)
}
+//Not used for Autodef and Cleanup
static void ConvertLocalIdsToBarcodeIdsCallback (BioseqPtr bsp, Pointer data)
{
SeqIdPtr sip_local = NULL;
@@ -14990,33 +16322,45 @@ static void ConvertLocalIdsToBarcodeIdsCallback (BioseqPtr bsp, Pointer data)
}
+//Not used for Autodef and Cleanup
NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep)
{
VisitBioseqsInSep (sep, sep, ConvertLocalIdsToBarcodeIdsCallback);
}
+//LCOV_EXCL_STOP
+
+NLM_EXTERN Int4 GetDeflinePosForFieldName(CharPtr name)
+{
+ Int4 i, rval = -1;
+
+ if (StringICmp(name, "specimen-voucher") == 0) {
+ rval = DEFLINE_POS_Specimen_voucher;
+ } else {
+ for (i = 0; i < numDefLineModifiers; i++) {
+ if (StringICmp(name, DefLineModifiers[i].name) == 0) {
+ rval = i;
+ break;
+ }
+ }
+ }
+ return rval;
+}
+//LCOV_EXCL_START
+//Not used in Autodef or Cleanup
NLM_EXTERN Int4 GetDeflinePosForFieldType (ValNodePtr field)
{
- Int4 i, rval = -1;
+ Int4 rval = -1;
CharPtr name;
name = SummarizeFieldType (field);
- if (StringICmp (name, "specimen-voucher") == 0) {
- rval = DEFLINE_POS_Specimen_voucher;
- } else {
- for (i = 0; i < numDefLineModifiers; i++) {
- if (StringICmp (name, DefLineModifiers[i].name) == 0) {
- rval = i;
- break;
- }
- }
- }
+ rval = GetDeflinePosForFieldName(name);
name = MemFree (name);
return rval;
}
-
+//Not used in Autodef or Cleanup
static void RemoveUnusedFieldTypes (FieldTypePtr PNTR orig_list)
{
ValNodePtr vnp, prev = NULL, vnp_next;
@@ -15040,7 +16384,7 @@ static void RemoveUnusedFieldTypes (FieldTypePtr PNTR orig_list)
}
}
-
+//Not used in Autodef or Cleanup
static Boolean RemoveMatchingFieldType (FieldTypePtr PNTR orig_list, FieldTypePtr match)
{
ValNodePtr vnp, prev = NULL, vnp_next;
@@ -15068,7 +16412,7 @@ static Boolean RemoveMatchingFieldType (FieldTypePtr PNTR orig_list, FieldTypePt
return rval;
}
-
+//Not used in Autodef or Cleanup
static Boolean ListHasMatchingFieldType (FieldTypePtr list, FieldTypePtr match)
{
Boolean rval = FALSE;
@@ -15100,7 +16444,7 @@ static Int4 DefLineFieldTypeSortOrder [] = {
Source_qual_breed
};
-
+//Not used in Autodef or Cleanup
static int CompareFieldTypeByImportance (FieldTypePtr field1, FieldTypePtr field2)
{
int rval = 0;
@@ -15129,6 +16473,7 @@ static int CompareFieldTypeByImportance (FieldTypePtr field1, FieldTypePtr field
return rval;
}
+//Not used in Autodef or Cleanup
static int LIBCALLBACK SortFieldTypeByImportance (
VoidPtr ptr1,
VoidPtr ptr2
@@ -15156,7 +16501,7 @@ typedef struct uniqbiosource {
ValNodePtr available_fields;
ValNodePtr strings;
} UniqBioSourceData, PNTR UniqBioSourcePtr;
-
+//Not used in Autodef or Cleanup
static Boolean AddQualToUniqBioSource (
UniqBioSourcePtr u,
FieldTypePtr field
@@ -15192,7 +16537,7 @@ static Boolean AddQualToUniqBioSource (
return rval;
}
-
+//Not used in Autodef or Cleanup
static UniqBioSourcePtr UniqBioSourceNew (BioSourcePtr biop)
{
UniqBioSourcePtr u;
@@ -15210,6 +16555,7 @@ static UniqBioSourcePtr UniqBioSourceNew (BioSourcePtr biop)
return u;
}
+//Not used in Autodef or Cleanup
static UniqBioSourcePtr UniqBioSourceFree (UniqBioSourcePtr u)
{
if (u != NULL) {
@@ -15220,7 +16566,7 @@ static UniqBioSourcePtr UniqBioSourceFree (UniqBioSourcePtr u)
return u;
}
-
+//Not used in Autodef or Cleanup
static UniqBioSourcePtr UniqBioSourceCopy (UniqBioSourcePtr u)
{
UniqBioSourcePtr u2;
@@ -15236,7 +16582,7 @@ static UniqBioSourcePtr UniqBioSourceCopy (UniqBioSourcePtr u)
return u2;
}
-
+//Not used in Autodef or Cleanup
/* The CompareOrganismDescriptors function compares the contents of the
* lists of strings for each BioSrcDesc item.
* The function returns:
@@ -15277,7 +16623,7 @@ static int CompareUniqBioSource (
}
}
-
+//Not used in Autodef or Cleanup
static Boolean RemoveFieldFromUniqBioSource (UniqBioSourcePtr u, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15288,7 +16634,7 @@ static Boolean RemoveFieldFromUniqBioSource (UniqBioSourcePtr u, FieldTypePtr fi
return rval;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr UniqBioSourceListFree (ValNodePtr list)
{
ValNodePtr list_next;
@@ -15303,7 +16649,7 @@ static ValNodePtr UniqBioSourceListFree (ValNodePtr list)
return list;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr UniqBioSourceListCopy (ValNodePtr orig)
{
ValNodePtr list = NULL, prev = NULL, vnp;
@@ -15325,7 +16671,7 @@ static ValNodePtr UniqBioSourceListCopy (ValNodePtr orig)
return list;
}
-
+//Not used in Autodef or Cleanup
static int LIBCALLBACK SortUniqBioSource (VoidPtr ptr1, VoidPtr ptr2)
{
@@ -15341,14 +16687,14 @@ static int LIBCALLBACK SortUniqBioSource (VoidPtr ptr1, VoidPtr ptr2)
return 0;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr UniqBioSourceListSort (ValNodePtr orig)
{
orig = ValNodeSort (orig, SortUniqBioSource);
return orig;
}
-
+//Not used in Autodef or Cleanup
static Boolean AddQualToUniqBioSourceList (ValNodePtr list, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15359,7 +16705,7 @@ static Boolean AddQualToUniqBioSourceList (ValNodePtr list, FieldTypePtr field)
return rval;
}
-
+//Not used in Autodef or Cleanup
static Boolean RemoveFieldFromUniqBioSourceList (ValNodePtr list, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15385,7 +16731,7 @@ typedef struct uniqbiosrcgrp {
ValNodePtr biop_list;
} UniqBioSrcGrpData, PNTR UniqBioSrcGrpPtr;
-
+//Not used in Autodef or Cleanup
static UniqBioSrcGrpPtr UniqBioSrcGrpNew (ValNodePtr biop_list)
{
UniqBioSrcGrpPtr g;
@@ -15396,7 +16742,7 @@ static UniqBioSrcGrpPtr UniqBioSrcGrpNew (ValNodePtr biop_list)
return g;
}
-
+//Not used in Autodef or Cleanup
static UniqBioSrcGrpPtr UniqBioSrcGrpFree (UniqBioSrcGrpPtr g)
{
if (g != NULL) {
@@ -15406,7 +16752,7 @@ static UniqBioSrcGrpPtr UniqBioSrcGrpFree (UniqBioSrcGrpPtr g)
return g;
}
-
+//Not used in Autodef or Cleanup
static UniqBioSrcGrpPtr UniqBioSrcGrpCopy (UniqBioSrcGrpPtr orig)
{
UniqBioSrcGrpPtr g;
@@ -15419,7 +16765,7 @@ static UniqBioSrcGrpPtr UniqBioSrcGrpCopy (UniqBioSrcGrpPtr orig)
return g;
}
-
+//Not used in Autodef or Cleanup
static Boolean AddQualToUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15433,7 +16779,7 @@ static Boolean AddQualToUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr field)
return rval;
}
-
+//Not used in Autodef or Cleanup
static Boolean RemoveFieldFromUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15444,7 +16790,7 @@ static Boolean RemoveFieldFromUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr fi
return rval;
}
-
+//Not used in Autodef or Cleanup
static FieldTypePtr GetAllPresentQualsForGroup (UniqBioSrcGrpPtr g)
{
ValNodePtr vnp;
@@ -15480,7 +16826,7 @@ static FieldTypePtr GetAllPresentQualsForGroup (UniqBioSrcGrpPtr g)
return match_list;
}
-
+//Not used in Autodef or Cleanup
static FieldTypePtr GetAllQualsForGroup (UniqBioSrcGrpPtr g)
{
ValNodePtr vnp, tmp;
@@ -15511,7 +16857,7 @@ static FieldTypePtr GetAllQualsForGroup (UniqBioSrcGrpPtr g)
return field_list_head;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr UniqBioSrcGrpListFree (ValNodePtr list)
{
ValNodePtr list_next;
@@ -15526,7 +16872,7 @@ static ValNodePtr UniqBioSrcGrpListFree (ValNodePtr list)
return list;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr UniqBioSrcGrpListCopy (ValNodePtr orig)
{
ValNodePtr list = NULL, prev = NULL, vnp;
@@ -15548,7 +16894,7 @@ static ValNodePtr UniqBioSrcGrpListCopy (ValNodePtr orig)
return list;
}
-
+//Not used in Autodef or Cleanup
/* NOTE - we want to sort groups from most biops to least biops */
static int LIBCALLBACK SortUniqBioSrcGrp (VoidPtr ptr1, VoidPtr ptr2)
@@ -15573,14 +16919,14 @@ static int LIBCALLBACK SortUniqBioSrcGrp (VoidPtr ptr1, VoidPtr ptr2)
return rval;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr BioSrcGrpListSort (ValNodePtr orig)
{
orig = ValNodeSort (orig, SortUniqBioSrcGrp);
return orig;
}
-
+//Not used in Autodef or Cleanup
static Boolean RemoveFieldFromUniqBioSrcGrpList (ValNodePtr list, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15592,7 +16938,7 @@ static Boolean RemoveFieldFromUniqBioSrcGrpList (ValNodePtr list, FieldTypePtr f
return rval;
}
-
+//Not used in Autodef or Cleanup
static void ReGroupUniqBioSrcGrpList (ValNodePtr list)
{
ValNodePtr list_next, vnp;
@@ -15620,7 +16966,7 @@ static void ReGroupUniqBioSrcGrpList (ValNodePtr list)
}
}
-
+//Not used in Autodef or Cleanup
static Int4 FindMaxOrgsInUniqBioSrcGrpList (ValNodePtr list)
{
Int4 max = 0;
@@ -15636,7 +16982,7 @@ static Int4 FindMaxOrgsInUniqBioSrcGrpList (ValNodePtr list)
return max;
}
-
+//Not used in Autodef or Cleanup
static Int4 CountUniqueOrgsInUniqBioSrcGrpList (ValNodePtr list)
{
Int4 count = 0;
@@ -15652,7 +16998,7 @@ static Int4 CountUniqueOrgsInUniqBioSrcGrpList (ValNodePtr list)
return count;
}
-
+//Not used in Autodef or Cleanup
static Boolean AddQualToUniqBioSrcGrpList (ValNodePtr list, FieldTypePtr field)
{
Boolean rval = FALSE;
@@ -15680,7 +17026,7 @@ typedef struct qualcombo {
ValNodePtr group_list;
} QualComboData, PNTR QualComboPtr;
-
+//Not used in Autodef or Cleanup
/* This function creates a new ModifierCombination item using the supplied
* OrgGroup list. It calculates the number of groups, maximum number of
* organisms in any one group, and number of unique organisms.
@@ -15706,7 +17052,7 @@ static QualComboPtr QualComboNew (ValNodePtr grp_list)
return newm;
}
-
+//Not used in Autodef or Cleanup
/* The CopyQualCombo creates a copy of a QualCombo item.
* This includes creating a copy of the number and list of modifiers
* and a copy of the number and list of OrgGroups, as well as copying the
@@ -15728,6 +17074,7 @@ static QualComboPtr QualComboCopy (
return newm;
}
+//Not used in Autodef or Cleanup
/* This function frees the memory associated with a list of
* ModifierCombination items.
*/
@@ -15743,7 +17090,7 @@ static QualComboPtr QualComboFree (
return m;
}
-
+//Not used in Autodef or Cleanup
static void TESTDisplayQualCombo (QualComboPtr q)
{
ValNodePtr vnp_t, vnp_b, vnp_q, vnp_f;
@@ -15763,7 +17110,7 @@ static void TESTDisplayQualCombo (QualComboPtr q)
}
}
-
+//Not used in Autodef or Cleanup
static Boolean AddQualToQualCombo (
QualComboPtr m,
FieldTypePtr field
@@ -15787,7 +17134,7 @@ static Boolean AddQualToQualCombo (
return rval;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr LIBCALLBACK QualComboListFree (ValNodePtr list)
{
ValNodePtr list_next;
@@ -15802,7 +17149,7 @@ static ValNodePtr LIBCALLBACK QualComboListFree (ValNodePtr list)
return list;
}
-
+//Not used in Autodef or Cleanup
/* NOTE - we want to sort groups from most unique organisms to least unique organisms */
/* secondary sort - most groups to least groups */
/* tertiary sort - fewer max orgs in group to most max orgs in group */
@@ -15859,7 +17206,7 @@ static int LIBCALLBACK SortQualCombo (VoidPtr ptr1, VoidPtr ptr2)
return rval;
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr QualComboListSort (ValNodePtr orig)
{
orig = ValNodeSort (orig, SortQualCombo);
@@ -15868,7 +17215,7 @@ static ValNodePtr QualComboListSort (ValNodePtr orig)
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr ExpandOneComboListUsingAllPresentQuals (QualComboPtr q)
{
ValNodePtr new_list = NULL, vnp, vnp_m;
@@ -15897,7 +17244,7 @@ static ValNodePtr ExpandOneComboListUsingAllPresentQuals (QualComboPtr q)
return new_list;
}
-
+//Not used in Autodef or Cleanup
static void TESTDisplayList (ValNodePtr new_list)
{
QualComboPtr q;
@@ -15910,7 +17257,7 @@ static void TESTDisplayList (ValNodePtr new_list)
}
-
+//Not used in Autodef or Cleanup
static Boolean IsQualOkForDefline (ValNodePtr vnp)
{
ValNodePtr scp;
@@ -15928,7 +17275,7 @@ static Boolean IsQualOkForDefline (ValNodePtr vnp)
}
-
+//Not used in Autodef or Cleanup
static ValNodePtr ExpandOneComboListUsingAnyPresentQuals (QualComboPtr q)
{
ValNodePtr new_list = NULL, vnp, vnp_m;
@@ -15962,7 +17309,7 @@ static ValNodePtr ExpandOneComboListUsingAnyPresentQuals (QualComboPtr q)
return new_list;
}
-
+//Not used in Autodef or Cleanup
static Boolean ExpandComboList (ValNodePtr PNTR list)
{
QualComboPtr q;
@@ -15998,7 +17345,7 @@ static Boolean ExpandComboList (ValNodePtr PNTR list)
return any_expansion;
}
-
+//Not used in Autodef or Cleanup
static void BuildUniqBioSrcList (
BioSourcePtr biop,
Pointer userdata
@@ -16017,7 +17364,7 @@ static void BuildUniqBioSrcList (
vnbp->tail = vnp;
}
-
+//Not used in Autodef or Cleanup
/* The function FindBestQualCombo tries to find the best combination of modifiers
* to create unique organism descriptions. This is accomplished by
* creating a list of required modifiers, and then creating a list of
@@ -16119,7 +17466,7 @@ static QualComboPtr FindBestQualComboEx(ValNodePtr PNTR biop_list, ModifierItemL
return best_combo;
}
-
+//Not used in Autodef or Cleanup
static QualComboPtr FindBestQualCombo(SeqEntryPtr sep, ModifierItemLocalPtr ItemList)
{
QualComboPtr best_combo;
@@ -16136,7 +17483,7 @@ static QualComboPtr FindBestQualCombo(SeqEntryPtr sep, ModifierItemLocalPtr Item
return best_combo;
}
-
+//Not used in Autodef or Cleanup
static ModifierCombinationPtr ModifierCombinationFromQualCombo (QualComboPtr q)
{
ModifierCombinationPtr m;
@@ -16165,6 +17512,7 @@ static ModifierCombinationPtr ModifierCombinationFromQualCombo (QualComboPtr q)
}
+//Not used in Autodef or Cleanup
NLM_EXTERN ValNodePtr FindBestModifiersForDeflineClauseList (
ValNodePtr defline_clauses,
ModifierItemLocalPtr ItemList
@@ -16204,7 +17552,7 @@ NLM_EXTERN ValNodePtr FindBestModifiersForDeflineClauseList (
return modifier_indices;
}
-
+//Not used in Autodef or Cleanup
NLM_EXTERN ValNodePtr FindBestModifiersEx(
SeqEntryPtr sep,
ModifierItemLocalPtr ItemList,
@@ -16223,12 +17571,14 @@ NLM_EXTERN ValNodePtr FindBestModifiersEx(
} else {
m = FindBestCombo (sep, ItemList);
}
- modifier_indices = CopyModifierIndices (m->modifier_indices);
+ if (m != NULL) {
+ modifier_indices = CopyModifierIndices (m->modifier_indices);
+ }
FreeModifierCombo (m);
return modifier_indices;
}
-
+//Not used in Autodef or Cleanup
NLM_EXTERN ValNodePtr FindBestModifiers(
SeqEntryPtr sep,
ModifierItemLocalPtr ItemList
@@ -16238,7 +17588,7 @@ NLM_EXTERN ValNodePtr FindBestModifiers(
return FindBestModifiersEx (sep, ItemList, FALSE);
}
-
+//Not used in Autodef or Cleanup
/* In this test function, we create a list of biosources with various combinations of modifiers,
* and then calculate the best combination to use for the organism description.
*/
@@ -16275,6 +17625,7 @@ static void ClearBiopQuals (BioSourcePtr biop)
}
+//Not used in Autodef or Cleanup
static void PrintBiopQuals (BioSourcePtr biop, FILE *fp)
{
OrgModPtr mod;
@@ -16290,7 +17641,7 @@ static void PrintBiopQuals (BioSourcePtr biop, FILE *fp)
fprintf (fp, "\n");
}
-
+//Not used in Autodef or Cleanup
static void PrintModifiers (ValNodePtr modifiers, FILE *fp)
{
ValNodePtr vnp;
@@ -16306,13 +17657,10 @@ static void PrintModifiers (ValNodePtr modifiers, FILE *fp)
}
+//Not used in AUtodef or Cleanup
static Boolean IsNonTextDeflineQual (Int4 srcqual)
{
- if (srcqual == DEFLINE_POS_Transgenic
- || srcqual == DEFLINE_POS_Germline
- || srcqual == DEFLINE_POS_Metagenomic
- || srcqual == DEFLINE_POS_Environmental_sample
- || srcqual == DEFLINE_POS_Rearranged)
+ if (srcqual == DEFLINE_POS_Transgenic)
{
return TRUE;
}
@@ -16322,7 +17670,7 @@ static Boolean IsNonTextDeflineQual (Int4 srcqual)
}
}
-
+//Not used in AUtodef or Cleanup
static void CreateOneTest (FILE *fp, Int4 i, Int4 j, Int4 k, BioSourcePtr PNTR biops, Int4 num_biops, Boolean vary1, Boolean vary2, Boolean vary3)
{
Int4 n;
@@ -16373,6 +17721,7 @@ static void CreateOneTest (FILE *fp, Int4 i, Int4 j, Int4 k, BioSourcePtr PNTR b
}
+//Not used in AUtodef or Cleanup
extern void TestFindBestQualCombo (FILE *fp)
{
BioSourcePtr biops[3];
@@ -16425,7 +17774,6 @@ extern void TestFindBestQualCombo (FILE *fp)
-
/* collection_date has a controlled format.
* It is YYYY or Mmm-YYYY or DD-Mmm-YYYY where Mmm = Jan, Feb, Mar, Apr, May,
* Jun, Jul, Aug, Sep, Oct,
@@ -16436,7 +17784,7 @@ extern void TestFindBestQualCombo (FILE *fp)
*
* If the date supplied is ambiguous (01/03/05), can you allow the indexer to choose which field goes in Mmm and which in DD.
*/
-
+//Not used in Autodef or Cleanup
NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len)
{
Int4 val = 0;
@@ -16456,6 +17804,7 @@ NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len)
return val;
}
+//Not used in Autodef or Cleanup
static Int4 GetYearFromNumber(Int4 year)
{
Nlm_DayTime dt;
@@ -16475,11 +17824,12 @@ static Int4 GetYearFromNumber(Int4 year)
return year;
}
+//Not used in Autodef or Cleanup
NLM_EXTERN Int4 GetYearFromToken (CharPtr token, Int4 token_len)
{
Int4 year = 0;
- if (token == NULL || token_len == 0 || token_len > 4)
+ if (token == NULL || token_len == 0 || token_len > 4 || token_len == 3)
{
return 0;
}
@@ -16495,7 +17845,7 @@ static CharPtr month_abbrevs [12] =
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
-
+//Not used in Autodef or Cleanup
NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n)
{
if (n > 0 && n <= 12) {
@@ -16512,7 +17862,7 @@ static Int4 days_in_month [12] =
31, 31, 30, 31, 30, 31
};
-
+//Not used in Autodef or Cleanup
NLM_EXTERN Int4 GetDaysInMonth (Int4 n)
{
if (n > 0 && n <= 12) {
@@ -16522,7 +17872,7 @@ NLM_EXTERN Int4 GetDaysInMonth (Int4 n)
}
}
-
+//Not used in Autodef or Cleanup
NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev)
{
Int4 i;
@@ -16535,6 +17885,7 @@ NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev)
return -1;
}
+//Not used in Autodef or Cleanup
static Int4 GetDaysInMonthByName (CharPtr month)
{
Int4 month_num;
@@ -16549,6 +17900,7 @@ static Int4 GetDaysInMonthByName (CharPtr month)
return 0;
}
+//Not used in Autodef or Cleanup
NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len)
{
Int4 month_num;
@@ -16590,10 +17942,13 @@ NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len)
}
}
+//Not used in Autodef or Cleanup
static Boolean
ChooseDayAndYear
(Int4 num_1,
+ Int4 num_1_len,
Int4 num_2,
+ Int4 num_2_len,
CharPtr month,
Boolean year_first,
Int4Ptr day,
@@ -16608,40 +17963,53 @@ ChooseDayAndYear
{
return FALSE;
}
- else if (num_1 == 0)
+ else if (num_1 == 0 && num_1_len == 2)
{
*year = 2000;
*day = num_2;
}
- else if (num_2 == 0)
+ else if (num_2 == 0 && num_2_len == 2)
{
*year = 2000;
*day = num_1;
}
- else if (num_1 > GetDaysInMonthByName (month))
+ else if (num_1 == 0 || num_2 == 0)
+ {
+ return FALSE;
+ }
+ else if (num_1 > GetDaysInMonthByName (month) && (num_2_len == 2 || num_2_len == 4))
{
+ if (num_2 > GetDaysInMonthByName (month))
+ {
+ return FALSE;
+ }
*year = num_1;
*day = num_2;
}
- else if (num_2 > GetDaysInMonthByName (month))
+ else if (num_2 > GetDaysInMonthByName (month) && (num_1_len == 2 || num_1_len == 4))
{
*year = num_2;
*day = num_1;
}
- else if (year_first)
+ else if (year_first && (num_1_len == 2 || num_1_len == 4))
{
*year = num_1;
*day = num_2;
}
- else
+ else if (num_2_len == 2 || num_2_len == 4)
{
*year = num_2;
*day = num_1;
}
+ else
+ {
+ return FALSE;
+ }
return TRUE;
}
+//Not used in Autodef or Cleanup
static Boolean
ChooseMonthAndYear
(Int4 num_1,
@@ -16702,6 +18070,7 @@ ChooseMonthAndYear
}
+//Not used in Autodef or Cleanup
static Boolean ChooseMonthAndDay
(Int4 num_1,
Int4 num_2,
@@ -16747,6 +18116,7 @@ static Boolean ChooseMonthAndDay
return TRUE;
}
+//Not used in Cleanup or Autodef
NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous)
{
CharPtr reformatted_date = NULL, cp;
@@ -16852,7 +18222,7 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first,
month = GetMonthFromToken (token_list [0], token_lens [0]);
num_1 = ReadNumberFromToken (token_list [1], token_lens [1]);
num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ if (!ChooseDayAndYear (num_1, token_lens[1], num_2, token_lens[2], month, FALSE, &day, &year))
{
return NULL;
}
@@ -16862,7 +18232,7 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first,
month = GetMonthFromToken (token_list [1], token_lens [1]);
num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ if (!ChooseDayAndYear (num_1, token_lens[0], num_2, token_lens[2], month, FALSE, &day, &year))
{
return NULL;
}
@@ -16872,7 +18242,7 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first,
month = GetMonthFromToken (token_list [2], token_lens [2]);
num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ if (!ChooseDayAndYear (num_1, token_lens[0], num_2, token_lens[1], month, FALSE, &day, &year))
{
return NULL;
}
@@ -16991,6 +18361,37 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first,
}
+//Not used in Autodef or Cleanup
+NLM_EXTERN Boolean ReformatAssemblyDate (CharPtr PNTR orig_date)
+{
+ CharPtr collection_date;
+ CharPtr assembly_date;
+ Boolean ambiguous = FALSE;
+
+ if (orig_date == NULL || StringHasNoText (*orig_date)) {
+ return FALSE;
+ }
+
+ collection_date = ReformatDateStringEx(*orig_date, TRUE, &ambiguous);
+ if (StringHasNoText(collection_date)) {
+ collection_date = MemFree (collection_date);
+ return FALSE;
+ }
+
+
+ assembly_date = AssemblyDateFromCollectionDate (collection_date, ambiguous);
+ collection_date = MemFree (collection_date);
+ if (!StringHasNoText (assembly_date)) {
+ *orig_date = MemFree (*orig_date);
+ *orig_date = assembly_date;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+//Not used in Autodef or Cleanup
NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
{
CharPtr reformatted_date = NULL, cp;
@@ -17004,8 +18405,9 @@ NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
Int4 token_len;
Int4 month_token = -1;
Boolean is_num;
- Int4 num_1, num_2;
- Int4 i;
+ Int4 nums[2];
+ Int4 num_lens[2];
+ Int4 i, nums_pos;
if (StringHasNoText (orig_date))
{
@@ -17093,40 +18495,28 @@ NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
}
else if (num_tokens == 3)
{
- if (month_token == 0)
- {
- month = GetMonthFromToken (token_list [0], token_lens [0]);
- num_1 = ReadNumberFromToken (token_list [1], token_lens [1]);
- num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
- {
- return NULL;
- }
- }
- else if (month_token == 1)
+ if (month_token < 0 || month_token > 2)
{
- month = GetMonthFromToken (token_list [1], token_lens [1]);
- num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
- num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
- {
- return NULL;
- }
+ return NULL;
}
- else if (month_token == 2)
- {
- month = GetMonthFromToken (token_list [2], token_lens [2]);
- num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
- num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
- if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
- {
+ nums_pos = 0;
+ for (i = 0; i < 3; i++) {
+ if (i == month_token) {
+ month = GetMonthFromToken (token_list[i], token_lens[i]);
+ } else if (token_lens[i] == 3) {
return NULL;
+ } else {
+ nums[nums_pos] = ReadNumberFromToken(token_list[i], token_lens[i]);
+ num_lens[nums_pos] = token_lens[i];
+ nums_pos++;
}
}
- else
+
+ if (!ChooseDayAndYear (nums[0], num_lens[0], nums[1], num_lens[1], month, FALSE, &day, &year))
{
- return NULL;
+ return NULL;
}
+
year = GetYearFromNumber(year);
}
@@ -17160,7 +18550,7 @@ NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
return reformatted_date;
}
-
+//Not used in Autodef or Cleanup
NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp)
{
SeqFeatPtr orig_prot, new_prot;
@@ -17192,6 +18582,7 @@ NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp)
}
+//Not used in Autodef or Cleanup
NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqFeatPtr top_cds, Boolean remove_original)
{
BioseqPtr prot_bsp;
@@ -17242,6 +18633,7 @@ NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqF
}
+//Not used in Autodef or Cleanup
NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_original)
{
BioseqPtr bsp;
@@ -17277,7 +18669,7 @@ NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_orig
return rval;
}
-
+//Not part of Autodef or Cleanup
NLM_EXTERN SeqEntryPtr GetBestSeqEntryForItem (ValNodePtr vnp)
{
SeqFeatPtr sfp;
@@ -17310,9 +18702,30 @@ NLM_EXTERN SeqEntryPtr GetBestSeqEntryForItem (ValNodePtr vnp)
return sep;
}
+
+//Not part of Autodef or Cleanup
+static Boolean IsDescriptorInList(SeqDescPtr sdp, SeqDescPtr list)
+{
+ Boolean found_match = FALSE;
+ SeqDescPtr sdp_tmp, sdp_tmp_next;
+ for (sdp_tmp = list, found_match = FALSE;
+ sdp_tmp != NULL && !found_match;
+ sdp_tmp = sdp_tmp->next) {
+ sdp_tmp_next = sdp_tmp->next;
+ sdp_tmp->next = NULL;
+ if (AsnIoMemComp (sdp, sdp_tmp, (AsnWriteFunc) SeqDescrAsnWrite)) {
+ found_match = TRUE;
+ }
+ sdp_tmp->next = sdp_tmp_next;
+ }
+ return found_match;
+}
+
+
+//Not part of Autodef or Cleanup
NLM_EXTERN void AddNewUniqueDescriptors (SeqDescrPtr PNTR new_set, SeqDescrPtr parent_set)
{
- SeqDescrPtr sdp, sdp_next, sdp_tmp, sdp_tmp_next;
+ SeqDescrPtr sdp, sdp_next;
Boolean found_match;
if (new_set == NULL || parent_set == NULL) return;
@@ -17327,16 +18740,7 @@ NLM_EXTERN void AddNewUniqueDescriptors (SeqDescrPtr PNTR new_set, SeqDescrPtr p
while (sdp != NULL) {
sdp_next = sdp->next;
sdp->next = NULL;
- for (sdp_tmp = *new_set, found_match = FALSE;
- sdp_tmp != NULL && !found_match;
- sdp_tmp = sdp_tmp->next) {
- sdp_tmp_next = sdp_tmp->next;
- sdp_tmp->next = NULL;
- if (AsnIoMemComp (sdp, sdp_tmp, (AsnWriteFunc) SeqDescrAsnWrite)) {
- found_match = TRUE;
- }
- sdp_tmp->next = sdp_tmp_next;
- }
+ found_match = IsDescriptorInList(sdp, *new_set);
if (!found_match) {
ValNodeLink (new_set,
AsnIoMemCopy ((Pointer) sdp,
@@ -17349,6 +18753,8 @@ NLM_EXTERN void AddNewUniqueDescriptors (SeqDescrPtr PNTR new_set, SeqDescrPtr p
}
}
+
+//Not part of Autodef or Cleanup
static void AddNewUniqueDescriptorsToSeqEntry (SeqEntryPtr sep, SeqDescrPtr parent_set)
{
BioseqPtr bsp;
@@ -17367,6 +18773,8 @@ static void AddNewUniqueDescriptorsToSeqEntry (SeqEntryPtr sep, SeqDescrPtr pare
}
}
+
+//Not part of Autodef or Cleanup
NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr parent_set)
{
SeqAnnotPtr sap, sap_next, sap_tmp, sap_tmp_next, sap_copy, last_sap;
@@ -17404,6 +18812,7 @@ NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr p
}
+//Not part of Autodef or Cleanup
static void AddItemListToSet (ValNodePtr item_list, BioseqSetPtr newset, Boolean for_segregate)
{
ValNodePtr vnp_item;
@@ -17494,6 +18903,7 @@ static void AddItemListToSet (ValNodePtr item_list, BioseqSetPtr newset, Boolean
}
+//Not part of AutoDef or Cleanup
static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category)
{
ValNodePtr vnp_item;
@@ -17509,6 +18919,7 @@ static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr ca
}
}
+//Not part of AutoDef or Cleanup
static Boolean NeedsNewSet (SeqEntryPtr sep)
{
BioseqSetPtr bssp;
@@ -17529,6 +18940,7 @@ static Boolean NeedsNewSet (SeqEntryPtr sep)
}
+//Not part of AutoDef or Cleanup
static Boolean IsSingletonSet (SeqEntryPtr sep)
{
BioseqSetPtr bssp;
@@ -17552,6 +18964,7 @@ static Boolean IsSingletonSet (SeqEntryPtr sep)
}
+//Not part of AutoDef or Cleanup
static void AddAnnotsToSeqEntry (SeqEntryPtr sep, SeqAnnotPtr sap)
{
BioseqPtr bsp;
@@ -17586,7 +18999,7 @@ static void AddAnnotsToSeqEntry (SeqEntryPtr sep, SeqAnnotPtr sap)
}
}
-
+//Not part of AutoDef or Cleanup
static void PromoteSingletonSetsInSet (SeqEntryPtr sep)
{
ObjMgrDataPtr omdptop;
@@ -17634,7 +19047,7 @@ static void PromoteSingletonSetsInSet (SeqEntryPtr sep)
RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
}
-
+//Not part of AutoDef or Cleanup
NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
(BioseqSetPtr bssp,
ValNodePtr value_lists)
@@ -17736,7 +19149,7 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
SeqMgrLinkSeqEntry (tmp, OBJ_BIOSEQSET, parent_set);
/* need to update GatherIndex values */
AssignIDsInEntity (entityID, 0, NULL);
- } else {
+ } else if (first_new_sep != NULL) {
sep = first_new_sep->next;
while (sep != NULL) {
AddNewUniqueDescriptorsToSeqEntry (sep, parent_set->descr);
@@ -17758,7 +19171,7 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
return parent_set;
}
-
+//Not part of Autodef or Cleanup
static void RemoveBioseqFromAlignmentsCallback (SeqAnnotPtr sap, Pointer data)
{
BioseqPtr bsp;
@@ -17831,7 +19244,7 @@ static void RemoveBioseqFromAlignmentsCallback (SeqAnnotPtr sap, Pointer data)
}
}
-
+//Not part of Autodef or Cleanup
/* expect that list is a valnode list with choice OBJ_BIOSEQ and data.ptrvalue a bioseq */
NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID)
{
@@ -17875,7 +19288,7 @@ NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID)
}
-
+//Not part of Autodef or cleanup
static void GetBioseqListCallback (BioseqPtr bsp, Pointer userdata)
{
if (bsp != NULL && userdata != NULL && ! ISA_aa (bsp->mol))
@@ -17884,6 +19297,8 @@ static void GetBioseqListCallback (BioseqPtr bsp, Pointer userdata)
}
}
+
+//Not part of Autodef or cleanup
NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberOfSets (Int4 num_sets, SeqEntryPtr sep)
{
ValNodePtr cip_list = NULL;
@@ -17919,6 +19334,7 @@ NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberOfSets (Int4 num_se
}
+//Not part of Autodef or cleanup
NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_per_set, SeqEntryPtr sep)
{
ValNodePtr cip_list = NULL;
@@ -17949,6 +19365,7 @@ NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_pe
}
+//Not part of Autodef or cleanup
NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets)
{
ValNodePtr set_list;
@@ -17968,6 +19385,7 @@ NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets)
}
+//Not part of Autodef or cleanup
NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set)
{
ValNodePtr set_list;
@@ -18015,6 +19433,7 @@ static CharPtr s_PlantGroupList[] = {
};
+//Not used by Autodef or cleanup
static void AssignBioseqToLineageGroup (BioseqPtr bsp, Pointer data)
{
SeqDescPtr sdp;
@@ -18048,6 +19467,7 @@ static void AssignBioseqToLineageGroup (BioseqPtr bsp, Pointer data)
}
+//Not used by Autodef or cleanup
static ValNodePtr MakeLineageGroupList (SeqEntryPtr sep, CharPtr PNTR lineage_strings)
{
ClickableItemPtr cip;
@@ -18086,6 +19506,7 @@ static ValNodePtr MakeLineageGroupList (SeqEntryPtr sep, CharPtr PNTR lineage_st
}
+//Not used by Autodef or cleanup
static ValNodePtr MakePlantGroupList (SeqEntryPtr sep)
{
return MakeLineageGroupList (sep, s_PlantGroupList);
@@ -18149,6 +19570,7 @@ static CharPtr s_FungusGroupList[] = {
NULL
};
+//Not used by Autodef or cleanup
NLM_EXTERN void SegregateSetsByFungusGroup (SeqEntryPtr sep)
{
ValNodePtr set_list;
@@ -18167,6 +19589,7 @@ NLM_EXTERN void SegregateSetsByFungusGroup (SeqEntryPtr sep)
}
+//Not part of Autodef or Cleanup
NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByBioseqList (SeqEntryPtr sep, ValNodePtr bsp_list)
{
ValNodeBlock b_list;
@@ -18185,7 +19608,7 @@ NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByBioseqList (SeqEntryPtr s
return cip_list;
}
-
+//Not part of Autodef or Cleanup
static void RemoveFoundBioseqFromAlignment (BioseqPtr bsp, Pointer data)
{
SeqAlignPtr PNTR pSalp;
@@ -18200,6 +19623,7 @@ static void RemoveFoundBioseqFromAlignment (BioseqPtr bsp, Pointer data)
}
+//Not part of Autodef or Cleanup
static void RemoveSequencesFromOtherSepFromAlignment (SeqEntryPtr exclude, SeqAnnotPtr sap)
{
SeqAlignPtr salp;
@@ -18221,6 +19645,7 @@ static void RemoveSequencesFromOtherSepFromAlignment (SeqEntryPtr exclude, SeqAn
}
+//Not part of Autodef or Cleanup
static void CopyAnnotToOtherSeq (SeqAnnotPtr sap, SeqEntryPtr list, SeqEntryPtr not_this_one)
{
SeqEntryPtr sep;
@@ -18249,6 +19674,7 @@ static void CopyAnnotToOtherSeq (SeqAnnotPtr sap, SeqEntryPtr list, SeqEntryPtr
}
+//Not part of Autodef or Cleanup
static void FixOriginalCopiedAlignment (SeqAnnotPtr sap, SeqEntryPtr list, SeqEntryPtr not_this_one)
{
SeqEntryPtr sep;
@@ -18261,6 +19687,7 @@ static void FixOriginalCopiedAlignment (SeqAnnotPtr sap, SeqEntryPtr list, SeqEn
}
+//Not part of Autodef or Cleanup
static void CopyBioseqListAlignments (BioseqSetPtr parent)
{
SeqEntryPtr sep;
@@ -18286,6 +19713,7 @@ static void CopyBioseqListAlignments (BioseqSetPtr parent)
}
+//Not part of Autodef or Cleanup
NLM_EXTERN void SegregateSetsByBioseqList (SeqEntryPtr sep, ValNodePtr vnp)
{
ValNodePtr set_list;
@@ -18305,7 +19733,7 @@ NLM_EXTERN void SegregateSetsByBioseqList (SeqEntryPtr sep, ValNodePtr vnp)
}
-
+//Not used for Autodef or Cleanup
static void SeqAnnotIsPairwiseAlignment (SeqAnnotPtr sap, Pointer data)
{
BoolPtr is;
@@ -18326,7 +19754,7 @@ static void SeqAnnotIsPairwiseAlignment (SeqAnnotPtr sap, Pointer data)
}
}
-
+//Not used for Autodef or Cleanup
NLM_EXTERN Boolean SeqEntryHasPairwiseAlignments (SeqEntryPtr sep)
{
Boolean rval = FALSE;
@@ -18338,13 +19766,13 @@ NLM_EXTERN Boolean SeqEntryHasPairwiseAlignments (SeqEntryPtr sep)
typedef Boolean (*Nlm_ParseProc) PROTO ((CharPtr, Pointer));
-
+//Not used for Autodef or Cleanup
static Boolean SkipToken (CharPtr cp, Pointer data)
{
return TRUE;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseLineOfTokens (CharPtr line, Nlm_ParseProc PNTR token_funcs, Pointer data)
{
CharPtr cp, cp_next;
@@ -18400,7 +19828,7 @@ typedef struct extractorinfo {
Boolean is_complement;
} ExtractorInfoData, PNTR ExtractorInfoPtr;
-
+//Not used for Autodef or Cleanup
static ExtractorInfoPtr ExtractorInfoNew ()
{
ExtractorInfoPtr ep = (ExtractorInfoPtr) MemNew (sizeof (ExtractorInfoData));
@@ -18408,7 +19836,7 @@ static ExtractorInfoPtr ExtractorInfoNew ()
return ep;
}
-
+//Not used for Autodef or Cleanup
static ExtractorInfoPtr ExtractorInfoFree (ExtractorInfoPtr ep)
{
if (ep != NULL) {
@@ -18420,7 +19848,7 @@ static ExtractorInfoPtr ExtractorInfoFree (ExtractorInfoPtr ep)
return ep;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseExtractorIdAndLength (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18485,7 +19913,7 @@ static Boolean ParseExtractorIdAndLength (CharPtr cp, Pointer data)
return TRUE;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseHasITS1 (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18505,7 +19933,7 @@ static Boolean ParseHasITS1 (CharPtr cp, Pointer data)
return TRUE;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseHasITS2 (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18525,7 +19953,7 @@ static Boolean ParseHasITS2 (CharPtr cp, Pointer data)
return TRUE;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseITS1Range (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18553,7 +19981,7 @@ static Boolean ParseITS1Range (CharPtr cp, Pointer data)
return rval;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseITS2Range (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18581,7 +20009,7 @@ static Boolean ParseITS2Range (CharPtr cp, Pointer data)
return rval;
}
-
+//Not used for Autodef or Cleanup
static Boolean ParseIsComplement (CharPtr cp, Pointer data)
{
ExtractorInfoPtr ep;
@@ -18630,6 +20058,34 @@ CharPtr extractor_feature_labels[] = {
};
+typedef struct rnafeatlist {
+ CharPtr id;
+ Boolean has_feat[eExtractorFeat28S + 1];
+ Int4 feat_pos;
+ Boolean is_complement;
+ CharPtr error;
+} RNAFeatListData, PNTR RNAFeatListPtr;
+
+//Not used for Autodef or Cleanup
+static RNAFeatListPtr RNAFeatListNew ()
+{
+ RNAFeatListPtr ep = (RNAFeatListPtr) MemNew (sizeof (RNAFeatListData));
+ MemSet (ep, 0, sizeof (RNAFeatListData));
+ return ep;
+}
+
+//Not used for Autodef or Cleanup
+static RNAFeatListPtr RNAFeatListFree (RNAFeatListPtr ep)
+{
+ if (ep != NULL) {
+ ep->id = MemFree (ep->id);
+ ep->error = MemFree (ep->error);
+ ep = MemFree (ep);
+ }
+ return ep;
+}
+
+//Not used for Autodef or Cleanup
static CharPtr MakeLabelFromExtractorInfo (ExtractorInfoPtr ep)
{
Boolean feat_present[5];
@@ -18718,6 +20174,194 @@ static CharPtr MakeLabelFromExtractorInfo (ExtractorInfoPtr ep)
}
+//Not used for Autodef or Cleanup
+static Boolean ParseExtractorIdOnly (CharPtr cp, Pointer data)
+{
+ RNAFeatListPtr ep;
+ CharPtr div = NULL, id_start, id_end;
+ Char ch_was;
+
+ if (StringHasNoText (cp) || (ep = (RNAFeatListPtr) data) == NULL) {
+ return FALSE;
+ }
+
+ id_start = cp;
+ while (isspace (*id_start)) {
+ id_start++;
+ }
+
+ if (*id_start == 0) {
+ return FALSE;
+ }
+
+ /* if we have a list of IDs, truncate after just the first one */
+ id_end = StringChr (id_start, '|');
+ if (id_end != NULL && id_end < div) {
+ id_end = StringChr (id_end + 1, '|');
+ if (id_end != NULL) {
+ div = id_end;
+ }
+ }
+
+ if (div != NULL) {
+ ch_was = *div;
+ *div = 0;
+ }
+ ep->id = StringSave (id_start);
+ if (div != NULL) {
+ *div = ch_was;
+ }
+ /* trim spaces from end of ID */
+ cp = ep->id + StringLen (ep->id) - 1;
+ while (cp > ep->id && isspace (*cp)) {
+ cp--;
+ }
+ *(cp + 1) = 0;
+ return TRUE;
+}
+
+//Not used for Autodef or Cleanup
+static Boolean ParseNewComplement (CharPtr cp, Pointer data)
+{
+ RNAFeatListPtr ep;
+ Boolean rval = TRUE;
+
+ if ((ep = (RNAFeatListPtr) data) == NULL) {
+ return FALSE;
+ }
+ if (StringHasNoText (cp) || StringCmp (cp, "0") == 0) {
+ ep->is_complement = FALSE;
+ } else if (StringCmp (cp, "1") == 0) {
+ ep->is_complement = TRUE;
+ } else {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static CharPtr sIgnoreRNAErrors[] = {
+ "Broken or partial sequence, no 5.8S!",
+ "Broken or partial sequence, only partial 5.8S!",
+ NULL};
+
+//Not used for Autodef or Cleanup
+static Boolean ParseRNAError (CharPtr cp, Pointer data)
+{
+ RNAFeatListPtr ep;
+ Boolean rval = TRUE, ignore = FALSE;
+ Int4 j;
+
+ if ((ep = (RNAFeatListPtr) data) == NULL) {
+ return FALSE;
+ }
+ if (!StringHasNoText (cp)) {
+ for (j = 0; sIgnoreRNAErrors[j] != NULL && !ignore; j++) {
+ if (StringNICmp (cp, sIgnoreRNAErrors[j], StringLen (sIgnoreRNAErrors[j])) == 0) {
+ ignore = TRUE;
+ }
+ }
+ if (!ignore) {
+ ep->error = StringSave (cp);
+ }
+ }
+ return rval;
+}
+
+//Not used for Autodef or Cleanup
+static Boolean ParseRNARange (CharPtr cp, Pointer data)
+{
+ RNAFeatListPtr ep;
+ Boolean rval = TRUE;
+ CharPtr colon;
+
+ if ((ep = (RNAFeatListPtr) data) == NULL) {
+ return FALSE;
+ }
+ colon = StringChr (cp, ':');
+ if (colon == NULL) {
+ return FALSE;
+ }
+ colon++;
+ while (isspace (*colon)) {
+ colon++;
+ }
+ if (StringICmp (colon, "Not found") == 0) {
+ ep->has_feat[ep->feat_pos] = FALSE;
+ ep->feat_pos++;
+ } else if (StringICmp (colon, "No end") == 0) {
+ ep->has_feat[ep->feat_pos] = TRUE;
+ ep->feat_pos++;
+ } else if (StringICmp (colon, "No start") == 0) {
+ ep->has_feat[ep->feat_pos] = TRUE;
+ ep->feat_pos++;
+ } else if (!isdigit (*colon)) {
+ rval = FALSE;
+ } else {
+ ep->has_feat[ep->feat_pos] = TRUE;
+ ep->feat_pos++;
+ }
+ return rval;
+}
+
+
+static Nlm_ParseProc new_token_parsers[] = {
+ ParseExtractorIdOnly,
+ SkipToken,
+ ParseRNARange,
+ ParseRNARange,
+ ParseRNARange,
+ ParseRNARange,
+ ParseRNARange,
+ ParseRNAError,
+ ParseNewComplement,
+ NULL};
+
+//Not used for Autodef or Cleanup
+static CharPtr MakeLabelFromRNAFeatList (RNAFeatListPtr ep)
+{
+ CharPtr label;
+ Int4 len, i, num_feat = 0, feat_num = 0;
+
+ if (ep == NULL) {
+ return NULL;
+ }
+
+ len = 15;
+ for (i = 0; i < 5; i++) {
+ if (ep->has_feat[i]) {
+ len += StringLen (extractor_feature_labels[i]) + 2;
+ num_feat++;
+ } else if (num_feat > 0) {
+ break;
+ }
+ }
+ label = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (label, "contains ");
+ for (i = 0; i < 5; i++) {
+ if (ep->has_feat[i]) {
+ if (feat_num > 0) {
+ if (feat_num == num_feat - 1) {
+ if (num_feat == 2) {
+ StringCat (label, " and ");
+ } else {
+ StringCat (label, ", and ");
+ }
+ } else {
+ StringCat (label, ", ");
+ }
+ }
+ StringCat (label, extractor_feature_labels[i]);
+ feat_num++;
+ } else if (feat_num > 0) {
+ break;
+ }
+ }
+ return label;
+}
+
+
+//Not used for Autodef or Cleanup
NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
{
@@ -18750,17 +20394,61 @@ NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2
}
}
+//Not used for Autodef or Cleanup
+static BioseqPtr GetBioseqFromExtractorTextId (CharPtr id, CharPtr line, SeqEntryPtr sep)
+{
+ Int4 len;
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ if (StringHasNoText (id)) {
+ Message (MSG_POSTERR, "No id for line %s", line);
+ return NULL;
+ }
+ /* figure out ID */
+ len = StringLen (id);
+ if (len > 3 && id[len - 1] == '.' && id[len - 2] == '.' && id[len - 3] == '.') {
+ Message (MSG_POSTERR, "ID was truncated for line %s", line);
+ return NULL;
+ }
+ sip = CreateSeqIdFromText (id, sep);
+ bsp = BioseqFind (sip);
+ sip = SeqIdFree (sip);
+ if (bsp == NULL) {
+ Message (MSG_POSTERR, "ID for sequence not present in record in line %s", line);
+ return NULL;
+ }
+ return bsp;
+}
+
+
+//Not used for Autodef or Cleanup
+static SeqFeatPtr MakeMiscRNAWithLabel(BioseqPtr bsp, CharPtr label)
+{
+ SeqFeatPtr sfp;
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
+
+ /* make feature and attach to appropriate annots */
+ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
+ rrp = RnaRefNew ();
+ sfp->data.value.ptrvalue = rrp;
+ rrp->type = 255;
+ rgp = RNAGenNew ();
+ rrp->ext.choice = 3;
+ rrp->ext.value.ptrvalue = rgp;
+ sfp->comment = StringSave(label);
+ SetSeqLocPartial (sfp->location, TRUE, TRUE);
+ return sfp;
+}
+
+//Not used for Autodef or Cleanup
static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr sep)
{
ExtractorInfoPtr ep;
SeqFeatPtr sfp = NULL;
CharPtr label;
- Int4 len;
- SeqIdPtr sip;
BioseqPtr bsp;
- RnaRefPtr rrp;
- RNAGenPtr rgp;
if (StringHasNoText (line)) {
return NULL;
@@ -18779,22 +20467,12 @@ static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr s
}
/* figure out ID */
- len = StringLen (ep->id);
- if (len > 3 && ep->id[len - 1] == '.' && ep->id[len - 2] == '.' && ep->id[len - 3] == '.') {
- ep = ExtractorInfoFree (ep);
- Message (MSG_POSTERR, "ID was truncated for line %s", line);
- return NULL;
- }
- sip = CreateSeqIdFromText (ep->id, sep);
- bsp = BioseqFind (sip);
- sip = SeqIdFree (sip);
+ bsp = GetBioseqFromExtractorTextId(ep->id, line, sep);
if (bsp == NULL) {
ep = ExtractorInfoFree (ep);
- Message (MSG_POSTERR, "ID for sequence not present in record in line %s", line);
return NULL;
}
-
/* calculate label */
label = MakeLabelFromExtractorInfo(ep);
@@ -18804,20 +20482,14 @@ static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr s
}
/* make feature and attach to appropriate annots */
- sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
- rrp = RnaRefNew ();
- sfp->data.value.ptrvalue = rrp;
- rrp->type = 255;
- rgp = RNAGenNew ();
- rrp->ext.choice = 3;
- rrp->ext.value.ptrvalue = rgp;
- sfp->comment = label;
- SetSeqLocPartial (sfp->location, TRUE, TRUE);
+ sfp = MakeMiscRNAWithLabel(bsp, label);
+ label = MemFree (label);
ep = ExtractorInfoFree (ep);
return sfp;
}
+//Not used for Autodef or Cleanup
NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep)
{
ReadBufferData rbd;
@@ -18834,6 +20506,84 @@ NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep)
}
}
+//Not used for Autodef or Cleanup
+static SeqFeatPtr ParseRNAFeatListRowToFeatures (CharPtr line, SeqEntryPtr sep, LogInfoPtr lip)
+{
+ RNAFeatListPtr ep;
+ SeqFeatPtr sfp = NULL;
+ CharPtr label;
+ BioseqPtr bsp;
+
+ if (StringHasNoText (line)) {
+ return NULL;
+ }
+
+ ep = RNAFeatListNew ();
+ if (!ParseLineOfTokens(line, new_token_parsers, ep)) {
+ ep = RNAFeatListFree (ep);
+ if (lip == NULL) {
+ Message (MSG_POSTERR, "Unable to parse extractor line %s", line);
+ } else {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Unable to parse extractor line %s\n", line);
+ }
+ lip->data_in_log = TRUE;
+ }
+ return NULL;
+ }
+ if (ep->error != NULL) {
+ if (lip == NULL) {
+ Message (MSG_POSTERR, "Error scanning for feature on %s: %s", ep->id, ep->error);
+ ep = RNAFeatListFree (ep);
+ } else {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Error scanning for feature on %s: %s\n", ep->id, ep->error);
+ }
+ lip->data_in_log = TRUE;
+ }
+ return NULL;
+ }
+
+ /* figure out ID */
+ bsp = GetBioseqFromExtractorTextId(ep->id, line, sep);
+ if (bsp == NULL) {
+ ep = RNAFeatListFree (ep);
+ return NULL;
+ }
+
+ /* calculate label */
+ label = MakeLabelFromRNAFeatList(ep);
+
+ if (ep->is_complement) {
+ BioseqRevComp (bsp);
+ SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
+ }
+
+ /* make feature and attach to appropriate annots */
+ sfp = MakeMiscRNAWithLabel(bsp, label);
+ label = MemFree (label);
+
+ ep = RNAFeatListFree (ep);
+ return sfp;
+}
+
+//Not used for Autodef or Cleanup
+NLM_EXTERN void ParseRNAFeatListTableToFeatures (FILE *fp, SeqEntryPtr sep, LogInfoPtr lip)
+{
+ ReadBufferData rbd;
+ CharPtr line;
+
+ rbd.fp = fp;
+ rbd.current_data = NULL;
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL && line[0] != EOF) {
+ /* TODO: skip intro lines */
+ ParseRNAFeatListRowToFeatures(line, sep, lip);
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+}
+
extern CharPtr latlon_onedegree [];
CharPtr latlon_onedegree [] = {
"1",
@@ -18916,27 +20666,27 @@ CharPtr latlon_onedegree [] = {
"Antarctica",
"\t-59\t-47\t-43",
"\t-60\t-59\t-53\t-47\t-43",
- "\t-61\t-62\t-53\t-47\t-43",
- "\t-62\t-62\t-53",
+ "\t-61\t-63\t-53\t-47\t-43",
+ "\t-62\t-63\t-53",
"\t-63\t-65\t-54",
- "\t-64\t-66\t-54\t51\t56\t99\t104\t110\t114",
- "\t-65\t-69\t-56\t47\t58\t86\t117\t119\t144",
- "\t-66\t-70\t-59\t42\t70\t79\t147",
- "\t-67\t-91\t-89\t-73\t-59\t31\t35\t38\t71\t76\t156",
- "\t-68\t-91\t-89\t-76\t-60\t31\t161",
- "\t-69\t-91\t-89\t-77\t-60\t-11\t168",
+ "\t-64\t-67\t-54\t51\t56\t91\t93\t99\t104\t110\t114",
+ "\t-65\t-69\t-55\t47\t58\t84\t117\t119\t144\t161\t164",
+ "\t-66\t-70\t-58\t42\t70\t79\t147\t161\t165",
+ "\t-67\t-91\t-89\t-73\t-59\t31\t35\t38\t71\t76\t156\t161\t165",
+ "\t-68\t-91\t-89\t-76\t-59\t14\t17\t31\t161\t163\t165",
+ "\t-69\t-91\t-89\t-77\t-59\t-11\t168",
"\t-70\t-103\t-95\t-77\t-59\t-13\t171",
- "\t-71\t-106\t-87\t-81\t-79\t-77\t-58\t-15\t171",
- "\t-72\t-128\t-117\t-115\t-112\t-106\t-58\t-22\t-19\t-17\t171",
+ "\t-71\t-106\t-87\t-81\t-79\t-77\t-58\t-17\t171",
+ "\t-72\t-128\t-112\t-106\t-58\t-22\t-19\t-17\t171",
"\t-73\t-137\t-109\t-106\t-58\t-23\t171",
"\t-74\t-147\t-58\t-27\t170",
- "\t-75\t-150\t-59\t-32\t166",
- "\t-76\t-159\t-62\t-48\t-44\t-36\t170",
+ "\t-75\t-151\t-59\t-32\t167",
+ "\t-76\t-159\t-62\t-50\t-43\t-36\t170",
"\t-77\t-165\t-65\t-51\t-42\t-37\t170",
"\t-78\t-165\t-64\t-62\t-58\t-52\t-41\t-37\t170",
"\t-79\t-165\t-58\t-55\t168",
"\t-80\t-165\t-58\t-55\t164",
- "\t-81\t-175\t-170\t-164\t169",
+ "\t-81\t-175\t-169\t-164\t169",
"\t-82\t-175\t177",
"\t-83\t-180\t180",
"\t-84\t-180\t180",
@@ -19027,7 +20777,7 @@ CharPtr latlon_onedegree [] = {
"\t-26\t111\t154",
"\t-27\t112\t154",
"\t-28\t112\t154",
- "\t-29\t113\t154",
+ "\t-29\t112\t154",
"\t-30\t113\t154\t158\t160",
"\t-31\t113\t154\t158\t160",
"\t-32\t113\t154\t158\t160",
@@ -19088,13 +20838,14 @@ CharPtr latlon_onedegree [] = {
"\t-26\t128\t139",
"\t-27\t128\t139",
"Australia: Queensland",
- "\t-9\t141\t143",
- "\t-10\t140\t144",
- "\t-11\t140\t144",
- "\t-12\t140\t144",
+ "\t-8\t141\t145",
+ "\t-9\t141\t145",
+ "\t-10\t140\t145",
+ "\t-11\t140\t145",
+ "\t-12\t140\t145",
"\t-13\t140\t146",
"\t-14\t140\t146",
- "\t-15\t137\t146",
+ "\t-15\t137\t147",
"\t-16\t137\t147",
"\t-17\t137\t147",
"\t-18\t137\t149",
@@ -19147,7 +20898,7 @@ CharPtr latlon_onedegree [] = {
"Australia: Western Australia",
"\t-12\t124\t128",
"\t-13\t123\t130",
- "\t-14\t123\t130",
+ "\t-14\t122\t130",
"\t-15\t121\t130",
"\t-16\t121\t130",
"\t-17\t120\t130",
@@ -19162,7 +20913,7 @@ CharPtr latlon_onedegree [] = {
"\t-26\t111\t130",
"\t-27\t112\t130",
"\t-28\t112\t130",
- "\t-29\t113\t130",
+ "\t-29\t112\t130",
"\t-30\t113\t130",
"\t-31\t113\t130",
"\t-32\t113\t130",
@@ -19282,9 +21033,9 @@ CharPtr latlon_onedegree [] = {
"\t-23\t-69\t-61",
"Borneo",
"\t6\t113\t116",
- "\t5\t113\t116",
- "\t4\t113\t116",
- "\t3\t113\t116",
+ "\t5\t112\t116",
+ "\t4\t112\t116",
+ "\t3\t112\t116",
"Borneo",
"\t5\t114\t118",
"\t4\t107\t109\t114\t118",
@@ -19386,9 +21137,9 @@ CharPtr latlon_onedegree [] = {
"\t17\t-65\t-63",
"Brunei",
"\t6\t113\t116",
- "\t5\t113\t116",
- "\t4\t113\t116",
- "\t3\t113\t116",
+ "\t5\t112\t116",
+ "\t4\t112\t116",
+ "\t3\t112\t116",
"Bulgaria",
"\t45\t21\t28",
"\t44\t21\t29",
@@ -19504,7 +21255,7 @@ CharPtr latlon_onedegree [] = {
"\t59\t-140\t-119",
"\t58\t-140\t-119",
"\t57\t-138\t-119",
- "\t56\t-134\t-119",
+ "\t56\t-133\t-119",
"\t55\t-134\t-119",
"\t54\t-134\t-117",
"\t53\t-134\t-116",
@@ -19550,7 +21301,7 @@ CharPtr latlon_onedegree [] = {
"\t53\t-68\t-54",
"\t52\t-68\t-54",
"\t51\t-68\t-54",
- "\t50\t-65\t-63\t-59\t-52",
+ "\t50\t-66\t-63\t-59\t-52",
"\t49\t-60\t-51",
"\t48\t-60\t-51",
"\t47\t-60\t-51",
@@ -19566,7 +21317,7 @@ CharPtr latlon_onedegree [] = {
"\t73\t-126\t-109",
"\t72\t-126\t-109",
"\t71\t-132\t-109",
- "\t70\t-136\t-109",
+ "\t70\t-137\t-109",
"\t69\t-137\t-109",
"\t68\t-137\t-115\t-113\t-111",
"\t67\t-137\t-113",
@@ -19611,7 +21362,7 @@ CharPtr latlon_onedegree [] = {
"\t63\t-110\t-62",
"\t62\t-103\t-63",
"\t61\t-103\t-89\t-84\t-63",
- "\t60\t-103\t-91\t-81\t-77\t-69\t-63",
+ "\t60\t-103\t-91\t-81\t-76\t-69\t-63",
"\t59\t-103\t-93\t-81\t-76\t-69\t-63",
"\t58\t-81\t-75",
"\t57\t-81\t-75",
@@ -19651,7 +21402,7 @@ CharPtr latlon_onedegree [] = {
"\t63\t-79\t-71",
"\t62\t-79\t-68",
"\t61\t-79\t-68\t-66\t-63",
- "\t60\t-79\t-68\t-66\t-63",
+ "\t60\t-79\t-63",
"\t59\t-79\t-62",
"\t58\t-79\t-62",
"\t57\t-79\t-62",
@@ -19822,7 +21573,10 @@ CharPtr latlon_onedegree [] = {
"\t20\t98\t102\t105\t114",
"\t19\t107\t112",
"\t18\t107\t112",
- "\t17\t107\t111",
+ "\t17\t107\t113",
+ "\t16\t110\t113",
+ "\t15\t110\t113",
+ "\t14\t110\t112",
"China: Hainan",
"\t21\t108\t111",
"\t20\t107\t112",
@@ -19842,9 +21596,9 @@ CharPtr latlon_onedegree [] = {
"\t-12\t95\t97",
"\t-13\t95\t97",
"Colombia",
- "\t14\t-82\t-80",
- "\t13\t-82\t-80\t-73\t-70",
- "\t12\t-82\t-80\t-75\t-70",
+ "\t14\t-82\t-79",
+ "\t13\t-82\t-79\t-73\t-70",
+ "\t12\t-82\t-79\t-75\t-70",
"\t11\t-82\t-80\t-76\t-70",
"\t10\t-77\t-70",
"\t9\t-78\t-71",
@@ -19852,9 +21606,9 @@ CharPtr latlon_onedegree [] = {
"\t7\t-78\t-66",
"\t6\t-78\t-66",
"\t5\t-78\t-66",
- "\t4\t-78\t-66",
- "\t3\t-79\t-66",
- "\t2\t-80\t-65",
+ "\t4\t-82\t-66",
+ "\t3\t-82\t-66",
+ "\t2\t-82\t-65",
"\t1\t-80\t-65",
"\t0\t-80\t-65",
"\t-1\t-79\t-68",
@@ -19884,10 +21638,10 @@ CharPtr latlon_onedegree [] = {
"\t-16\t146\t151",
"\t-17\t146\t151",
"\t-18\t147\t149",
- "\t-20\t152\t156",
- "\t-21\t152\t156",
- "\t-22\t152\t156",
- "\t-23\t154\t156",
+ "\t-20\t151\t156",
+ "\t-21\t151\t156",
+ "\t-22\t151\t156",
+ "\t-23\t151\t156",
"Costa Rica",
"\t12\t-86\t-83",
"\t11\t-86\t-82",
@@ -19981,7 +21735,7 @@ CharPtr latlon_onedegree [] = {
"\t56\t7\t16",
"\t55\t7\t16",
"\t54\t7\t16",
- "\t53\t7\t13",
+ "\t53\t7\t16",
"Djibouti",
"\t13\t41\t44",
"\t12\t40\t44",
@@ -20101,7 +21855,7 @@ CharPtr latlon_onedegree [] = {
"\t-18\t-180\t-177\t176\t180",
"\t-19\t-180\t-177\t176\t180",
"\t-20\t-180\t-177\t173\t180",
- "\t-21\t173\t175",
+ "\t-21\t-179\t-177\t173\t175",
"\t-22\t173\t175",
"Finland",
"\t71\t26\t28",
@@ -20165,16 +21919,16 @@ CharPtr latlon_onedegree [] = {
"\t-10\t-141\t-137",
"\t-11\t-140\t-137",
"\t-13\t-149\t-140",
- "\t-14\t-149\t-139",
- "\t-15\t-152\t-139",
- "\t-16\t-152\t-137",
- "\t-17\t-152\t-135",
- "\t-18\t-150\t-148\t-146\t-135",
+ "\t-14\t-155\t-153\t-149\t-139",
+ "\t-15\t-155\t-139",
+ "\t-16\t-155\t-137",
+ "\t-17\t-154\t-135",
+ "\t-18\t-151\t-148\t-146\t-135",
"\t-19\t-142\t-135",
"\t-20\t-142\t-134",
"\t-21\t-152\t-150\t-141\t-134",
"\t-22\t-152\t-146\t-141\t-133",
- "\t-23\t-152\t-146\t-136\t-133",
+ "\t-23\t-152\t-146\t-137\t-133",
"\t-24\t-150\t-146\t-136\t-133",
"\t-26\t-145\t-143",
"\t-27\t-145\t-143",
@@ -20227,7 +21981,7 @@ CharPtr latlon_onedegree [] = {
"\t40\t40\t47",
"Germany",
"\t56\t7\t9",
- "\t55\t7\t15",
+ "\t55\t6\t15",
"\t54\t5\t15",
"\t53\t5\t15",
"\t52\t4\t16",
@@ -20585,7 +22339,7 @@ CharPtr latlon_onedegree [] = {
"\t40\t51\t56\t65\t71",
"\t39\t66\t69",
"Kenya",
- "\t6\t33\t36",
+ "\t6\t34\t36",
"\t5\t32\t42",
"\t4\t32\t42",
"\t3\t32\t42",
@@ -20609,15 +22363,16 @@ CharPtr latlon_onedegree [] = {
"Kiribati",
"\t5\t-161\t-159",
"\t4\t-161\t-158\t171\t173",
- "\t3\t-161\t-156\t171\t173",
+ "\t3\t-161\t-156\t171\t174",
"\t2\t-160\t-156\t171\t174",
- "\t1\t-158\t-156\t171\t175",
- "\t0\t-158\t-156\t171\t177",
- "\t-1\t-172\t-170\t171\t177",
+ "\t1\t-158\t-156\t168\t175",
+ "\t0\t-158\t-156\t168\t177",
+ "\t-1\t-172\t-170\t168\t177",
"\t-2\t-172\t-170\t173\t177",
- "\t-3\t-173\t-170\t-156\t-153\t174\t177",
- "\t-4\t-173\t-171\t-156\t-153",
- "\t-5\t-173\t-171\t-156\t-153",
+ "\t-3\t-175\t-170\t-156\t-153\t174\t177",
+ "\t-4\t-175\t-170\t-156\t-153",
+ "\t-5\t-175\t-170\t-156\t-153",
+ "\t-6\t-156\t-154",
"\t-10\t-152\t-150",
"\t-11\t-152\t-150",
"\t-12\t-152\t-150",
@@ -20738,7 +22493,7 @@ CharPtr latlon_onedegree [] = {
"\t-20\t42\t50",
"\t-21\t42\t49",
"\t-22\t42\t49",
- "\t-23\t42\t48",
+ "\t-23\t42\t49",
"\t-24\t42\t48",
"\t-25\t42\t48",
"\t-26\t43\t48",
@@ -20855,9 +22610,9 @@ CharPtr latlon_onedegree [] = {
"\t28\t-119\t-98",
"\t27\t-119\t-96",
"\t26\t-116\t-96",
- "\t25\t-115\t-96",
- "\t24\t-113\t-96",
- "\t23\t-113\t-96",
+ "\t25\t-116\t-96",
+ "\t24\t-116\t-96",
+ "\t23\t-116\t-96\t-90\t-88",
"\t22\t-111\t-96\t-91\t-85",
"\t21\t-111\t-95\t-91\t-85",
"\t20\t-111\t-109\t-107\t-94\t-92\t-85",
@@ -20869,13 +22624,18 @@ CharPtr latlon_onedegree [] = {
"\t14\t-98\t-90",
"\t13\t-93\t-91",
"Micronesia",
- "\t10\t137\t139",
- "\t9\t137\t139\t148\t151\t153\t155",
- "\t8\t137\t139\t148\t155",
- "\t7\t148\t159",
+ "\t10\t137\t141",
+ "\t9\t137\t141\t148\t151",
+ "\t8\t137\t141\t148\t152",
+ "\t7\t148\t152\t156\t159",
"\t6\t148\t154\t156\t159\t161\t164",
"\t5\t148\t150\t152\t154\t156\t159\t161\t164",
- "\t4\t152\t154\t156\t158\t161\t164",
+ "\t4\t152\t158\t161\t164",
+ "\t3\t153\t155",
+ "\t2\t153\t155",
+ "\t1\t153\t155",
+ "\t0\t153\t155",
+ "\t-1\t153\t155",
"Midway Islands",
"\t29\t-178\t-176",
"\t28\t-178\t-176",
@@ -21024,12 +22784,12 @@ CharPtr latlon_onedegree [] = {
"\t12\t-69\t-67",
"\t11\t-69\t-67",
"New Caledonia",
- "\t-18\t158\t160\t162\t164",
- "\t-19\t158\t160\t162\t168",
- "\t-20\t158\t160\t162\t169",
- "\t-21\t162\t169",
- "\t-22\t163\t169",
- "\t-23\t165\t168",
+ "\t-18\t162\t164",
+ "\t-19\t162\t168",
+ "\t-20\t162\t169",
+ "\t-21\t162\t172",
+ "\t-22\t163\t172",
+ "\t-23\t165\t168\t170\t172",
"New Zealand",
"\t-7\t-173\t-171",
"\t-8\t-173\t-170",
@@ -21045,7 +22805,7 @@ CharPtr latlon_onedegree [] = {
"\t-37\t172\t179",
"\t-38\t172\t179",
"\t-39\t171\t179",
- "\t-40\t170\t179",
+ "\t-40\t170\t178",
"\t-41\t169\t177",
"\t-42\t-177\t-175\t167\t177",
"\t-43\t-177\t-175\t166\t175",
@@ -21178,6 +22938,7 @@ CharPtr latlon_onedegree [] = {
"\t23\t65\t72",
"\t22\t66\t69",
"Palau",
+ "\t9\t133\t135",
"\t8\t133\t135",
"\t7\t133\t135",
"\t6\t131\t135",
@@ -21187,6 +22948,7 @@ CharPtr latlon_onedegree [] = {
"\t2\t130\t132",
"\t1\t130\t132",
"Palmyra Atoll",
+ "\t7\t-163\t-161",
"\t6\t-163\t-161",
"\t5\t-163\t-161",
"\t4\t-163\t-161",
@@ -21352,8 +23114,8 @@ CharPtr latlon_onedegree [] = {
"\t81\t35\t37\t43\t66\t77\t81\t88\t100",
"\t80\t35\t37\t43\t66\t75\t81\t88\t105",
"\t79\t35\t37\t43\t66\t75\t81\t89\t108",
- "\t78\t49\t52\t57\t60\t66\t68\t75\t78\t88\t108\t155\t157",
- "\t77\t59\t70\t88\t114\t136\t143\t147\t153\t155\t157",
+ "\t78\t49\t52\t57\t60\t66\t68\t75\t78\t87\t108\t155\t157",
+ "\t77\t59\t70\t87\t114\t136\t143\t147\t153\t155\t157",
"\t76\t54\t70\t80\t114\t134\t153\t155\t157",
"\t75\t53\t70\t78\t117\t134\t153",
"\t74\t52\t130\t134\t151",
@@ -21451,7 +23213,7 @@ CharPtr latlon_onedegree [] = {
"\t29\t33\t49",
"\t28\t33\t50",
"\t27\t33\t51",
- "\t26\t33\t51",
+ "\t26\t34\t51",
"\t25\t34\t52",
"\t24\t35\t53",
"\t23\t36\t56",
@@ -21569,20 +23331,20 @@ CharPtr latlon_onedegree [] = {
"\t-46\t36\t38",
"\t-47\t36\t38",
"South Georgia and the South Sandwich Islands",
- "\t-52\t-43\t-36",
- "\t-53\t-43\t-33",
- "\t-54\t-43\t-33",
- "\t-55\t-39\t-33\t-29\t-26",
- "\t-56\t-35\t-33\t-29\t-25",
+ "\t-52\t-39\t-36",
+ "\t-53\t-39\t-34",
+ "\t-54\t-39\t-34",
+ "\t-55\t-39\t-34\t-29\t-26",
+ "\t-56\t-29\t-25",
"\t-57\t-29\t-25",
"\t-58\t-28\t-25",
"\t-59\t-28\t-25",
"\t-60\t-28\t-25",
"South Korea",
"\t39\t125\t129",
- "\t38\t123\t131",
- "\t37\t123\t131",
- "\t36\t123\t131",
+ "\t38\t123\t132",
+ "\t37\t123\t132",
+ "\t36\t123\t132",
"\t35\t124\t130",
"\t34\t124\t130",
"\t33\t124\t129",
@@ -21598,7 +23360,7 @@ CharPtr latlon_onedegree [] = {
"\t6\t23\t36",
"\t5\t25\t36",
"\t4\t25\t36",
- "\t3\t26\t35",
+ "\t3\t26\t36",
"\t2\t29\t34",
"Spain",
"\t44\t-10\t0",
@@ -21654,7 +23416,7 @@ CharPtr latlon_onedegree [] = {
"\t8\t22\t35",
"\t7\t22\t25",
"Suriname",
- "\t7\t-56\t-54",
+ "\t7\t-57\t-55",
"\t6\t-58\t-52",
"\t5\t-59\t-52",
"\t4\t-59\t-52",
@@ -21776,9 +23538,9 @@ CharPtr latlon_onedegree [] = {
"\t-9\t-172\t-170",
"\t-10\t-172\t-170",
"Tonga",
- "\t-14\t-176\t-172",
- "\t-15\t-176\t-172",
- "\t-16\t-176\t-172",
+ "\t-14\t-176\t-174",
+ "\t-15\t-176\t-174",
+ "\t-16\t-176\t-174",
"\t-17\t-175\t-172",
"\t-18\t-176\t-172",
"\t-19\t-176\t-172",
@@ -21863,7 +23625,7 @@ CharPtr latlon_onedegree [] = {
"\t43\t32\t36",
"United Arab Emirates",
"\t27\t55\t57",
- "\t26\t53\t57",
+ "\t26\t54\t57",
"\t25\t50\t57",
"\t24\t50\t57",
"\t23\t50\t57",
@@ -21919,7 +23681,7 @@ CharPtr latlon_onedegree [] = {
"\t49\t-125\t-86",
"\t48\t-125\t-84\t-70\t-66",
"\t47\t-125\t-82\t-71\t-66",
- "\t46\t-125\t-81\t-75\t-66",
+ "\t46\t-125\t-81\t-75\t-65",
"\t45\t-125\t-81\t-77\t-65",
"\t44\t-125\t-65",
"\t43\t-125\t-65",
@@ -21927,7 +23689,7 @@ CharPtr latlon_onedegree [] = {
"\t41\t-125\t-68",
"\t40\t-125\t-68",
"\t39\t-125\t-71",
- "\t38\t-124\t-73",
+ "\t38\t-125\t-73",
"\t37\t-124\t-73",
"\t36\t-124\t-74",
"\t35\t-123\t-74",
@@ -21936,9 +23698,9 @@ CharPtr latlon_onedegree [] = {
"\t32\t-121\t-76",
"\t31\t-119\t-78",
"\t30\t-114\t-79",
- "\t29\t-106\t-79",
- "\t28\t-105\t-79",
- "\t27\t-174\t-172\t-104\t-94\t-90\t-88\t-83\t-79",
+ "\t29\t-179\t-177\t-106\t-79",
+ "\t28\t-179\t-177\t-105\t-79",
+ "\t27\t-179\t-177\t-174\t-172\t-104\t-94\t-90\t-88\t-83\t-79",
"\t26\t-174\t-166\t-100\t-95\t-83\t-79",
"\t25\t-174\t-166\t-100\t-96\t-83\t-79",
"\t24\t-172\t-160\t-98\t-96\t-83\t-79",
@@ -21984,8 +23746,8 @@ CharPtr latlon_onedegree [] = {
"\t50\t-180\t-174\t176\t180",
"USA: Alaska, Aleutian Islands",
"\t60\t-154\t-149\t-147\t-145",
- "\t59\t-162\t-159\t-154\t-149\t-147\t-145",
- "\t58\t-171\t-169\t-162\t-159\t-155\t-149\t-147\t-145",
+ "\t59\t-162\t-158\t-154\t-149\t-147\t-145",
+ "\t58\t-171\t-169\t-162\t-149\t-147\t-145",
"\t57\t-171\t-168\t-162\t-150",
"\t56\t-171\t-168\t-164\t-151",
"\t55\t-170\t-152",
@@ -22017,7 +23779,7 @@ CharPtr latlon_onedegree [] = {
"\t41\t-125\t-119",
"\t40\t-125\t-119",
"\t39\t-125\t-117",
- "\t38\t-124\t-116",
+ "\t38\t-125\t-116",
"\t37\t-124\t-115",
"\t36\t-124\t-113",
"\t35\t-123\t-113",
@@ -22068,12 +23830,14 @@ CharPtr latlon_onedegree [] = {
"\t30\t-86\t-79",
"\t29\t-86\t-80",
"USA: Hawaii",
- "\t27\t-174\t-172",
- "\t26\t-174\t-166",
+ "\t29\t-179\t-177",
+ "\t28\t-179\t-174",
+ "\t27\t-179\t-172",
+ "\t26\t-176\t-166",
"\t25\t-174\t-166",
"\t24\t-172\t-160",
- "\t23\t-165\t-158",
- "\t22\t-165\t-155",
+ "\t23\t-167\t-158",
+ "\t22\t-167\t-155",
"\t21\t-161\t-154",
"\t20\t-161\t-153",
"\t19\t-158\t-153",
@@ -22140,11 +23904,12 @@ CharPtr latlon_onedegree [] = {
"USA: Maine",
"\t48\t-70\t-66",
"\t47\t-71\t-66",
- "\t46\t-72\t-66",
+ "\t46\t-72\t-65",
"\t45\t-72\t-65",
"\t44\t-72\t-65",
"\t43\t-72\t-65",
- "\t42\t-71\t-68",
+ "\t42\t-71\t-67",
+ "\t41\t-71\t-69",
"USA: Maryland",
"\t40\t-80\t-74",
"\t39\t-80\t-74",
@@ -22285,7 +24050,7 @@ CharPtr latlon_onedegree [] = {
"\t33\t-101\t-93",
"\t32\t-98\t-93",
"USA: Oregon",
- "\t47\t-124\t-115",
+ "\t47\t-125\t-115",
"\t46\t-125\t-115",
"\t45\t-125\t-115",
"\t44\t-125\t-115",
@@ -22417,6 +24182,9 @@ CharPtr latlon_onedegree [] = {
"\t-20\t168\t170",
"\t-21\t168\t170",
"Venezuela",
+ "\t16\t-64\t-62",
+ "\t15\t-64\t-62",
+ "\t14\t-64\t-62",
"\t13\t-71\t-66",
"\t12\t-73\t-62",
"\t11\t-73\t-60",
@@ -22464,19 +24232,19 @@ CharPtr latlon_onedegree [] = {
"\t-12\t-177\t-175",
"\t-13\t-179\t-175",
"\t-14\t-179\t-175",
- "\t-15\t-179\t-176",
+ "\t-15\t-179\t-177",
"West Bank",
"\t33\t33\t36",
"\t32\t33\t36",
"\t31\t33\t36",
"\t30\t33\t36",
"Western Sahara",
- "\t28\t-14\t-7",
- "\t27\t-15\t-7",
- "\t26\t-15\t-7",
- "\t25\t-16\t-7",
- "\t24\t-17\t-7",
- "\t23\t-17\t-11",
+ "\t28\t-11\t-7",
+ "\t27\t-13\t-7",
+ "\t26\t-13\t-7",
+ "\t25\t-14\t-7",
+ "\t24\t-15\t-7",
+ "\t23\t-15\t-11",
"\t22\t-18\t-11",
"\t21\t-18\t-12",
"\t20\t-18\t-12",
@@ -22523,7 +24291,7 @@ extern CharPtr water_onedegree [];
CharPtr water_onedegree [] = {
"1",
"Adriatic Sea",
- "46\t11\t15",
+ "\t46\t11\t15",
"\t45\t11\t16",
"\t44\t11\t18",
"\t43\t11\t20",
@@ -23998,21 +25766,20 @@ CharPtr water_onedegree [] = {
"\t69\t-82\t-78",
"\t68\t-82\t-78",
"Internal Denmark Waters",
- "\t57\t9\t11",
- "\t56\t8\t12",
- "\t55\t8\t12",
- "\t54\t8\t12",
- "\t53\t8\t12",
+ "\t55\t9\t13",
+ "\t54\t9\t13",
+ "\t53\t9\t13",
+ "\t52\t9\t12",
"Internal Philippines Waters",
- "\t11\t124\t127",
- "\t10\t124\t127",
- "\t9\t124\t127",
- "\t8\t124\t127",
+ "\t11\t124\t126",
+ "\t10\t124\t126",
+ "\t9\t124\t126",
+ "\t8\t124\t126",
"Internal Philippines Waters",
+ "\t14\t121\t124",
+ "\t13\t121\t124",
+ "\t12\t121\t124",
"\t11\t122\t124",
- "\t10\t122\t124",
- "\t9\t122\t124",
- "\t8\t122\t124",
"Internal U.S. (Alaska) Waters",
"\t60\t-138\t-134",
"\t59\t-138\t-132",
@@ -25424,6 +27191,7 @@ CharPtr water_onedegree [] = {
};
+//Not part of AutoDef or Cleanup
static Uint4 sqn_binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
{
Uint4 L;
@@ -25449,6 +27217,7 @@ static Uint4 sqn_binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 lis
}
+//Not part of AutoDef or Cleanup
static Int4 MapRowCoordsSpecial(SeqAlignPtr sap, Uint4 pos, Int4 row, Boolean is_left_end)
{
DenseSegPtr dsp;
@@ -25502,6 +27271,7 @@ static Int4 MapRowCoordsSpecial(SeqAlignPtr sap, Uint4 pos, Int4 row, Boolean is
}
+//Not part of AutoDef or Cleanup
static Int4 MapBioseqToBioseqSpecial(SeqAlignPtr sap, Int4 begin, Int4 fin, Int4 pos, Boolean is_left_end)
{
Int4 bspos;
@@ -25530,7 +27300,7 @@ static Int4 MapBioseqToBioseqSpecial(SeqAlignPtr sap, Int4 begin, Int4 fin, Int4
return (stop2+1);
}
-
+//Not part of AutoDef or Cleanup
/* This function adjusts the endpoints of a location, as long as the
* endpoints are in the area represented by the alignment.
* When we are adjusting locations for an alignment of a part, we will
@@ -25592,6 +27362,7 @@ static Int4 AdjustEndpoint
}
+//Not part of AutoDef or Cleanup
static void ReplaceLocation (SeqAlignPtr salp, SeqLocPtr slp, Int4 length, Int4 begin, Int4 fin)
{
@@ -25633,6 +27404,7 @@ static void ReplaceLocation (SeqAlignPtr salp, SeqLocPtr slp, Int4 length, Int4
}
+//Not part of AutoDef or Cleanup
/* this function iterates through the pieces of a complex location
* and calls ReplaceLocation for each one. ReplaceLocation will only
* act on SEQLOC_INT, SEQLOC_PNT, and SEQLOC_PACKED_PNT and will ignore
@@ -25661,6 +27433,7 @@ ReplaceComplexLocation
}
+//Not part of AutoDef or Cleanup
static void UpdateOneFeatureForSequenceReplace
(SeqFeatPtr sfp,
SeqAlignPtr salp,
@@ -25705,6 +27478,7 @@ static void UpdateOneFeatureForSequenceReplace
}
+//Not part of AutoDef or Cleanup
static void UpdateLocationsForSequenceReplace
(SeqAlignPtr salp,
BioseqPtr oldbsp,
@@ -25746,6 +27520,7 @@ static void UpdateLocationsForSequenceReplace
}
+//Not part of AutoDef or Cleanup
NLM_EXTERN void
ReplaceOneSequence
(SeqAlignPtr salp,
@@ -25801,6 +27576,7 @@ ReplaceOneSequence
}
+//Not part of AutoDef or Cleanup
NLM_EXTERN Boolean AreSequenceResiduesIdentical (BioseqPtr bsp1, BioseqPtr bsp2)
{
SeqPortPtr spp1, spp2;
@@ -25874,7 +27650,7 @@ NLM_EXTERN Boolean AreSequenceResiduesIdentical (BioseqPtr bsp1, BioseqPtr bsp2)
return rval;
}
-
+//Not part of AutoDef or Cleanup
static Boolean FindBestCitSubCallback (GatherContextPtr gcp)
{
@@ -25909,7 +27685,7 @@ static Boolean FindBestCitSubCallback (GatherContextPtr gcp)
return TRUE;
}
-
+//Not part of AutoDef or Cleanup
static CitSubPtr FindBestCitSubForSeqEntry (SeqEntryPtr sep)
{
CitSubPtr best = NULL;
@@ -25927,7 +27703,7 @@ static CitSubPtr FindBestCitSubForSeqEntry (SeqEntryPtr sep)
return best;
}
-
+//Not part of AutoDef or Cleanup
NLM_EXTERN ValNodePtr CreateUpdateCitSubFromBestTemplate (
SeqEntryPtr top_sep,
SeqEntryPtr upd_sep,
@@ -25998,6 +27774,7 @@ NLM_EXTERN ValNodePtr CreateUpdateCitSubFromBestTemplate (
}
+//Not part of AutoDef or Cleanup
CharPtr kSubmitterUpdateText = "Sequence update by submitter";
NLM_EXTERN void AddCitSubToUpdatedSequence (BioseqPtr upd_bsp, Uint2 input_entityID, CharPtr update_txt)
@@ -26021,6 +27798,7 @@ NLM_EXTERN void AddCitSubToUpdatedSequence (BioseqPtr upd_bsp, Uint2 input_entit
}
+//Not used for Autodef or cleanup
static void ListPhrapGraphsCallback (SeqGraphPtr sgp, Pointer userdata)
{
ValNodePtr PNTR vnpp;
@@ -26033,7 +27811,7 @@ static void ListPhrapGraphsCallback (SeqGraphPtr sgp, Pointer userdata)
}
}
-
+//Not used for Autodef or cleanup
NLM_EXTERN void RemoveQualityScores
(BioseqPtr bsp,
FILE *log_fp,
@@ -26070,6 +27848,7 @@ NLM_EXTERN void RemoveQualityScores
}
+//Not used for Autodef or cleanup
static Char GetNextCharacterFromFile (FILE *fp, BoolPtr pIsASN)
{
FileCache fc;
@@ -26105,7 +27884,7 @@ static Char GetNextCharacterFromFile (FILE *fp, BoolPtr pIsASN)
return special_symbol;
}
-
+//Not used for Autodef or cleanup
NLM_EXTERN void ReplaceFakeIDWithIDFromTitle (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -26186,7 +27965,7 @@ NLM_EXTERN void ReplaceFakeIDWithIDFromTitle (BioseqPtr bsp)
}
}
-
+//Not used for Autodef or cleanup
static void PutDeflineIDBackInTitle (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -26234,7 +28013,7 @@ static void PutDeflineIDBackInTitle (BioseqPtr bsp)
}
-
+//Not used for Autodef or cleanup
static SeqEntryPtr
ImportOneNucBioseq
(FILE *fp,
@@ -26251,7 +28030,7 @@ ImportOneNucBioseq
ErrSev oldsev;
if (feof (fp)) {
- lastchar = 0;
+ *lastchar = 0;
return NULL;
}
oldsev = ErrSetMessageLevel (SEV_MAX);
@@ -26281,7 +28060,7 @@ ImportOneNucBioseq
sep = ReadOneSegSet (fp, parse_id, err_msg_list, this_chars_stripped);
}
else */
- if (lastchar == 0)
+ if (*lastchar == 0)
{
*lastchar = GetNextCharacterFromFile(fp, isAsn);
}
@@ -26323,7 +28102,7 @@ ImportOneNucBioseq
return sep;
}
-
+//Not used for Autodef or cleanup
static Boolean HasGapID (SeqEntryPtr sep)
{
BioseqPtr bsp;
@@ -26365,6 +28144,7 @@ static Boolean HasGapID (SeqEntryPtr sep)
}
+//Not used for Autodef or cleanup
static Boolean HasNoSeqID (SeqEntryPtr sep)
{
BioseqPtr bsp;
@@ -26384,6 +28164,7 @@ static Boolean HasNoSeqID (SeqEntryPtr sep)
}
+//Not used for Autodef or cleanup
static Int4 FindLineForStartOfBadRead (FILE *fp, Int4 pos)
{
FileCache fc;
@@ -26407,6 +28188,7 @@ static Int4 FindLineForStartOfBadRead (FILE *fp, Int4 pos)
}
+//Not used for Autodef or cleanup
static Int4 FindLineForBadReadChar (FILE *fp, Char badchar)
{
FileCache fc;
@@ -26430,18 +28212,21 @@ static Int4 FindLineForBadReadChar (FILE *fp, Char badchar)
}
+//Not used for Autodef or cleanup
NLM_EXTERN SeqEntryPtr
-ImportNucleotideFASTASequencesFromFile
+ImportNucleotideFASTASequencesFromFileEx
(FILE *fp,
Boolean parse_id,
CharPtr supplied_id_txt,
ValNodePtr PNTR err_msg_list,
BoolPtr chars_stripped,
- Boolean allow_char_stripping)
+ Boolean allow_char_stripping,
+ Nlm_ImportSeqCallbackProc callback,
+ Pointer callback_data)
{
- Int4 count;
+ Int4 seq_count = 0, nt_count = 0;
SeqEntryPtr last;
- Char lastchar;
+ Char lastchar = '\0';
SeqEntryPtr nextsep;
BioseqPtr bsp = NULL;
SeqEntryPtr new_sep_list = NULL;
@@ -26456,8 +28241,6 @@ ImportNucleotideFASTASequencesFromFile
*chars_stripped = FALSE;
}
- count = 0;
-
new_sep_list = NULL;
last = NULL;
@@ -26513,6 +28296,13 @@ ImportNucleotideFASTASequencesFromFile
last->next = nextsep;
last = nextsep;
}
+ seq_count++;
+ if (IS_Bioseq (nextsep) && (bsp = (BioseqPtr) nextsep->data.ptrvalue) != NULL) {
+ nt_count += bsp->length;
+ }
+ if (callback != NULL) {
+ callback (seq_count, nt_count, callback_data);
+ }
}
pos = ftell (fp);
if (!allow_char_stripping && this_chars_stripped)
@@ -26533,7 +28323,22 @@ ImportNucleotideFASTASequencesFromFile
return new_sep_list;
}
+//Not used for Autodef or cleanup
+NLM_EXTERN SeqEntryPtr
+ImportNucleotideFASTASequencesFromFile
+(FILE *fp,
+ Boolean parse_id,
+ CharPtr supplied_id_txt,
+ ValNodePtr PNTR err_msg_list,
+ BoolPtr chars_stripped,
+ Boolean allow_char_stripping)
+{
+ return ImportNucleotideFASTASequencesFromFileEx (fp, parse_id, supplied_id_txt,
+ err_msg_list, chars_stripped, allow_char_stripping, NULL, NULL);
+}
+
+//Not used for Autodef or cleanup
static void StripStopCodons (SeqEntryPtr sep_list)
{
BioseqPtr pbsp;
@@ -26556,6 +28361,7 @@ static void StripStopCodons (SeqEntryPtr sep_list)
}
+//Not used for Autodef or cleanup
NLM_EXTERN SeqEntryPtr ImportProteinFASTASequences
(FILE *fp,
Boolean parse_id,
@@ -26636,6 +28442,7 @@ NLM_EXTERN SeqEntryPtr ImportProteinFASTASequences
}
+//Not used for Autodef or cleanup
NLM_EXTERN void AddUniqueUpdateSequenceIDs (SeqEntryPtr sep)
{
BioseqPtr bsp;
@@ -26666,7 +28473,7 @@ NLM_EXTERN void AddUniqueUpdateSequenceIDs (SeqEntryPtr sep)
AddUniqueUpdateSequenceIDs (sep->next);
}
-
+//Not used for Autodef or cleanup
NLM_EXTERN void
ListBioseqsInSeqEntry
(SeqEntryPtr sep,
@@ -26708,6 +28515,7 @@ ListBioseqsInSeqEntry
}
+//Not used for Autodef or cleanup
static Boolean SeqIdListsOverlap (SeqIdPtr sip1, SeqIdPtr sip2)
{
SeqIdPtr sip_next;
@@ -26739,6 +28547,7 @@ static Boolean SeqIdListsOverlap (SeqIdPtr sip1, SeqIdPtr sip2)
}
+//Not used for Autodef or cleanup
NLM_EXTERN ValNodePtr ShuffleUpdateBioseqList (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list)
{
ValNodePtr unmatched_list = NULL;
@@ -26807,7 +28616,7 @@ NLM_EXTERN ValNodePtr ShuffleUpdateBioseqList (ValNodePtr PNTR update_bioseq_lis
return unmatched_list;
}
-
+//Not used for Autodef or cleanup
/* This function compares the text from a local ID against the
* report string from non-local IDs in sip_list, useful when
* comparing values from a file in which the user did not specify
@@ -26866,6 +28675,7 @@ NLM_EXTERN Boolean RelaxedSeqIdIn (SeqIdPtr sip, SeqIdPtr sip_list)
}
+//Not used for Autodef or cleanup
NLM_EXTERN BioseqPtr FindBioseqInList (ValNodePtr bioseq_list, SeqIdPtr sip, Int4Ptr position)
{
ValNodePtr vnp;
@@ -26902,6 +28712,7 @@ NLM_EXTERN BioseqPtr FindBioseqInList (ValNodePtr bioseq_list, SeqIdPtr sip, Int
}
+//Not used for Autodef or cleanup
/* This function should find all update Bioseqs that have colliding sequence IDs and
* replace the colliding IDs with new sequence IDs.
*/
@@ -26946,6 +28757,7 @@ NLM_EXTERN void ReplaceCollidingUpdateIDs (ValNodePtr update_bioseq_list, ValNod
}
+//Not used for Autodef or cleanup
NLM_EXTERN void RemoveSequencesWithoutUpdates (ValNodePtr PNTR orig_bioseq_list, ValNodePtr PNTR update_bioseq_list)
{
ValNodePtr orig_prev = NULL, update_prev = NULL;
@@ -27003,6 +28815,7 @@ NLM_EXTERN void RemoveSequencesWithoutUpdates (ValNodePtr PNTR orig_bioseq_list,
}
+//Not used for Autodef or Cleanup
NLM_EXTERN SeqAlignPtr AlignForSequenceUpdate (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp, GlobalAlignFunc align_func)
{
SeqIdPtr old_id;
@@ -27029,6 +28842,7 @@ NLM_EXTERN SeqAlignPtr AlignForSequenceUpdate (BioseqPtr bsp1, BioseqPtr bsp2, B
}
+//Not used for Autodef or Cleanup
static AuthorPtr AuthorFromEndnoteString (CharPtr val)
{
AuthorPtr auth;
@@ -27078,6 +28892,7 @@ static AuthorPtr AuthorFromEndnoteString (CharPtr val)
}
+//Not used for Autodef or Cleanup
NLM_EXTERN PubPtr ParsePubFromEndnote (FILE *fp)
{
ReadBufferData rbd;
@@ -27239,6 +29054,330 @@ NLM_EXTERN PubPtr ParsePubFromEndnote (FILE *fp)
return pub;
}
+//Not used by Autodef or Cleanup
+static Int4 ReplaceStopsWithSelenocysteine(BioseqPtr bsp, FILE *log_fp)
+{
+ SeqFeatPtr prot, cds;
+ SeqMgrFeatContext context;
+ ProtRefPtr prp;
+ CharPtr bases, cp;
+ Int4 pos;
+ SeqLocPtr prot_loc, dna_loc;
+ Boolean partial5, partial3;
+ BioseqPtr nbsp;
+ Char nbases[10];
+ CdRegionPtr crp;
+ CodeBreakPtr cbp, last_cbp = NULL;
+ CharPtr fmt = "Unable to add transl_except for stop codon at position %d in protein %s because codon is not TGA\n";
+ Char id_buf[PATH_MAX];
+ Int4 num_replaced = 0;
+
+ if (bsp == NULL || !ISA_aa(bsp->mol)) {
+ return num_replaced;
+ }
+ prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &context);
+ if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL
+ || prp->name == NULL
+ || StringISearch (prp->name->data.ptrvalue, "seleno") == NULL) {
+ return num_replaced;
+ }
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ if (cds == NULL) {
+ return num_replaced;
+ }
+ nbsp = BioseqFindFromSeqLoc (cds->location);
+ if (nbsp == NULL) {
+ return num_replaced;
+ }
+ crp = (CdRegionPtr) cds->data.value.ptrvalue;
+ if (crp == NULL) {
+ crp = CdRegionNew ();
+ cds->data.value.ptrvalue = crp;
+ }
+
+ CheckSeqLocForPartial (prot->location, &partial5, &partial3);
+ /* find stop codons */
+ bases = GetSequenceByBsp(bsp);
+ cp = StringChr (bases, '*');
+ while (cp != NULL) {
+ pos = cp - bases;
+ prot_loc = SeqLocIntNew (pos, pos, Seq_strand_unknown, SeqIdFindBest (bsp->id, 0));
+ dna_loc = productInterval_to_locationIntervals(cds, pos, pos, partial5);
+ SeqPortStreamLoc (dna_loc, STREAM_EXPAND_GAPS, (Pointer) nbases, NULL);
+ if (StringICmp (nbases, "TGA") == 0) {
+ cbp = CodeBreakNew ();
+ cbp->loc = dna_loc;
+ cbp->aa.choice = 1; /* ncbieaa */
+ cbp->aa.value.intvalue = 'U';
+ if (last_cbp == NULL) {
+ crp->code_break = cbp;
+ } else {
+ last_cbp->next = cbp;
+ }
+ last_cbp = cbp;
+ num_replaced ++;
+ } else {
+ if (log_fp != NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, 0), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
+ fprintf (log_fp, fmt, pos + 1, id_buf);
+ }
+ dna_loc = SeqLocFree (dna_loc);
+ }
+ prot_loc = SeqLocFree (prot_loc);
+ cp = StringChr (cp + 1, '*');
+ }
+
+ if (num_replaced > 0) {
+ RetranslateOneCDS (cds, cds->idx.entityID, TRUE, TRUE);
+ }
+ return num_replaced;
+}
+
+
+typedef struct fixlog {
+ Int4 num_replaced;
+ FILE *log_fp;
+} FixLogData, PNTR FixLogPtr;
+
+//Not used by Autodef or Cleanup
+static void ReplaceStopsWithSelenocysteineCallback (BioseqPtr bsp, Pointer data)
+{
+ FixLogPtr rp = (FixLogPtr) data;
+ if (rp == NULL) {
+ return;
+ }
+ rp->num_replaced += ReplaceStopsWithSelenocysteine(bsp, rp->log_fp);
+}
+
+//Not used by Autodef or Cleanup
+NLM_EXTERN Boolean ReplaceStopsWithSelenocysteineInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
+{
+ FixLogData rd;
+
+ MemSet (&rd, 0, sizeof (FixLogData));
+ rd.log_fp = log_fp;
+
+ VisitBioseqsInSep (sep, &rd, ReplaceStopsWithSelenocysteineCallback);
+ if (rd.num_replaced > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Replaced %d stops with selenocysteine\n", rd.num_replaced);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+typedef struct trnamatch {
+ CharPtr label;
+ Int4 left;
+ Int4 len;
+ Uint1 strand;
+ SeqFeatPtr sfp;
+ SeqFeatPtr gene;
+} trnaMatchData, PNTR trnaMatchPtr;
+
+//Not used by Autodef or Cleanup
+trnaMatchPtr trnaMatchNew (SeqFeatPtr sfp, SeqMgrFeatContextPtr context)
+{
+ trnaMatchPtr t = (trnaMatchPtr) MemNew (sizeof (trnaMatchData));
+ t->label = StringSave(context->label);
+ t->left = context->left;
+ t->len = SeqLocLen (sfp->location);
+ t->strand = context->strand;
+ t->sfp = sfp;
+ t->gene = GetGeneForFeature (t->sfp);
+ return t;
+}
+
+
+//Not used by Autodef or Cleanup
+trnaMatchPtr trnaMatchFree (trnaMatchPtr t)
+{
+ if (t != NULL) {
+ t->label = MemFree (t->label);
+ t = MemFree (t);
+ }
+ return t;
+}
+
+
+//Not used by Autodef or Cleanup
+static int LIBCALLBACK SortVnpBytrnaMatch (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ trnaMatchPtr str1;
+ trnaMatchPtr str2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ str1 = (trnaMatchPtr) vnp1->data.ptrvalue;
+ str2 = (trnaMatchPtr) vnp2->data.ptrvalue;
+ if (str1 != NULL && str2 != NULL) {
+ rval = StringICmp (str1->label, str2->label);
+ if (rval == 0) {
+ if (str1->strand == Seq_strand_minus && str2->strand != Seq_strand_minus) {
+ rval = 1;
+ } else if (str1->strand != Seq_strand_minus && str2->strand == Seq_strand_minus) {
+ rval = -1;
+ }
+ }
+ if (rval == 0) {
+ if (str1->strand == Seq_strand_minus) {
+ if (str1->left > str2->left) {
+ rval = -1;
+ } else if (str1->left < str2->left) {
+ rval = 1;
+ }
+ } else {
+ if (str1->left > str2->left) {
+ rval = 1;
+ } else if (str1->left < str2->left) {
+ rval = -1;
+ }
+ }
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+
+//Not used by Autodef or Cleanup
+static void AddToLoc (SeqLocPtr PNTR loc, SeqLocPtr add, Boolean single_interval, BioseqPtr bsp)
+{
+ SeqLocPtr new_loc;
+
+ if (loc == NULL || *loc == NULL || add == NULL) {
+ return;
+ }
+ new_loc = SeqLocMerge (bsp, *loc, add, single_interval, FALSE, FALSE);
+ *loc = SeqLocFree (*loc);
+ *loc = new_loc;
+}
+
+
+//Not used by Autodef or Cleanup
+static void JoinShortTrnasCallback(BioseqPtr bsp, Pointer data)
+{
+ FixLogPtr rp;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+ ValNodePtr list = NULL, vnp;
+ trnaMatchPtr t_prev, t_this;
+
+ if (bsp == NULL) {
+ return;
+ }
+ rp = (FixLogPtr) data;
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_tRNA, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_tRNA, &context)) {
+ if (SeqLocLen (sfp->location) < 50) {
+ ValNodeAddPointer (&list, 0, trnaMatchNew(sfp, &context));
+ }
+ }
+
+ if (list != NULL && list->next != NULL) {
+ list = ValNodeSort (list, SortVnpBytrnaMatch);
+ t_prev = list->data.ptrvalue;
+ vnp = list->next;
+ while (vnp != NULL) {
+ t_this = vnp->data.ptrvalue;
+ if (StringICmp (t_prev->label, t_this->label) == 0
+ && ((t_prev->strand == Seq_strand_minus && t_this->strand == Seq_strand_minus)
+ || (t_prev->strand != Seq_strand_minus && t_this->strand != Seq_strand_minus))) {
+ AddToLoc (&(t_prev->sfp->location), t_this->sfp->location, FALSE, bsp);
+ if (t_prev->gene != NULL) {
+ if (t_this->gene != NULL) {
+ AddToLoc (&(t_prev->gene->location), t_this->gene->location, TRUE, bsp);
+ } else {
+ AddToLoc (&(t_prev->gene->location), t_this->sfp->location, TRUE, bsp);
+ }
+ }
+ if (t_this->gene != NULL) {
+ t_this->gene->idx.deleteme = TRUE;
+ }
+
+ SetStringValue (&(t_prev->sfp->comment), t_this->sfp->comment, ExistingTextOption_append_semi);
+ t_this->sfp->idx.deleteme = TRUE;
+ rp->num_replaced ++;
+ vnp = vnp->next;
+ } else {
+ t_prev = t_this;
+ while (t_prev != NULL && t_prev->sfp->idx.deleteme) {
+ vnp = vnp->next;
+ if (vnp == NULL) {
+ t_prev = NULL;
+ } else {
+ t_prev = vnp->data.ptrvalue;
+ vnp = vnp->next;
+ }
+ }
+ if (vnp != NULL) {
+ vnp = vnp->next;
+ }
+ }
+ }
+ }
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ vnp->data.ptrvalue = trnaMatchFree (vnp->data.ptrvalue);
+ }
+ list = ValNodeFree (list);
+}
+
+//Not used by Autodef or Cleanup
+NLM_EXTERN Boolean JoinShortTrnas (SeqEntryPtr sep, FILE *log_fp)
+{
+ FixLogData rd;
+
+ MemSet (&rd, 0, sizeof (FixLogData));
+ rd.log_fp = log_fp;
+
+ VisitBioseqsInSep (sep, &rd, JoinShortTrnasCallback);
+ DeleteMarkedObjects (ObjMgrGetEntityIDForChoice (sep), 0, NULL);
+
+ if (rd.num_replaced > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Joined %d short tRNAs\n", rd.num_replaced);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+//Not part of Autodef or Cleanup
+NLM_EXTERN Boolean IsRegulatorySubtype (Uint1 key)
+{
+ if (key == FEATDEF_enhancer ||
+ key == FEATDEF_promoter ||
+ key == FEATDEF_CAAT_signal ||
+ key == FEATDEF_TATA_signal ||
+ key == FEATDEF_35_signal ||
+ key == FEATDEF_10_signal ||
+ key == FEATDEF_RBS ||
+ key == FEATDEF_GC_signal ||
+ key == FEATDEF_polyA_signal ||
+ key == FEATDEF_attenuator ||
+ key == FEATDEF_terminator ||
+ key == FEATDEF_misc_signal) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+//LCOV_EXCL_STOP
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 79653c13..b8bff98d 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.639 $
+* $Revision: 6.744 $
*
* File Description:
*
@@ -226,10 +226,21 @@ NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF);
NLM_EXTERN Uint1 CodonToGcIndex (CharPtr codon);
NLM_EXTERN CharPtr GcIndextoCodon (Uint1 index);
+NLM_EXTERN GBQualPtr SortFeatureGBQuals (GBQualPtr list);
+NLM_EXTERN void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq);
+
/* finds bioseq from (cds) product, gets largest protein feature packaged on it */
NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product);
+/* set coding region partial flags by initial dash and final star in translation */
+
+NLM_EXTERN void CodingRegionPartialsFromTranslation (SeqEntryPtr sep);
+
+/* impose coding region partial flags onto appropriate mRNA and gene features */
+
+NLM_EXTERN void ImposeCodingRegionPartials (SeqEntryPtr sep);
+
/* resynchronizes coding regions with product protein bioseq molinfo and protein feature */
NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep);
@@ -245,6 +256,9 @@ NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep);
/* individual feature callbacks for above functions */
+NLM_EXTERN void CDSPartialsFromTranslation (SeqFeatPtr sfp, Pointer userdata);
+NLM_EXTERN void ImposeCDSPartials (SeqFeatPtr sfp, Pointer userdata);
+NLM_EXTERN void ImposeGenePartials (SeqFeatPtr sfp, Pointer userdata);
NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata);
NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata);
NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata);
@@ -281,7 +295,7 @@ NLM_EXTERN void StripSeqFeatSupportAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
/* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */
/* for example, passing "gene" to SqnTagFind returns "lacZ" */
-#define MAX_SQN_TAGS 32
+#define MAX_SQN_TAGS 200
typedef struct sqntag {
CharPtr query;
@@ -307,7 +321,6 @@ NLM_EXTERN Uint1 EquivalentOrgMod (CharPtr str);
NLM_EXTERN Uint1 EquivalentSubSourceEx (CharPtr str, Boolean allow_discouraged_and_discontinued);
NLM_EXTERN Uint1 EquivalentOrgModEx (CharPtr str, Boolean allow_discouraged_and_discontinued);
-
/* functions to extract BioSource, MolInfo, and Bioseq information from parsed titles */
NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
@@ -469,6 +482,10 @@ NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCI (VoidPtr ptr1, VoidPtr ptr2);
NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2);
NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list);
+/* for sorting valnode list by choice */
+
+NLM_EXTERN int LIBCALLBACK SortByChoice (VoidPtr ptr1, VoidPtr ptr2);
+
/* for sorting and uniquing valnode list by data.intvalue */
NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2);
@@ -501,11 +518,13 @@ NLM_EXTERN CharPtr TagFromKey (KeyTag PNTR ktp, Int2 key);
#define BAD_INFERENCE_BODY 3
#define SINGLE_INFERENCE_FIELD 4
#define SPACES_IN_INFERENCE 5
-#define SAME_SPECIES_MISUSED 6
-#define BAD_INFERENCE_ACCESSION 7
-#define BAD_INFERENCE_ACC_VERSION 8
-#define ACC_VERSION_NOT_PUBLIC 9
-#define BAD_ACCESSION_TYPE 10
+#define INFERENCE_HAS_COMMENT 6
+#define SAME_SPECIES_MISUSED 7
+#define BAD_INFERENCE_ACCESSION 8
+#define BAD_INFERENCE_ACC_VERSION 9
+#define ACC_VERSION_NOT_PUBLIC 10
+#define BAD_ACCESSION_TYPE 11
+#define UNRECOGNIZED_DATABASE 12
NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn);
@@ -609,6 +628,7 @@ NLM_EXTERN BioseqPtr ReadDeltaFasta (FILE *fp, Uint2Ptr entityIDptr);
* the sequence, or FALSE if not.
*/
NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped);
+NLM_EXTERN BioseqPtr ReadDeltaFastaExEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped, BoolPtr cache_failed);
/* ReadDeltaFastaWithEmptyDefline reads just one delta sequence with an empty
* definition line.
@@ -685,6 +705,10 @@ require reindexing) */
NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep);
+/* AdvancedSeqEntryCleanup also resynchronizes CDS, mRNA, and protein partials */
+
+NLM_EXTERN void AdvancedSeqEntryCleanup (SeqEntryPtr sep);
+
/* cleanup for a single descriptor, after editing */
NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep);
@@ -971,7 +995,6 @@ extern CharPtr AlignmentStringToSequenceString (CharPtr aln_str, Uint1 moltype);
extern SeqEntryPtr MakeSequinDataFromAlignment (TAlignmentFilePtr afp, Uint1 moltype);
extern SeqEntryPtr MakeSequinDataFromAlignmentEx (TAlignmentFilePtr afp, Uint1 moltype, Boolean check_ids);
extern SeqEntryPtr make_seqentry_for_seqentry (SeqEntryPtr sep);
-extern void ProcessPseudoMiscFeatsForEntityID (Uint2 entityID);
extern Boolean ConvertOnePseudoCDSToMiscFeat (SeqFeatPtr sfp);
NLM_EXTERN Boolean ConvertOnePseudoCDSToMiscFeatEx (SeqFeatPtr sfp, Boolean remove_product);
extern void ConvertPseudoCDSToMiscFeatsForEntityID (Uint2 entityID);
@@ -1119,6 +1142,7 @@ typedef enum {
DISC_N_RUNS,
DISC_ZERO_BASECOUNT,
DISC_ADJACENT_PSEUDOGENE,
+ DISC_LONG_NO_ANNOTATION,
DISC_NO_ANNOTATION,
DISC_INFLUENZA_DATE_MISMATCH,
DISC_SHORT_INTRON,
@@ -1135,6 +1159,8 @@ typedef enum {
DISC_SPECVOUCHER_TAXNAME_MISMATCH,
DISC_GENE_PARTIAL_CONFLICT,
DISC_FLATFILE_FIND_ONCALLER,
+ DISC_FLATFILE_FIND_ONCALLER_FIXABLE,
+ DISC_FLATFILE_FIND_ONCALLER_UNFIXABLE,
DISC_CDS_PRODUCT_FIND,
DISC_DUP_DEFLINE,
DUP_DISC_ATCC_CULTURE_CONFLICT,
@@ -1247,6 +1273,27 @@ typedef enum {
ONCALLER_COUNTRY_COLON,
ONCALLER_BIOPROJECT_ID,
ONCALLER_STRAIN_TAXNAME_CONFLICT,
+ ONCALLER_MORE_NAMES_COLLECTED_BY,
+ ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY,
+ ONCALLER_SUSPECTED_ORG_IDENTIFIED,
+ ONCALLER_SUSPECTED_ORG_COLLECTED,
+ ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX,
+ ONCALLER_CITSUB_AFFIL_DUP_TEXT,
+ ONCALLER_DUPLICATE_PRIMER_SET,
+ END_COLON_IN_COUNTRY,
+ DISC_PROTEIN_NAMES,
+ DISC_TITLE_ENDS_WITH_SEQUENCE,
+ DISC_INCONSISTENT_STRUCTURED_COMMENTS,
+ DISC_INCONSISTENT_DBLINK,
+ DISC_INCONSISTENT_MOLINFO_TECH,
+ DISC_GAPS,
+ DISC_BAD_BGPIPE_QUALS,
+ TEST_SHORT_LNCRNA,
+ TEST_TERMINAL_NS,
+ TEST_ALIGNMENT_HAS_SCORE,
+ UNCULTURED_NOTES_ONCALLER,
+ SEQ_ID_PHRASES,
+ NO_PRODUCT_STRING,
MAX_DISC_TYPE
} DiscrepancyType;
@@ -1254,7 +1301,8 @@ typedef enum {
eReportTypeDiscrepancy = 1,
eReportTypeOnCaller,
eReportTypeMegaReport,
- eReportTypeTSA
+ eReportTypeTSA,
+ eReportType_End
} EDiscrepancyReportType;
extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyReportType report_type);
@@ -1273,7 +1321,7 @@ typedef struct discrepancyconfig
Boolean conf_list[MAX_DISC_TYPE];
Boolean use_feature_table_format;
Boolean use_big_test_set;
- Boolean run_tsa_checks;
+ Boolean is_big_sequence;
} DiscrepancyConfigData, PNTR DiscrepancyConfigPtr;
extern DiscrepancyConfigPtr DiscrepancyConfigFree (DiscrepancyConfigPtr dcp);
@@ -1311,6 +1359,7 @@ extern const CharPtr kOverlappingCDSNeedsNoteFmt;
extern void AddOverlappingCodingRegionDiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
extern void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTagsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean exclude_dirsub);
extern void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
+extern void FindShortIntronsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean check_organelles);
extern void FindShortIntrons (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
extern void CheckBioSourceQuals (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
extern void FindExtendablePartials (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
@@ -1328,6 +1377,8 @@ NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointe
NLM_EXTERN void OncallerToolFindEcoNoEnvFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
NLM_EXTERN void AddExceptionsToShortIntrons (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
+NLM_EXTERN Boolean IsShortrRNA (SeqFeatPtr sfp);
+
/* structure shared by tbl2asn and discrepancy report functions */
typedef struct genprodsetdiscrepancylists {
ValNodePtr cds_product_list;
@@ -1359,6 +1410,7 @@ extern void ConvertGlobalDiscrepancyListToText (ValNodePtr vnp, Boolean use_feat
extern ValNodePtr GetGlobalDiscrepancyItem (GlobalDiscrepancyPtr g);
extern CharPtr GetGlobalDiscrepancyStr (GlobalDiscrepancyPtr g);
NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyString (VoidPtr ptr1, VoidPtr ptr2);
+NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyStringCaseSensitive (VoidPtr ptr1, VoidPtr ptr2);
extern ClickableItemPtr
ReportNonUniqueGlobalDiscrepancy
(ValNodePtr vnp,
@@ -1414,26 +1466,30 @@ typedef struct discreportoutputconfig {
Boolean summary_report;
Boolean add_output_tag;
Boolean add_extra_output_tag;
+ Int4 num_nucs;
} DiscReportOutputConfigData, PNTR DiscReportOutputConfigPtr;
+NLM_EXTERN void AddToOutputConfig(SeqEntryPtr sep, DiscReportOutputConfigPtr c);
+NLM_EXTERN void AddListToOutputConfig(ValNodePtr list, DiscReportOutputConfigPtr c);
typedef struct globaldiscrepreport {
- ValNodePtr locus_tag_list;
- ValNodePtr missing_locus_tag;
- ValNodePtr cds_product_list;
- ValNodePtr missing_cds_product;
- ValNodePtr mrna_product_list;
- ValNodePtr missing_mrna_product;
+ ValNodeBlock locus_tag_list;
+ ValNodeBlock missing_locus_tag;
+ ValNodeBlock cds_product_list;
+ ValNodeBlock missing_cds_product;
+ ValNodeBlock mrna_product_list;
+ ValNodeBlock missing_mrna_product;
ValNodePtr adjacent_locus_tag_disc_list;
- ValNodePtr missing_gnl_list;
- ValNodePtr gnl_list;
+ ValNodeBlock missing_gnl_list;
+ ValNodeBlock gnl_list;
ValNodePtr global_src_qual_vals;
ValNodePtr global_srcs;
+ ValNodeBlock global_prot_name_list;
ValNodePtr src_qual_repeated_list;
ValNodePtr src_qual_multi_list;
- ValNodePtr feature_count_list;
- ValNodePtr discrepancy_list;
+ ValNodeBlock feature_count_list;
+ ValNodeBlock discrepancy_list;
PerformDiscrepancyTest taxlookup;
DiscrepancyConfigPtr test_config;
@@ -1443,10 +1499,12 @@ typedef struct globaldiscrepreport {
NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportNew ();
NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportFree (GlobalDiscrepReportPtr g);
NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrepReportPtr g, CharPtr filename);
+NLM_EXTERN Boolean WriteGlobalDiscrepancyReportEx (GlobalDiscrepReportPtr g, FILE *fp, CharPtr extra_comment);
NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp);
NLM_EXTERN Boolean CollectionDateIsInTheFuture (CharPtr name);
NLM_EXTERN Boolean CollectionDateIsValid (CharPtr name);
+NLM_EXTERN Boolean CollectionDatesInOrder (CharPtr name);
/* for the Barcode Discrepancy Test */
typedef enum {
@@ -1556,6 +1614,8 @@ extern Nlm_QualNameAssoc discontinued_orgmod_subtype_alist[];
extern Nlm_NameNameAssoc orgmod_aliases[];
extern CharPtr GetOrgModQualName (Uint1 subtype);
extern void BioSourceHasOldOrgModQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued);
+NLM_EXTERN void StringHasOrgModPrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref);
+NLM_EXTERN CharPtr StringHasPrefix (CharPtr str, CharPtr pref, Boolean novalneeded, Boolean skippref);
extern Nlm_QualNameAssoc current_subsource_subtype_alist [];
extern Nlm_QualNameAssoc discouraged_subsource_subtype_alist[];
@@ -1567,7 +1627,6 @@ extern Boolean GeneRefMatch (GeneRefPtr grp1, GeneRefPtr grp2);
extern Boolean DbxrefsMatch (ValNodePtr vnp1, ValNodePtr vnp2, Boolean case_sensitive);
extern Boolean XrefsMatch (SeqFeatXrefPtr x1, SeqFeatXrefPtr x2);
extern Boolean ProtRefMatch (ProtRefPtr prp1, ProtRefPtr prp2);
-NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial);
extern void IsCorrectLatLonFormat (CharPtr lat_lon, BoolPtr format_correct, BoolPtr precision_correct, BoolPtr lat_in_range, BoolPtr lon_in_range);
extern CharPtr FixLatLonFormat (CharPtr orig_lat_lon);
@@ -1580,6 +1639,7 @@ extern CharPtr GetCountryFix (CharPtr country, CharPtr PNTR country_list);
extern CharPtr ncrnaClassList[];
extern Int4 NcrnaOTHER;
extern Boolean IsStringInNcRNAClassList (CharPtr str);
+extern Boolean IsStringInRegulatoryClassList (CharPtr str);
extern ValNodePtr ListFeaturesInLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice);
extern ValNodePtr ListCodingRegionsContainedInSourceFeatures (SeqEntryPtr sep);
extern ValNodePtr ListFeaturesOverlappingLocationEx (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice, ValNodePtr constraint);
@@ -1612,6 +1672,7 @@ NLM_EXTERN void ResetCapitalization (Boolean first_is_upper, CharPtr pString);
NLM_EXTERN SeqIdPtr CreateSeqIdFromText (CharPtr id_str, SeqEntryPtr sep);
NLM_EXTERN SeqLocPtr SeqLocWholeNew (BioseqPtr bsp);
NLM_EXTERN Int4 GetDeltaSeqLen (DeltaSeqPtr dsp);
+NLM_EXTERN DeltaSeqPtr GetDeltaSeqForPosition(Int4 pos, BioseqPtr bsp, Int4Ptr pStart);
typedef SeqAlignPtr (*GlobalAlignFunc) PROTO ((BioseqPtr, BioseqPtr, BoolPtr));
@@ -1643,6 +1704,7 @@ LocationContainsGaps
BoolPtr internal_gaps,
BoolPtr entirely_in_gap);
+NLM_EXTERN void SetPartialsAfterSplittingAtGap (SeqLocPtr before, SeqLocPtr after, Boolean set_partial_ends, Boolean partial5, Boolean partial3);
NLM_EXTERN void AdjustFeatureForGapsCallback (SeqFeatPtr sfp, Pointer data);
NLM_EXTERN void MarkFeaturesInGapsForDeletion (AdjustFeatForGapPtr afgp);
NLM_EXTERN void AdjustCDSLocationsForUnknownGapsCallback (SeqFeatPtr sfp, Pointer data);
@@ -1666,43 +1728,10 @@ NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state);
typedef SeqAlignPtr (*LocalAlignFunc) PROTO ((BioseqPtr, BioseqPtr));
-typedef struct transcriptomeids {
- BioseqPtr consensus_bsp;
- ValNodePtr token_list;
-} TranscriptomeIdsData, PNTR TranscriptomeIdsPtr;
-
-NLM_EXTERN TranscriptomeIdsPtr TranscriptomeIdsNew (BioseqPtr bsp, ValNodePtr token_list);
-NLM_EXTERN TranscriptomeIdsPtr TranscriptomeIdsFree (TranscriptomeIdsPtr t);
-
-NLM_EXTERN ValNodePtr TranscriptomeIdsListFree (ValNodePtr list);
-NLM_EXTERN ValNodePtr GetTranscriptomeIdsList (FILE *fp, SeqEntryPtr sep, ValNodePtr PNTR err_list);
-NLM_EXTERN ValNodePtr GetExistingTSATableIds (SeqEntryPtr sep);
-
-NLM_EXTERN ValNodePtr
-ApplyTranscriptomeIdsListToSeqEntrySeqHist
-(ValNodePtr list,
- LocalAlignFunc aln_func,
- Nlm_ChangeNotifyProc change_notify,
- Pointer change_userdata);
-NLM_EXTERN ValNodePtr
-MakeTranscriptomeAssemblySeqHist
-(TranscriptomeIdsPtr t,
- LocalAlignFunc aln_func,
- Nlm_ChangeNotifyProc change_notify,
- Pointer change_userdata);
-
extern void ReverseAlignmentStrand (SeqAlignPtr salp, Int4 nth);
-NLM_EXTERN Boolean HasExistingSeqHistAssembly (ValNodePtr list);
-NLM_EXTERN void DeleteSeqHistAssembliesForList (ValNodePtr list);
-NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr list);
-
NLM_EXTERN SeqAlignPtr SortPairwiseAlignmentsByFirstSeqRange (SeqAlignPtr salp);
NLM_EXTERN ValNodePtr ReportCoverageForBioseqSeqHist (BioseqPtr bsp);
-NLM_EXTERN ValNodePtr ReportConsensusMatchForBioseqSeqHist (BioseqPtr bsp);
-
-NLM_EXTERN ValNodePtr ReportGapsInSeqHistAlignmentForBioseq (BioseqPtr bsp);
-NLM_EXTERN ValNodePtr ReportGapsInSeqHistAlignmentsForIdsList (ValNodePtr list);
NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep);
@@ -1730,6 +1759,7 @@ typedef enum {
RemovableMobileElement,
RemovablePrecursorRNA,
RemovablencRNA,
+ RemovableRepeatRegion,
NumRemovableItems
} RemovableList;
NLM_EXTERN CharPtr GetRemovableItemName (Int4 i);
@@ -1777,75 +1807,41 @@ typedef struct modifieritemlocal {
} ModifierItemLocalData, PNTR ModifierItemLocalPtr;
typedef enum {
- DEFLINE_POS_Acronym = 0,
- DEFLINE_POS_Anamorph,
- DEFLINE_POS_Authority,
- DEFLINE_POS_Bio_material,
+ DEFLINE_POS_Bio_material = 0,
DEFLINE_POS_Biotype,
DEFLINE_POS_Biovar,
DEFLINE_POS_Breed,
DEFLINE_POS_Cell_line,
- DEFLINE_POS_Cell_type,
DEFLINE_POS_Chemovar,
DEFLINE_POS_Chromosome,
DEFLINE_POS_Clone,
- DEFLINE_POS_Clone_lib,
- DEFLINE_POS_Collected_by,
- DEFLINE_POS_Collection_date,
- DEFLINE_POS_Common,
DEFLINE_POS_Country,
DEFLINE_POS_Cultivar,
DEFLINE_POS_Culture_collection,
DEFLINE_POS_Dev_stage,
DEFLINE_POS_Ecotype,
DEFLINE_POS_Endogenous_virus_name,
- DEFLINE_POS_Environmental_sample,
- DEFLINE_POS_Forma,
- DEFLINE_POS_Forma_specialis,
- DEFLINE_POS_Frequency,
DEFLINE_POS_Genotype,
- DEFLINE_POS_Germline,
- DEFLINE_POS_Group,
DEFLINE_POS_Haplogroup,
DEFLINE_POS_Haplotype,
- DEFLINE_POS_Specific_host,
- DEFLINE_POS_Identified_by,
DEFLINE_POS_Isolate,
- DEFLINE_POS_Isolation_source,
- DEFLINE_POS_Lab_host,
- DEFLINE_POS_Lat_lon,
DEFLINE_POS_Linkage_group,
DEFLINE_POS_Map,
- DEFLINE_POS_Mating_type,
- DEFLINE_POS_Metagenomic,
- DEFLINE_POS_Note_orgmod,
- DEFLINE_POS_Note_subsrc,
DEFLINE_POS_Pathovar,
DEFLINE_POS_Plasmid_name,
- DEFLINE_POS_Plastid_name,
DEFLINE_POS_Pop_variant,
- DEFLINE_POS_Rearranged,
DEFLINE_POS_Segment,
DEFLINE_POS_Serogroup,
DEFLINE_POS_Serotype,
DEFLINE_POS_Serovar,
- DEFLINE_POS_Sex,
DEFLINE_POS_Specimen_voucher,
DEFLINE_POS_Strain,
DEFLINE_POS_Subclone,
- DEFLINE_POS_Subgroup,
- DEFLINE_POS_Sub_species,
DEFLINE_POS_Substrain,
- DEFLINE_POS_Subtype,
- DEFLINE_POS_Synonym,
- DEFLINE_POS_Teleomorph,
- DEFLINE_POS_Tissue_lib,
- DEFLINE_POS_Tissue_type,
- DEFLINE_POS_Transgenic,
- DEFLINE_POS_Type,
- DEFLINE_POS_Variety
+ DEFLINE_POS_Transgenic
} DefLinePos;
+NLM_EXTERN Int4 GetDeflinePosForFieldName(CharPtr name);
NLM_EXTERN Int4 GetDeflinePosForFieldType (ValNodePtr field);
/* ModifierItemGlobalData is used to store information about the available
@@ -1960,6 +1956,23 @@ AutoDefForSeqEntry
Boolean alternate_splice_flag,
Boolean gene_cluster_opp_strand);
+NLM_EXTERN void
+AutoDefForSeqEntryEx
+(SeqEntryPtr sep,
+Uint2 entityID,
+OrganismDescriptionModifiersPtr odmp,
+ModifierItemLocalPtr modList,
+ValNodePtr modifier_indices,
+DeflineFeatureRequestListPtr feature_requests,
+Int2 product_flag,
+Boolean alternate_splice_flag,
+Boolean gene_cluster_opp_strand,
+Boolean update_options);
+
+NLM_EXTERN void RegenerateAutoDef(BioseqPtr bsp);
+NLM_EXTERN void RemoveAutodefObjects(SeqEntryPtr sep);
+NLM_EXTERN void RemoveAutodefObjectsForDesc(SeqDescPtr sdp);
+
NLM_EXTERN void AddPopsetTitles
(SeqEntryPtr sep,
DeflineFeatureRequestListPtr feature_requests,
@@ -1969,6 +1982,19 @@ NLM_EXTERN void AddPopsetTitles
NLM_EXTERN void RemovePopsetTitles(SeqEntryPtr sep);
+NLM_EXTERN UserObjectPtr MakeAutoDefOptionsUserObject
+(OrganismDescriptionModifiersPtr odmp,
+ModifierItemLocalPtr modList,
+ValNodePtr modifier_indices,
+DeflineFeatureRequestListPtr feature_requests,
+Int2 product_flag,
+Boolean alternate_splice_flag,
+Boolean gene_cluster_opp_strand);
+
+NLM_EXTERN void AddAutoDefUserObjectToSeqEntry(SeqEntryPtr sep, UserObjectPtr uop);
+
+NLM_EXTERN void DoTbl2AsnAutoDef(SeqEntryPtr sep, Uint2 entityID);
+
typedef struct popsetretrostat {
Int4 feature_clause;
Int4 common_title;
@@ -2074,10 +2100,12 @@ NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertRegionToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to);
-NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to);
+NLM_EXTERN Boolean ConvertGeneToImpFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp);
NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp);
+NLM_EXTERN Boolean ConvertmRNAToCodingRegion (SeqFeatPtr sfp);
+NLM_EXTERN Boolean ConverttRNAToGene(SeqFeatPtr sfp);
NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep);
NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp);
@@ -2090,9 +2118,6 @@ NLM_EXTERN SeqEntryPtr SequenceStringToSeqEntry (CharPtr str, SeqIdPtr sip, Uint
NLM_EXTERN void RevCompOneFeatForBioseq (SeqFeatPtr sfp, BioseqPtr bsp);
NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
-NLM_EXTERN ValNodePtr SplitPubListFree (ValNodePtr list);
-NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, SeqEntryPtr sep, ValNodePtr PNTR err_list);
-NLM_EXTERN void SplitPubsByList (ValNodePtr split_list);
/* for parsing collection dates */
NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous);
NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date);
@@ -2103,6 +2128,10 @@ NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev);
NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n);
NLM_EXTERN Int4 GetDaysInMonth (Int4 n);
+/* for reformatting assembly date */
+NLM_EXTERN CharPtr AssemblyDateFromCollectionDate (CharPtr collection_date, Boolean ambiguous);
+NLM_EXTERN Boolean ReformatAssemblyDate (CharPtr PNTR orig_date);
+
NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list);
NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all);
NLM_EXTERN void AddDatabaseNameToStructuredComment (UserObjectPtr uop, CharPtr dbname);
@@ -2124,14 +2153,15 @@ AlignmentIntervalToString
Int4 PNTR alnbuffer_len,
Boolean show_substitutions);
-/* Compare Functions */
-NLM_EXTERN int LIBCALL ObjectIdCompare (ObjectIdPtr a, ObjectIdPtr b);
-NLM_EXTERN int LIBCALL DbtagCompare (DbtagPtr a, DbtagPtr b);
-NLM_EXTERN int LIBCALL OrgModSetCompare (OrgModPtr mod1, OrgModPtr mod2);
-NLM_EXTERN int LIBCALL OrgNameCompare (OrgNamePtr onp1, OrgNamePtr onp2);
-NLM_EXTERN int LIBCALL OrgRefCompare (OrgRefPtr orp1, OrgRefPtr orp2);
-
-extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_stretch, Boolean expand_gaps);
+extern void CountNsInSequence (
+ BioseqPtr bsp,
+ Int4Ptr p_totalN,
+ Int4Ptr p_totalDash,
+ Int4Ptr p_totalTilde,
+ Int4Ptr p_max_stretch,
+ Boolean expand_gaps,
+ Boolean no_stretch_in_assembly_gap
+);
NLM_EXTERN Boolean IsTSA (BioseqPtr bsp);
NLM_EXTERN Boolean IsPseudo (SeqFeatPtr sfp);
@@ -2189,9 +2219,16 @@ NLM_EXTERN Boolean RemoveDuplicateNestedSetsForEntityID (Uint2 entityID);
NLM_EXTERN Boolean RemoveDuplicateNestedSetsForEntityIDNoUpdate (Uint2 entityID);
NLM_EXTERN void AddStructuredCommentKeywords (Uint2 entityID);
+NLM_EXTERN CharPtr KeywordForStructuredCommentPrefix (CharPtr prefix);
+NLM_EXTERN CharPtr StructuredCommentPrefixForKeyword (CharPtr keyword);
NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop);
NLM_EXTERN Boolean HasKeywordForStructuredCommentName (BioseqPtr bsp, UserObjectPtr uop);
+NLM_EXTERN Boolean HasAllKeywordsForStructuredComment (BioseqPtr bsp, CharPtr keyword);
+NLM_EXTERN Boolean HasAnyKeywordForStructuredComment (BioseqPtr bsp, CharPtr keyword);
+NLM_EXTERN ValNodePtr GetAllStructuredCommentKeywords (void);
NLM_EXTERN void RemoveStructuredCommentKeywords (Uint2 entityID);
+NLM_EXTERN void RemoveAllStructuredCommentKeywords (Uint2 entityID);
+NLM_EXTERN ValNodePtr SplitStringAtSemicolon (CharPtr keyword);
NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags);
@@ -2235,9 +2272,8 @@ NLM_EXTERN Uint1 GetSpecialPlastidGenCode (
NLM_EXTERN Boolean TrimPrimerSeqJunkInSeqEntry (SeqEntryPtr sep, FILE *log_fp);
NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp);
-NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp);
NLM_EXTERN void AdjustSeqEntryForConsensusSplice (SeqEntryPtr sep);
-NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp);
+NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp, Boolean strict);
NLM_EXTERN void
FixCapitalizationInTitle
@@ -2248,6 +2284,8 @@ FixCapitalizationInTitle
NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep);
NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep);
+NLM_EXTERN void ParseRNAFeatListTableToFeatures (FILE *fp, SeqEntryPtr sep, LogInfoPtr lip);
+
#ifdef OS_MSWIN
NLM_EXTERN Int4 RunSilent(const char *cmdline);
@@ -2271,6 +2309,8 @@ NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp);
NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string);
NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void);
+NLM_EXTERN void SetStructuredCommentPrefixAndSuffix (UserObjectPtr uop, CharPtr string);
+
extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields);
NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep);
@@ -2282,6 +2322,7 @@ NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp)
NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip);
NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome);
+NLM_EXTERN Boolean IsBioseqOrganelle (BioseqPtr bsp);
NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2);
NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp);
@@ -2304,7 +2345,9 @@ NLM_EXTERN void RevCompBioseqList (ValNodePtr bsp_list,
Boolean revCompFeats,
Boolean check_for_aln);
NLM_EXTERN Boolean IsBioseqInAnyAlignment (BioseqPtr bsp, Uint2 input_entityID);
+NLM_EXTERN Boolean AreAnyElementsOfSetInAnyAlignment (BioseqSetPtr bssp, Uint2 input_entityID);
NLM_EXTERN void RemoveAlignmentsWithSequence (BioseqPtr bsp, Uint2 input_entityID);
+NLM_EXTERN void RemoveAlignmentsWithElementsOfSet (BioseqSetPtr bssp, Uint2 input_entityID);
NLM_EXTERN void ReplaceComplexLocation (SeqLocPtr slp, SeqAlignPtr salp, Int4 new_len, Int4 begin, Int4 fin);
NLM_EXTERN void ReplaceOneSequence (SeqAlignPtr salp, BioseqPtr oldbsp, BioseqPtr newbsp);
@@ -2314,6 +2357,20 @@ NLM_EXTERN void AddCitSubToUpdatedSequence (BioseqPtr upd_bsp, Uint2 input_entit
NLM_EXTERN ValNodePtr CreateUpdateCitSubFromBestTemplate (SeqEntryPtr top_sep, SeqEntryPtr upd_sep, CharPtr update_txt);
NLM_EXTERN void RemoveQualityScores (BioseqPtr bsp, FILE *log_fp, BoolPtr data_in_log);
NLM_EXTERN void ReplaceFakeIDWithIDFromTitle (BioseqPtr bsp);
+
+typedef void (*Nlm_ImportSeqCallbackProc) PROTO ((Int4, Int4, Pointer));
+
+NLM_EXTERN SeqEntryPtr
+ImportNucleotideFASTASequencesFromFileEx
+(FILE *fp,
+ Boolean parse_id,
+ CharPtr supplied_id_txt,
+ ValNodePtr PNTR err_msg_list,
+ BoolPtr chars_stripped,
+ Boolean allow_char_stripping,
+ Nlm_ImportSeqCallbackProc callback,
+ Pointer callback_data);
+
NLM_EXTERN SeqEntryPtr
ImportNucleotideFASTASequencesFromFile
(FILE *fp,
@@ -2374,6 +2431,7 @@ NLM_EXTERN Int2 GetGenCodeForBsp (BioseqPtr bsp);
typedef enum unverifiedtype {
eUnverifiedType_Organism = 0,
eUnverifiedType_Features ,
+ eUnverifiedType_Misassembled ,
eUnverifiedType_Max
} UnverifiedMatchType;
NLM_EXTERN CharPtr GetUnverifiedMatchName (Int4 unverified_type);
@@ -2384,6 +2442,7 @@ NLM_EXTERN CharPtr GetRepliconLocation (BioSourcePtr biop);
NLM_EXTERN PubPtr ParsePubFromEndnote (FILE *fp);
NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiers (BioseqPtr bsp, Boolean include_subsource);
+NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiersByList (BioseqPtr bsp, ValNodePtr list);
/* for finding frameshifts */
typedef enum {
@@ -2400,10 +2459,87 @@ typedef struct frameshiftreport {
} FrameShiftReportData, PNTR FrameShiftReportPtr;
NLM_EXTERN ValNodePtr FrameShiftReportListFree (ValNodePtr vnp);
-NLM_EXTERN void PrintFrameShiftReportList (ValNodePtr list, Boolean has_exons, LogInfoPtr lip);
+NLM_EXTERN void PrintFrameShiftReportList (ValNodePtr list, Boolean has_exons, Boolean print_exons_only, LogInfoPtr lip);
NLM_EXTERN ValNodePtr FindFrameShiftsInAlignment (SeqAlignPtr salp, BoolPtr has_exons);
NLM_EXTERN Boolean PropagateMissingOldNames (ValNodePtr sep_list);
NLM_EXTERN CharPtr DescribeBioSourceDifferences (BioSourcePtr biop1, BioSourcePtr biop2);
+NLM_EXTERN CharPtr DescribeStructuredCommentDifferences (UserObjectPtr uop1, UserObjectPtr uop2);
+NLM_EXTERN Boolean RemoveDuplicateStructuredCommentsInSeqEntry (SeqEntryPtr sep);
+NLM_EXTERN ValNodePtr GetSUCCommonList (SeqEntryPtr sep, Boolean reverse, Boolean byblock, Boolean showsequence, Boolean byqual);
+
+NLM_EXTERN ValNodePtr LookupArticlesWithEutils (ValNodePtr orig_pub, LogInfoPtr lip);
+NLM_EXTERN Int4 LookupPubsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
+
+NLM_EXTERN void LogTrimmedLocation (LogInfoPtr lip, SeqLocPtr slp);
+
+NLM_EXTERN void AddListOutputTags(ValNodePtr discrepancy_list, DiscReportOutputConfigPtr oc);
+NLM_EXTERN Boolean IsMrnaSequence (BioseqPtr bsp);
+NLM_EXTERN BioseqPtr BioseqFromAlignmentID (CharPtr PNTR id_str);
+
+NLM_EXTERN Int4 TrimNsFromNucsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
+NLM_EXTERN void CorrectGenCodes (SeqEntryPtr sep, Uint2 entityID);
+NLM_EXTERN int CompareUserFields (UserFieldPtr ufp1, UserFieldPtr ufp2);
+
+NLM_EXTERN void RemoveEmptyStructuredComments (Uint2 entityID);
+
+NLM_EXTERN Boolean IsStructuredCommentPrefix (UserFieldPtr ufp);
+NLM_EXTERN Boolean IsStructuredCommentSuffix (UserFieldPtr ufp);
+NLM_EXTERN CharPtr GetStructuredCommentPrefix (UserObjectPtr uop);
+
+
+typedef struct fielddiff {
+ ValNodePtr field;
+ CharPtr seq_id;
+ CharPtr biosample_id;
+ CharPtr val1;
+ CharPtr val2;
+ ValNodePtr src;
+} FieldDiffData, PNTR FieldDiffPtr;
+
+NLM_EXTERN FieldDiffPtr FieldDiffFree (FieldDiffPtr diff);
+NLM_EXTERN ValNodePtr LIBCALL FieldDiffListFree (ValNodePtr list);
+NLM_EXTERN ValNodePtr GetBioSourceFieldDiffs (CharPtr seq_id, CharPtr biosample_id, BioSourcePtr biop1, BioSourcePtr biop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data);
+NLM_EXTERN ValNodePtr GetStructuredCommentFieldDiffs (CharPtr seq_id, CharPtr biosample_id, UserObjectPtr uop1, UserObjectPtr uop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data);
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffField (VoidPtr ptr1, VoidPtr ptr2);
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffBioIdThenField (VoidPtr ptr1, VoidPtr ptr2);
+NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffBiosampleIdThenFieldThenVal (VoidPtr ptr1, VoidPtr ptr2);
+
+NLM_EXTERN Boolean FindFlankingGenes (SeqLocPtr location, SeqFeatPtr PNTR firstP, SeqFeatPtr PNTR lastP);
+NLM_EXTERN void AssignGeneXrefToFeat (SeqFeatPtr sfp, SeqFeatPtr gene);
+
+
+/* for cleanup of BioSources */
+NLM_EXTERN void ConsolidateBioSourceNotes (BioSourcePtr biop);
+NLM_EXTERN void ConsolidateOneLikeOrganismModifier (OrgModPtr match_to, Boolean use_semicolon);
+NLM_EXTERN void ConsolidateOneLikeSubSourceModifier (SubSourcePtr match_to, Boolean use_semicolon);
+
+#define kAllowManualGenCodeException "genetic code exception"
+
+
+NLM_EXTERN Boolean ReplaceStopsWithSelenocysteineInSeqEntry (SeqEntryPtr sep, FILE *log_fp);
+NLM_EXTERN Boolean JoinShortTrnas (SeqEntryPtr sep, FILE *log_fp);
+
+NLM_EXTERN Boolean IsDBLinkObject (UserObjectPtr uop);
+
+typedef struct gaplocdata {
+ Int4 start;
+ Int4 length;
+ CharPtr estimated_length;
+ CharPtr gap_type;
+ CharPtr linkage_evidence;
+ Boolean unknown_length;
+} GapLocData, PNTR GapLocPtr;
+
+
+void PopulateGapLocQuals(GapLocPtr glp, SeqFeatPtr sfp, Int4 left, Int4 len);
+GapLocPtr GapLocFromSeqFeat(SeqFeatPtr sfp, Int4 left);
+Boolean IncompatibleGapFeatQuals (SeqFeatPtr sfp);
+void BioseqToDeltaByGapFeat (BioseqPtr bsp, Pointer userdata);
+void BioseqToDeltaMergeGapFeat (BioseqPtr bsp, Pointer userdata);
+Boolean DeltaLitOnly (BioseqPtr bsp);
+Boolean MergeAssemblyGapFeats (BioseqPtr bsp);
+
+NLM_EXTERN Boolean IsRegulatorySubtype (Uint1 key);
#ifdef __cplusplus
diff --git a/api/subutil.c b/api/subutil.c
index f9c74e1a..bd653106 100644
--- a/api/subutil.c
+++ b/api/subutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.99 $
+* $Revision: 6.104 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -2077,7 +2077,7 @@ NLM_EXTERN Boolean AddOrganismToEntry (
{
ValNodePtr vnp;
OrgRefPtr orp;
- Char buf[80];
+ Char buf[128];
if ((submission == NULL) || (entry == NULL))
return FALSE;
@@ -4476,7 +4476,9 @@ NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr
ufp->label = oip;
ufp->choice = 2; /* integer */
ufp->data.intvalue = gi;
- last->next = ufp;
+ if (last != NULL) {
+ last->next = ufp;
+ }
last = ufp;
}
@@ -4487,7 +4489,9 @@ NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr
ufp->label = oip;
ufp->choice = 1; /* visible string */
ufp->data.ptrvalue = (Pointer) StringSave (comment);
- last->next = ufp;
+ if (last != NULL) {
+ last->next = ufp;
+ }
last = ufp;
}
@@ -4501,7 +4505,9 @@ NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr
ufp->label = oip;
ufp->choice = 2; /* integer */
ufp->data.intvalue = from;
- last->next = ufp;
+ if (last != NULL) {
+ last->next = ufp;
+ }
last = ufp;
ufp = UserFieldNew ();
@@ -4516,7 +4522,7 @@ NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr
NLM_EXTERN UserObjectPtr CreateMrnaProteinLinkUserObject (BioseqPtr bsp)
{
- Char buf [80];
+ Char buf [128];
ObjectIdPtr oip;
SeqIdPtr sip;
UserFieldPtr ufp;
@@ -5370,12 +5376,13 @@ NLM_EXTERN UserObjectPtr CreateDBLinkUserObject (
return uop;
}
-NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
+
+NLM_EXTERN void AddIntListFieldToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
- Int4Ptr values
+ Int4Ptr values,
+ CharPtr field_name
)
-
{
UserFieldPtr curr;
Int4 i;
@@ -5389,7 +5396,7 @@ NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
for (curr = uop->data; curr != NULL; curr = curr->next) {
oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "Trace Assembly Archive") == 0) {
+ if (oip != NULL && StringICmp (oip->str, field_name) == 0) {
break;
}
prev = curr;
@@ -5398,7 +5405,7 @@ NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
if (curr == NULL) {
curr = UserFieldNew ();
oip = ObjectIdNew ();
- oip->str = StringSave ("Trace Assembly Archive");
+ oip->str = StringSave (field_name);
curr->label = oip;
curr->choice = 8; /* sequence of integer */
@@ -5423,63 +5430,23 @@ NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
curr->data.ptrvalue = (Pointer) ip;
}
-NLM_EXTERN void AddBioSampleIDsToDBLinkUserObject (
+
+NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
- CharPtr PNTR values
+ Int4Ptr values
)
{
- CharPtr PNTR cpp;
- UserFieldPtr curr;
- Int4 i;
- UserFieldPtr prev = NULL;
- ObjectIdPtr oip;
-
- if (uop == NULL || values == NULL) return;
- oip = uop->type;
- if (oip == NULL || StringICmp (oip->str, "DBLink") != 0) return;
-
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "BioSample") == 0) {
- break;
- }
- prev = curr;
- }
-
- if (curr == NULL) {
- curr = UserFieldNew ();
- oip = ObjectIdNew ();
- oip->str = StringSave ("BioSample");
- curr->label = oip;
- curr->choice = 7; /* sequence of string */
-
- /* link new set at end of list */
-
- if (prev != NULL) {
- prev->next = curr;
- } else {
- uop->data = curr;
- }
- }
-
- if (curr == NULL || curr->choice != 7) return;
-
- cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num));
- if (cpp == NULL) return;
-
- curr->num = num;
- for (i = 0; i < num; i++) {
- cpp [i] = StringSaveNoNull (values [i]);
- }
- curr->data.ptrvalue = (Pointer) cpp;
+ AddIntListFieldToDBLinkUserObject (uop, num, values, "Trace Assembly Archive");
}
-NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
+
+NLM_EXTERN void AddStringListFieldToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
- CharPtr PNTR values
+ CharPtr PNTR values,
+ CharPtr field_name
)
{
@@ -5495,7 +5462,7 @@ NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
for (curr = uop->data; curr != NULL; curr = curr->next) {
oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "Sequence Read Archive") == 0) {
+ if (oip != NULL && StringICmp (oip->str, field_name) == 0) {
break;
}
prev = curr;
@@ -5504,7 +5471,7 @@ NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
if (curr == NULL) {
curr = UserFieldNew ();
oip = ObjectIdNew ();
- oip->str = StringSave ("Sequence Read Archive");
+ oip->str = StringSave (field_name);
curr->label = oip;
curr->choice = 7; /* sequence of string */
@@ -5529,58 +5496,25 @@ NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
curr->data.ptrvalue = (Pointer) cpp;
}
-NLM_EXTERN void AddFieldsToDBLinkUserObject (
+
+NLM_EXTERN void AddBioSampleIDsToDBLinkUserObject (
UserObjectPtr uop,
- CharPtr field_name,
Int4 num,
CharPtr PNTR values
)
{
- CharPtr PNTR cpp;
- UserFieldPtr curr;
- Int4 i;
- UserFieldPtr prev = NULL;
- ObjectIdPtr oip;
-
- if (uop == NULL || values == NULL) return;
- oip = uop->type;
- if (oip == NULL || StringICmp (oip->str, "DBLink") != 0) return;
-
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, field_name) == 0) {
- break;
- }
- prev = curr;
- }
-
- if (curr == NULL) {
- curr = UserFieldNew ();
- oip = ObjectIdNew ();
- oip->str = StringSave (field_name);
- curr->label = oip;
- curr->choice = 7; /* sequence of string */
-
- /* link new set at end of list */
-
- if (prev != NULL) {
- prev->next = curr;
- } else {
- uop->data = curr;
- }
- }
-
- if (curr == NULL || curr->choice != 7) return;
+ AddStringListFieldToDBLinkUserObject(uop, num, values, "BioSample");
+}
- cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num));
- if (cpp == NULL) return;
+NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
+ UserObjectPtr uop,
+ Int4 num,
+ CharPtr PNTR values
+)
- curr->num = num;
- for (i = 0; i < num; i++) {
- cpp [i] = StringSaveNoNull (values [i]);
- }
- curr->data.ptrvalue = (Pointer) cpp;
+{
+ AddStringListFieldToDBLinkUserObject(uop, num, values, "Sequence Read Archive");
}
@@ -5591,50 +5525,7 @@ NLM_EXTERN void AddProbeDBIDsToDBLinkUserObject (
)
{
- CharPtr PNTR cpp;
- UserFieldPtr curr;
- Int4 i;
- UserFieldPtr prev = NULL;
- ObjectIdPtr oip;
-
- if (uop == NULL || values == NULL) return;
- oip = uop->type;
- if (oip == NULL || StringICmp (oip->str, "DBLink") != 0) return;
-
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "ProbeDB") == 0) {
- break;
- }
- prev = curr;
- }
-
- if (curr == NULL) {
- curr = UserFieldNew ();
- oip = ObjectIdNew ();
- oip->str = StringSave ("ProbeDB");
- curr->label = oip;
- curr->choice = 7; /* sequence of string */
-
- /* link new set at end of list */
-
- if (prev != NULL) {
- prev->next = curr;
- } else {
- uop->data = curr;
- }
- }
-
- if (curr == NULL || curr->choice != 7) return;
-
- cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num));
- if (cpp == NULL) return;
-
- curr->num = num;
- for (i = 0; i < num; i++) {
- cpp [i] = StringSaveNoNull (values [i]);
- }
- curr->data.ptrvalue = (Pointer) cpp;
+ AddStringListFieldToDBLinkUserObject(uop, num, values, "ProbeDB");
}
NLM_EXTERN void AddSeqReadArchiveIDsToDBLinkUserObject (
@@ -5644,50 +5535,7 @@ NLM_EXTERN void AddSeqReadArchiveIDsToDBLinkUserObject (
)
{
- CharPtr PNTR cpp;
- UserFieldPtr curr;
- Int4 i;
- UserFieldPtr prev = NULL;
- ObjectIdPtr oip;
-
- if (uop == NULL || values == NULL) return;
- oip = uop->type;
- if (oip == NULL || StringICmp (oip->str, "DBLink") != 0) return;
-
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "Sequence Read Archive") == 0) {
- break;
- }
- prev = curr;
- }
-
- if (curr == NULL) {
- curr = UserFieldNew ();
- oip = ObjectIdNew ();
- oip->str = StringSave ("Sequence Read Archive");
- curr->label = oip;
- curr->choice = 7; /* sequence of string */
-
- /* link new set at end of list */
-
- if (prev != NULL) {
- prev->next = curr;
- } else {
- uop->data = curr;
- }
- }
-
- if (curr == NULL || curr->choice != 7) return;
-
- cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num));
- if (cpp == NULL) return;
-
- curr->num = num;
- for (i = 0; i < num; i++) {
- cpp [i] = StringSaveNoNull (values [i]);
- }
- curr->data.ptrvalue = (Pointer) cpp;
+ AddStringListFieldToDBLinkUserObject(uop, num, values, "Sequence Read Archive");
}
NLM_EXTERN void AddBioProjectIDsToDBLinkUserObject (
@@ -5697,50 +5545,7 @@ NLM_EXTERN void AddBioProjectIDsToDBLinkUserObject (
)
{
- CharPtr PNTR cpp;
- UserFieldPtr curr;
- Int4 i;
- UserFieldPtr prev = NULL;
- ObjectIdPtr oip;
-
- if (uop == NULL || values == NULL) return;
- oip = uop->type;
- if (oip == NULL || StringICmp (oip->str, "DBLink") != 0) return;
-
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- oip = curr->label;
- if (oip != NULL && StringICmp (oip->str, "BioProject") == 0) {
- break;
- }
- prev = curr;
- }
-
- if (curr == NULL) {
- curr = UserFieldNew ();
- oip = ObjectIdNew ();
- oip->str = StringSave ("BioProject");
- curr->label = oip;
- curr->choice = 7; /* sequence of string */
-
- /* link new set at end of list */
-
- if (prev != NULL) {
- prev->next = curr;
- } else {
- uop->data = curr;
- }
- }
-
- if (curr == NULL || curr->choice != 7) return;
-
- cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num));
- if (cpp == NULL) return;
-
- curr->num = num;
- for (i = 0; i < num; i++) {
- cpp [i] = StringSaveNoNull (values [i]);
- }
- curr->data.ptrvalue = (Pointer) cpp;
+ AddStringListFieldToDBLinkUserObject(uop, num, values, "BioProject");
}
NLM_EXTERN UserObjectPtr CreateNcbiCleanupUserObject (
diff --git a/api/subutil.h b/api/subutil.h
index 8b7c1aaf..d21056b6 100644
--- a/api/subutil.h
+++ b/api/subutil.h
@@ -31,7 +31,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.88 $
+* $Revision: 6.93 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -624,6 +624,8 @@ NLM_EXTERN Boolean AddGenBankBlockToEntry (
#define GENOME_hydrogenosome 20
#define GENOME_chromosome 21
#define GENOME_chromatophore 22
+#define GENOME_plasmid_in_mitochondrion 23
+#define GENOME_plasmid_in_plastid 24
/********************************************
* Genome describes the type of genome from which the DNA or gene for
@@ -694,6 +696,7 @@ NLM_EXTERN Boolean AddGenomeToEntry (
#define SUBSRC_haplogroup 40
#define SUBSRC_whole_replicon 41
#define SUBSRC_phenotype 42
+#define SUBSRC_altitude 43
#define SUBSRC_other 255
/*********************************************
@@ -743,6 +746,7 @@ NLM_EXTERN Boolean AddGenomeToEntry (
haplogroup (40) ,
whole-replicon (41) ,
phenotype (42) ,
+ altitude (43) ,
other (255) } ,
* value is an optional string to give the name (eg. of the
@@ -790,6 +794,7 @@ NLM_EXTERN Boolean AddSubSourceToEntry (
#define ORGMOD_culture_collection 35
#define ORGMOD_bio_material 36
#define ORGMOD_metagenome_source 37
+#define ORGMOD_type_material 38
#define ORGMOD_old_lineage 253
#define ORGMOD_old_name 254
#define ORGMOD_other 255
@@ -1565,32 +1570,38 @@ NLM_EXTERN UserObjectPtr CreateDBLinkUserObject (
void
);
+NLM_EXTERN void AddIntListFieldToDBLinkUserObject (
+ UserObjectPtr uop,
+ Int4 num,
+ Int4Ptr values,
+ CharPtr field_name
+);
+
NLM_EXTERN void AddTraceAssemblyIDsToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
Int4Ptr values
);
-NLM_EXTERN void AddBioSampleIDsToDBLinkUserObject (
+NLM_EXTERN void AddStringListFieldToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
- CharPtr PNTR values
+ CharPtr PNTR values,
+ CharPtr field_name
);
-NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
+NLM_EXTERN void AddBioSampleIDsToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
CharPtr PNTR values
);
-NLM_EXTERN void AddFieldsToDBLinkUserObject (
+NLM_EXTERN void AddSeqReadArchIDsToDBLinkUserObject (
UserObjectPtr uop,
- CharPtr field_name,
Int4 num,
CharPtr PNTR values
);
-
NLM_EXTERN void AddProbeDBIDsToDBLinkUserObject (
UserObjectPtr uop,
Int4 num,
diff --git a/api/tofasta.c b/api/tofasta.c
index 112a3ab6..fecf9d2f 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.256 $
+* $Revision: 6.313 $
*
* File Description: various sequence objects to fasta output
*
@@ -744,7 +744,7 @@ typedef struct streamfsa {
Int2 blocklen;
Int2 grouplen;
Int2 skip;
- Int4 gi;
+ BIG_ID gi;
Int4 start;
Int4 seqpos;
Boolean seqspans;
@@ -881,6 +881,7 @@ static void AddSubSourceValuesToNucTitle (
)
{
+ Boolean needsQuotes;
CharPtr ssp_name;
SubSourcePtr ssp;
Char text [256];
@@ -896,8 +897,20 @@ static void AddSubSourceValuesToNucTitle (
StringCat (text, ssp_name);
}
StringToLower (text);
+ needsQuotes = FALSE;
+ if (StringChr (ssp->name, '=') != NULL ||
+ StringChr (ssp->name, '[') != NULL ||
+ StringChr (ssp->name, ']') != NULL) {
+ needsQuotes = TRUE;
+ }
StringCat (text, "=");
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, ssp->name);
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, "] ");
StringCat (str, text);
ssp = ssp->next;
@@ -912,6 +925,7 @@ static void AddOrgModValuesToNucTitle (
{
CharPtr mod_name;
OrgModPtr mod;
+ Boolean needsQuotes;
Char text [256];
if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL || str == NULL) return;
@@ -921,8 +935,20 @@ static void AddOrgModValuesToNucTitle (
mod_name = GetOrgModQualName (mod->subtype);
StringCat (text, mod_name);
StringToLower (text);
+ needsQuotes = FALSE;
+ if (StringChr (mod->subname, '=') != NULL ||
+ StringChr (mod->subname, '[') != NULL ||
+ StringChr (mod->subname, ']') != NULL) {
+ needsQuotes = TRUE;
+ }
StringCat (text, "=");
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, mod->subname);
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, "] ");
StringCat (str, text);
mod = mod->next;
@@ -936,6 +962,7 @@ static CharPtr MakeNucleotideTitleInSequinStyle (
{
BioSourcePtr biop;
MolInfoPtr mip;
+ Boolean needsQuotes;
OrgNamePtr onp;
OrgRefPtr orp;
SeqDescrPtr sdp;
@@ -976,8 +1003,20 @@ static CharPtr MakeNucleotideTitleInSequinStyle (
orp = biop->org;
if (orp != NULL) {
+ needsQuotes = FALSE;
+ if (StringChr (orp->taxname, '=') != NULL ||
+ StringChr (orp->taxname, '[') != NULL ||
+ StringChr (orp->taxname, ']') != NULL) {
+ needsQuotes = TRUE;
+ }
StringCpy (text, "[organism=");
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, orp->taxname);
+ if (needsQuotes) {
+ StringCat (text, "\"");
+ }
StringCat (text, "] ");
StringCat (str, text);
}
@@ -1050,10 +1089,11 @@ static Int4 BioseqFastaStreamInternal (
Char buf [4096];
Char ch, ch1, ch2, ch3;
Int4 count = 0;
- Int4 gi = -1;
+ BIG_ID gi = -1;
SeqIdPtr gpp = NULL;
Char id [128];
Uint1 id_format = PRINTID_FASTA_LONG;
+ CharPtr original_id = NULL;
CharPtr ptr;
StreamFsa sf;
SeqIdPtr sip = NULL;
@@ -1198,6 +1238,9 @@ static Int4 BioseqFastaStreamInternal (
}
if (do_defline) {
id [0] = '\0';
+ if (ShouldUseOriginalID (bsp)) {
+ original_id = FastaGetOriginalId (bsp);
+ }
if (substitute_ids) {
sip = ChooseFastaID (bsp, sorted_prot);
} else if (bsp != NULL) {
@@ -1206,7 +1249,11 @@ static Int4 BioseqFastaStreamInternal (
if ((flags & STREAM_ALL_FASTA_IDS) != 0) {
id_format = PRINTID_FASTA_ALL;
}
- SeqIdWrite (sip, id, id_format, sizeof (id) - 1);
+ if (original_id != NULL && StringLen (original_id) + 5 < sizeof (id)) {
+ sprintf (id, "lcl|%s", original_id);
+ } else {
+ SeqIdWrite (sip, id, id_format, sizeof (id) - 1);
+ }
/* no longer need to do feature indexing if title not present to speed up creation */
/*
sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL);
@@ -1227,6 +1274,12 @@ static Int4 BioseqFastaStreamInternal (
}
tmp = buf;
ch = *tmp;
+ if (ch == '>') {
+ *tmp = '_';
+ }
+ /*
+ tmp = buf;
+ ch = *tmp;
while (ch != '\0') {
if (ch == '>') {
*tmp = '_';
@@ -1234,6 +1287,7 @@ static Int4 BioseqFastaStreamInternal (
tmp++;
ch = *tmp;
}
+ */
if (sf.fp != NULL) {
fprintf (fp, ">%s %s\n", id, buf);
} else if (sf.bs != NULL) {
@@ -1406,7 +1460,7 @@ static void DoSpecialDefline (
Uint2 entityID;
SeqFeatPtr gene = NULL;
SeqMgrFeatContext genecontext;
- Int4 gi;
+ BIG_ID gi;
GeneRefPtr grp;
IntAsn2gbJob iaj;
SeqLocPtr loc;
@@ -1981,6 +2035,7 @@ static void FastaOneBioseq (
bsp->repr == Seq_repr_seg ||
bsp->repr == Seq_repr_const ||
bsp->repr == Seq_repr_delta ||
+ bsp->repr == Seq_repr_ref ||
bsp->repr == Seq_repr_virtual) {
count = BioseqFastaStreamEx (bsp, fsp->fp, fsp->flags, fsp->linelen, fsp->blocklen, fsp->grouplen,
TRUE, fsp->substitute_ids, fsp->sorted_prot);
@@ -2066,6 +2121,53 @@ NLM_EXTERN Int4 SeqEntryFastaStream (
{ return SeqEntryFastaStreamEx (sep, fp, flags, linelen, blocklen, grouplen, do_na, do_aa, master_style, FALSE, FALSE);
}
+NLM_EXTERN void MakeFastaStreamIdSuffix (
+ SeqFeatPtr sfp,
+ Uint4 idx,
+ CharPtr prefix,
+ CharPtr buf,
+ Boolean do_product,
+ Boolean do_feat_id
+)
+
+{
+ Char fbuf [64];
+ BIG_ID gi;
+ BioseqPtr pbsp;
+ Char pbuf [64];
+ SeqIdPtr sip;
+
+ if (sfp == NULL || buf == NULL) return;
+
+ StringCpy (buf, prefix);
+ fbuf [0] = '\0';
+ pbuf [0] = '\0';
+ if (do_product && sfp->product != NULL) {
+ pbsp = BioseqFindFromSeqLoc (sfp->product);
+ if (pbsp != NULL) {
+ SeqIdWrite (pbsp->id, pbuf, PRINTID_TEXTID_ACC_VER, sizeof (pbuf) - 1);
+ } else {
+ sip = SeqLocId (sfp->product);
+ if (sip != NULL && sip->choice == SEQID_GI) {
+ gi = sip->data.intvalue;
+ sip = GetSeqIdForGI (gi);
+ if (sip != NULL) {
+ SeqIdWrite (sip, pbuf, PRINTID_TEXTID_ACC_VER, sizeof (pbuf) - 1);
+ }
+ }
+ }
+ }
+ if (StringDoesHaveText (pbuf)) {
+ StringCat (buf, "_");
+ StringCat (buf, pbuf);
+ }
+ if (do_feat_id && idx > 0) {
+ sprintf (fbuf, "%ld", (long) idx);
+ StringCat (buf, "_");
+ StringCat (buf, fbuf);
+ }
+}
+
/*****************************************************************************
*
* Here are functions that convert FASTA format from file or from memory
@@ -2620,7 +2722,7 @@ static Boolean FastaReadSequenceInternalEx
}
static SeqIdPtr MakeTrustedID (CharPtr prefix, Int2Ptr ctrptr)
{
- Char buf[40];
+ Char buf[128];
ValNodePtr newid;
ObjectIdPtr oid;
Int2 start = 1;
@@ -2912,6 +3014,7 @@ NLM_EXTERN Boolean FastaId(BioseqPtr bsp, CharPtr buf, Uint4 buflen)
SeqIdWrite(bsp->id, buf, PRINTID_FASTA_LONG, buflen);
return TRUE;
}
+
static Boolean FastaIdX(BioseqPtr bsp, CharPtr buf, Uint4 buflen, Boolean printid_general, SeqLocPtr seqloc)
{
Int4 length;
@@ -2932,6 +3035,85 @@ static Boolean FastaIdX(BioseqPtr bsp, CharPtr buf, Uint4 buflen, Boolean printi
}
return TRUE;
}
+
+NLM_EXTERN CharPtr FastaGetOriginalId (BioseqPtr bsp)
+
+{
+ CharPtr id;
+ ObjectIdPtr oip;
+ SeqDescrPtr sdp;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+
+ if (bsp == NULL) return NULL;
+
+ for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice != Seq_descr_user) continue;
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL) continue;
+ oip = uop->type;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "OrginalID") != 0 && StringCmp (oip->str, "OriginalID") != 0) continue;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringCmp (oip->str, "LocalId") != 0) continue;
+ if (ufp->choice != 1) continue;
+ id = (CharPtr) ufp->data.ptrvalue;
+ if (id == NULL) continue;
+ return id;
+ }
+ }
+
+ return NULL;
+}
+
+NLM_EXTERN Boolean ShouldUseOriginalID (BioseqPtr bsp)
+
+{
+ DbtagPtr dbt;
+ SeqIdPtr sip;
+
+ if (bsp == NULL) return FALSE;
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_LOCAL :
+ break;
+ case SEQID_GENERAL :
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt != NULL) {
+ if (! IsSkippableDbtag (dbt)) return FALSE;
+ }
+ break;
+ default :
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+NLM_EXTERN Boolean FastaIdEx(BioseqPtr bsp, CharPtr buf, Uint4 buflen, Boolean prefer_original_ID)
+{
+ CharPtr id;
+ SeqIdPtr sip;
+
+ if ((bsp == NULL) || (buf == NULL)) return FALSE;
+ if (prefer_original_ID) {
+ sip = bsp->id;
+ if (ShouldUseOriginalID (bsp)) {
+ id = FastaGetOriginalId (bsp);
+ if (id != NULL && StringLen (id) + 5 < buflen) {
+ sprintf (buf, "lcl|%s", id);
+ return TRUE;
+ }
+ }
+ }
+ SeqIdWrite(bsp->id, buf, PRINTID_FASTA_LONG, buflen);
+ return TRUE;
+}
+
/*****************************************************************************
*
* FastaDefLine(bsp, buf, buflen, accession, organism)
@@ -2951,7 +3133,7 @@ NLM_EXTERN Boolean FastaDefLine (BioseqPtr bsp, CharPtr buf, Uint4 buflen,
PatentSeqIdPtr psip;
Uint4 diff, phase;
Int4 num_segs, num_gaps;
- Char tbuf[80];
+ Char tbuf[128];
static CharPtr htgs[2] = {
"unordered", "ordered" };
if ((bsp == NULL) || (buf == NULL)) return FALSE;
@@ -4653,7 +4835,7 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
Uint4 diff, phase, i;
Boolean doit;
Int4 num_segs, num_gaps;
- static Char tbuf[80];
+ static Char tbuf[128];
static CharPtr htgs[2] = {
"unordered", "ordered" };
static CharPtr htg_phrase[3] = {
@@ -4728,6 +4910,14 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
if (StringCmp (tsip->accession + 6, "000000") == 0) {
wgsmaster = TRUE;
}
+ } else if (StringLen (tsip->accession) == 13) {
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ wgsmaster = TRUE;
+ }
+ } else if (StringLen (tsip->accession) == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ wgsmaster = TRUE;
+ }
}
}
break;
@@ -5254,7 +5444,7 @@ do_virtual)
Int2 ctr = 0;
Uint1 residue;
Int4 pos;
- Char idbuf[40];
+ Char idbuf[128];
if ((spp == NULL) || (buf == NULL)) return FALSE;
while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
{
@@ -5409,19 +5599,47 @@ NLM_EXTERN void ClearGenBankKeywords (Uint2 entityID, Pointer ptr)
VisitDescriptorsInSep (sep, NULL, ClearKeywordsProc);
DeleteMarkedObjects (entityID, 0, NULL);
}
+
+static void IsNcCallback (BioseqPtr bsp, Pointer userdata)
+
+{
+ BoolPtr is_ncP;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ if (bsp == NULL) return;
+ is_ncP = (BoolPtr) userdata;
+ if (is_ncP == NULL) return;
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice != SEQID_OTHER) continue;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
+ *is_ncP = TRUE;
+ }
+ }
+}
+
NLM_EXTERN void NC_Cleanup (Uint2 entityID, Pointer ptr)
{
Boolean objMgrFilt [OBJ_MAX];
+ Boolean is_nc = FALSE;
SeqEntryPtr sep;
+
if (entityID == 0) {
entityID = ObjMgrGetEntityIDForPointer (ptr);
}
if (entityID == 0) return;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+ VisitBioseqsInSep (sep, (Pointer) &is_nc, IsNcCallback);
+ if (! is_nc) return;
+
AssignIDsInEntity (entityID, 0, NULL);
MemSet ((Pointer) objMgrFilt, FALSE, sizeof (objMgrFilt));
objMgrFilt [OBJ_SEQDESC] = TRUE;
GatherObjectsInEntity (entityID, 0, NULL, RemoveAllTitles, NULL, objMgrFilt);
- sep = GetTopSeqEntryForEntityID (entityID);
VisitDescriptorsInSep (sep, NULL, ClearKeywordsProc);
DeleteMarkedObjects (entityID, 0, NULL);
MemSet ((Pointer) objMgrFilt, FALSE, sizeof (objMgrFilt));
@@ -5627,6 +5845,9 @@ typedef struct deflinestruct {
Boolean m_reconstruct;
Boolean m_allprotnames;
+ Boolean m_gpipemode;
+ Boolean m_devmode;
+
/* seq-inst fields */
Boolean m_is_na;
Boolean m_is_aa;
@@ -5634,6 +5855,8 @@ typedef struct deflinestruct {
Boolean m_is_seg;
Boolean m_is_delta;
Boolean m_is_virtual;
+ Boolean m_is_map;
+ Uint1 m_topology;
/* seq-id fields */
Boolean m_is_nc;
@@ -5641,9 +5864,11 @@ typedef struct deflinestruct {
Boolean m_is_nr;
Boolean m_is_patent;
Boolean m_is_pdb;
+ Boolean m_is_wp;
Boolean m_third_party;
Boolean m_wgs_master;
Boolean m_tsa_master;
+ Boolean m_tls_master;
CharPtr m_general_str;
CharPtr m_patent_country;
@@ -5659,6 +5884,7 @@ typedef struct deflinestruct {
Boolean m_htg_tech;
Boolean m_htgs_unfinished;
+ Boolean m_is_tls;
Boolean m_is_tsa;
Boolean m_is_wgs;
Boolean m_is_est_sts_gss;
@@ -5672,13 +5898,23 @@ typedef struct deflinestruct {
Boolean m_tpa_exp;
Boolean m_tpa_inf;
Boolean m_tpa_reasm;
+ Boolean m_unordered;
/* pdb block fields */
CharPtr m_pdb_compound;
/* biosource fields */
CharPtr m_taxname;
+ Boolean m_multispecies;
int m_genome;
+ Boolean m_is_plasmid;
+ Boolean m_is_chromosome;
+
+ CharPtr m_organelle;
+
+ CharPtr m_first_super_kingdom;
+ CharPtr m_second_super_kingdom;
+ Boolean m_is_cross_kingdom;
/* subsource fields */
CharPtr m_chromosome;
@@ -5694,8 +5930,15 @@ typedef struct deflinestruct {
CharPtr m_isolate;
CharPtr m_strain;
+ /* map fields */
+ CharPtr m_enzyme;
+
/* user object fields */
Boolean m_is_unverified;
+ CharPtr m_targeted_locus;
+
+ /* comment fields */
+ Boolean m_is_pseudogene;
/* exception fields */
TextFsaPtr m_low_quality_fsa;
@@ -5733,12 +5976,121 @@ static Boolean x_CDShasLowQualityException (
return FALSE;
}
+static CharPtr x_OrganelleName (
+ DefLinePtr dlp,
+ Boolean has_plasmid,
+ Boolean virus_or_phage,
+ Boolean wgs_suffix
+)
+
+{
+ CharPtr result = NULL;
+
+ if (dlp == NULL) return NULL;
+
+ switch (dlp->m_genome) {
+ case GENOME_chloroplast :
+ result = "chloroplast";
+ break;
+ case GENOME_chromoplast :
+ result = "chromoplast";
+ break;
+ case GENOME_kinetoplast :
+ result = "kinetoplast";
+ break;
+ case GENOME_mitochondrion :
+ {
+ if (has_plasmid || wgs_suffix) {
+ result = "mitochondrial";
+ } else {
+ result = "mitochondrion";
+ }
+ break;
+ }
+ case GENOME_plastid :
+ result = "plastid";
+ break;
+ case GENOME_macronuclear :
+ {
+ result = "macronuclear";
+ break;
+ }
+ case GENOME_extrachrom :
+ {
+ if (! wgs_suffix) {
+ result = "extrachromosomal";
+ }
+ break;
+ }
+ case GENOME_plasmid :
+ {
+ if (! wgs_suffix) {
+ result = "plasmid";
+ }
+ break;
+ }
+ /* transposon and insertion-seq are obsolete */
+ case GENOME_cyanelle :
+ result = "cyanelle";
+ break;
+ case GENOME_proviral :
+ {
+ if (! virus_or_phage) {
+ if (has_plasmid || wgs_suffix) {
+ result = "proviral";
+ } else {
+ result = "provirus";
+ }
+ }
+ break;
+ }
+ case GENOME_virion :
+ {
+ if (! virus_or_phage) {
+ result = "virus";
+ }
+ break;
+ }
+ case GENOME_nucleomorph :
+ {
+ if (! wgs_suffix) {
+ result = "nucleomorph";
+ }
+ break;
+ }
+ case GENOME_apicoplast :
+ result = "apicoplast";
+ break;
+ case GENOME_leucoplast :
+ result = "leucoplast";
+ break;
+ case GENOME_proplastid :
+ result = "proplastid";
+ break;
+ case GENOME_endogenous_virus :
+ result = "endogenous virus";
+ break;
+ case GENOME_hydrogenosome :
+ result = "hydrogenosome";
+ break;
+ case GENOME_chromosome :
+ result = "chromosome";
+ break;
+ case GENOME_chromatophore :
+ result = "chromatophore";
+ break;
+ }
+
+ return result;
+}
+
/* set instance variables from Seq-inst, Seq-ids, MolInfo, etc., but not BioSource */
static void x_SetFlags (
DefLinePtr dlp
)
{
+ BioSourcePtr biop;
BioseqPtr bsp;
IdPatPtr cit;
ValNodePtr compound;
@@ -5747,15 +6099,24 @@ static void x_SetFlags (
GBBlockPtr gbp;
DbtagPtr general;
ValNodePtr keywords;
+ size_t len;
MolInfoPtr mip;
+ Int2 num_super_kingdom = 0;
ObjectIdPtr oip;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
PdbBlockPtr pbp;
PDBSeqIdPtr pdbip;
PatentSeqIdPtr psip;
+ RsiteRefPtr rrp;
SeqDescrPtr sdp;
+ SeqFeatPtr sfp;
SeqIdPtr sip;
CharPtr str;
+ Boolean super_kingdoms_different = FALSE;
+ TaxElementPtr tep;
TextSeqIdPtr tsip;
+ UserFieldPtr ufp;
UserObjectPtr uop;
ValNodePtr vnp;
@@ -5766,10 +6127,12 @@ static void x_SetFlags (
dlp->m_is_na = (Boolean) ISA_na (bsp->mol);
dlp->m_is_aa = (Boolean) ISA_aa (bsp->mol);
+ dlp->m_topology = bsp->topology;
dlp->m_is_seg = (Boolean) (bsp->repr == Seq_repr_seg);
dlp->m_is_delta = (Boolean) (bsp->repr == Seq_repr_delta);
dlp->m_is_virtual = (Boolean) (bsp->repr == Seq_repr_virtual);
+ dlp->m_is_map = (Boolean) (bsp->repr == Seq_repr_map);
/* process Seq-ids */
for (sip = bsp->id; sip != NULL; sip = sip->next) {
@@ -5785,6 +6148,22 @@ static void x_SetFlags (
dlp->m_is_nm = TRUE;
} else if (StringNICmp (tsip->accession, "NR_", 3) == 0) {
dlp->m_is_nr = TRUE;
+ } else if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ dlp->m_is_wp = TRUE;
+ }
+ len = StringLen (tsip->accession);
+ if (len == 15) {
+ if (StringCmp (tsip->accession + 9, "000000") == 0) {
+ dlp->m_wgs_master = TRUE;
+ }
+ } else if (len == 16) {
+ if (StringCmp (tsip->accession + 9, "0000000") == 0) {
+ dlp->m_wgs_master = TRUE;
+ }
+ } else if (len == 17) {
+ if (StringCmp (tsip->accession + 10, "0000000") == 0) {
+ dlp->m_wgs_master = TRUE;
+ }
}
}
break;
@@ -5793,10 +6172,19 @@ static void x_SetFlags (
case SEQID_DDBJ :
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ len = StringLen (tsip->accession);
+ if (len == 12) {
if (StringCmp (tsip->accession + 6, "000000") == 0) {
dlp->m_wgs_master = TRUE;
}
+ } else if (len == 13) {
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ dlp->m_wgs_master = TRUE;
+ }
+ } else if (len == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ dlp->m_wgs_master = TRUE;
+ }
}
}
break;
@@ -5886,6 +6274,13 @@ static void x_SetFlags (
dlp->m_tsa_master = TRUE;
}
break;
+ case MI_TECH_targeted :
+ dlp->m_is_tls = TRUE;
+ dlp->m_use_biosrc = TRUE;
+ if (dlp->m_is_virtual) {
+ dlp->m_tls_master = TRUE;
+ }
+ break;
default :
break;
}
@@ -5901,47 +6296,74 @@ static void x_SetFlags (
if (uop == NULL) continue;
oip = uop->type;
if (oip == NULL) continue;
- if (StringICmp (oip->str, "Unverified") != 0) continue;
- dlp->m_is_unverified = TRUE;
+ if (StringICmp (oip->str, "Unverified") == 0) {
+ dlp->m_is_unverified = TRUE;
+ } else if (StringICmp (oip->str, "AutodefOptions") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL) continue;
+ if (StringICmp (oip->str, "Targeted Locus Name") != 0) continue;
+ if (ufp->choice != 1) continue;
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ dlp->m_targeted_locus = str;
+ }
+ }
+ }
+
+ /* process comments */
+ for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_comment, NULL);
+ sdp != NULL;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_comment, sdp)) {
+ if (sdp->choice != Seq_descr_comment) continue;
+ str = (CharPtr) sdp->data.ptrvalue;
+ if (str == NULL) continue;
+ if (StringISearch (str, "[CAUTION] Could be the product of a pseudogene") != 0) {
+ dlp->m_is_pseudogene = TRUE;
+ }
}
- if (dlp->m_htg_tech || dlp->m_third_party) {
- /* process keywords */
- keywords = NULL;
+ /* process keywords */
+ keywords = NULL;
- sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_genbank, NULL);
- if (sdp != NULL && sdp->choice == Seq_descr_genbank) {
- gbp = (GBBlockPtr) sdp->data.ptrvalue;
- if (gbp != NULL) {
- keywords = gbp->keywords;
- }
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_genbank, NULL);
+ if (sdp != NULL && sdp->choice == Seq_descr_genbank) {
+ gbp = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gbp != NULL) {
+ keywords = gbp->keywords;
}
- if (keywords == NULL) {
- sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_embl, NULL);
- if (sdp != NULL && sdp->choice == Seq_descr_embl) {
- ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
- if (ebp != NULL) {
- keywords = ebp->keywords;
- }
+ }
+ if (keywords == NULL) {
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_embl, NULL);
+ if (sdp != NULL && sdp->choice == Seq_descr_embl) {
+ ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
+ if (ebp != NULL) {
+ keywords = ebp->keywords;
}
}
- if (keywords != NULL) {
- for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
- str = (CharPtr) vnp->data.ptrvalue;
- if (StringHasNoText (str)) continue;
- if (StringICmp (str, "HTGS_DRAFT") == 0) {
- dlp->m_htgs_draft = TRUE;
- } else if (StringICmp (str, "HTGS_CANCELLED") == 0) {
- dlp->m_htgs_cancelled = TRUE;
- } else if (StringICmp (str, "HTGS_POOLED_MULTICLONE") == 0) {
- dlp->m_htgs_pooled = TRUE;
- } else if (StringICmp (str, "TPA:experimental") == 0) {
- dlp->m_tpa_exp = TRUE;
- } else if (StringICmp (str, "TPA:inferential") == 0) {
- dlp->m_tpa_inf = TRUE;
- } else if (StringICmp (str, "TPA:reassembly") == 0) {
- dlp->m_tpa_reasm = TRUE;
- }
+ }
+ if (keywords != NULL) {
+ for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (StringICmp (str, "UNORDERED") == 0) {
+ dlp->m_unordered = TRUE;
+ }
+ if (! dlp->m_htg_tech && ! dlp->m_third_party) continue;
+ if (StringICmp (str, "HTGS_DRAFT") == 0) {
+ dlp->m_htgs_draft = TRUE;
+ } else if (StringICmp (str, "HTGS_CANCELLED") == 0) {
+ dlp->m_htgs_cancelled = TRUE;
+ } else if (StringICmp (str, "HTGS_POOLED_MULTICLONE") == 0) {
+ dlp->m_htgs_pooled = TRUE;
+ } else if (StringICmp (str, "TPA:experimental") == 0) {
+ dlp->m_tpa_exp = TRUE;
+ } else if (StringICmp (str, "TPA:inferential") == 0) {
+ dlp->m_tpa_inf = TRUE;
+ } else if (StringICmp (str, "TPA:reassembly") == 0) {
+ dlp->m_tpa_reasm = TRUE;
+ } else if (StringICmp (str, "TPA:assembly") == 0) {
+ dlp->m_tpa_reasm = TRUE;
}
}
}
@@ -5960,6 +6382,46 @@ static void x_SetFlags (
}
}
}
+
+ if (dlp->m_is_wp) {
+ for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
+ sdp != NULL;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, sdp)) {
+ if (sdp->choice != Seq_descr_source) continue;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) continue;
+ orp = biop->org;
+ if (orp == NULL) continue;
+ onp = orp->orgname;
+ if (onp == NULL) continue;
+ if (onp->choice != 5) continue;
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ num_super_kingdom++;
+ if (dlp->m_first_super_kingdom == NULL) {
+ dlp->m_first_super_kingdom = tep->name;
+ } else if (StringICmp (dlp->m_first_super_kingdom, tep->name) != 0) {
+ dlp->m_second_super_kingdom = tep->name;
+ super_kingdoms_different = TRUE;
+ }
+ if (num_super_kingdom > 1 && super_kingdoms_different) {
+ dlp->m_is_cross_kingdom = TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ if (dlp->m_is_map) {
+ for (sfp = (SeqFeatPtr) bsp->seq_ext; sfp != NULL; sfp = sfp->next) {
+ if (sfp->data.choice != SEQFEAT_RSITE) continue;
+ rrp = (RsiteRefPtr) sfp->data.value.ptrvalue;
+ if (rrp == NULL) continue;
+ if (rrp->choice == 1) {
+ dlp->m_enzyme = (CharPtr) rrp->data.ptrvalue;
+ }
+ }
+ }
}
/* set instance variables from BioSource */
@@ -5993,13 +6455,15 @@ static void x_SetBioSrc (
)
{
- BioSourcePtr biop;
- BioseqPtr bsp;
- OrgModPtr omp;
- OrgNamePtr onp;
- OrgRefPtr orp;
- SeqDescrPtr sdp;
- SubSourcePtr ssp;
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ Boolean has_plasmid = FALSE, wgs_suffix = FALSE, virus_or_phage = FALSE;
+ OrgModPtr omp;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ SeqDescrPtr sdp;
+ SubSourcePtr ssp;
+ TaxElementPtr tep;
if (dlp == NULL) return;
@@ -6017,6 +6481,8 @@ static void x_SetBioSrc (
}
}
dlp->m_genome = biop->genome;
+ dlp->m_is_plasmid = (Boolean) (dlp->m_genome == GENOME_plasmid);
+ dlp->m_is_chromosome = (Boolean) (dlp->m_genome == GENOME_chromosome);
/* process SubSource */
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
@@ -6047,6 +6513,15 @@ static void x_SetBioSrc (
if (orp != NULL) {
onp = orp->orgname;
if (onp != NULL) {
+ if (onp->choice == 5) {
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level > 0) {
+ dlp->m_multispecies = TRUE;
+ } else if (StringDoesHaveText (tep->level) && StringICmp (tep->level, "species") != 0) {
+ dlp->m_multispecies = TRUE;
+ }
+ }
+ }
for (omp = onp->mod; omp != NULL; omp = omp->next) {
if (StringHasNoText (omp->subname)) continue;
switch (omp->subtype) {
@@ -6079,6 +6554,22 @@ static void x_SetBioSrc (
}
}
+ if (StringISearch (dlp->m_taxname, "virus") != NULL ||
+ StringISearch (dlp->m_taxname, "phage") != NULL) {
+ virus_or_phage = TRUE;
+ }
+
+ if (StringDoesHaveText (dlp->m_plasmid)) {
+ has_plasmid = TRUE;
+ }
+
+ if (dlp->m_is_wgs) {
+ wgs_suffix = TRUE;
+ }
+
+ dlp->m_organelle = x_OrganelleName (dlp, has_plasmid, virus_or_phage, wgs_suffix);
+
+
if (dlp->m_has_clone) return;
VisitFeaturesOnBsp (bsp, (Pointer) dlp, x_SetSrcClone);
@@ -6222,7 +6713,7 @@ static CharPtr x_DescribeClones (
)
{
- Char buf [40];
+ Char buf [128];
Char ch;
Int4 count;
size_t len;
@@ -6265,7 +6756,8 @@ static CharPtr x_DescribeClones (
}
static Boolean x_EndsWithStrain (
- DefLinePtr dlp
+ DefLinePtr dlp,
+ CharPtr strain
)
{
@@ -6274,9 +6766,9 @@ static Boolean x_EndsWithStrain (
CharPtr nxt;
CharPtr ptr;
- if (dlp == NULL) return FALSE;
+ if (dlp == NULL || strain == NULL) return FALSE;
- len = StringLen (dlp->m_strain);
+ len = StringLen (strain);
if (len >= StringLen (dlp->m_taxname)) return FALSE;
ptr = StringChr (dlp->m_taxname, ' ');
@@ -6286,13 +6778,13 @@ static Boolean x_EndsWithStrain (
if (ptr == NULL) return FALSE;
ptr++;
- ptr = StringISearch (dlp->m_taxname, dlp->m_strain);
+ ptr = StringISearch (dlp->m_taxname, strain);
if (ptr == NULL) return FALSE;
- nxt = StringISearch (ptr + 1, dlp->m_strain);
+ nxt = StringISearch (ptr + 1, strain);
while (nxt != NULL) {
ptr = nxt;
- nxt = StringISearch (ptr + 1, dlp->m_strain);
+ nxt = StringISearch (ptr + 1, strain);
}
ptr += len;
@@ -6323,7 +6815,6 @@ static CharPtr x_TitleFromBioSrc (
)
{
- Char ch;
CharPtr result = NULL, cln, stn, ptr;
ValNodePtr strings = NULL;
@@ -6332,15 +6823,33 @@ static CharPtr x_TitleFromBioSrc (
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
if (StringDoesHaveText (dlp->m_strain)) {
- if (! x_EndsWithStrain (dlp)) {
+ stn = StringSave (dlp->m_strain);
+ ptr = StringChr (stn, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ if (! x_EndsWithStrain (dlp, stn)) {
ValNodeCopyStr (&strings, 0, " strain ");
- stn = StringSave (dlp->m_strain);
- ptr = StringChr (stn, ';');
- if (ptr != NULL) {
- *ptr = '\0';
- }
ValNodeCopyStr (&strings, 0, stn);
- MemFree (stn);
+ }
+ MemFree (stn);
+ }
+
+ if (StringDoesHaveText (dlp->m_breed)) {
+ ValNodeCopyStr (&strings, 0, " breed ");
+ ValNodeCopyStr (&strings, 0, dlp->m_breed);
+ }
+
+ if (StringDoesHaveText (dlp->m_cultivar)) {
+ ValNodeCopyStr (&strings, 0, " cultivar ");
+ ValNodeCopyStr (&strings, 0, dlp->m_cultivar);
+ }
+
+ if (StringDoesHaveText (dlp->m_isolate)) {
+ /* x_EndsWithStrain just checks for supplied pattern, using here for isolate */
+ if (! x_EndsWithStrain (dlp, dlp->m_isolate)) {
+ ValNodeCopyStr (&strings, 0, " isolate ");
+ ValNodeCopyStr (&strings, 0, dlp->m_isolate);
}
}
@@ -6360,130 +6869,38 @@ static CharPtr x_TitleFromBioSrc (
ValNodeCopyStr (&strings, 0, dlp->m_map);
}
+ if (StringDoesHaveText (dlp->m_organelle)) {
+ if (StringCmp (dlp->m_organelle, "chromosome") == 0) {
+ /*
+ if (StringHasNoText (dlp->m_chromosome)) {
+ ValNodeCopyStr (&strings, 0, " ");
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
+ }
+ */
+ } else if (StringCmp (dlp->m_organelle, "plasmid") == 0) {
+ if (StringHasNoText (dlp->m_plasmid) && StringHasNoText (dlp->m_chromosome)) {
+ ValNodeCopyStr (&strings, 0, " ");
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
+ }
+ } else {
+ ValNodeCopyStr (&strings, 0, " ");
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
+ }
+ }
+
if (StringDoesHaveText (dlp->m_plasmid)) {
- if (dlp->m_is_wgs) {
+ if (StringStr (dlp->m_plasmid, "plasmid") == NULL) {
ValNodeCopyStr (&strings, 0, " plasmid ");
- ValNodeCopyStr (&strings, 0, dlp->m_plasmid);
+ } else {
+ ValNodeCopyStr (&strings, 0, " ");
}
+ ValNodeCopyStr (&strings, 0, dlp->m_plasmid);
}
result = x_CatenateValNodeStrings (strings);
ValNodeFreeData (strings);
if (result == NULL) return NULL;
- ch = result [0];
- if (IS_LOWER (ch)) {
- result [0] = TO_UPPER (ch);
- }
-
- return result;
-}
-
-static CharPtr x_OrganelleName (
- DefLinePtr dlp,
- Boolean has_plasmid,
- Boolean virus_or_phage,
- Boolean wgs_suffix
-)
-
-{
- CharPtr result = NULL;
-
- if (dlp == NULL) return NULL;
-
- switch (dlp->m_genome) {
- case GENOME_chloroplast :
- result = "chloroplast";
- break;
- case GENOME_chromoplast :
- result = "chromoplast";
- break;
- case GENOME_kinetoplast :
- result = "kinetoplast";
- break;
- case GENOME_mitochondrion :
- {
- if (has_plasmid || wgs_suffix) {
- result = "mitochondrial";
- } else {
- result = "mitochondrion";
- }
- break;
- }
- case GENOME_plastid :
- result = "plastid";
- break;
- case GENOME_macronuclear :
- {
- result = "macronuclear";
- break;
- }
- case GENOME_extrachrom :
- {
- if (! wgs_suffix) {
- result = "extrachromosomal";
- }
- break;
- }
- case GENOME_plasmid :
- {
- if (! wgs_suffix) {
- result = "plasmid";
- }
- break;
- }
- /* transposon and insertion-seq are obsolete */
- case GENOME_cyanelle :
- result = "cyanelle";
- break;
- case GENOME_proviral :
- {
- if (! virus_or_phage) {
- if (has_plasmid || wgs_suffix) {
- result = "proviral";
- } else {
- result = "provirus";
- }
- }
- break;
- }
- case GENOME_virion :
- {
- if (! virus_or_phage) {
- result = "virus";
- }
- break;
- }
- case GENOME_nucleomorph :
- {
- if (! wgs_suffix) {
- result = "nucleomorph";
- }
- break;
- }
- case GENOME_apicoplast :
- result = "apicoplast";
- break;
- case GENOME_leucoplast :
- result = "leucoplast";
- break;
- case GENOME_proplastid :
- result = "proplastid";
- break;
- case GENOME_endogenous_virus :
- result = "endogenous virus";
- break;
- case GENOME_hydrogenosome :
- result = "hydrogenosome";
- break;
- case GENOME_chromosome :
- result = "chromosome";
- break;
- case GENOME_chromatophore :
- result = "chromatophore";
- break;
- }
-
return result;
}
@@ -6520,11 +6937,9 @@ static CharPtr x_TitleFromNC (
)
{
- Char ch;
CharPtr completeseq = ", complete sequence";
CharPtr completegen = ", complete genome";
- Boolean is_chromosome, is_plasmid, has_plasmid = FALSE, virus_or_phage = FALSE;
- CharPtr result = NULL, orgnl = NULL, pls_pfx = "";
+ CharPtr result = NULL, pls_pfx = "";
ValNodePtr strings = NULL;
if (dlp == NULL) return NULL;
@@ -6534,20 +6949,6 @@ static CharPtr x_TitleFromNC (
if (StringHasNoText (dlp->m_taxname)) return NULL;
- if (StringISearch (dlp->m_taxname, "virus") != NULL ||
- StringISearch (dlp->m_taxname, "phage") != NULL) {
- virus_or_phage = TRUE;
- }
-
- if (StringDoesHaveText (dlp->m_plasmid)) {
- has_plasmid = TRUE;
- }
-
- orgnl = x_OrganelleName (dlp, has_plasmid, virus_or_phage, FALSE);
-
- is_plasmid = (Boolean) (dlp->m_genome == GENOME_plasmid);
- is_chromosome = (Boolean) (dlp->m_genome == GENOME_chromosome);
-
if (dlp->m_mi_completeness == 2 ||
dlp->m_mi_completeness == 3 ||
dlp->m_mi_completeness == 4 ||
@@ -6569,7 +6970,7 @@ static CharPtr x_TitleFromNC (
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
ValNodeCopyStr (&strings, 0, completeseq);
- } else if (is_plasmid) {
+ } else if (dlp->m_is_plasmid) {
if (StringDoesHaveText (dlp->m_plasmid)) {
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
@@ -6585,10 +6986,10 @@ static CharPtr x_TitleFromNC (
} else if (StringDoesHaveText (dlp->m_plasmid)) {
- if (StringDoesHaveText (orgnl)) {
+ if (StringDoesHaveText (dlp->m_organelle)) {
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
ValNodeCopyStr (&strings, 0, " ");
- ValNodeCopyStr (&strings, 0, orgnl);
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
ValNodeCopyStr (&strings, 0, " ");
ValNodeCopyStr (&strings, 0, pls_pfx);
ValNodeCopyStr (&strings, 0, dlp->m_plasmid);
@@ -6601,21 +7002,29 @@ static CharPtr x_TitleFromNC (
ValNodeCopyStr (&strings, 0, completeseq);
}
- } else if (StringDoesHaveText (orgnl)) {
+ } else if (StringDoesHaveText (dlp->m_organelle)) {
if (StringDoesHaveText (dlp->m_chromosome)) {
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
- if (! is_chromosome) {
+ if (! dlp->m_is_chromosome) {
ValNodeCopyStr (&strings, 0, " ");
- ValNodeCopyStr (&strings, 0, orgnl);
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
}
ValNodeCopyStr (&strings, 0, " chromosome ");
ValNodeCopyStr (&strings, 0, dlp->m_chromosome);
ValNodeCopyStr (&strings, 0, completeseq);
} else {
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
- ValNodeCopyStr (&strings, 0, " ");
- ValNodeCopyStr (&strings, 0, orgnl);
+ switch (dlp->m_genome) {
+ case GENOME_mitochondrion :
+ case GENOME_chloroplast :
+ case GENOME_kinetoplast :
+ case GENOME_plastid :
+ case GENOME_apicoplast :
+ ValNodeCopyStr (&strings, 0, " ");
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
+ break;
+ }
ValNodeCopyStr (&strings, 0, completegen);
}
@@ -6655,11 +7064,6 @@ static CharPtr x_TitleFromNC (
x_LowercasePlasmidOrElement (result);
- ch = result [0];
- if (IS_LOWER (ch)) {
- result [0] = TO_UPPER (ch);
- }
-
return result;
}
@@ -6909,7 +7313,7 @@ static CharPtr x_TitleFromPatent (
)
{
- Char buf [80];
+ Char buf [128];
if (dlp == NULL) return NULL;
@@ -6926,7 +7330,7 @@ static CharPtr x_TitleFromPDB (
)
{
- Char buf [40];
+ Char buf [128];
Char ch;
CharPtr result = NULL;
ValNodePtr strings = NULL;
@@ -6946,6 +7350,68 @@ static CharPtr x_TitleFromPDB (
return result;
}
+static CharPtr x_TitleFromGPipe (
+ DefLinePtr dlp
+)
+
+{
+ CharPtr result = NULL, cln, stn, ptr;
+ ValNodePtr strings = NULL;
+
+ if (dlp == NULL) return NULL;
+
+ ValNodeCopyStr (&strings, 0, dlp->m_taxname);
+
+ if (StringDoesHaveText (dlp->m_organelle) && StringICmp (dlp->m_organelle, "plasmid") != 0) {
+ ValNodeCopyStr (&strings, 0, " ");
+ ValNodeCopyStr (&strings, 0, dlp->m_organelle);
+ }
+
+ if (StringDoesHaveText (dlp->m_strain)) {
+ stn = StringSave (dlp->m_strain);
+ ptr = StringChr (stn, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ if (! x_EndsWithStrain (dlp, stn)) {
+ ValNodeCopyStr (&strings, 0, " strain ");
+ ValNodeCopyStr (&strings, 0, stn);
+ }
+ MemFree (stn);
+ }
+
+ if (StringDoesHaveText (dlp->m_chromosome)) {
+ ValNodeCopyStr (&strings, 0, " chromosome ");
+ ValNodeCopyStr (&strings, 0, dlp->m_chromosome);
+ }
+
+ cln = x_DescribeClones (dlp);
+ if (StringDoesHaveText (cln)) {
+ ValNodeCopyStr (&strings, 0, cln);
+ }
+ MemFree (cln);
+
+ if (StringDoesHaveText (dlp->m_map)) {
+ ValNodeCopyStr (&strings, 0, " map ");
+ ValNodeCopyStr (&strings, 0, dlp->m_map);
+ }
+
+ if (StringDoesHaveText (dlp->m_plasmid)) {
+ ValNodeCopyStr (&strings, 0, " plasmid ");
+ ValNodeCopyStr (&strings, 0, dlp->m_plasmid);
+ }
+
+ if (dlp->m_mi_completeness == 1) {
+ ValNodeCopyStr (&strings, 0, ", complete sequence");
+ }
+
+ result = x_CatenateValNodeStrings (strings);
+ ValNodeFreeData (strings);
+ if (result == NULL) return NULL;
+
+ return result;
+}
+
typedef struct udxfeatdata {
SeqIdPtr bspid;
Int4 longest;
@@ -7077,26 +7543,26 @@ static CharPtr proteinOrganellePrefix [] = {
NULL,
NULL,
"chloroplast",
- NULL,
- NULL,
+ "chromoplast",
+ "kinetoplast",
"mitochondrion",
+ "plastid",
+ "macronuclear",
NULL,
+ "plasmid",
NULL,
NULL,
+ "cyanelle",
NULL,
NULL,
+ "nucleomorph",
+ "apicoplast",
+ "leucoplast",
+ "protoplast",
+ "endogenous virus",
+ "hydrogenosome",
NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
+ "chromatophore"
};
static CharPtr x_TitleFromProtein (
@@ -7107,19 +7573,27 @@ static CharPtr x_TitleFromProtein (
BioSourcePtr biop;
BioseqPtr bsp;
SeqFeatPtr cds = NULL;
+ Char ch;
+ CharPtr comma = NULL;
Uint2 entityID;
SeqMgrFeatContext fcontext;
GeneRefPtr grp;
Boolean indexed;
+ CharPtr isoform = NULL;
size_t len;
CharPtr low_qual = "LOW QUALITY PROTEIN: ";
+ Int2 offset = 0;
CharPtr organelle = NULL;
OrgRefPtr orp;
Boolean partial = FALSE;
CharPtr prefix = "";
ProtRefPtr prp;
+ CharPtr ptr;
CharPtr result = NULL;
SeqFeatPtr sfp = NULL;
+ SeqIntPtr sintp;
+ SeqLocPtr slp, slpx;
+ SeqPntPtr spp;
CharPtr str;
ValNodePtr strings = NULL;
CharPtr taxname = NULL;
@@ -7175,6 +7649,36 @@ static CharPtr x_TitleFromProtein (
x_TrimPunctuationFromEnd (title);
/* if hypothetical protein, append locus_tag */
+ offset = 0;
+ if (StringNICmp (title, "hypothetical protein", 20) == 0) {
+ offset = 20;
+ } else if (StringNICmp (title, "uncharacterized protein", 23) == 0) {
+ offset = 23;
+ }
+ if (offset > 0) {
+ ptr = title + offset;
+ if (ptr [0] == ',' && ptr [1] == ' ') {
+ comma = ",";
+ ptr += 2;
+ }
+ if (ptr [0] == ' ') {
+ ptr++;
+ }
+ if (StringNCmp (ptr, "isoform ", 8) == 0) {
+ ptr += 8;
+ isoform = ptr;
+ ch = *ptr;
+ while (ch != '\0' && IS_ALPHANUM (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+ if (ch != '\0') {
+ isoform = NULL;
+ } else {
+ title [offset] = '\0';
+ }
+ }
+ }
if (StringICmp (title, "hypothetical protein") == 0 || StringICmp (title, "uncharacterized protein") == 0) {
if (! indexed) {
SeqMgrIndexFeatures (entityID, NULL);
@@ -7193,12 +7697,19 @@ static CharPtr x_TitleFromProtein (
}
if (grp != NULL) {
if (grp->locus_tag != NULL) {
- len = StringLen (title) + StringLen (grp->locus_tag) + 20;
+ len = StringLen (title) + StringLen (grp->locus_tag) + StringLen (isoform) + 35;
str = (CharPtr) MemNew (sizeof (Char) * len);
if (str != NULL) {
StringCat (str, title);
StringCat (str, " ");
StringCat (str, grp->locus_tag);
+ if (StringDoesHaveText (isoform)) {
+ if (comma != NULL) {
+ StringCat (str, comma);
+ }
+ StringCat (str, " isoform ");
+ StringCat (str, isoform);
+ }
MemFree (title);
title = str;
}
@@ -7258,6 +7769,35 @@ static CharPtr x_TitleFromProtein (
if (title == NULL) {
title = StringSave ("unnamed protein product");
+ if (! indexed) {
+ SeqMgrIndexFeatures (entityID, NULL);
+ indexed = TRUE;
+ }
+ if (cds == NULL) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ }
+ if (cds != NULL) {
+ grp = SeqMgrGetGeneXref (cds);
+ if (grp == NULL) {
+ sfp = SeqMgrGetOverlappingFeature (cds->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, NULL);
+ if (sfp != NULL) {
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ }
+ }
+ if (grp != NULL) {
+ if (grp->locus_tag != NULL) {
+ len = StringLen (title) + StringLen (grp->locus_tag) + 20;
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ if (str != NULL) {
+ StringCat (str, title);
+ StringCat (str, " ");
+ StringCat (str, grp->locus_tag);
+ MemFree (title);
+ title = str;
+ }
+ }
+ }
+ }
}
if (title != NULL) {
@@ -7274,24 +7814,46 @@ static CharPtr x_TitleFromProtein (
cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
}
if (cds != NULL) {
- sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
- if (sfp != NULL) {
- biop = (BioSourcePtr) sfp->data.value.ptrvalue;
- if (biop != NULL) {
- orp = biop->org;
- if (orp != NULL) {
- taxname = orp->taxname;
+ slp = AsnIoMemCopy ((Pointer) cds->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
+ if (slp != NULL) {
+ for (slpx = SeqLocFindNext (slp, NULL); slpx != NULL; slpx = SeqLocFindNext (slp, slpx)) {
+ if (slpx->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slpx->data.ptrvalue;
+ if (sintp != NULL) {
+ sintp->strand = Seq_strand_both;
+ }
+ } else if (slpx->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) slpx->data.ptrvalue;
+ if (spp != NULL) {
+ spp->strand = Seq_strand_both;
+ }
+ }
+ }
+ /*
+ sfp = SeqMgrGetOverlappingSource (slp, &fcontext);
+ */
+ sfp = SeqMgrGetOverlappingFeature (slp, FEATDEF_BIOSRC, NULL, 0, NULL, LOCATION_SUBSET, &fcontext);
+ if (sfp != NULL) {
+ biop = (BioSourcePtr) sfp->data.value.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ taxname = orp->taxname;
+ }
}
}
+ SeqLocFree (slp);
}
}
}
if (dlp->m_genome >= GENOME_chloroplast && dlp->m_genome <= GENOME_chromatophore) {
organelle = proteinOrganellePrefix [dlp->m_genome];
+ /*
if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0) {
organelle = NULL;
}
+ */
}
if (cds == NULL) {
@@ -7339,7 +7901,20 @@ static CharPtr x_TitleFromProtein (
}
}
- if (StringDoesHaveText (taxname)) {
+ if (dlp->m_is_cross_kingdom && StringDoesHaveText (dlp->m_first_super_kingdom) && StringDoesHaveText (dlp->m_second_super_kingdom)) {
+ len = StringLen (title) + StringLen (dlp->m_first_super_kingdom) + StringLen (dlp->m_second_super_kingdom) + 8;
+ tmp = (CharPtr) MemNew (sizeof (Char) * len);
+ if (tmp != NULL) {
+ StringCat (tmp, title);
+ StringCat (tmp, " [");
+ StringCat (tmp, dlp->m_first_super_kingdom);
+ StringCat (tmp, "][");
+ StringCat (tmp, dlp->m_second_super_kingdom);
+ StringCat (tmp, "]");
+ MemFree (title);
+ title = tmp;
+ }
+ } else if (StringDoesHaveText (taxname)) {
len = StringLen (title) + StringLen (taxname) + 6;
tmp = (CharPtr) MemNew (sizeof (Char) * len);
if (tmp != NULL) {
@@ -7423,7 +7998,7 @@ static CharPtr x_TitleFromSegSeq (
}
}
} else {
- if (StringDoesHaveText (dlp->m_strain) && (! x_EndsWithStrain (dlp))) {
+ if (StringDoesHaveText (dlp->m_strain) && (! x_EndsWithStrain (dlp, dlp->m_strain))) {
modifier = dlp->m_strain;
label = " strain ";
} else if (StringDoesHaveText (dlp->m_clone)) {
@@ -7480,12 +8055,31 @@ static CharPtr x_TitleFromSegSeq (
return result;
}
+ static Boolean x_StringInList (
+ ValNodePtr strings,
+ CharPtr str
+)
+
+{
+ CharPtr tmp;
+ ValNodePtr vnp;
+
+ if (strings == NULL || StringHasNoText (str)) return FALSE;
+
+ for (vnp = strings; vnp != NULL; vnp = vnp->next) {
+ tmp = (CharPtr) vnp->data.ptrvalue;
+ if (StringStr (tmp, str) != NULL) return TRUE;
+ }
+
+ return FALSE;
+}
+
+
static CharPtr x_TitleFromWGS (
DefLinePtr dlp
)
{
- Char ch;
CharPtr result = NULL, cln, mod, ptr;
ValNodePtr strings = NULL;
@@ -7494,16 +8088,16 @@ static CharPtr x_TitleFromWGS (
ValNodeCopyStr (&strings, 0, dlp->m_taxname);
if (StringDoesHaveText (dlp->m_strain)) {
- if (! x_EndsWithStrain (dlp)) {
+ mod = StringSave (dlp->m_strain);
+ ptr = StringChr (mod, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ if (! x_EndsWithStrain (dlp, mod)) {
ValNodeCopyStr (&strings, 0, " strain ");
- mod = StringSave (dlp->m_strain);
- ptr = StringChr (mod, ';');
- if (ptr != NULL) {
- *ptr = '\0';
- }
ValNodeCopyStr (&strings, 0, mod);
- MemFree (mod);
}
+ MemFree (mod);
} else if (StringDoesHaveText (dlp->m_breed)) {
ValNodeCopyStr (&strings, 0, " breed ");
mod = StringSave (dlp->m_breed);
@@ -7524,6 +8118,14 @@ static CharPtr x_TitleFromWGS (
MemFree (mod);
}
+ if (StringDoesHaveText (dlp->m_isolate)) {
+ /* x_EndsWithStrain just checks for supplied pattern, using here for isolate */
+ if (! x_EndsWithStrain (dlp, dlp->m_isolate)) {
+ ValNodeCopyStr (&strings, 0, " isolate ");
+ ValNodeCopyStr (&strings, 0, dlp->m_isolate);
+ }
+ }
+
if (StringDoesHaveText (dlp->m_chromosome)) {
ValNodeCopyStr (&strings, 0, " chromosome ");
ValNodeCopyStr (&strings, 0, dlp->m_chromosome);
@@ -7547,7 +8149,9 @@ static CharPtr x_TitleFromWGS (
}
}
- if (StringDoesHaveText (dlp->m_general_str)) {
+ if (dlp->m_genome == GENOME_plasmid && dlp->m_topology == TOPOLOGY_CIRCULAR) {
+ } else if (dlp->m_genome == GENOME_chromosome) {
+ } else if (StringDoesHaveText (dlp->m_general_str) && StringICmp (dlp->m_general_str, dlp->m_chromosome) != 0) {
ValNodeCopyStr (&strings, 0, " ");
ValNodeCopyStr (&strings, 0, dlp->m_general_str);
}
@@ -7556,11 +8160,69 @@ static CharPtr x_TitleFromWGS (
ValNodeFreeData (strings);
if (result == NULL) return NULL;
- ch = result [0];
- if (IS_LOWER (ch)) {
- result [0] = TO_UPPER (ch);
+ return result;
+}
+
+static CharPtr x_TitleFromMap (
+ DefLinePtr dlp
+)
+
+{
+ BioseqPtr bsp;
+ CharPtr result = NULL, mod, ptr;
+ ValNodePtr strings = NULL;
+
+ if (dlp == NULL) return NULL;
+
+ bsp = dlp->m_bioseq;
+ if (bsp == NULL) return NULL;
+ if (bsp->seq_ext_type != 3) return NULL;
+ if (bsp->seq_ext == NULL) return NULL;
+
+ ValNodeCopyStr (&strings, 0, dlp->m_taxname);
+
+ if (StringDoesHaveText (dlp->m_strain)) {
+ mod = StringSave (dlp->m_strain);
+ ptr = StringChr (mod, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ if (! x_EndsWithStrain (dlp, mod)) {
+ ValNodeCopyStr (&strings, 0, " strain ");
+ ValNodeCopyStr (&strings, 0, mod);
+ }
+ MemFree (mod);
}
+ if (StringDoesHaveText (dlp->m_chromosome)) {
+ ValNodeCopyStr (&strings, 0, " chromosome ");
+ ValNodeCopyStr (&strings, 0, dlp->m_chromosome);
+ } else if (dlp->m_is_chromosome) {
+ ValNodeCopyStr (&strings, 0, " chromosome");
+ }
+
+ if (StringDoesHaveText (dlp->m_plasmid)) {
+ ValNodeCopyStr (&strings, 0, " plasmid ");
+ ValNodeCopyStr (&strings, 0, dlp->m_plasmid);
+ } else if (dlp->m_is_plasmid) {
+ ValNodeCopyStr (&strings, 0, " plasmid");
+ }
+
+ if (StringDoesHaveText (dlp->m_isolate)) {
+ ValNodeCopyStr (&strings, 0, " isolate ");
+ ValNodeCopyStr (&strings, 0, dlp->m_isolate);
+ }
+
+ if (StringDoesHaveText (dlp->m_enzyme)) {
+ ValNodeCopyStr (&strings, 0, ", ");
+ ValNodeCopyStr (&strings, 0, dlp->m_enzyme);
+ ValNodeCopyStr (&strings, 0, " whole genome map");
+ }
+
+ result = x_CatenateValNodeStrings (strings);
+ ValNodeFreeData (strings);
+ if (result == NULL) return NULL;
+
return result;
}
@@ -7580,137 +8242,266 @@ static CharPtr x_SetPrefix (
}
} else if (dlp->m_is_tsa) {
prefix = "TSA: ";
+ } else if (dlp->m_is_tls) {
+ prefix = "TLS: ";
} else if (dlp->m_third_party) {
if (dlp->m_tpa_exp) {
prefix = "TPA_exp: ";
} else if (dlp->m_tpa_inf) {
prefix = "TPA_inf: ";
} else if (dlp->m_tpa_reasm) {
- prefix = "TPA_reasm: ";
+ prefix = "TPA_asm: ";
} else {
prefix = "TPA: ";
}
+ } else if (dlp->m_multispecies && dlp->m_is_wp) {
+ prefix = "MULTISPECIES: ";
+ } else if (dlp->m_is_pseudogene) {
+ if (StringStr (title, "PUTATIVE PSEUDOGENE") == NULL) {
+ prefix = "PUTATIVE PSEUDOGENE: ";
+ }
}
return StringSave (prefix);
}
+static Int4 CountDeltaGaps (
+ BioseqPtr bsp
+)
+
+{
+ DeltaSeqPtr dsp;
+ Int4 num_gaps = 0;
+ SeqLitPtr slitp;
+ SeqLocPtr slocp;
+
+ if (bsp == NULL) return 0;
+
+ if (bsp->repr == Seq_repr_delta) {
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp->next) {
+ switch (dsp->choice) {
+ case 1:
+ slocp = (SeqLocPtr)(dsp->data.ptrvalue);
+ if (slocp == NULL) break;
+ if (slocp->choice == SEQLOC_NULL) {
+ num_gaps++;
+ }
+ break;
+ case 2:
+ slitp = (SeqLitPtr)(dsp->data.ptrvalue);
+ if (slitp == NULL) break;
+ if (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) {
+ num_gaps++;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return num_gaps;
+}
+
static CharPtr x_SetSuffix (
DefLinePtr dlp,
- CharPtr title
+ CharPtr title,
+ Boolean appendComplete
)
{
- Char buf [80];
+ Char buf1 [512], buf2 [256];
+ CharPtr compl = "", study = "", type = "", un = "ordered", suffix;
size_t len;
Int4 num_segs, num_gaps;
- CharPtr orgnl = NULL, str, suffix = "", un = "";
if (dlp == NULL) return NULL;
+ buf1 [0] = '\0';
+ buf2 [0] = '\0';
+
switch (dlp->m_mi_tech) {
case MI_TECH_htgs_0 :
if (StringStr (title, "LOW-PASS") == NULL) {
- suffix = ", LOW-PASS SEQUENCE SAMPLING";
+ type = ", LOW-PASS SEQUENCE SAMPLING";
}
break;
case MI_TECH_htgs_1 :
- un = "un";
+ un = "unordered";
/* fall through */
case MI_TECH_htgs_2 :
if (dlp->m_htgs_draft) {
if (StringStr (title, "WORKING DRAFT") == NULL) {
- suffix = ", WORKING DRAFT SEQUENCE";
+ type = ", WORKING DRAFT SEQUENCE";
}
} else if (! dlp->m_htgs_cancelled) {
if (StringStr (title, "SEQUENCING IN") == NULL) {
- suffix = ", *** SEQUENCING IN PROGRESS ***";
+ type = ", *** SEQUENCING IN PROGRESS ***";
}
}
if (dlp->m_is_delta) {
if (CountGapsInDeltaSeq (dlp->m_bioseq, &num_segs, &num_gaps, NULL, NULL, NULL, 0)) {
if (num_gaps > 0) {
- sprintf (buf, "%s, %ld %sordered pieces", suffix, (long) (num_gaps + 1), un);
- suffix = StringSave (buf);
- return suffix;
+ sprintf (buf1, "%s, %ld %s pieces", type, (long) (num_gaps + 1), un);
+ type = buf1;
}
}
}
break;
case MI_TECH_htgs_3 :
if (StringStr (title, "complete sequence") == NULL) {
- suffix = ", complete sequence";
+ type = ", complete sequence";
}
break;
case MI_TECH_est :
if (StringStr (title, "mRNA sequence") == NULL) {
- suffix = ", mRNA sequence";
+ type = ", mRNA sequence";
}
break;
case MI_TECH_sts :
if (StringStr (title, "sequence tagged site") == NULL) {
- suffix = ", sequence tagged site";
+ type = ", sequence tagged site";
}
break;
case MI_TECH_survey :
if (StringStr (title, "genomic survey sequence") == NULL) {
- suffix = ", genomic survey sequence";
+ type = ", genomic survey sequence";
}
break;
case MI_TECH_wgs :
if (dlp->m_wgs_master) {
- if (StringStr (title, "whole genome shotgun sequencing project") == NULL) {
- suffix = ", whole genome shotgun sequencing project";
+ if (StringStr (title, "whole genome shotgun sequencing") == NULL) {
+ type = ", whole genome shotgun sequencing project";
}
- } else {
- if (StringStr (title, "whole genome shotgun sequence") == NULL) {
- orgnl = x_OrganelleName (dlp, FALSE, FALSE, TRUE);
- len = StringLen (", whole genome shotgun sequence") + StringLen (orgnl) + 5;
- str = (CharPtr) MemNew (sizeof (Char) * len);
- if (str != NULL) {
- if (StringDoesHaveText (orgnl)) {
- StringCat (str, " ");
- StringCat (str, orgnl);
- }
- StringCat (str, ", whole genome shotgun sequence");
- return str;
- }
+ } else if (StringStr (title, "whole genome shotgun sequence") == NULL) {
+ if (StringDoesHaveText (dlp->m_organelle) && StringStr (title, dlp->m_organelle) == NULL) {
+ StringCat (buf1, " ");
+ StringCat (buf1, dlp->m_organelle);
}
+ StringCat (buf1, ", whole genome shotgun sequence");
+ type = buf1;
}
break;
case MI_TECH_tsa :
- if (dlp->m_mi_biomol == MOLECULE_TYPE_MRNA) {
- if (dlp->m_tsa_master) {
- if (StringStr (title, "transcriptome shotgun assembly project") == NULL) {
- suffix = ", transcriptome shotgun assembly";
- }
- } else {
- if (StringStr (title, "mRNA sequence") == NULL) {
- suffix = ", mRNA sequence";
- }
+ if (dlp->m_tsa_master) {
+ if (StringStr (title, "transcriptome shotgun assembly") == NULL) {
+ type = ", transcriptome shotgun assembly";
+ }
+ } else if (StringStr (title, "RNA sequence") == NULL) {
+ switch (dlp->m_mi_biomol) {
+ case MOLECULE_TYPE_MRNA :
+ type = ", mRNA sequence";
+ break;
+ case MOLECULE_TYPE_RRNA :
+ type = ", rRNA sequence";
+ break;
+ case MOLECULE_TYPE_NCRNA :
+ type = ", ncRNA sequence";
+ break;
+ case MOLECULE_TYPE_PRE_MRNA :
+ case MOLECULE_TYPE_SNRNA :
+ case MOLECULE_TYPE_SCRNA :
+ case MOLECULE_TYPE_CRNA :
+ case MOLECULE_TYPE_SNORNA :
+ case MOLECULE_TYPE_TRANSCRIBED_RNA :
+ type = ", transcribed RNA sequence";
+ break;
+ default :
+ break;
+ }
+ }
+ break;
+ case MI_TECH_targeted :
+ if (dlp->m_tls_master) {
+ if (StringStr (title, "targeted locus study") == NULL) {
+ type = ", targeted locus study";
}
+ } else {
+ if (StringStr (title, "sequence") == NULL) {
+ type = ", sequence";
+ }
+ }
+ if (StringDoesHaveText (dlp->m_targeted_locus) && StringStr (title, dlp->m_targeted_locus) == NULL) {
+ study = dlp->m_targeted_locus;
}
break;
default :
break;
}
- return StringSave (suffix);
+ if (appendComplete && StringStr (title, "complete") == NULL && StringStr (title, "partial") == NULL) {
+ if (dlp->m_mi_completeness == 1) {
+ if (dlp->m_is_plasmid) {
+ compl = ", complete sequence";
+ } else if (dlp->m_genome == GENOME_mitochondrion ||
+ dlp->m_genome == GENOME_chloroplast ||
+ dlp->m_genome == GENOME_kinetoplast ||
+ dlp->m_genome == GENOME_plastid ||
+ dlp->m_genome == GENOME_apicoplast) {
+ compl = ", complete genome";
+ } else if (dlp->m_is_chromosome) {
+ if (StringDoesHaveText (dlp->m_chromosome)) {
+ compl = ", complete sequence";
+ } else {
+ compl = ", complete genome";
+ }
+ }
+ }
+ }
+
+ if (dlp->m_unordered && dlp->m_is_delta) {
+ num_gaps = CountDeltaGaps (dlp->m_bioseq);
+ if (num_gaps > 0) {
+ sprintf (buf1, ", %ld unordered pieces", (long) (num_gaps + 1));
+ type = buf1;
+ }
+ }
+
+ len = StringLen (type) + StringLen (study) + StringLen (compl) + 5;
+ suffix = (CharPtr) MemNew (len * sizeof (Char));
+ if (suffix == NULL) return NULL;
+
+ suffix [0] = '\0';
+ if (StringDoesHaveText (study)) {
+ StringCat (suffix, " ");
+ StringCat (suffix, study);
+ }
+ StringCat (suffix, type);
+ StringCat (suffix, compl);
+
+ return suffix;
}
-NLM_EXTERN CharPtr NewCreateDefLine (
+static CharPtr tpa_prefix_list [] = {
+ "TPA:",
+ "TPA_exp:",
+ "TPA_inf:",
+ "TPA_reasm:",
+ "TPA_asm:",
+ "TSA:",
+ "UNVERIFIED:",
+ NULL
+};
+
+NLM_EXTERN CharPtr NewCreateDefLineExEx (
ItemInfoPtr iip,
BioseqPtr bsp,
Boolean ignoreTitle,
- Boolean extProtTitle
+ Boolean extProtTitle,
+ Boolean gpipeMode,
+ Boolean devMode
)
{
+ Boolean appendComplete = FALSE;
+ Boolean capitalize = TRUE;
+ Char ch;
DefLinePtr dlp;
Uint2 entityID;
+ int i;
size_t len;
ObjValNodePtr ovp;
- CharPtr result = NULL, prefix = NULL, suffix = NULL, title = NULL;
+ CharPtr result = NULL, prefix = NULL, suffix = NULL, title = NULL, fix = NULL;
SeqDescrPtr sdp = NULL;
CharPtr str = NULL;
@@ -7739,6 +8530,9 @@ NLM_EXTERN CharPtr NewCreateDefLine (
dlp->m_reconstruct = ignoreTitle;
dlp->m_allprotnames = extProtTitle;
+ dlp->m_gpipemode = gpipeMode;
+ dlp->m_devmode = devMode;
+
/* clear ItemInfo fields */
if (iip != NULL) {
iip->entityID = 0;
@@ -7766,6 +8560,7 @@ NLM_EXTERN CharPtr NewCreateDefLine (
title = StringSave (str);
/* strip trailing periods, commas, semicolons, etc. */
x_TrimPunctuationFromEnd (title);
+ capitalize = FALSE;
/* set ItemInfo fields for selection */
if (iip != NULL && sdp != NULL && sdp->extended != 0) {
@@ -7801,35 +8596,40 @@ NLM_EXTERN CharPtr NewCreateDefLine (
title = x_TitleFromProtein (dlp);
} else if (dlp->m_is_seg && (! dlp->m_is_est_sts_gss)) {
title = x_TitleFromSegSeq (dlp);
- } else if (dlp->m_is_tsa || (dlp->m_is_wgs && (! dlp->m_wgs_master))) {
+ } else if (dlp->m_is_tsa || (dlp->m_is_wgs && (! dlp->m_wgs_master)) || (dlp->m_is_tls && (! dlp->m_tls_master))) {
title = x_TitleFromWGS (dlp);
+ } else if (dlp->m_is_map) {
+ title = x_TitleFromMap (dlp);
}
}
+ if (StringHasNoText (title) && dlp->m_gpipemode) {
+ /* title using gpipe policy */
+ title = x_TitleFromGPipe (dlp);
+ }
+
if (StringHasNoText (title)) {
/* default title using source fields */
title = x_TitleFromBioSrc (dlp);
+ if (dlp->m_mi_completeness == 1 && StringDoesHaveText (title)) {
+ appendComplete = TRUE;
+ }
}
if (StringHasNoText (title)) {
/* last resort title created here */
+ /*
title = StringSave ("No definition line found");
+ */
}
}
/* remove TPA or TSA prefix, will rely on other data in record to set */
- if (StringNICmp (title, "TPA:", 4) == 0) {
- x_TrimFirstNCharacters (title, 4);
- } else if (StringNICmp (title, "TPA_exp:", 8) == 0) {
- x_TrimFirstNCharacters (title, 8);
- } else if (StringNICmp (title, "TPA_inf:", 8) == 0) {
- x_TrimFirstNCharacters (title, 8);
- } else if (StringNICmp (title, "TPA_reasm:", 10) == 0) {
- x_TrimFirstNCharacters (title, 10);
- } else if (StringNICmp (title, "TSA:", 4) == 0) {
- x_TrimFirstNCharacters (title, 4);
- } else if (StringNICmp (title, "UNVERIFIED:", 11) == 0) {
- x_TrimFirstNCharacters (title, 11);
+ for (i = 0; tpa_prefix_list [i] != NULL; i++) {
+ len = StringLen (tpa_prefix_list [i]);
+ if (StringNICmp (title, tpa_prefix_list [i], len) == 0) {
+ x_TrimFirstNCharacters (title, len);
+ }
}
/* strip leading spaces remaining after removal of old TPA or TSA prefixes */
@@ -7842,7 +8642,7 @@ NLM_EXTERN CharPtr NewCreateDefLine (
prefix = x_SetPrefix (dlp, title);
/* calculate suffix */
- suffix = x_SetSuffix (dlp, title);
+ suffix = x_SetSuffix (dlp, title, appendComplete);
len = StringLen (prefix) + StringLen (title) + StringLen (suffix) + 4;
result = (CharPtr) MemNew (sizeof (Char) * len);
@@ -7851,6 +8651,27 @@ NLM_EXTERN CharPtr NewCreateDefLine (
StringCat (result, prefix);
StringCat (result, title);
StringCat (result, suffix);
+
+ if (dlp->m_is_aa) {
+ fix = StringStr (result, ". [");
+ if (fix == NULL) {
+ fix = StringStr (result, ", [");
+ }
+ if (fix != NULL) {
+ *fix = ' ';
+ }
+ }
+
+ fix = StringStr (result, " ,");
+ if (fix != NULL) {
+ fix [0] = ',';
+ fix [1] = ' ';
+ }
+
+ fix = StringStr (result, ",,");
+ if (fix != NULL) {
+ fix [1] = ' ';
+ }
}
MemFree (prefix);
@@ -7859,28 +8680,59 @@ NLM_EXTERN CharPtr NewCreateDefLine (
TextFsaFree (dlp->m_low_quality_fsa);
- dlp = MemFree (dlp);
-
Asn2gnbkCompressSpaces (result);
+ if (! dlp->m_is_pdb && ! dlp->m_is_patent && ! dlp->m_is_aa && ! dlp->m_is_seg) {
+ if (result != NULL) {
+ ch = result [0];
+ if (IS_LOWER (ch) && capitalize) {
+ result [0] = TO_UPPER (ch);
+ }
+ }
+ }
+
+ dlp = MemFree (dlp);
+
return result;
}
+NLM_EXTERN CharPtr NewCreateDefLineEx (
+ ItemInfoPtr iip,
+ BioseqPtr bsp,
+ Boolean ignoreTitle,
+ Boolean extProtTitle,
+ Boolean gpipeMode
+)
+
+{
+ return NewCreateDefLineExEx (iip, bsp, ignoreTitle, extProtTitle, gpipeMode, FALSE);
+}
+
+NLM_EXTERN CharPtr NewCreateDefLine (
+ ItemInfoPtr iip,
+ BioseqPtr bsp,
+ Boolean ignoreTitle,
+ Boolean extProtTitle
+)
+
+{
+ return NewCreateDefLineExEx (iip, bsp, ignoreTitle, extProtTitle, FALSE, FALSE);
+}
+
NLM_EXTERN Boolean NewCreateDefLineBuf (
ItemInfoPtr iip,
BioseqPtr bsp,
CharPtr buf,
Uint4 buflen,
Boolean ignoreTitle,
- Boolean extProtTitle
-)
+ Boolean extProtTitle)
{
CharPtr title = NULL;
if (bsp == NULL || buf == NULL|| buflen == 0) return FALSE;
- title = NewCreateDefLine (iip, bsp, ignoreTitle, extProtTitle);
+ title = NewCreateDefLineEx (iip, bsp, ignoreTitle, extProtTitle, FALSE);
StringNCpy_0 (buf, title, buflen);
MemFree (title);
diff --git a/api/tofasta.h b/api/tofasta.h
index b8152d33..4a6ff7d0 100644
--- a/api/tofasta.h
+++ b/api/tofasta.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.43 $
+* $Revision: 6.48 $
*
* File Description: various sequence objects to fasta output
*
@@ -306,6 +306,15 @@ NLM_EXTERN Int4 SeqEntryFastaStreamEx (
Boolean sorted_prot
);
+NLM_EXTERN void MakeFastaStreamIdSuffix (
+ SeqFeatPtr sfp,
+ Uint4 idx,
+ CharPtr prefix,
+ CharPtr buf,
+ Boolean do_product,
+ Boolean do_feat_id
+);
+
/*****************************************************************************
*
* FastaFileFunc(key, buf, data)
@@ -432,6 +441,10 @@ Boolean FastaReadSequenceMem
*****************************************************************************/
NLM_EXTERN Boolean FastaId PROTO((BioseqPtr bsp, CharPtr buf, Uint4 buflen));
+NLM_EXTERN CharPtr FastaGetOriginalId PROTO((BioseqPtr bsp));
+NLM_EXTERN Boolean ShouldUseOriginalID PROTO((BioseqPtr bsp));
+NLM_EXTERN Boolean FastaIdEx PROTO((BioseqPtr bsp, CharPtr buf, Uint4 buflen, Boolean prefer_original_ID));
+
/*****************************************************************************
*
* FastaDefLine(bsp, buf, buflen, accession, organism)
@@ -460,6 +473,23 @@ NLM_EXTERN Boolean CreateDefLineExEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr bu
*
*****************************************************************************/
+NLM_EXTERN CharPtr NewCreateDefLineExEx (
+ ItemInfoPtr iip,
+ BioseqPtr bsp,
+ Boolean ignoreTitle,
+ Boolean extProtTitle,
+ Boolean gpipeMode,
+ Boolean devMode
+);
+
+NLM_EXTERN CharPtr NewCreateDefLineEx (
+ ItemInfoPtr iip,
+ BioseqPtr bsp,
+ Boolean ignoreTitle,
+ Boolean extProtTitle,
+ Boolean gpipeMode
+);
+
NLM_EXTERN CharPtr NewCreateDefLine (
ItemInfoPtr iip,
BioseqPtr bsp,
diff --git a/api/tomedlin.c b/api/tomedlin.c
index f51fbf6b..012f2202 100644
--- a/api/tomedlin.c
+++ b/api/tomedlin.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/15/91
*
-* $Revision: 6.11 $
+* $Revision: 6.12 $
*
* File Description: conversion to medlars format
*
@@ -40,6 +40,9 @@
*
*
* $Log: tomedlin.c,v $
+* Revision 6.12 2012/08/21 19:18:45 kans
+* fixed several potential null dereferences found by clang
+*
* Revision 6.11 2004/03/10 15:19:47 kans
* ParseMedline loops on journal, only saves one of iso-jta or ml-jta to avoid memory leak
*
@@ -252,8 +255,8 @@ static Boolean MedlineEntryToDataFileEx (MedlineEntryPtr mep, Int4 pmid, FILE *f
AffilPtr affil;
AuthorPtr ap;
AuthListPtr authors = NULL;
- CitArtPtr cit;
- CitJourPtr citjour;
+ CitArtPtr cit = NULL;
+ CitJourPtr citjour = NULL;
Int2 count;
CharPtr curr;
DatePtr date = NULL;
@@ -531,7 +534,9 @@ static Boolean MedlineEntryToDataFileEx (MedlineEntryPtr mep, Int4 pmid, FILE *f
}
rsult = (Boolean) (SendTextToFile (fp, buffer, &para, table) && rsult);
ClearString ();
- citjour = cit->fromptr;
+ if (cit != NULL) {
+ citjour = cit->fromptr;
+ }
if (citjour != NULL) {
imp = citjour->imp;
if (imp != NULL) {
diff --git a/api/txalign.c b/api/txalign.c
index 48d87c9d..c0b95f0d 100644
--- a/api/txalign.c
+++ b/api/txalign.c
@@ -1,4 +1,4 @@
-/* $Id: txalign.c,v 6.96 2011/12/19 18:33:53 gouriano Exp $
+/* $Id: txalign.c,v 6.101 2016/09/02 15:01:22 ucko Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -27,530 +27,12 @@
*
* File Name: txalign.c
*
-* $Revision: 6.96 $
+* $Revision: 6.101 $
*
* File Description: Formating of text alignment for the BLAST output
*
* Modifications:
* --------------------------------------------------------------------------
-* $Log: txalign.c,v $
-* Revision 6.96 2011/12/19 18:33:53 gouriano
-* Corrected printf formatting. NOJIRA
-*
-* Revision 6.95 2008/01/07 23:20:06 bealer
-* - Fix condition found by valgrind - conditional statement on
-* unassigned location.
-*
-* Revision 6.94 2007/05/07 13:28:35 kans
-* added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
-*
-* Revision 6.93 2006/07/13 12:58:15 bollin
-* removed unused variables
-*
-* Revision 6.92 2006/01/24 18:37:08 papadopo
-* from Mike Gertz: Use enumerated values, rather than #define'd constants, to specify the composition adjustment method
-*
-* Revision 6.91 2005/05/16 17:39:20 papadopo
-* From Alejandro Schaffer: if matrix is adjusted due to composition in
-* blastpgp, then print the method for adjustment in the output alignments.
-*
-* Revision 6.90 2004/12/13 16:14:45 jianye
-* increase the width of new.gif so that it looks normal on window browsers
-*
-* Revision 6.89 2004/09/09 19:39:49 jianye
-* Added gene linkout
-*
-* Revision 6.88 2004/08/16 19:36:52 dondosha
-* Made CreateMaskByteStore function public: needed by web BLAST 2 sequences
-*
-* Revision 6.87 2004/08/11 18:14:55 jianye
-* not turn on gene linkout yet
-*
-* Revision 6.86 2004/08/10 20:02:03 jianye
-* Added gene linkout
-*
-* Revision 6.85 2004/07/06 19:12:13 dondosha
-* Correction for bit score formatting in one-line descriptions
-*
-* Revision 6.84 2004/06/24 21:15:44 dondosha
-* Changed last Boolean argument in ScoreAndEvalueToBuffers to Uint1, to allow different options for formatting
-*
-* Revision 6.83 2004/05/14 16:31:03 kans
-* ScoreAndEvalueToBuffers had a typo in OS_MAC specific code
-*
-* Revision 6.82 2004/05/14 15:38:09 dondosha
-* Made function ScoreAndEvalueToBuffers public
-*
-* Revision 6.81 2003/11/25 16:24:03 dondosha
-* Use query number for synchronizeCheck; do not show structure link if RID not available
-*
-* Revision 6.80 2003/11/20 22:09:26 dondosha
-* Added a PrindDefLinesFromSeqAlignWithPath function with an argument to provide root path for image links
-*
-* Revision 6.79 2003/09/26 20:54:10 dondosha
-* Revert change in revision 6.77, as it turned trace.cgi links should have stayed as they were
-*
-* Revision 6.78 2003/08/20 21:29:13 dondosha
-* Correction for OOF alignments with nucleotide coordinates starting at 1
-*
-* Revision 6.77 2003/07/30 14:07:36 dondosha
-* Changed hrefs to trace.cgi in accordance with the new taxonomy web interface
-*
-* Revision 6.76 2003/07/21 22:15:23 dondosha
-* Added support for out-of-frame tblastn alignments
-*
-* Revision 6.75 2003/07/15 14:36:06 dondosha
-* Added a #define for fprintf substitute, needed for gzip compression of Web BLAST results
-*
-* Revision 6.74 2003/06/11 20:15:35 jianye
-* changed unigene linkout
-*
-* Revision 6.73 2003/06/02 20:02:15 jianye
-* Added geo linkout
-*
-* Revision 6.72 2003/01/23 23:31:58 dondosha
-* Added a global variable for the query number, needed in make_dumpgnl_links
-*
-* Revision 6.71 2002/12/11 16:24:34 jianye
-* added structure linkout
-*
-* Revision 6.70 2002/11/12 22:37:35 dondosha
-* Compute number of identities from sequence data when formatting, not relying on the score set in seqalign
-*
-* Revision 6.69 2002/11/04 23:04:00 dondosha
-* Take number of identities directly from seqalign
-*
-* Revision 6.68 2002/10/17 16:57:49 jianye
-* added option for get sequence feature
-*
-* Revision 6.67 2002/09/09 21:59:21 jianye
-* fixed problem associated with query-anchored alignment for get sequence checkbox
-*
-* Revision 6.66 2002/09/04 20:32:52 jianye
-* added get sequence feature
-*
-* Revision 6.65 2002/07/24 21:08:47 kans
-* reverted ncbi URL
-*
-* Revision 6.64 2002/07/23 16:44:35 kans
-* changed www.ncbi.nlm.nih.gov to www.ncbi.nih.gov
-*
-* Revision 6.63 2002/07/09 16:38:05 dondosha
-* Use gather for all translated searches when parsing seqaligns - fixes a bug for ungapped blastx
-*
-* Revision 6.62 2002/05/21 20:34:33 jianye
-* Added download large sequence gif
-*
-* Revision 6.61 2002/05/02 18:06:02 dondosha
-* Print 2 sequences tblastx link and/or list of extra hits in a cluster only for the first HSP in a hit
-*
-* Revision 6.60 2002/04/26 16:26:30 madden
-* Add length check to for loop over fasta seqid
-*
-* Revision 6.59 2002/04/24 17:53:11 dondosha
-* Pass SeqIds in URL safe form to the wblast2.cgi link
-*
-* Revision 6.58 2002/04/15 20:22:05 dondosha
-* Changed link to wblast2.cgi to
-*
-* Revision 6.57 2002/04/05 15:53:53 camacho
-* Fixed Tx_PrintDefLine
-*
-* Revision 6.56 2002/03/26 23:26:37 dondosha
-* Added a possibility of a link to Blast 2 sequences from megablast output
-*
-* Revision 6.55 2002/02/21 17:48:52 camacho
-* Fixed UMR problem in PrintDefLinesFromSeqAlignEx2
-*
-* Revision 6.54 2002/02/15 14:18:24 camacho
-* Added RDBTaxNamesClone function
-*
-* Revision 6.53 2002/02/07 19:44:23 jianye
-* added discrimination between na and aa sequence for locus link linkout
-*
-* Revision 6.52 2002/02/05 19:50:50 camacho
-* Fix to Tx_PrintDefLine
-*
-* Revision 6.51 2002/02/01 20:04:56 jianye
-* Fixed getting wrong blast defline struct for non-redundant bioseq and adding utility function getBlastDefLineForSeqId(bdlp, sip)
-*
-* Revision 6.50 2002/01/31 21:17:00 camacho
-* Fixed minor memory leak
-*
-* Revision 6.49 2002/01/29 21:43:45 jianye
-* Changed some gif image path and get rid of unneeded readdb.h include
-*
-* Revision 6.48 2002/01/24 18:47:49 camacho
-* Moved RDBTaxNamesFree from readdb.[ch] to txalign.[ch]
-*
-* Revision 6.47 2002/01/23 20:29:14 madden
-* Add back 6.45 changes that were accidentally removed
-*
-* Revision 6.46 2002/01/23 19:42:57 jianye
-* Added checkLinkoutType(), addLinkoutForBioseq(). Changed PrintDefLinesFromSeqAlignEx2 and FSFPrintOneDefline for linkout info. Fixed bug in FDGetTaxNamesFromBioseq().
-*
-* Revision 6.45 2002/01/15 20:41:23 madden
-* If tool_url is dumpgnl.cgi do no set the NO_ENTREZ flag
-*
-* Revision 6.44 2002/01/10 20:59:05 camacho
-* Fixed problem with long deflines in Tx_PrintDefLine
-*
-* Revision 6.43 2001/12/13 21:05:38 madden
-* Comment out hyperlink to wgetorg for new db format
-*
-* Revision 6.42 2001/12/10 22:34:24 dondosha
-* Bug fix for Traces html links
-*
-* Revision 6.41 2001/11/14 17:53:21 camacho
-* One more minor fix to Tx_PrintDefLine.
-*
-* Revision 6.40 2001/11/09 22:07:39 camacho
-* Fixed Tx_PrintDefLine to properly format 1-line descriptions in the
-* default blast output for the new database format.
-*
-* Revision 6.39 2001/10/29 20:39:53 camacho
-* Fixed memory leak
-*
-* Revision 6.38 2001/10/12 15:35:38 dondosha
-* Added printing of cluster sequences deflines in ShowAlignNodeText2Ex
-*
-* Revision 6.37 2001/10/05 17:49:30 dondosha
-* Fixed bug in strand reporting for bl2seq
-*
-* Revision 6.36 2001/08/03 19:45:53 egorov
-* Change size of title_length and related variables to Int4
-*
-* Revision 6.35 2001/08/01 16:03:06 madden
-* Only call SeqAlignSegsStr for first alignment for each db sequence
-*
-* Revision 6.34 2001/07/23 20:20:11 dondosha
-* Made replace_bytestore_data function public for use in web blast2seq
-*
-* Revision 6.33 2001/07/09 14:17:55 madden
-* Fix memory leak
-*
-* Revision 6.32 2001/07/06 15:24:39 madden
-* Fix compiler warning
-*
-* Revision 6.31 2001/06/21 18:26:27 shavirin
-* Moved here functions to get Taxonomy names information encoded in
-* the Bioseq returned from the Blast database.
-*
-* Revision 6.30 2001/06/15 15:32:38 madden
-* Fix memory leaks
-*
-* Revision 6.29 2001/06/15 14:49:36 madden
-* Fix spacing for BLAST databases without -o option
-*
-* Revision 6.28 2001/06/04 21:29:42 dondosha
-* Add message about deleted hits with e-value below the low threshold
-*
-* Revision 6.27 2001/05/25 18:59:30 vakatov
-* Nested comment typo fixed
-*
-* Revision 6.26 2001/05/16 19:32:15 egorov
-* Added ALT tags to HTML images
-*
-* Revision 6.25 2001/05/15 17:16:41 egorov
-* TXALIGN_TARGET_IN_LINKS added
-*
-* Revision 6.24 2001/05/11 16:23:45 egorov
-* Out of bsp scope use stored txsp->is_na boolean value
-*
-* Revision 6.23 2001/05/04 14:12:57 madden
-* Fix problem with accessing already freed annot
-*
-* Revision 6.22 2001/04/26 13:55:02 madden
-* Use accession in URL for dumpgnl
-*
-* Revision 6.21 2001/03/29 21:56:33 madden
-* Minor fix if formatdb run without -o T
-*
-* Revision 6.20 2001/03/29 19:04:59 madden
-* Fixed problem in FilterAsn1DefLine, added Tx_PrintDefLine for one-line descriptions
-*
-* Revision 6.19 2001/03/23 21:20:56 madden
-* Print Length for old and new databases
-*
-* Revision 6.18 2001/03/23 17:48:34 kans
-* define NLM_GENERATED_CODE_PROTO so BlastDefLineSetAsnRead prototype is available to txalign.c
-*
-* Revision 6.17 2001/03/23 17:24:44 madden
-* Add FDGetDeflineAsnFromBioseq from readdb.[ch]
-*
-* Revision 6.16 2001/02/13 21:32:54 madden
-* Fix for tblastx
-*
-* Revision 6.15 2001/02/09 21:42:09 madden
-* Return from PrintDefLineEx2 if no descriptions demanded
-*
-* Revision 6.14 2001/02/07 17:22:39 shavirin
-* Changed order of Human genome viewer labels in case of minus strand.
-*
-* Revision 6.13 2001/02/05 21:40:14 madden
-* Correction to gather change for ungapped output
-*
-* Revision 6.12 2001/02/02 20:42:53 dondosha
-* Corrected count of number of descriptions in PrintDefLinesFromSeqAlignEx2
-*
-* Revision 6.11 2001/01/31 22:20:24 madden
-* Minimize calls to gather
-*
-* Revision 6.10 2001/01/31 18:43:48 dondosha
-* Test whether subject Bioseq is found before trying to show the hit
-*
-* Revision 6.9 2001/01/24 14:43:02 egorov
-* Do not overwrite bestid with a garbage, what happened with non-existing
-* (e.g. recently deleted) GIs.
-*
-* Revision 6.8 2001/01/23 16:32:37 dondosha
-* 1. Fixed bug in PrintDefLinesFromSeqAlignEx2
-* 2. Round percentages to nearest integer instead of casting
-*
-* Revision 6.7 2001/01/03 17:28:26 dondosha
-* Link gnl|ti ids to the Trace Archive web page
-*
-* Revision 6.6 2000/12/18 20:35:10 shavirin
-* Added +1 to from/to using for printing links to Genome viewer.
-*
-* Revision 6.5 2000/12/15 19:46:50 shavirin
-* Adeed missing "</a>" in the label to the single alignment.
-*
-* Revision 6.4 2000/12/14 17:08:52 shavirin
-* Added additinal label "<name=" for the single alignment. This link will
-* be shown only in Human Genome viewer.
-*
-* Revision 6.3 2000/11/27 17:18:16 madden
-* Do not strip directory name if dumpgnl.cgi is used
-*
-* Revision 6.2 2000/11/22 19:56:04 shavirin
-* Added possibility to print links to Taxonomy database in HTML output
-* used with ASN.1 structured deflines.
-*
-* Revision 6.1 2000/11/16 22:19:34 shavirin
-* File moved to distrib/tools directory and to libncbitool.a library.
-*
-* Revision 6.154 2000/11/14 17:03:21 shavirin
-* Fixed problem with uninitialized memory in the function ShowAlignNodeText2()
-* resulted in coredump of blastcl3 program.
-*
-* Revision 6.153 2000/11/14 16:57:55 madden
-* Init aso.blast_type to NULL if not set
-*
-* Revision 6.152 2000/11/13 18:01:43 madden
-* do not set blast_type to UNFIN_GEN by default
-*
-* Revision 6.151 2000/11/03 15:26:02 madden
-* Check TOOL_URL for ? before adding one
-*
-* Revision 6.150 2000/11/01 14:43:11 madden
-* Changes from Futamura for psitblastn
-*
-* Revision 6.149 2000/11/01 14:24:56 madden
-* Set options to TXALIGN_NO_ENTREZ if TOOL_URL defined
-*
-* Revision 6.148 2000/10/27 17:54:17 madden
-* Changes to make_dumpgnl_links for new hs genome page
-*
-* Revision 6.147 2000/10/12 21:37:32 shavirin
-* Adjusted calculation of ends of alignment in minus strand.
-*
-* Revision 6.146 2000/10/06 19:30:51 shavirin
-* Added printing of initial frame number in OOF case. Fixed some spacing
-* to be the same as in regular case.
-*
-* Revision 6.145 2000/10/06 17:55:44 shavirin
-* Added usage of correct matrix in OOF case.
-*
-* Revision 6.144 2000/10/06 17:23:21 shavirin
-* Added BioseqUnlock in printing OOF alignment.
-*
-* Revision 6.142 2000/10/02 22:03:26 shavirin
-* Changed function OOFShowSingleAlignment to have correct spacing.
-*
-* Revision 6.141 2000/09/28 15:51:44 dondosha
-* Open <PRE> block in PrintDefLinesFromSeqAlignEx2 - needed for PSI BLAST
-*
-* Revision 6.140 2000/09/28 15:03:15 dondosha
-* Added boolean splice_junction score type
-*
-* Revision 6.139 2000/09/27 20:57:55 shavirin
-* Fixed bug with printing DNA line ends in case of minus strand.
-*
-* Revision 6.138 2000/09/25 19:22:12 shavirin
-* Fixed start of protein sequencer in OOF alignment. Added check for NULL
-* return from BioseqLocById in FormatScoreFromSeqAlignEx() function.
-*
-* Revision 6.137 2000/09/13 22:24:30 dondosha
-* Corrected the printing of </PRE> at the end of PrintDefLinesFromSeqAlignEx2
-*
-* Revision 6.136 2000/09/13 21:15:39 dondosha
-* Removed opening <PRE> in PrintDefLinesFromSeqAlignEx2
-*
-* Revision 6.135 2000/09/01 18:43:38 shavirin
-* Adjusted start and stop of every line for OOF alignment printout.
-*
-* Revision 6.134 2000/08/31 16:53:10 shavirin
-* Fixed memory leak in OOFShowSingleAlignment().
-*
-* Revision 6.133 2000/08/30 14:18:42 shavirin
-* Fixed case for printing OOF alignment.
-*
-* Revision 6.132 2000/08/25 19:02:37 shavirin
-* Corrected calculation of number of mismatches, gaps,positives etc.
-* for discontinuous alignments.
-*
-* Revision 6.131 2000/08/24 18:14:54 shavirin
-* Added "<a name=..." links to every HSP Score for Greg's BLAST page.
-* This easyly may be changed for general Blast output case.
-*
-* Revision 6.130 2000/07/25 16:47:13 shavirin
-* Changed function to print OOF alignment.
-*
-* Revision 6.129 2000/07/18 22:37:22 shavirin
-* Adjusted end_of_line values in the function OOFShowSingleAlignment()
-*
-* Revision 6.128 2000/07/17 14:11:47 shavirin
-* Adjusted function OOFShowSingleAlignment()
-*
-* Revision 6.127 2000/07/14 16:02:41 shavirin
-* Initialixed variable ooframe to FALSE.
-*
-* Revision 6.126 2000/07/11 20:51:04 shavirin
-* Added major functions for displaying Out-Of-Frame alignments.
-*
-* Revision 6.125 2000/07/10 20:45:53 shavirin
-* Added parameter ooframe for Out-Of-frame alignment and corresponding changes
-* to accomodate this parameter.
-*
-* Revision 6.124 2000/06/22 18:56:55 egorov
-* Add a protection against empty deflines.
-*
-* Revision 6.123 2000/06/19 12:53:18 madden
-* Do SeqIdWrite for both HTML and text
-*
-* Revision 6.122 2000/06/16 18:25:43 shavirin
-* Fixed problem with removing full path of db in make_dumpgnl_links()
-*
-* Revision 6.121 2000/06/16 16:18:46 madden
-* Roll back change from rev. 6.114
-*
-* Revision 6.120 2000/06/15 15:35:47 shavirin
-* Fixed Uninitialized memory read error in make_dumpgnl_links() function
-*
-* Revision 6.119 2000/06/13 18:58:51 shavirin
-* Adjusted region of database sequence in the function
-* load_align_sum_for_DenseDiag()
-*
-* Revision 6.118 2000/06/12 16:50:03 shavirin
-* Fixed bug with calculation total length of the StdSeg alignment and
-* adjusted calculation of translation frame.
-*
-* Revision 6.117 2000/06/09 19:00:05 shavirin
-* Function GetGeneticCodeFromSeqId() made external and added to header file.
-*
-* Revision 6.116 2000/06/08 20:44:49 shavirin
-* Added calculation of start/stop values in the function find_score_in_align().
-*
-* Revision 6.115 2000/06/08 17:13:53 dondosha
-* Fixed bug with wrong scores reported for ungapped blastx alignments
-*
-* Revision 6.114 2000/06/06 16:40:20 shavirin
-* Use plain database name if TXALIGN_NO_ENTREZ option is set.
-*
-* Revision 6.113 2000/05/16 16:32:17 shavirin
-* Added check for WWW_ROOT_PATH environment for PSI Blast.
-*
-* Revision 6.112 2000/05/05 20:23:08 shavirin
-* Do not make gnl-link if passwd or tool_url == NULL.
-*
-* Revision 6.111 2000/05/05 20:03:48 shavirin
-* Rolled back revision 6.110.
-*
-* Revision 6.109 2000/05/01 19:09:58 shavirin
-* Removed function SeqIdSetDup()
-*
-* Revision 6.108 2000/05/01 16:26:14 shavirin
-* Added multiple-highligted deflines in the BLAST output.
-*
-* Revision 6.107 2000/04/25 18:13:43 shavirin
-* Do not link to anything ids with BL_ORD_ID.
-*
-* Revision 6.106 2000/04/04 21:52:50 madden
-* Roll-back last change
-*
-* Revision 6.105 2000/04/03 17:45:42 shavirin
-* Changed way to print multiple deflines. Removed define to print
-* old Entrez links.
-*
-* Revision 6.104 2000/03/24 16:04:24 shavirin
-* Added hack for Drosophila BLAST page.
-*
-* Revision 6.103 2000/03/23 15:02:12 shavirin
-* Added possibility to use environment variables in the function
-* make_dumpgnl_links()
-*
-* Revision 6.102 2000/03/14 17:16:11 shavirin
-* Cleared AlignSum buffer in the function FormatScoreFromSeqAlign
-*
-* Revision 6.101 2000/03/07 21:58:40 shavirin
-* Now will use PSSM Matrix to show positives in PSI Blast
-*
-* Revision 6.100 2000/03/02 16:25:09 shavirin
-* Fixed bug with very long deflines in FilterTheDefline() function.
-*
-* Revision 6.99 2000/01/19 21:54:31 madden
-* Moved vecscreen stuff to vecscrn.[ch]
-*
-* Revision 6.98 2000/01/07 21:08:28 shavirin
-* Fixed minor memory leak in ShowTextAlignFromAnnot2().
-*
-* Revision 6.97 1999/12/02 19:36:10 shavirin
-* Fixed hundreds of errors detected by C++ compiler. Fixed formating
-* of deflines in PHI/PSI Blast search.
-*
-* Revision 6.96 1999/11/24 21:24:31 vakatov
-* Fixed for the C++ and/or MSVC DLL compilation
-*
-* Revision 6.95 1999/11/16 20:44:07 egorov
-* Close all <PRE>'s
-*
-* Revision 6.94 1999/11/12 19:01:40 madden
-* Fix memory leaks
-*
-* Revision 6.93 1999/11/10 18:27:11 madden
-* Fix problem with determining if same sequence as last is being examined
-*
-* Revision 6.92 1999/11/09 22:15:07 shavirin
-* Added parameter follower to the Blast score printing function
-*
-* Revision 6.91 1999/11/01 16:50:25 shavirin
-* Fixed typo in the function get_seqid_for_textbuf()
-*
-* Revision 6.90 1999/11/01 15:38:13 shavirin
-* Turned on producing of new Entrez links in the BLAST output.
-*
-* Revision 6.89 1999/10/19 18:25:43 shavirin
-* Added prefix "images" to psi_blast gifs.
-*
-* Revision 6.88 1999/10/07 16:08:04 shavirin
-* Passed matrix to the function FormatScoreFromSeqAlign().
-*
-* Revision 6.87 1999/10/07 13:13:44 egorov
-* Fix bug when preprocessor direrective and C statement were at the same line.
-*
-* Revision 6.86 1999/09/30 20:43:34 madden
-* change ray links for VecScreen
-*
-* Revision 6.85 1999/09/29 17:15:37 shavirin
-* Added new funtion FormatScoreFromSeqAlign()
-*
-* Revision 6.84 1999/09/29 14:07:56 shavirin
-* Fixed typo in webb_blossum62 matrix.
-*
-* Revision 6.83 1999/09/28 20:11:34 shavirin
-* Added Id, Revision and Log information.
-*
*
* ==========================================================================
*/
@@ -704,7 +186,7 @@ BlastDefLinePtr getBlastDefLineForSeqId(BlastDefLinePtr bdlp, SeqIdPtr sip){
static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
BlastDefLinePtr bdlp, bdlpTemp;
Boolean hasLinkout=FALSE;
- Int4 gi, firstGi=GetGIForSeqId(sip);
+ BIG_ID gi, firstGi=GetGIForSeqId(sip);
Char molType[8]={""};
if(bsp){
@@ -772,7 +254,7 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
static void addLinkoutForBioseq(BioseqPtr bsp, SeqIdPtr sip, SeqIdPtr firstSip, FILE* fp){
BlastDefLinePtr bdlp, actualBdlp;
Boolean hasLinkout=FALSE;
- Int4 gi, firstGi;
+ BIG_ID gi, firstGi;
Char molType[8]={""};
if(bsp){
@@ -1769,14 +1251,14 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean
/*check box for getting sequence*/
if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
Char checkboxBuf[200];
- sprintf(checkboxBuf, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%d\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%d', 'getSeqGi', this.checked)\">", sip->data.intvalue, query_number_glb);
- sprintf(docbuf+pos,checkboxBuf);
+ snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%d\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%d', 'getSeqGi', this.checked)\">", sip->data.intvalue, query_number_glb);
+ snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
pos += StringLen(checkboxBuf);
}
html_len = StringLen(HTML_buffer);
- sprintf(docbuf+pos, HTML_buffer);
+ snprintf(docbuf+pos, size-pos, "%s", HTML_buffer);
pos += html_len;
pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
@@ -1793,7 +1275,8 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean
sprintf(HTML_buffer, "<a name = THC%ld></a><a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id, (long) oip->id);
html_len = StringLen(HTML_buffer);
- sprintf(docbuf+pos, HTML_buffer);
+ snprintf(docbuf+pos, size-pos, "%s",
+ HTML_buffer);
pos += html_len;
pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
@@ -1802,7 +1285,8 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean
sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id, (long) oip->id);
html_len = StringLen(HTML_buffer);
- sprintf(docbuf+pos, HTML_buffer);
+ snprintf(docbuf+pos, size-pos, "%s",
+ HTML_buffer);
pos += html_len;
pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
@@ -1818,14 +1302,14 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean
if(!load){
if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
Char checkboxBuf[200];
- sprintf(checkboxBuf, "<input type=\"checkbox\" name=\"getSeqMaster\" value=\"\" onClick=\"uncheckable('getSeqAlignment%d', 'getSeqMaster')\">", query_number_glb);
- sprintf(docbuf+pos,checkboxBuf);
+ snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqMaster\" value=\"\" onClick=\"uncheckable('getSeqAlignment%d', 'getSeqMaster')\">", query_number_glb);
+ snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
pos += StringLen(checkboxBuf);
}
pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos, tdp->strand, FALSE, FALSE, label_size, num_size, show_strand, strip_semicolon);
}
- sprintf(docbuf+pos, "%s", tdp->buf);
+ snprintf(docbuf+pos, size-pos, "%s", tdp->buf);
pos += StringLen(tdp->buf);
if(stop_val >=0 && is_first)
{
@@ -6300,7 +5784,7 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
if(seq_int1 != NULL) {
/* This line should be checked for correctness */
- if((line1[line_index] = BSGetByte(b_store)) == EOF)
+ if((line1[line_index] = BSGetByte(b_store)) == (Char)EOF)
line1[line_index] = '?';
if(dna_strand != Seq_strand_minus)
diff --git a/api/utilpub.c b/api/utilpub.c
index 933ab96a..cb424f23 100644
--- a/api/utilpub.c
+++ b/api/utilpub.c
@@ -381,7 +381,7 @@ NLM_EXTERN ValNodePtr StorePub(BioseqPtr bsp, ValNodePtr vnp, ValNodePtr pub, Se
AddPubBsp (psp, bsp);
AddCitFeat(psp, sfp);
}
- } else {
+ } else if (sfp) {
PubSet = (ValNodePtr) sfp->cit;
Pub = (ValNodePtr) PubSet->data.ptrvalue;
for (v=Pub; v; v=v->next) {
@@ -1161,7 +1161,8 @@ NLM_EXTERN void DeleteSites (SeqEntryPtr sep, Pointer data, Int4 index, Int2 ind
for (ap = sap; ap != NULL; ap = apnext) {
apnext = ap->next;
- if (ap->data == NULL) {
+ /* now keep empty annot if annot_descr present */
+ if (ap->data == NULL && ap->desc == NULL) {
sap = remove_annot(sap, ap);
}
}
diff --git a/api/valapi.c b/api/valapi.c
index 17fc0dd4..85339206 100755
--- a/api/valapi.c
+++ b/api/valapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/7/2009
*
-* $Revision: 1.17 $
+* $Revision: 1.33 $
*
* File Description:
*
@@ -109,7 +109,10 @@ static void SortFieldsInCommentRule (CommentRulePtr cr)
ValNodePtr list = NULL, vnp;
if (cr == NULL || cr->fields == NULL) {
+ //LCOV_EXCL_START
+ //not valid, not testable
return;
+ //LCOV_EXCL_STOP
}
for (r = cr->fields; r != NULL; r = r->next) {
@@ -140,6 +143,8 @@ static void SortCommentRuleFields (CommentSetPtr cr_set)
}
+//LCOV_EXCL_START
+//not testable in regression
static Boolean LoadCommentRulesFromLocalString (void)
{
@@ -160,6 +165,7 @@ static Boolean LoadCommentRulesFromLocalString (void)
#endif
return (Boolean) (CommentRules != NULL);
}
+//LCOV_EXCL_STOP
NLM_EXTERN CommentRulePtr LoadCommentRuleSet (void)
@@ -172,25 +178,29 @@ NLM_EXTERN CommentRulePtr LoadCommentRuleSet (void)
if (! FindPath("ncbi", "ncbi", "data", buf, sizeof (buf)))
{
-
+ //LCOV_EXCL_START
+ //not testable in regression
if (LoadCommentRulesFromLocalString ()) {
return CommentRules;
}
ErrPostEx(SEV_WARNING, 0, 0, "FindPath failed in LoadCommentRuleSet - ncbi configuration file missing or incorrect");
return CommentRules;
+ //LCOV_EXCL_STOP
}
StringCat(buf, "validrules.prt");
if ((aip = AsnIoOpen(buf, "r")) == NULL)
{
-
+ //LCOV_EXCL_START
+ //not testable in regression
if (LoadCommentRulesFromLocalString ()) {
return CommentRules;
}
ErrPostEx(SEV_WARNING, 0, 0, "Couldn't open [%s]", buf);
return CommentRules;
+ //LCOV_EXCL_STOP
}
CommentRules = CommentSetAsnRead(aip, NULL);
@@ -361,7 +371,7 @@ static UserFieldPtr FindFieldForRuleName (UserFieldPtr ufp, CharPtr field_rule)
}
-static int CompareUserFields (UserFieldPtr ufp1, UserFieldPtr ufp2)
+static int VACompareUserFields (UserFieldPtr ufp1, UserFieldPtr ufp2)
{
int rval = 0;
CharPtr cp1, cp2;
@@ -415,7 +425,7 @@ NLM_EXTERN int LIBCALLBACK SortVnpByUserField (VoidPtr ptr1, VoidPtr ptr2)
vnp1 = *((ValNodePtr PNTR) ptr1);
vnp2 = *((ValNodePtr PNTR) ptr2);
if (vnp1 != NULL && vnp2 != NULL) {
- return CompareUserFields(vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ return VACompareUserFields(vnp1->data.ptrvalue, vnp2->data.ptrvalue);
}
}
return 0;
@@ -428,7 +438,10 @@ static void SortFieldsInUserObject (UserObjectPtr uop)
ValNodePtr list = NULL, vnp;
if (uop == NULL || uop->data == NULL) {
+ //LCOV_EXCL_START
+ //invalid
return;
+ //LCOV_EXCL_STOP
}
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
@@ -447,31 +460,101 @@ static void SortFieldsInUserObject (UserObjectPtr uop)
list = ValNodeFree (list);
}
+static Boolean PrefixOrSuffixInList (CharPtr val, CharPtr before, CharPtr after)
-NLM_EXTERN EFieldValid
-IsStructuredCommentValidForRule
+{
+ Char buf [1024];
+ size_t len, l_before, l_after;
+ ValNodePtr list, vnp;
+ Boolean rsult = FALSE;
+ CharPtr str;
+
+ if (val == NULL) return FALSE;
+ len = StringLen (val);
+ if (len < 10) return FALSE;
+ l_before = StringLen (before);
+ l_after = StringLen (after);
+ if (StringNCmp (val, before, l_before) != 0) return FALSE;
+ if (StringNCmp (val + len - l_after, after, l_after) != 0) return FALSE;
+
+ if (len > sizeof (buf)) return FALSE;
+ StringNCpy_0 (buf, val + l_before, sizeof (buf));
+ buf [len - l_before - l_after] = '\0';
+
+ list = GetStructuredCommentPrefixList ();
+ if (list == NULL) return FALSE;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (StringCmp (buf, str) == 0) rsult = TRUE;
+ }
+
+ ValNodeFreeData (list);
+
+ return rsult;
+}
+
+
+static EFieldValid FindForbiddenPhrases
(UserObjectPtr uop,
CommentRulePtr comment_rule,
StructuredCommentCallback s_callback,
Pointer s_callback_data)
{
- UserFieldPtr ufp, ufp_tmp, depend_ufp;
- FieldRulePtr field_rule, rule_tmp;
- DependentFieldRulePtr depend_rule;
+ UserFieldPtr ufp;
+ ValNodePtr vnp;
EFieldValid rval = eFieldValid_Valid;
- if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
- return eFieldValid_Invalid;
- } else if (comment_rule == NULL) {
+ if (uop == NULL || comment_rule == NULL || comment_rule->forbidden_phrases == NULL) {
return eFieldValid_Valid;
}
- /* first, make sure comment rule prefix matches comment */
- if (!DoesStructuredCommentHavePrefix (uop, comment_rule->prefix)) {
+ /* examine fields for forbidden phrases */
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL
+ && (StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0
+ || StringICmp (ufp->label->str, "StructuredCommentSuffix") == 0)) {
+ /* skip suffix and prefix */
+ continue;
+ } else {
+ if (ufp->choice == 1) {
+ /* compare string value */
+ for (vnp = comment_rule->forbidden_phrases; vnp != NULL; vnp = vnp->next) {
+ if (StringISearch(ufp->data.ptrvalue, vnp->data.ptrvalue) != NULL) {
+ rval = eFieldValid_Inappropriate;
+ if (s_callback == NULL) {
+ break;
+ } else {
+ s_callback (eFieldValid_Inappropriate, NULL, ufp, NULL, s_callback_data, uop);
+ }
+ }
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+
+static EFieldValid AreStructuredCommentContentsValidForRule
+(UserObjectPtr uop,
+ CommentRulePtr comment_rule,
+ StructuredCommentCallback s_callback,
+ Pointer s_callback_data)
+{
+ UserFieldPtr ufp, ufp_tmp, depend_ufp;
+ FieldRulePtr field_rule, rule_tmp;
+ DependentFieldRulePtr depend_rule;
+ EFieldValid rval = eFieldValid_Valid, tmp_val;
+ Boolean free_uop = FALSE;
+
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
return eFieldValid_Invalid;
}
if (!comment_rule->require_order) {
+ free_uop = TRUE;
uop = (UserObjectPtr) AsnIoMemCopy (uop, (AsnReadFunc) UserObjectAsnRead, (AsnWriteFunc) UserObjectAsnWrite);
SortFieldsInUserObject(uop);
}
@@ -497,7 +580,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_Invalid;
}
- s_callback (eFieldValid_Invalid, field_rule, ufp, NULL, s_callback_data);
+ s_callback (eFieldValid_Invalid, field_rule, ufp, NULL, s_callback_data, uop);
}
}
ufp = ufp->next;
@@ -511,7 +594,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_Invalid;
}
- s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data);
+ s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data, uop);
}
}
ufp = ufp->next;
@@ -526,12 +609,12 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_MissingRequiredField;
}
- s_callback (eFieldValid_MissingRequiredField, field_rule, NULL, NULL, s_callback_data);
+ s_callback (eFieldValid_MissingRequiredField, field_rule, NULL, NULL, s_callback_data, uop);
}
} else {
/* field wasn't required, it's ok */
}
- } else {
+ } else if (comment_rule->require_order) {
/* field is out of order */
if (s_callback == NULL) {
rval = eFieldValid_FieldOutOfOrder;
@@ -540,11 +623,20 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_FieldOutOfOrder;
}
- s_callback (eFieldValid_FieldOutOfOrder, field_rule, ufp_tmp, NULL, s_callback_data);
+ s_callback (eFieldValid_FieldOutOfOrder, field_rule, ufp_tmp, NULL, s_callback_data, uop);
if (!DoesFieldValueMatchRule (ufp_tmp, field_rule)) {
- s_callback (eFieldValid_Invalid, field_rule, ufp_tmp, NULL, s_callback_data);
+ s_callback (eFieldValid_Invalid, field_rule, ufp_tmp, NULL, s_callback_data, uop);
}
}
+ } else {
+ if (!DoesFieldValueMatchRule(ufp_tmp, field_rule)) {
+ if (s_callback == NULL) {
+ rval = eFieldValid_Invalid;
+ goto IsStructuredCommentValidForRule_exit;
+ } else {
+ s_callback(eFieldValid_Invalid, field_rule, ufp_tmp, NULL, s_callback_data, uop);
+ }
+ }
}
field_rule = field_rule->next;
}
@@ -560,7 +652,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_MissingRequiredField;
}
- s_callback (eFieldValid_MissingRequiredField, field_rule, NULL, NULL, s_callback_data);
+ s_callback (eFieldValid_MissingRequiredField, field_rule, NULL, NULL, s_callback_data, uop);
}
}
field_rule = field_rule->next;
@@ -583,7 +675,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_Invalid;
}
- s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data);
+ s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data, uop);
}
} else if (field_rule != NULL && FindFieldForRuleName(uop->data, field_rule->field_name) != ufp) {
if (s_callback == NULL) {
@@ -593,7 +685,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_DuplicateField;
}
- s_callback (eFieldValid_DuplicateField, field_rule, ufp, NULL, s_callback_data);
+ s_callback (eFieldValid_DuplicateField, field_rule, ufp, NULL, s_callback_data, uop);
}
}
ufp = ufp->next;
@@ -615,7 +707,7 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_MissingRequiredField;
}
- s_callback (eFieldValid_MissingRequiredField, field_rule, ufp, depend_ufp, s_callback_data);
+ s_callback (eFieldValid_MissingRequiredField, field_rule, ufp, depend_ufp, s_callback_data, uop);
}
} else if (!DoesFieldValueMatchRule (ufp, field_rule)) {
if (s_callback == NULL) {
@@ -625,11 +717,13 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_MissingRequiredField;
}
- s_callback (eFieldValid_Invalid, field_rule, ufp, depend_ufp, s_callback_data);
+ s_callback (eFieldValid_Invalid, field_rule, ufp, depend_ufp, s_callback_data, uop);
}
}
}
for (field_rule = depend_rule->disallowed_fields; field_rule != NULL; field_rule = field_rule->next) {
+ //LCOV_EXCL_START
+ //no rules currently have disallowed fields
ufp = FindFieldForRuleName (uop->data, field_rule->field_name);
if (ufp != NULL && DoesFieldValueMatchRule (ufp, field_rule)) {
if (s_callback == NULL) {
@@ -639,89 +733,101 @@ IsStructuredCommentValidForRule
if (rval == eFieldValid_Valid) {
rval = eFieldValid_Disallowed;
}
- s_callback (eFieldValid_Disallowed, field_rule, ufp, depend_ufp, s_callback_data);
+ s_callback (eFieldValid_Disallowed, field_rule, ufp, depend_ufp, s_callback_data, uop);
}
}
+ //LCOV_EXCL_STOP
}
}
}
+ tmp_val = FindForbiddenPhrases(uop, comment_rule, s_callback, s_callback_data);
+ if (rval == eFieldValid_Valid) {
+ rval = tmp_val;
+ }
+
IsStructuredCommentValidForRule_exit:
- if (!comment_rule->require_order) {
+ if (free_uop) {
uop = UserObjectFree (uop);
}
-
return rval;
}
-NLM_EXTERN EFieldValid IsStructuredCommentValid (UserObjectPtr uop, StructuredCommentCallback s_callback, Pointer s_callback_data)
+NLM_EXTERN EFieldValid
+IsStructuredCommentValidForRule
+(UserObjectPtr uop,
+ CommentRulePtr comment_rule,
+ StructuredCommentCallback s_callback,
+ Pointer s_callback_data)
{
- CommentRulePtr cr;
UserFieldPtr ufp;
- CharPtr prefix = NULL;
+ EFieldValid rval = eFieldValid_Valid;
- for (ufp = uop->data; ufp != NULL && prefix == NULL; ufp = ufp->next) {
- if (ufp->label != NULL
- && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0
- && ufp->choice == 1) {
- prefix = ufp->data.ptrvalue;
- }
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
+ return eFieldValid_Invalid;
}
- if (prefix == NULL) {
- return TRUE;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
+ /* check prefix */
+ if (! PrefixOrSuffixInList ((CharPtr) ufp->data.ptrvalue, "##", "-START##")) {
+ if (s_callback == NULL) {
+ return eFieldValid_Invalid;
+ } else {
+ if (rval == eFieldValid_Valid) {
+ rval = eFieldValid_Invalid;
+ }
+ s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data, uop);
+ }
+ }
+ } else if (ufp->label != NULL && StringICmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
+ /* check suffix */
+ if (! PrefixOrSuffixInList ((CharPtr) ufp->data.ptrvalue, "##", "-END##")) {
+ if (s_callback == NULL) {
+ return eFieldValid_Invalid;
+ } else {
+ if (rval == eFieldValid_Valid) {
+ rval = eFieldValid_Invalid;
+ }
+ s_callback (eFieldValid_Invalid, NULL, ufp, NULL, s_callback_data, uop);
+ }
+ }
+ }
}
- cr = GetCommentRuleFromRuleSet (prefix);
- return IsStructuredCommentValidForRule (uop, cr, s_callback, s_callback_data);
-}
-
-static Boolean IsStructuredCommentPrefix (UserFieldPtr ufp)
-{
- if (ufp == NULL) {
- return FALSE;
- }
- if (ufp->label != NULL
- && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0
- && ufp->choice == 1) {
- return TRUE;
- } else {
- return FALSE;
+ if (comment_rule == NULL) {
+ return rval;
}
-}
-
-static Boolean IsStructuredCommentSuffix (UserFieldPtr ufp)
-{
- if (ufp == NULL) {
- return FALSE;
- }
- if (ufp->label != NULL
- && StringICmp (ufp->label->str, "StructuredCommentSuffix") == 0
- && ufp->choice == 1) {
- return TRUE;
- } else {
- return FALSE;
+ /* first, make sure comment rule prefix matches comment */
+ if (!DoesStructuredCommentHavePrefix (uop, comment_rule->prefix)) {
+ return eFieldValid_Invalid;
}
+
+ return AreStructuredCommentContentsValidForRule (uop, comment_rule, s_callback, s_callback_data);
}
-static CharPtr GetStructuredCommentPrefix (UserObjectPtr uop)
+NLM_EXTERN EFieldValid IsStructuredCommentValid (UserObjectPtr uop, StructuredCommentCallback s_callback, Pointer s_callback_data)
{
+ CommentRulePtr cr;
UserFieldPtr ufp;
CharPtr prefix = NULL;
- if (uop == NULL) {
- return NULL;
- }
-
for (ufp = uop->data; ufp != NULL && prefix == NULL; ufp = ufp->next) {
- if (IsStructuredCommentPrefix(ufp)) {
+ if (ufp->label != NULL
+ && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0
+ && ufp->choice == 1) {
prefix = ufp->data.ptrvalue;
}
}
- return prefix;
+
+ if (prefix == NULL) {
+ return TRUE;
+ }
+ cr = GetCommentRuleFromRuleSet (prefix);
+ return IsStructuredCommentValidForRule (uop, cr, s_callback, s_callback_data);
}
@@ -770,16 +876,20 @@ static Boolean MovePrefixAndSuffixFieldsToFlank (UserObjectPtr uop)
}
+//LCOV_EXCL_START
+//not used for validation
NLM_EXTERN Boolean ReorderStructuredCommentFields (UserObjectPtr uop)
{
CommentRulePtr cr;
FieldRulePtr rule;
- UserFieldPtr ufp, ufp_prev = NULL, new_list = NULL, new_prev = NULL, ufp_next, ufp_last = NULL;
+ UserFieldPtr ufp, ufp_prev = NULL, new_list = NULL, new_prev = NULL;
CharPtr prefix = NULL;
+ UserObjectPtr uop_orig;
Boolean changed = FALSE;
if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) return FALSE;
+ uop_orig = AsnIoMemCopy (uop, (AsnReadFunc) UserObjectAsnRead, (AsnWriteFunc) UserObjectAsnWrite);
prefix = GetStructuredCommentPrefix (uop);
if (prefix != NULL
&& (cr = GetCommentRuleFromRuleSet (prefix)) != NULL) {
@@ -812,10 +922,37 @@ NLM_EXTERN Boolean ReorderStructuredCommentFields (UserObjectPtr uop)
}
changed |= MovePrefixAndSuffixFieldsToFlank (uop);
+ changed = !AsnIoMemComp (uop, uop_orig, (AsnWriteFunc) UserObjectAsnWrite);
+ uop_orig = UserObjectFree (uop_orig);
+
return changed;
}
+//not used for validation
+static void ReorderStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer userdata)
+{
+ BoolPtr r = (BoolPtr) userdata;
+ if (sdp->choice == Seq_descr_user) {
+ if (ReorderStructuredCommentFields ((UserObjectPtr) sdp->data.ptrvalue)) {
+ if (r != NULL) {
+ *r = TRUE;
+ }
+ }
+ }
+}
+
+//not used for validation
+NLM_EXTERN Boolean ReorderStructuredCommentsInSeqEntry (SeqEntryPtr sep)
+{
+ Boolean rval = FALSE;
+
+ VisitDescriptorsInSep (sep, &rval, ReorderStructuredCommentFieldsCallback);
+ return rval;
+}
+
+
+//not used for validation
static Boolean DoesStructuredCommentHaveAnyPrefixOrSuffix (UserObjectPtr uop)
{
UserFieldPtr ufp;
@@ -836,11 +973,14 @@ static Boolean DoesStructuredCommentHaveAnyPrefixOrSuffix (UserObjectPtr uop)
}
+//LCOV_EXCL_START
+//not used in validation
static Boolean IsRuleOkForStructuredComment (UserObjectPtr uop, CommentRulePtr cr)
{
- UserFieldPtr ufp, ufp_last = NULL;
+ UserFieldPtr ufp;
FieldRulePtr field_rule;
Boolean rval = FALSE;
+ CommentRulePtr cr_tmp = NULL;
if (uop == NULL || uop->data == NULL || cr == NULL) {
return FALSE;
@@ -849,29 +989,31 @@ static Boolean IsRuleOkForStructuredComment (UserObjectPtr uop, CommentRulePtr c
/* all field names must be recognized */
while (ufp != NULL) {
- field_rule = FindRuleForFieldName(ufp, cr->fields);
- if (field_rule == NULL) {
- return FALSE;
+ if (!IsStructuredCommentPrefix(ufp) && !IsStructuredCommentSuffix(ufp)) {
+ /* ignore prefix and suffix if present */
+ field_rule = FindRuleForFieldName(ufp, cr->fields);
+ if (field_rule == NULL) {
+ return FALSE;
+ }
}
- ufp_last = ufp;
ufp = ufp->next;
}
- ufp = UserFieldNew ();
- ufp->label = ObjectIdNew ();
- ufp->label->str = StringSave ("StructuredCommentPrefix");
- ufp->choice = 1; /* visible string */
- ufp->data.ptrvalue = (Pointer) StringSave (cr->prefix);
- ufp_last->next = ufp;
-
- if (IsStructuredCommentValidForRule (uop, cr, NULL, NULL) == eFieldValid_Valid) {
+ /* don't be picky about order when assigning prefix */
+ if (cr->require_order) {
+ cr_tmp = (CommentRulePtr) AsnIoMemCopy (cr, (AsnReadFunc) CommentRuleAsnRead, (AsnWriteFunc) CommentRuleAsnWrite);
+ cr_tmp->require_order = FALSE;
+ cr = cr_tmp;
+ }
+ if (AreStructuredCommentContentsValidForRule (uop, cr, NULL, NULL) == eFieldValid_Valid) {
rval = TRUE;
}
- ufp_last->next = UserFieldFree (ufp_last->next);
+ cr_tmp = CommentRuleFree (cr_tmp);
return rval;
}
+//not used for validation
NLM_EXTERN CharPtr AutoapplyStructuredCommentPrefix (UserObjectPtr uop)
{
CommentRulePtr cr;
@@ -904,3 +1046,38 @@ NLM_EXTERN CharPtr AutoapplyStructuredCommentPrefix (UserObjectPtr uop)
return prefix;
}
+
+//not used for validation
+NLM_EXTERN CommentRulePtr NewRuleForStructuredComment (UserObjectPtr uop)
+{
+ CommentRulePtr cr;
+ CommentRulePtr new_cr = NULL;
+
+ if (uop == NULL || uop->type == NULL
+ || StringICmp (uop->type->str, "StructuredComment") != 0
+ || !DoesStructuredCommentHaveAnyPrefixOrSuffix (uop)
+ || IsStructuredCommentValid (uop, NULL, NULL) == eFieldValid_Valid) {
+ return NULL;
+ }
+
+ if (CommentRules == NULL) {
+ cr = LoadCommentRuleSet ();
+ } else {
+ cr = CommentRules;
+ }
+
+ while (cr != NULL) {
+ if (IsRuleOkForStructuredComment(uop, cr)) {
+ if (new_cr == NULL) {
+ new_cr = cr;
+ } else {
+ return NULL;
+ }
+ }
+ cr = cr->next;
+ }
+
+ return new_cr;
+}
+//LCOV_EXCL_STOP
+
diff --git a/api/valapi.h b/api/valapi.h
index ded438b8..d7f304ab 100755
--- a/api/valapi.h
+++ b/api/valapi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/8/2009
*
-* $Revision: 1.5 $
+* $Revision: 1.10 $
*
* File Description:
*
@@ -59,12 +59,13 @@ typedef enum {
eFieldValid_MissingRequiredField,
eFieldValid_FieldOutOfOrder,
eFieldValid_DuplicateField,
- eFieldValid_Disallowed
+ eFieldValid_Disallowed,
+ eFieldValid_Inappropriate
} EFieldValid;
/* error code, field rule violated, value of offending field (if any), extra data provided by user */
-typedef void (*StructuredCommentCallback) PROTO ((EFieldValid, FieldRulePtr, UserFieldPtr, UserFieldPtr, Pointer));
+typedef void (*StructuredCommentCallback) PROTO ((EFieldValid, FieldRulePtr, UserFieldPtr, UserFieldPtr, Pointer, UserObjectPtr));
NLM_EXTERN EFieldValid
IsStructuredCommentValidForRule
@@ -76,7 +77,9 @@ IsStructuredCommentValidForRule
NLM_EXTERN EFieldValid IsStructuredCommentValid (UserObjectPtr uop, StructuredCommentCallback s_callback, Pointer s_callback_data);
NLM_EXTERN Boolean ReorderStructuredCommentFields (UserObjectPtr uop);
+NLM_EXTERN Boolean ReorderStructuredCommentsInSeqEntry (SeqEntryPtr sep);
NLM_EXTERN CharPtr AutoapplyStructuredCommentPrefix (UserObjectPtr uop);
+NLM_EXTERN CommentRulePtr NewRuleForStructuredComment (UserObjectPtr uop);
#ifdef __cplusplus
diff --git a/api/valid.c b/api/valid.c
index 04dce7eb..1a0bfc2e 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.1636 $
+* $Revision: 6.2003 $
*
* File Description: Sequence editing utilities
*
@@ -65,6 +65,7 @@ static char *this_file = __FILE__;
#include <subutil.h>
#include <tofasta.h>
#include <findrepl.h>
+#include <edutil.h>
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
#include <macroapi.h>
@@ -102,7 +103,7 @@ static void SpellCheckSeqDescr (GatherContextPtr gcp);
NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp);
NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp);
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp);
-NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefix);
+NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, Boolean report_abutting, CharPtr prefix);
NLM_EXTERN Boolean PatchBadSequence (BioseqPtr bsp);
NLM_EXTERN CharPtr FindIDForEntry (SeqEntryPtr sep, CharPtr buf);
NLM_EXTERN void SpellCheckSeqFeat (GatherContextPtr gcp);
@@ -116,10 +117,10 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, Pubde
static void LookForMultiplePubs (ValidStructPtr vsp, GatherContextPtr gcp, SeqDescrPtr sdp);
static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp);
static void ValidateAffil (ValidStructPtr vsp, AffilPtr ap);
-static TextFsaPtr GetSpecificECNumberFSA (void);
-static TextFsaPtr GetAmbiguousECNumberFSA (void);
-static TextFsaPtr GetDeletedECNumberFSA (void);
-static TextFsaPtr GetReplacedECNumberFSA (void);
+static TextFsaPtr GetSpecificECNumberFSA (ValidStructPtr vsp);
+static TextFsaPtr GetAmbiguousECNumberFSA (ValidStructPtr vsp);
+static TextFsaPtr GetDeletedECNumberFSA (ValidStructPtr vsp);
+static TextFsaPtr GetReplacedECNumberFSA (ValidStructPtr vsp);
static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp);
static Boolean HasFeatId(SeqFeatPtr sfp, Int4 num)
@@ -182,6 +183,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean indexerVersion;
Boolean disableSuppression;
Boolean genomeSubmission;
+ Boolean debugTestDuJour;
Int2 validationLimit;
ValidErrorFunc errfunc;
Pointer userdata;
@@ -192,15 +194,19 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean is_htg_in_sep;
Boolean is_barcode_sep;
Boolean is_refseq_in_sep;
+ Boolean is_wp_in_sep;
Boolean is_gpipe_in_sep;
Boolean is_gps_in_sep;
Boolean is_small_genome_set;
Boolean is_embl_ddbj_in_sep;
+ Boolean is_embl_tpe_in_sep;
Boolean is_old_gb_in_sep;
Boolean is_patent_in_sep;
Boolean other_sets_in_sep;
Boolean is_insd_in_sep;
+ Boolean is_pdb_in_sep;
Boolean only_lcl_gnl_in_sep;
+ Boolean has_gi_or_accn_ver;
Boolean has_gnl_prot_sep;
Boolean bsp_genomic_in_sep;
Boolean is_smupd_in_sep;
@@ -209,6 +215,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean has_multi_int_genes;
Boolean has_seg_bioseqs;
Boolean far_fetch_failure;
+ Boolean use_heartbeat;
+ Boolean is_geneious;
if (vsp == NULL)
return;
@@ -242,6 +250,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
indexerVersion = vsp->indexerVersion;
disableSuppression = vsp->disableSuppression;
genomeSubmission = vsp->genomeSubmission;
+ debugTestDuJour = vsp->debugTestDuJour;
validationLimit = vsp->validationLimit;
errfunc = vsp->errfunc;
userdata = vsp->userdata;
@@ -252,15 +261,19 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
is_htg_in_sep = vsp->is_htg_in_sep;
is_barcode_sep = vsp->is_barcode_sep;
is_refseq_in_sep = vsp->is_refseq_in_sep;
+ is_wp_in_sep = vsp->is_wp_in_sep;
is_gpipe_in_sep = vsp->is_gpipe_in_sep;
is_gps_in_sep = vsp->is_gps_in_sep;
is_small_genome_set = vsp->is_small_genome_set;
other_sets_in_sep = vsp->other_sets_in_sep;
is_embl_ddbj_in_sep = vsp->is_embl_ddbj_in_sep;
+ is_embl_tpe_in_sep = vsp->is_embl_tpe_in_sep;
is_old_gb_in_sep = vsp->is_old_gb_in_sep;
is_patent_in_sep = vsp->is_patent_in_sep;
is_insd_in_sep = vsp->is_insd_in_sep;
+ is_pdb_in_sep = vsp->is_pdb_in_sep;
only_lcl_gnl_in_sep = vsp->only_lcl_gnl_in_sep;
+ has_gi_or_accn_ver = vsp->has_gi_or_accn_ver;
has_gnl_prot_sep = vsp->has_gnl_prot_sep;
bsp_genomic_in_sep = vsp->bsp_genomic_in_sep;
is_smupd_in_sep = vsp->is_smupd_in_sep;
@@ -269,6 +282,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
has_multi_int_genes = vsp->has_multi_int_genes;
has_seg_bioseqs = vsp->has_seg_bioseqs;
far_fetch_failure = vsp->far_fetch_failure;
+ use_heartbeat = vsp->use_heartbeat;
+ is_geneious = vsp->is_geneious;
MemSet ((VoidPtr) vsp, 0, sizeof (ValidStruct));
vsp->errbuf = errbuf;
vsp->cutoff = cutoff;
@@ -299,6 +314,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->indexerVersion = indexerVersion;
vsp->disableSuppression = disableSuppression;
vsp->genomeSubmission = genomeSubmission;
+ vsp->debugTestDuJour = debugTestDuJour;
vsp->validationLimit = validationLimit;
vsp->errfunc = errfunc;
vsp->userdata = userdata;
@@ -309,15 +325,19 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->is_htg_in_sep = is_htg_in_sep;
vsp->is_barcode_sep = is_barcode_sep;
vsp->is_refseq_in_sep = is_refseq_in_sep;
+ vsp->is_wp_in_sep = is_wp_in_sep;
vsp->is_gpipe_in_sep = is_gpipe_in_sep;
vsp->is_gps_in_sep = is_gps_in_sep;
vsp->is_small_genome_set = is_small_genome_set;
vsp->other_sets_in_sep = other_sets_in_sep;
vsp->is_embl_ddbj_in_sep = is_embl_ddbj_in_sep;
+ vsp->is_embl_tpe_in_sep = is_embl_tpe_in_sep;
vsp->is_old_gb_in_sep = is_old_gb_in_sep;
vsp->is_patent_in_sep = is_patent_in_sep;
vsp->is_insd_in_sep = is_insd_in_sep;
+ vsp->is_pdb_in_sep = is_pdb_in_sep;
vsp->only_lcl_gnl_in_sep = only_lcl_gnl_in_sep;
+ vsp->has_gi_or_accn_ver = has_gi_or_accn_ver;
vsp->has_gnl_prot_sep = has_gnl_prot_sep;
vsp->bsp_genomic_in_sep = bsp_genomic_in_sep;
vsp->is_smupd_in_sep = is_smupd_in_sep;
@@ -326,6 +346,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->has_multi_int_genes = has_multi_int_genes;
vsp->has_seg_bioseqs = has_seg_bioseqs;
vsp->far_fetch_failure = far_fetch_failure;
+ vsp->use_heartbeat = use_heartbeat;
+ vsp->is_geneious = is_geneious;
return;
}
@@ -483,6 +505,8 @@ static void ChangeSeqLocToBestID (SeqLocPtr slp)
}
}
+//LCOV_EXCL_START
+//function associated with unused options
static Int2 WorstBioseqLabel (BioseqPtr bsp, CharPtr buffer, Int2 buflen, Uint1 content)
{
CharPtr tmp;
@@ -529,6 +553,7 @@ static Int2 WorstBioseqLabel (BioseqPtr bsp, CharPtr buffer, Int2 buflen, Uint1
return (len - buflen); /* SUMMARY not done yet */
}
+//LCOV_EXCL_STOP
static CharPtr categoryLabel [] = {
NULL, "SEQ_INST", "SEQ_DESCR", "GENERIC", "SEQ_PKG", "SEQ_FEAT", "SEQ_ALIGN", "SEQ_GRAPH", "SEQ_ANNOT"
@@ -614,7 +639,10 @@ static CharPtr err1Label [] = {
"HighNContentPercent",
"BadSegmentedSeq",
"SeqLitGapFuzzNot100",
- "SeqGapProblem"
+ "SeqGapProblem",
+ "WGSMasterLacksStrucComm",
+ "TSAMasterLacksStrucComm",
+ "AllNs"
};
static CharPtr err2Label [] = {
@@ -707,7 +735,21 @@ static CharPtr err2Label [] = {
"OrganismNotFound",
"TaxonomyIsSpeciesProblem",
"TaxonomyConsultRequired",
- "TaxonomyNucleomorphProblem"
+ "TaxonomyNucleomorphProblem",
+ "InconsistentMolTypeBiomol",
+ "BadInstitutionCountry",
+ "AmbiguousSpecificHost",
+ "BadAltitude",
+ "RefGeneTrackingOnNucProtSet",
+ "InconsistentDates",
+ "MultipleTaxonIDs",
+ "ScaffoldLacksBioProject",
+ "CompleteGenomeLacksBioProject",
+ "TaxonomyPlastidsProblem",
+ "OrganismIsUndefinedSpecies",
+ "WrongBiomolForTechnique",
+ "WrongOrganismFor16SrRNA",
+ "InconsistentWGSFlags"
};
static CharPtr err3Label [] = {
@@ -727,7 +769,11 @@ static CharPtr err3Label [] = {
"SgmlPresentInText",
"UnexpectedPubStatusComment",
"PastReleaseDate",
- "MissingISOJTA"
+ "MissingISOJTA",
+ "MissingVolume",
+ "MissingVolumeEpub",
+ "MissingPages",
+ "MissingPagesEpub"
};
static CharPtr err4Label [] = {
@@ -760,7 +806,8 @@ static CharPtr err4Label [] = {
"ComponentMissingTitle",
"SingleItemSet",
"MisplacedMolInfo",
- "ImproperlyNestedSets"
+ "ImproperlyNestedSets",
+ "SeqSubmitWithWgsSet"
};
static CharPtr err5Label [] = {
@@ -953,7 +1000,25 @@ static CharPtr err5Label [] = {
"InconsistentPseudogeneCounts",
"DeletedEcNumber",
"ReplacedEcNumber",
- "SplitEcNumber"
+ "SplitEcNumber",
+ "PeptideFeatureLacksCDS",
+ "EcNumberDataMissing",
+ "CDSnotBetweenUTRs",
+ "ShortExon",
+ "ExtraProteinFeature",
+ "AssemblyGapAdjacentToNs",
+ "AssemblyGapCoversSequence",
+ "FeatureBeginsOrEndsWithN",
+ "FeatureIsMostlyNs",
+ "CDSonMinusStrandTranscribedRNA",
+ "MultipleGenCodes",
+ "InvalidFuzz",
+ "BadComment",
+ "NonsenseIntron",
+ "InconsistentPseudogeneValue",
+ "MultiIntervalIntron",
+ "SeqLocTypeProblem",
+ "ColdShockProteinProblem"
};
static CharPtr err6Label [] = {
@@ -1047,17 +1112,20 @@ NLM_EXTERN CharPtr GetValidErrorName (int errcode, int subcode)
return NULL;
}
+//LCOV_EXCL_START
NLM_EXTERN CharPtr GetValidExplanation (int errcode, int subcode)
{
return Nlm_GetErrLongText (THIS_MODULE, errcode, subcode);
}
+//LCOV_EXCL_STOP
static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int subcode)
{
CharPtr accession = NULL, context = NULL, label = NULL, location = NULL,
- message = NULL, objtype = NULL, product = NULL, featureID = NULL;
+ message = NULL, objtype = NULL, product = NULL, featureID = NULL,
+ seqid = NULL;
BioseqPtr bsp;
BioseqSetPtr bssp;
Int2 buflen, diff, wrklen;
@@ -1066,7 +1134,7 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
Uint2 entityID = 0, itemtype = 0;
ValidErrorFunc errfunc;
GatherContextPtr gcp;
- Char id [64], numbuf [15];
+ Char id [64], id2 [64], numbuf [15];
Uint4 itemID = 0;
ObjectIdPtr oip;
ObjValNodePtr ovp;
@@ -1139,9 +1207,25 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
}
}
}
+ if (sip == NULL) {
+ sep = FindNthBioseq (vsp->sep, 1);
+ if (sep != NULL) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp != NULL) {
+ sip = SeqIdFindWorst (bsp->id);
+ }
+ }
+ }
if (sip != NULL) {
SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id) - 1);
accession = id;
+ if (sip->choice == SEQID_GENERAL) {
+ SeqIdWrite (sip, id2, PRINTID_FASTA_GENERAL, sizeof (id2) - 1);
+ seqid = id2;
+ } else {
+ SeqIdWrite (sip, id2, PRINTID_FASTA_SHORT, sizeof (id2) - 1);
+ seqid = id2;
+ }
}
if (vsp->sfp != NULL) {
@@ -1229,7 +1313,10 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
} else if (vsp->bsp != NULL) {
label = tmp;
if (vsp->convertGiToAccn) {
+ //LCOV_EXCL_START
+ // option not used
diff = WorstBioseqLabel (vsp->bsp, tmp, wrklen, OM_LABEL_CONTENT);
+ //LCOV_EXCL_STOP
} else {
diff = BioseqLabel (vsp->bsp, tmp, wrklen, OM_LABEL_BOTH);
}
@@ -1343,7 +1430,10 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
buflen -= diff;
tmp += diff;
if (vsp->suppressContext || vsp->convertGiToAccn) {
- diff = WorstBioseqLabel (vsp->bsp, tmp, buflen, OM_LABEL_CONTENT);
+ //LCOV_EXCL_START
+ // option not used
+ diff = WorstBioseqLabel(vsp->bsp, tmp, buflen, OM_LABEL_CONTENT);
+ //LCOV_EXCL_STOP
} else {
diff = BioseqLabel (vsp->bsp, tmp, buflen, OM_LABEL_BOTH);
}
@@ -1371,7 +1461,7 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
}
}
- (*errfunc) (severity, errcode, subcode, entityID, itemtype, itemID, accession,
+ (*errfunc) (severity, errcode, subcode, entityID, itemtype, itemID, accession, seqid,
featureID, message, objtype, label, context, location, product, vsp->userdata);
}
@@ -1400,12 +1490,18 @@ static Boolean IsUnclassifiedExcept (ValidStructPtr vsp)
if (vsp->sfp->excpt && (! vsp->ignoreExceptions)) {
if (vsp->sfp->data.choice == SEQFEAT_CDREGION) {
if (StringStr (vsp->sfp->except_text, "unclassified translation discrepancy") != NULL) {
+ //LCOV_EXCL_START
+ // if text was present, error to be suppressed would never have been calculated
rval = TRUE;
+ //LCOV_EXCL_STOP
}
+ //LCOV_EXCL_START
+ //errors suppressed with this function are never for mRNA features
} else if (vsp->sfp->idx.subtype == FEATDEF_mRNA) {
if (StringStr (vsp->sfp->except_text, "unclassified transcription discrepancy") != NULL) {
rval = TRUE;
}
+ //LCOV_EXCL_STOP
}
}
return rval;
@@ -1485,6 +1581,7 @@ static Boolean ShouldSuppressValidErr (ValidStructPtr vsp, int code1, int code2,
&& (valid_suppress[i].search_phrase == NULL || StringISearch (fmt, valid_suppress[i].search_phrase) != NULL)
&& (valid_suppress[i].func == NULL || valid_suppress[i].func(vsp))
&& (valid_suppress[i].exclude_phrase == NULL || StringISearch (fmt, valid_suppress[i].exclude_phrase) == NULL)) {
+ // note: all exclude phrases are NULL
rval = TRUE;
}
}
@@ -1613,7 +1710,6 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_INST_TpaAssmeblyProblem},
{ERR_SEQ_INST_SeqLocLength},
{ERR_SEQ_INST_CompleteTitleProblem},
- {ERR_SEQ_INST_CompleteCircleProblem},
{ERR_SEQ_INST_BadHTGSeq},
{ERR_SEQ_INST_OverlappingDeltaRange},
{ERR_SEQ_INST_LeadingX},
@@ -1632,7 +1728,7 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_DESCR_Inconsistent},
{ERR_SEQ_DESCR_ObsoleteSourceLocation},
{ERR_SEQ_DESCR_ObsoleteSourceQual},
- {ERR_SEQ_DESCR_StructuredSourceNote},
+ {ERR_SEQ_DESCR_UnwantedCompleteFlag},
{ERR_SEQ_DESCR_CollidingPublications},
{ERR_SEQ_DESCR_TransgenicProblem},
{ERR_SEQ_DESCR_BioSourceInconsistency},
@@ -1645,6 +1741,7 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_DESCR_LatLonProblem},
{ERR_SEQ_DESCR_LatLonRange},
{ERR_SEQ_DESCR_LatLonValue},
+ {ERR_SEQ_DESCR_LatLonCountry},
{ERR_SEQ_DESCR_BadInstitutionCode},
{ERR_SEQ_DESCR_BadCollectionCode},
{ERR_SEQ_DESCR_BadVoucherID},
@@ -1653,7 +1750,10 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_DESCR_WrongVoucherType},
{ERR_SEQ_DESCR_UserObjectProblem},
{ERR_SEQ_DESCR_BadKeyword},
+ {ERR_SEQ_DESCR_BioSourceNeedsChromosome},
{ERR_SEQ_DESCR_MolInfoConflictsWithBioSource},
+ {ERR_SEQ_DESCR_OrganismIsUndefinedSpecies},
+ {ERR_SEQ_DESCR_WrongBiomolForTechnique},
{ERR_GENERIC_MissingPubInfo},
{ERR_GENERIC_UnnecessaryPubEquiv},
{ERR_GENERIC_CollidingSerialNumbers},
@@ -1672,6 +1772,7 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_PKG_SingleItemSet},
{ERR_SEQ_PKG_MisplacedMolInfo},
{ERR_SEQ_PKG_ImproperlyNestedSets},
+ {ERR_SEQ_PKG_SeqSubmitWithWgsSet},
{ERR_SEQ_FEAT_Range},
{ERR_SEQ_FEAT_MixedStrand},
{ERR_SEQ_FEAT_SeqLocOrder},
@@ -1712,9 +1813,6 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_FEAT_BadFullLengthFeature},
{ERR_SEQ_FEAT_RedundantFields},
{ERR_SEQ_FEAT_CDSwithNoMRNAOverlap},
- /*
- {ERR_SEQ_FEAT_FeatureProductInconsistency},
- */
{ERR_SEQ_FEAT_ImproperBondLocation},
{ERR_SEQ_FEAT_GeneXrefWithoutGene},
{ERR_SEQ_FEAT_MissingTrnaAA},
@@ -1756,6 +1854,7 @@ static ValidErrRaiseData valid_genome_raise [] = {
{ERR_SEQ_FEAT_NeedsNote},
{ERR_SEQ_FEAT_RptUnitRangeProblem},
{ERR_SEQ_FEAT_InconsistentRRNAstrands},
+ {ERR_SEQ_FEAT_PeptideFeatureLacksCDS},
{ERR_SEQ_GRAPH_GraphAbove},
{ERR_SEQ_GRAPH_GraphOutOfOrder},
{ERR_SEQ_GRAPH_GraphSeqLocLen},
@@ -1864,9 +1963,11 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
gcp = vsp->gcp;
buflen = 1023;
vsprintf (tmp, fmt, args);
- while (*tmp != '\0') {
- buflen--;
- tmp++;
+ if (tmp != NULL) {
+ while (*tmp != '\0') {
+ buflen--;
+ tmp++;
+ }
}
va_end (args);
@@ -1877,8 +1978,13 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
return;
}
+ //LCOV_EXCL_START
+ //commandline tool always uses CustValErr
+
if (vsp->justShowAccession) {
- vsp->errbuf[0] = '\0';
+ if (vsp->errbuf != NULL) {
+ vsp->errbuf[0] = '\0';
+ }
tmp = vsp->errbuf;
sip = NULL;
@@ -1938,7 +2044,9 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
}
ErrPostItem ((ErrSev) (severity), code1, code2, "%s", vsp->errbuf);
- vsp->errbuf[0] = '\0';
+ if (vsp->errbuf != NULL) {
+ vsp->errbuf[0] = '\0';
+ }
return;
}
@@ -2049,7 +2157,7 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
if (vsp->bsp == NULL) {
diff = LabelCopy (tmp, "??", buflen);
} else if (vsp->suppressContext) {
- diff = WorstBioseqLabel (vsp->bsp, tmp, buflen, OM_LABEL_CONTENT);
+ diff = WorstBioseqLabel(vsp->bsp, tmp, buflen, OM_LABEL_CONTENT);
} else {
diff = BioseqLabel (vsp->bsp, tmp, buflen, OM_LABEL_BOTH);
}
@@ -2070,8 +2178,10 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
}
}
+ if (vsp->errbuf == NULL) return;
ErrPostItem ((ErrSev) (severity), code1, code2, "%s", vsp->errbuf);
vsp->errbuf[0] = '\0';
+ //LCOV_EXCL_STOP
}
@@ -2133,7 +2243,29 @@ static ErrSev ErrorLevelFromFieldRuleSev (Uint2 severity)
return sev;
}
-static void StructuredCommentError (EFieldValid err_code, FieldRulePtr field_rule, UserFieldPtr ufp, UserFieldPtr depend_ufp, Pointer data)
+static Boolean IsGenomeAssembly (UserObjectPtr uop)
+
+{
+ UserFieldPtr curr;
+ CharPtr field;
+ ObjectIdPtr oip;
+
+ if (uop == NULL) return FALSE;
+
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ if (curr->choice != 1) continue;
+ oip = curr->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringHasNoText (field)) continue;
+ if (StringCmp (field, "StructuredCommentPrefix") != 0) continue;
+ if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void StructuredCommentError (EFieldValid err_code, FieldRulePtr field_rule, UserFieldPtr ufp, UserFieldPtr depend_ufp, Pointer data, UserObjectPtr uop)
{
ValidStructPtr vsp;
CharPtr label, val;
@@ -2161,29 +2293,45 @@ static void StructuredCommentError (EFieldValid err_code, FieldRulePtr field_rul
switch (err_code) {
case eFieldValid_Invalid:
label = GetUserFieldLabelString (ufp);
- if (field_rule == NULL) {
+ if (field_rule == NULL && StringCmp (label, "StructuredCommentPrefix") != 0 && StringCmp (label, "StructuredCommentSuffix") != 0) {
ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommInvalidFieldName, "%s is not a valid field name%s", label, depend_str == NULL ? "" : depend_str);
} else {
val = GetUserFieldValueString (ufp);
+ if (StringICmp (label, "Finishing Goal") == 0 && IsGenomeAssembly (uop)) {
+ sev = SEV_ERROR;
+ } else if (StringICmp (label, "Current Finishing Status") == 0 && IsGenomeAssembly (uop)) {
+ sev = SEV_ERROR;
+ }
ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "%s is not a valid value for %s%s", val, label, depend_str == NULL ? "" : depend_str);
val = MemFree (val);
}
label = MemFree (label);
break;
case eFieldValid_MissingRequiredField:
- ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommMissingField, "Required field %s is missing%s", field_rule->field_name, depend_str == NULL ? "" : depend_str);
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommMissingField, "Required field %s is missing%s", field_rule == NULL ? "" : field_rule->field_name, depend_str == NULL ? "" : depend_str);
break;
case eFieldValid_FieldOutOfOrder:
- ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommFieldOutOfOrder, "%s field is out of order%s", field_rule->field_name, depend_str == NULL ? "" : depend_str);
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommFieldOutOfOrder, "%s field is out of order%s", field_rule == NULL ? "" : field_rule->field_name, depend_str == NULL ? "" : depend_str);
break;
case eFieldValid_DuplicateField:
- ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommMultipleFields, "Multiple values for %s field%s", field_rule->field_name, depend_str == NULL ? "" : depend_str);
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommMultipleFields, "Multiple values for %s field%s", field_rule == NULL ? "" : field_rule->field_name, depend_str == NULL ? "" : depend_str);
break;
case eFieldValid_Disallowed:
+ //LCOV_EXCL_START
+ //no rules currently have disallowed fields
label = GetUserFieldLabelString (ufp);
ValidErr (vsp, sev, ERR_SEQ_DESCR_BadStrucCommInvalidFieldName, "%s is not a valid field name%s", label, depend_str == NULL ? "" : depend_str);
label = MemFree (label);
break;
+ //LCOV_EXCL_STOP
+ case eFieldValid_Inappropriate:
+ //LCOV_EXCL_START
+ // this code is not used
+ val = GetUserFieldValueString (ufp);
+ ValidErr(vsp, sev, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "'%s' is inappropriate for a GenBank submisison", val);
+ val = MemFree (val);
+ break;
+ //LCOV_EXCL_STOP
default:
/* do nothing */
break;
@@ -2204,6 +2352,59 @@ static Boolean StringLooksLikeFakeStructuredComment (CharPtr str)
}
+static void ValidateUserObject(ValidStructPtr vsp, UserObjectPtr uop)
+{
+ CharPtr prefix;
+ ObjectIdPtr oip;
+ EFieldValid sc_valid;
+ UserFieldPtr curr;
+ CharPtr field;
+ CharPtr str;
+
+ if (uop == NULL || vsp == NULL) {
+ return;
+ }
+ oip = uop->type;
+ if (oip == NULL) {
+ //LCOV_EXCL_START
+ //can't test with valid ASN.1
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_UserObjectProblem, "User object with no type");
+ //LCOV_EXCL_STOP
+ }
+ if (uop->data == NULL) {
+ if (oip == NULL || oip->str == NULL
+ || (StringICmp (oip->str, "NcbiAutofix") != 0
+ && StringICmp (oip->str, "Unverified") != 0)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_UserObjectProblem, "User object with no data");
+ }
+ }
+
+ if (uop->type != NULL && StringICmp (uop->type->str, "StructuredComment") == 0) {
+ sc_valid = IsStructuredCommentValid (uop, StructuredCommentError, vsp);
+ /* report ? */
+ if (sc_valid != eFieldValid_Valid) {
+ prefix = GetStructuredCommentPrefix(uop);
+ if (!StringHasNoText(prefix)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "Structured Comment invalid");
+ }
+ }
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ if (curr->choice != 1) continue;
+ oip = curr->label;
+ if (oip == NULL) continue;
+ field = oip->str;
+ if (StringStr (field, "::") != NULL) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadStrucCommInvalidFieldName, "Structured comment field '%s' contains double colons", field);
+ }
+ str = (CharPtr) curr->data.ptrvalue;
+ if (StringStr (str, "::") != NULL) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "Structured comment value '%s' contains double colons", str);
+ }
+ }
+ }
+}
+
+
/*****************************************************************************
*
* Valid1GatherProc(gcp)
@@ -2214,9 +2415,7 @@ static Boolean StringLooksLikeFakeStructuredComment (CharPtr str)
static Boolean Valid1GatherProc (GatherContextPtr gcp)
{
ValidStructPtr vsp;
- UserFieldPtr curr;
AnnotDescrPtr desc;
- CharPtr field;
SeqAnnotPtr sap;
Boolean is_blast_align;
Int2 limit;
@@ -2230,12 +2429,10 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
BioseqPtr bsp;
SeqIdPtr sip;
CharPtr str;
- Char buf [64];
- Char tmp [64];
+ Char buf [128];
+ Char tmp [128];
ValNodePtr vnp2;
SeqMgrFeatContext context;
- UserObjectPtr uop;
- EFieldValid sc_valid;
vsp = (ValidStructPtr) (gcp->userdata);
vsp->gcp = gcp; /* needed for ValidErr */
@@ -2394,24 +2591,7 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
LookForMultiplePubs (vsp, gcp, sdp);
}
if (sdp->choice == Seq_descr_user) {
- uop = sdp->data.ptrvalue;
- if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "StructuredComment") == 0) {
- sc_valid = IsStructuredCommentValid (uop, StructuredCommentError, vsp);
- /* report ? */
- for (curr = uop->data; curr != NULL; curr = curr->next) {
- if (curr->choice != 1) continue;
- oip = curr->label;
- if (oip == NULL) continue;
- field = oip->str;
- if (StringStr (field, "::") != NULL) {
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadStrucCommInvalidFieldName, "Structured comment field '%s' contains double colons", field);
- }
- str = (CharPtr) curr->data.ptrvalue;
- if (StringStr (str, "::") != NULL) {
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "Structured comment value '%s' contains double colons", str);
- }
- }
- }
+ ValidateUserObject(vsp, (UserObjectPtr) sdp->data.ptrvalue);
}
if (sdp->choice == Seq_descr_comment) {
str = (CharPtr) sdp->data.ptrvalue;
@@ -2461,6 +2641,7 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
static void DiscrepanciesToValidationErrs (ValNodePtr discrepancy_list, Uint4 item_type, ValidStructPtr vsp, int severity, int code1, int code2, char *msg)
{
ValNodePtr vnp, obj;
+ ValNodePtr tvnp;
ClickableItemPtr cip;
if (discrepancy_list == NULL || vsp == NULL) {
@@ -2484,7 +2665,16 @@ static void DiscrepanciesToValidationErrs (ValNodePtr discrepancy_list, Uint4 it
vsp->gcp->thistype = OBJ_SEQFEAT;
vsp->gcp->itemID = vsp->sfp->idx.itemID;
- ValidErr (vsp, severity, code1, code2, msg);
+ if(item_type == DISC_SHORT_INTRON)
+ {
+ for(tvnp = vsp->sisfp; tvnp != NULL; tvnp = tvnp->next)
+ if(tvnp->data.ptrvalue == vsp->sfp)
+ break;
+ }
+ else
+ tvnp = NULL;
+ if(tvnp == NULL)
+ ValidErr (vsp, severity, code1, code2, msg);
vsp->sfp = NULL;
}
}
@@ -2530,7 +2720,7 @@ static void ValidateShortIntrons (SeqEntryPtr sep, ValidStructPtr vsp)
vn.data.ptrvalue = sep;
vn.next = NULL;
- FindShortIntrons (&discrepancy_list, &vn);
+ FindShortIntronsEx (&discrepancy_list, &vn, vsp->indexerVersion);
DiscrepanciesToValidationErrs (discrepancy_list, DISC_SHORT_INTRON, vsp, SEV_WARNING, ERR_SEQ_FEAT_ShortIntron, "Introns should be at least 10 nt long");
@@ -2538,16 +2728,18 @@ static void ValidateShortIntrons (SeqEntryPtr sep, ValidStructPtr vsp)
}
-static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_biosrc)
+static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_cit_sub, BoolPtr no_biosrc)
{
BioseqPtr bsp;
BioseqSetPtr bssp;
+ PubdescPtr pdp;
SeqAnnotPtr sap = NULL;
ValNodePtr sdp = NULL;
SeqFeatPtr sfp;
SeqEntryPtr tmp;
+ ValNodePtr vnp;
- if (sep == NULL || no_pub == NULL || no_biosrc == NULL)
+ if (sep == NULL || no_pub == NULL || no_cit_sub == NULL || no_biosrc == NULL)
return;
if (IS_Bioseq (sep)) {
bsp = (BioseqPtr) sep->data.ptrvalue;
@@ -2560,7 +2752,7 @@ static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_bio
if (bssp == NULL)
return;
for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
- LookForAnyPubAndOrg (tmp, no_pub, no_biosrc);
+ LookForAnyPubAndOrg (tmp, no_pub, no_cit_sub, no_biosrc);
}
sap = bssp->annot;
sdp = bssp->descr;
@@ -2583,6 +2775,14 @@ static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_bio
while (sdp != NULL) {
if (sdp->choice == Seq_descr_pub) {
*no_pub = FALSE;
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (pdp != NULL) {
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Sub) {
+ *no_cit_sub = FALSE;
+ }
+ }
+ }
} else if (sdp->choice == Seq_descr_source) {
*no_biosrc = FALSE;
}
@@ -2602,6 +2802,8 @@ typedef struct ftprob {
Uint4 num_tpa_without_hist;
Uint4 num_pseudo;
Uint4 num_pseudogene;
+ Uint4 first_taxid;
+ Int2 num_super_kingdom;
Boolean has_gi;
Boolean loc_has_gi;
Boolean loc_has_just_accn;
@@ -2609,6 +2811,9 @@ typedef struct ftprob {
Boolean prod_has_gi;
Boolean prod_has_just_accn;
Boolean prod_has_accn_ver;
+ Boolean mult_taxids;
+ Boolean super_kingdoms_different;
+ CharPtr super_kingdom_name;
} FeatProb, PNTR FeatProbPtr;
static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_misplaced_features, Uint4Ptr num_small_genome_set_misplaced)
@@ -2806,7 +3011,10 @@ static void CountSfpLocIdTypes (SeqIdPtr sip, Pointer userdata)
if (tsip->version < 1) {
fpp->loc_has_just_accn = TRUE;
} else {
+ //LCOV_EXCL_START
+ //value not actually used anywhere
fpp->loc_has_accn_ver = TRUE;
+ //LCOV_EXCL_STOP
}
}
}
@@ -2958,6 +3166,22 @@ static Boolean IsWgsContig (BioseqPtr bsp)
return FALSE;
}
+static Boolean IsTsaContig (BioseqPtr bsp)
+
+{
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+
+ if (bsp == NULL) return FALSE;
+ /* if (bsp->repr == Seq_repr_virtual) return FALSE; */
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
+ if (sdp == NULL) return FALSE;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) return FALSE;
+ if (mip->tech == MI_TECH_tsa) return TRUE;
+ return FALSE;
+}
+
typedef struct vfcdata {
ValNodePtr uids;
ValNodePtr unpub;
@@ -3001,12 +3225,15 @@ static void MakePubTags (PubdescPtr pdp, Pointer userdata)
} else if (vnp->choice == PUB_Gen) {
cgp = (CitGenPtr) vnp->data.ptrvalue;
if (cgp != NULL && cgp->serial_number > 0) {
+ //LCOV_EXCL_START
+ //serial numbers stripped by basic cleanup
tmp = ValNodeNew (NULL);
if (tmp != NULL) {
tmp->data.intvalue = (Int4) cgp->serial_number;
tmp->next = vfp->serial;
vfp->serial = tmp;
}
+ //LCOV_EXCL_STOP
}
}
}
@@ -3128,6 +3355,8 @@ static void CheckFeatCits (SeqFeatPtr sfp, Pointer userdata)
}
}
+//LCOV_EXCL_START
+//serial numbers are removed during basic cleanup
static void CheckForCollidingSerials (
ValidStructPtr vsp,
GatherContextPtr gcp,
@@ -3171,6 +3400,7 @@ static void CheckForCollidingSerials (
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
}
+//LCOV_EXCL_STOP
static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
@@ -3332,6 +3562,8 @@ static ValNodePtr UniqueValNodeCaseSensitive (ValNodePtr list)
return list;
}
+//LCOV_EXCL_START
+//C++ Toolkit automatically resolves Seq-ids that match, even without case
static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp)
{
@@ -3385,15 +3617,14 @@ static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp)
ValNodeFreeData (vd.headid);
}
+//LCOV_EXCL_STOP
static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
{
DbtagPtr dbt;
- Boolean has_lcl_gnl = FALSE;
- Boolean has_others = FALSE;
SeqIdPtr sip;
- TextSeqIdPtr tsip;
+ TextSeqIdPtr tsip = NULL;
ValidStructPtr vsp;
if (bsp == NULL || userdata == NULL) return;
@@ -3402,6 +3633,8 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
for (sip = bsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_EMBL:
+ vsp->is_embl_tpe_in_sep = TRUE;
+ /* and fall through */
case SEQID_DDBJ:
vsp->is_embl_ddbj_in_sep = TRUE;
/* and fall through */
@@ -3416,14 +3649,23 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
}
break;
case SEQID_TPE:
+ vsp->is_embl_tpe_in_sep = TRUE;
+ /* and fall through */
case SEQID_TPD:
vsp->is_insd_in_sep = TRUE;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
break;
case SEQID_PATENT:
vsp->is_patent_in_sep = TRUE;
break;
case SEQID_OTHER:
vsp->is_refseq_in_sep = TRUE;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ vsp->is_wp_in_sep = TRUE;
+ }
+ }
break;
case SEQID_GPIPE:
vsp->is_gpipe_in_sep = TRUE;
@@ -3436,17 +3678,23 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
vsp->has_gnl_prot_sep = TRUE;
}
break;
+ case SEQID_PDB:
+ vsp->is_pdb_in_sep = TRUE;
+ break;
+ case SEQID_GI:
+ vsp->has_gi_or_accn_ver = TRUE;
+ break;
default:
break;
}
- if (sip->choice == SEQID_LOCAL || sip->choice == SEQID_GENERAL) {
- has_lcl_gnl = TRUE;
- } else {
- has_others = TRUE;
+ if (tsip != NULL) {
+ if (StringDoesHaveText (tsip->accession) && tsip->version >= 1) {
+ vsp->has_gi_or_accn_ver = TRUE;
+ }
+ }
+ if (sip->choice != SEQID_LOCAL && sip->choice != SEQID_GENERAL) {
+ vsp->only_lcl_gnl_in_sep = FALSE;
}
- }
- if (has_lcl_gnl && ! has_others) {
- vsp->only_lcl_gnl_in_sep = TRUE;
}
}
@@ -3496,6 +3744,7 @@ static void LookForSeqDescrFields (SeqDescrPtr sdp, Pointer userdata)
BioSourcePtr biop;
MolInfoPtr mip;
ObjectIdPtr oip;
+ UserFieldPtr ufp;
UserObjectPtr uop;
ValidStructPtr vsp;
@@ -3513,6 +3762,17 @@ static void LookForSeqDescrFields (SeqDescrPtr sdp, Pointer userdata)
if (oip != NULL) {
if (StringICmp (oip->str, "GenomeBuild") == 0) {
vsp->is_gpipe_in_sep = TRUE;
+ } else if (StringICmp (oip->str, "StructuredComment") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL) continue;
+ if (StringICmp (oip->str, "Annotation Pipeline") == 0) {
+ if (ufp->choice == 1 &&
+ StringCmp ((CharPtr) ufp->data.ptrvalue, "NCBI eukaryotic genome annotation pipeline") == 0) {
+ vsp->is_gpipe_in_sep = TRUE;
+ }
+ }
+ }
}
}
break;
@@ -3572,6 +3832,8 @@ static void FindMultiIntervalGenes (
}
}
+//LCOV_EXCL_START
+// Only for SegSets
static void FindSegmentedBioseqs (
BioseqPtr bsp,
Pointer userdata
@@ -3585,6 +3847,8 @@ static void FindSegmentedBioseqs (
if (segmentedBioseqsP == NULL) return;
*segmentedBioseqsP = TRUE;
}
+//LCOV_EXCL_STOP
+
static void SetPubScratchData (SeqDescrPtr sdp, Pointer userdata)
{
@@ -3610,7 +3874,10 @@ static void SetPubScratchData (SeqDescrPtr sdp, Pointer userdata)
if (cgp != NULL) {
if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
+ //LCOV_EXCL_START
+ //serial numbers are stripped by basic cleanup
vnp = vnp->next;
+ //LCOV_EXCL_STOP
}
}
}
@@ -3864,6 +4131,8 @@ static void SetupFeatureScratchData (
static Boolean using_ec_from_file = FALSE;
+//LCOV_EXCL_START
+//internal check of data
static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp)
{
@@ -3886,10 +4155,10 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp)
fsa = (TextFsaPtr) GetAppProperty ("ReplacedEECNumberFSA");
if (fsa != NULL) return;
- GetSpecificECNumberFSA ();
- GetAmbiguousECNumberFSA ();
- GetDeletedECNumberFSA ();
- GetReplacedECNumberFSA ();
+ GetSpecificECNumberFSA (vsp);
+ GetAmbiguousECNumberFSA (vsp);
+ GetDeletedECNumberFSA (vsp);
+ GetReplacedECNumberFSA (vsp);
if (using_ec_from_file) {
if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
@@ -4024,6 +4293,8 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp)
}
}
+//LCOV_EXCL_STOP
+
typedef struct collisioninfo {
CharPtr str;
@@ -4121,6 +4392,8 @@ static void FindLongIdsThatCollideWhenTruncated (SeqEntryPtr sep, ValidStructPtr
}
+//LCOV_EXCL_START
+//used for locking procedures specific to C Toolkit
static void ValLookForBigFarSeqs (
BioseqPtr bsp,
Pointer userdata
@@ -4157,6 +4430,7 @@ static void ValLookForBigFarSeqs (
*toomanyfarP = TRUE;
}
}
+//LCOV_EXCL_STOP
static Boolean ValTooManyFarComponents (
SeqEntryPtr sep
@@ -4331,6 +4605,7 @@ NLM_EXTERN Boolean TooManyInferenceAccessions (
return FALSE;
}
+/*
static void CountPseudogenes (SeqFeatPtr sfp, Pointer userdata)
{
@@ -4351,6 +4626,146 @@ static void CountPseudogenes (SeqFeatPtr sfp, Pointer userdata)
(fpp->num_pseudogene)++;
}
}
+*/
+
+static void CheckTaxIDs (BioSourcePtr biop, Pointer userdata)
+
+{
+ ValNodePtr db;
+ DbtagPtr dbt;
+ FeatProbPtr fpp;
+ ObjectIdPtr oip;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ TaxElementPtr tep;
+
+ if (biop == NULL || userdata == NULL) return;
+ fpp = (FeatProbPtr) userdata;
+
+ orp = biop->org;
+ if (orp == NULL) return;
+
+ for (db = orp->db; db != NULL; db = db->next) {
+ dbt = (DbtagPtr) db->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (StringICmp (dbt->db, "taxon") != 0) continue;
+ oip = dbt->tag;
+ if (oip == NULL) continue;
+ if (oip->str != NULL) continue;
+ if (fpp->first_taxid == 0) {
+ fpp->first_taxid = oip->id;
+ } else if (fpp->first_taxid != oip->id) {
+ fpp->mult_taxids = TRUE;
+ }
+ }
+
+ onp = orp->orgname;
+ if (onp == NULL) return;
+ if (onp->choice == 5) {
+ for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
+ if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
+ (fpp->num_super_kingdom)++;
+ if (fpp->super_kingdom_name == NULL) {
+ fpp->super_kingdom_name = tep->name;
+ } else if (StringICmp (fpp->super_kingdom_name, tep->name) != 0) {
+ fpp->super_kingdoms_different = TRUE;
+ }
+ }
+ }
+ }
+}
+
+
+//LCOV_EXCL_START
+NLM_EXTERN void Heartbeat(ValidStructPtr vsp, CharPtr msg)
+{
+ Char id_buf[255];
+
+ if (vsp->use_heartbeat) {
+ if (msg == NULL) {
+ if (vsp->bsp == NULL || vsp->bsp->id == NULL) {
+ ValidErr (vsp, SEV_INFO, 0, 0, "Processing");
+ } else {
+ SeqIdWrite (SeqIdFindBest (vsp->bsp->id, 0), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
+ ValidErr (vsp, SEV_INFO, 0, 0, "Processing %s", id_buf);
+ }
+ } else {
+ ValidErr (vsp, SEV_INFO, 0, 0, msg);
+ }
+ }
+}
+//LCOV_EXCL_STOP
+
+
+static Boolean IsWgsIntermediate (SeqEntryPtr sep)
+
+{
+ BioseqPtr bsp;
+ Boolean has_gi = FALSE, is_other = FALSE, is_wgs = FALSE;
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+ SeqIdPtr sip;
+
+ bsp = FindNucBioseq (sep);
+ if (bsp == NULL) return FALSE;
+
+ for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice != Seq_descr_molinfo) continue;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) continue;
+ if (mip->tech == MI_TECH_wgs) {
+ is_wgs = TRUE;
+ }
+ }
+ if (! is_wgs) return FALSE;
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ is_other = TRUE;
+ } else if (sip->choice == SEQID_GI) {
+ has_gi = TRUE;
+ }
+ }
+ if (! is_other) return FALSE;
+ if (has_gi) return FALSE;
+
+ return TRUE;
+}
+
+static Boolean IsTsaIntermediate (SeqEntryPtr sep)
+
+{
+ BioseqPtr bsp;
+ Boolean has_gi = FALSE, is_other = FALSE, is_tsa = FALSE;
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+ SeqIdPtr sip;
+
+ bsp = FindNucBioseq (sep);
+ if (bsp == NULL) return FALSE;
+
+ for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice != Seq_descr_molinfo) continue;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) continue;
+ if (mip->tech == MI_TECH_tsa) {
+ is_tsa = TRUE;
+ }
+ }
+ if (! is_tsa) return FALSE;
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ is_other = TRUE;
+ } else if (sip->choice == SEQID_GI) {
+ has_gi = TRUE;
+ }
+ }
+ if (! is_other) return FALSE;
+ if (has_gi) return FALSE;
+
+ return TRUE;
+}
NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
@@ -4372,6 +4787,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
Int2 i;
Boolean inferenceAccnCheck;
Boolean suppress_no_pubs = TRUE;
+ Boolean suppress_no_cit_subs = TRUE;
Boolean suppress_no_biosrc = TRUE;
FeatProb featprob;
GatherContextPtr gcp = NULL;
@@ -4384,6 +4800,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
ObjMgrDataPtr omdp;
SeqEntryPtr topsep = NULL;
SeqEntryPtr tmp;
+ TextSeqIdPtr tsip;
ValNodePtr bsplist;
SubmitBlockPtr sbp;
ErrSev sev;
@@ -4393,6 +4810,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
Boolean isGPS = FALSE;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
+ Boolean isWP = FALSE;
FindRepData frd;
Int4 numInferences;
Int4 numAccessions;
@@ -4422,16 +4840,22 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
VisitGraphsInSep (topsep, (Pointer) &featprob, CheckGraphPacking);
VisitFeaturesInSep (topsep, (Pointer) &featprob, CountGeneXrefs);
VisitFeaturesInSep (topsep, (Pointer) &featprob, CountFeatLocIdTypes);
+ /*
VisitFeaturesInSep (topsep, (Pointer) &featprob, CountPseudogenes);
+ */
VisitBioseqsInSep (topsep, (Pointer) &featprob, CheckTpaHist);
+ VisitBioSourcesInSep (topsep, (Pointer) &featprob, CheckTaxIDs);
} else {
-
+//LCOV_EXCL_START
/* if not using indexing, still need feature->idx.subtype now */
entityID = ObjMgrGetEntityIDForChoice (sep);
AssignIDsInEntity (entityID, 0, NULL);
+//LCOV_EXCL_STOP
}
+ Heartbeat(vsp, "Processing");
+
/* Seq-submit can have multiple entries with no Bioseq-set wrapper */
omdp = ObjMgrGetData (entityID);
@@ -4443,6 +4867,9 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
mult_subs = TRUE;
}
}
+ if (ssp != NULL && ssp->sub != NULL && StringNICmp (ssp->sub->tool, "Geneious", 8) == 0) {
+ vsp->is_geneious = TRUE;
+ }
}
if (IS_Bioseq_set (sep)) {
@@ -4456,6 +4883,11 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
sep = bssp->seq_set;
do_many = TRUE;
break;
+ case BioseqseqSet_class_wgs_set:
+ if(ssp != NULL) /* Seq-submit on top */
+ ValidErr(vsp, SEV_WARNING, ERR_SEQ_PKG_SeqSubmitWithWgsSet,
+ "File was created as a wgs-set, but should be a batch submission instead.");
+ break;
case BioseqseqSet_class_gen_prod_set:
isGPS = TRUE;
default:
@@ -4467,10 +4899,10 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
if (mult_subs) {
for (tmp = sep; tmp != NULL; tmp = tmp->next) {
- LookForAnyPubAndOrg (tmp, &suppress_no_pubs, &suppress_no_biosrc);
+ LookForAnyPubAndOrg (tmp, &suppress_no_pubs, &suppress_no_cit_subs, &suppress_no_biosrc);
}
} else {
- LookForAnyPubAndOrg (sep, &suppress_no_pubs, &suppress_no_biosrc);
+ LookForAnyPubAndOrg (sep, &suppress_no_pubs, &suppress_no_cit_subs, &suppress_no_biosrc);
}
if (GetAppProperty ("ValidateExons") != NULL) {
@@ -4480,17 +4912,22 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
vsp->is_htg_in_sep = FALSE;
vsp->is_barcode_sep = FALSE;
vsp->is_refseq_in_sep = FALSE;
+ vsp->is_wp_in_sep = FALSE;
vsp->is_gpipe_in_sep = FALSE;
vsp->is_gps_in_sep = FALSE;
vsp->other_sets_in_sep = FALSE;
vsp->is_embl_ddbj_in_sep = FALSE;
+ vsp->is_embl_tpe_in_sep = FALSE;
vsp->is_old_gb_in_sep = FALSE;
vsp->is_insd_in_sep = FALSE;
- vsp->only_lcl_gnl_in_sep = FALSE;
+ vsp->is_pdb_in_sep = FALSE;
+ vsp->has_gi_or_accn_ver = FALSE;
vsp->has_gnl_prot_sep = FALSE;
vsp->bsp_genomic_in_sep = FALSE;
vsp->is_smupd_in_sep = FALSE;
+ vsp->only_lcl_gnl_in_sep = TRUE;
+
VisitBioseqsInSep (sep, (Pointer) vsp, LookForBioseqFields);
VisitSetsInSep (sep, (Pointer) vsp, LookForBioseqSetFields);
VisitDescriptorsInSep (sep, (Pointer) vsp, LookForSeqDescrFields);
@@ -4500,29 +4937,6 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
VisitBioseqsInSep (sep, (Pointer) &has_seg_bioseqs, FindSegmentedBioseqs);
vsp->has_seg_bioseqs = has_seg_bioseqs;
- /*
- vsp->is_htg_in_sep = FALSE;
- VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_htg_in_sep), LookForHTG);
- vsp->is_barcode_sep = FALSE;
- VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_barcode_sep), LookForBarcode);
- vsp->is_smupd_in_sep = FALSE;
- VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_smupd_in_sep), LookForSMUPD);
- vsp->is_gps_in_sep = FALSE;
- SeqEntryExplore (sep, (Pointer) &(vsp->is_gps_in_sep), LookForGPS);
- vsp->other_sets_in_sep = FALSE;
- SeqEntryExplore (sep, (Pointer) &(vsp->other_sets_in_sep), LookForNonGPS);
- vsp->is_refseq_in_sep = FALSE;
- VisitBioseqsInSep (sep, (Pointer) &(vsp->is_refseq_in_sep), LookForNC);
- vsp->is_embl_ddbj_in_sep = FALSE;
- VisitBioseqsInSep (sep, (Pointer) &(vsp->is_embl_ddbj_in_sep), LookForEmblDdbj);
- vsp->is_insd_in_sep = FALSE;
- VisitBioseqsInSep (sep, (Pointer) &(vsp->is_insd_in_sep), LookForGEDseqID);
- vsp->only_lcl_gnl_in_sep = FALSE;
- VisitBioseqsInSep (sep, (Pointer) &(vsp->only_lcl_gnl_in_sep), LookForLclGnl);
- vsp->has_gnl_prot_sep = FALSE;
- VisitBioseqsInSep (sep, (Pointer) &(vsp->has_gnl_prot_sep), LookForProteinGnl);
- */
-
vsp->feat_loc_has_gi = featprob.loc_has_gi;
vsp->feat_prod_has_gi = featprob.prod_has_gi;
@@ -4553,6 +4967,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
}
vsp->gcp = gcp; /* above needed for ValidErr */
vsp->suppress_no_pubs = suppress_no_pubs;
+ vsp->suppress_no_cit_subs = suppress_no_cit_subs;
vsp->suppress_no_biosrc = suppress_no_biosrc;
if (vsp->is_refseq_in_sep && vsp->is_insd_in_sep) {
@@ -4572,9 +4987,12 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
entityID = ObjMgrGetEntityIDForChoice (sep);
if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ //LCOV_EXCL_START
+ //specific to C Toolkit indexing
oldsev = ErrSetMessageLevel (SEV_MAX);
SeqMgrIndexFeatures (entityID, NULL);
ErrSetMessageLevel (oldsev);
+ //LCOV_EXCL_STOP
}
/* lock all remote genome components, locations, and products in advance */
@@ -4607,21 +5025,40 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
isPatent = TRUE;
} else if (sip->choice == SEQID_PDB) {
isPDB = TRUE;
+ } else if (sip->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ isWP = TRUE;
+ }
+ }
}
}
if (first) {
TestDeletedOrReplacedECnumbers (vsp);
- if (suppress_no_pubs && (! vsp->seqSubmitParent)) {
+ if (! vsp->seqSubmitParent) {
omdp = ObjMgrGetData (gc.entityID);
if (omdp == NULL || omdp->datatype != OBJ_SEQSUB) {
- sev = SEV_ERROR;
- if ((!isGPS) && (!IsNoncuratedRefSeq (fbsp, &sev)) && (! IsGpipe (fbsp)) && (! IsWgsContig (fbsp))) {
- ValidErr (vsp, sev, ERR_SEQ_DESCR_NoPubFound, "No publications anywhere on this entire record.");
+ if (suppress_no_pubs) {
+ sev = SEV_ERROR;
+ if ((!isGPS) && (!IsNoncuratedRefSeq (fbsp, &sev)) && (! IsGpipe (fbsp)) && (! IsWgsContig (fbsp)) && (! IsTsaContig (fbsp))) {
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_NoPubFound, "No publications anywhere on this entire record.");
+ }
+ }
+ if (suppress_no_cit_subs) {
+ sev = SEV_INFO;
+ if (vsp->genomeSubmission) {
+ sev = SEV_ERROR;
+ }
+ if ((! IsNoncuratedRefSeq (fbsp, &sev)) && (! IsWgsContig (fbsp)) && (! IsTsaContig (fbsp)) && (! IsWgsIntermediate (vsp->sep)) && (! IsTsaIntermediate (vsp->sep))) {
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "No submission citation anywhere on this entire record.");
+ }
}
}
}
+
if (suppress_no_biosrc) {
if ((!isPatent) && ((!isPDB))) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_NoOrgFound, "No organism name anywhere on this entire record.");
@@ -4673,11 +5110,20 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
(long) featprob.num_tpa_without_hist);
}
+ /*
if (featprob.num_pseudo != featprob.num_pseudogene && featprob.num_pseudo > 0 && featprob.num_pseudogene > 0) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InconsistentPseudogeneCounts,
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InconsistentPseudogeneCounts,
"There are %ld pseudo features with %ld pseudogene qualifiers in this record.",
(long) featprob.num_pseudo, (long) featprob.num_pseudogene);
- }
+ }
+ */
+
+ if (featprob.mult_taxids && vsp->is_refseq_in_sep) {
+ if (featprob.num_super_kingdom > 1 && featprob.super_kingdoms_different && isWP) {
+ } else {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MultipleTaxonIDs, "There are multiple taxonIDs in this RefSeq record.");
+ }
+ }
if (vsp->indexerVersion && vsp->has_gnl_prot_sep && (! vsp->is_refseq_in_sep)) {
if (FindNucBioseq (sep) != NULL) {
@@ -4699,7 +5145,6 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
/* do validator tests using Discrepancy Report */
ValidateGeneLocusTags (topsep, vsp);
- ValidateShortIntrons (topsep, vsp);
VisitFeaturesInSep (sep, NULL, AddScratchToFeatures);
VisitBioseqsInSep (sep, NULL, SetupFeatureScratchData);
@@ -4719,7 +5164,14 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
}
}
+ vsp->sisfp = NULL;
GatherSeqEntry (sep, (Pointer) vsp, Valid1GatherProc, &gs);
+ ValidateShortIntrons (topsep, vsp);
+ if(vsp->sisfp != NULL)
+ {
+ ValNodeFree(vsp->sisfp);
+ vsp->sisfp = NULL;
+ }
/* restore inferenceAccnCheck flag for next record */
vsp->inferenceAccnCheck = inferenceAccnCheck;
@@ -4782,8 +5234,11 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
}
if (vsp->far_fetch_failure) {
+ //LCOV_EXCL_START
+ //not testable in regression
vsp->gcp = NULL;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_FarFetchFailure, "Far fetch failures caused some validator tests to be bypassed");
+ //LCOV_EXCL_STOP
}
VisitFeaturesInSep (sep, NULL, ClearScratchOnFeatures);
@@ -4848,14 +5303,19 @@ static void ValidateSetContents (SeqEntryPtr sep, Pointer data, Int4 index, Int2
vsp->protcnt++;
else
vsp->nuccnt++;
- if (bsp->repr == Seq_repr_seg)
- vsp->segcnt++;
-
+ if (bsp->repr == Seq_repr_seg){
+ //LCOV_EXCL_START
+ // Only for SegSets
+ vsp->segcnt++;
+ //LCOV_EXCL_STOP
+ }
}
return;
}
+//LCOV_EXCL_START
+// Only for SegSets
static CharPtr GetBioseqSetClass (Uint1 cl)
{
if (cl == BioseqseqSet_class_nuc_prot)
@@ -4900,7 +5360,7 @@ static CharPtr GetBioseqSetClass (Uint1 cl)
return ("other");
return ("not-set");
}
-
+//LCOV_EXCL_STOP
static BioseqSetPtr FindGenProdSetParentOfBioseqSet (BioseqSetPtr bssp)
{
@@ -4978,13 +5438,18 @@ NLM_EXTERN ValNodePtr BioseqGetSeqDescr(BioseqPtr bsp, Int2 type, ValNodePtr cur
static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp)
{
- SeqDescrPtr sdp;
- SeqEntryPtr sep;
- BioSourcePtr biop;
- BioseqPtr bsp;
- BioseqSetPtr bssp1;
- OrgRefPtr orp;
- Int4 prot_biosource = 0;
+ SeqDescrPtr sdp;
+ SeqEntryPtr sep;
+ SeqIdPtr sip;
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ BioseqSetPtr bssp1;
+ Boolean is_nm = FALSE;
+ ObjectIdPtr oip;
+ OrgRefPtr orp;
+ Int4 prot_biosource = 0;
+ TextSeqIdPtr tsip;
+ UserObjectPtr uop;
if (bssp->_class != BioseqseqSet_class_nuc_prot)
return;
@@ -4995,6 +5460,14 @@ static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp)
if (bsp == NULL) continue;
if (ISA_na (bsp->mol)) {
IfInGPSmustBeMrnaProduct (vsp, bsp);
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice != SEQID_OTHER) continue;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip == NULL) continue;
+ if (StringNCmp (tsip->accession, "NM_", 3) == 0) {
+ is_nm = TRUE;
+ }
+ }
} else if (ISA_aa (bsp->mol)) {
IfInGPSmustBeCDSProduct (vsp, bsp);
sdp = BioseqGetSeqDescr (bsp, Seq_descr_source, NULL);
@@ -5033,6 +5506,21 @@ static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+ if (! is_nm) {
+ for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice == Seq_descr_user) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringICmp (oip->str, "RefGeneTracking") == 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_RefGeneTrackingOnNucProtSet,
+ "Nuc-prot set should not have RefGeneTracking user object");
+ }
+ }
+ }
+ }
+ }
+
for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_source) {
biop = (BioSourcePtr) sdp->data.ptrvalue;
@@ -5066,6 +5554,8 @@ typedef struct incons {
MolInfoPtr mip;
} Incons, PNTR InconsPtr;
+//LCOV_EXCL_START
+// Only for SegSets
static void FindInconsistMolInfos (SeqDescrPtr sdp, Pointer userdata)
{
@@ -5085,6 +5575,7 @@ static void FindInconsistMolInfos (SeqDescrPtr sdp, Pointer userdata)
}
}
+// Only for SegSets
static void ValidateSegmentedSet (BioseqSetPtr bssp, ValidStructPtr vsp)
{
@@ -5133,6 +5624,7 @@ static void ValidateSegmentedSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+// Only for SegSets
static void ValidatePartsSet (BioseqSetPtr bssp, ValidStructPtr vsp)
{
@@ -5172,6 +5664,7 @@ static void ValidatePartsSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
}
+//LCOV_EXCL_STOP
static Boolean CheckForInconsistentBiosources (SeqEntryPtr sep, ValidStructPtr vsp, OrgRefPtr PNTR orpp, BioseqSetPtr top)
@@ -5429,6 +5922,7 @@ static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+//LCOV_EXCL_START
typedef struct mutsetsrcdata {
CharPtr taxname;
Int2 num_not_mut_origin;
@@ -5455,6 +5949,7 @@ static void CheckMutSetSources (BioSourcePtr biop, Pointer userdata)
(mssp->num_not_mut_origin)++;
}
}
+//LCOV_EXCL_STOP
static void ValidateMutSet (BioseqSetPtr bssp, ValidStructPtr vsp)
@@ -5649,7 +6144,7 @@ static void CheckForNestedSets (BioseqSetPtr bssp, Pointer userdata)
}
}
-static void FindDBlinkUserObject (SeqDescrPtr sdp, Pointer userdata)
+static void FindDBlinkUserObjectOnBsp (SeqDescrPtr sdp, Pointer userdata)
{
GatherContextPtr gcp;
@@ -5670,7 +6165,31 @@ static void FindDBlinkUserObject (SeqDescrPtr sdp, Pointer userdata)
gcp = vsp->gcp;
if (gcp == NULL) return;
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "DBLink user object should not be on this set");
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "DBLink user object should not be on a Bioseq");
+}
+
+static void FindDBlinkUserObjectInSet (SeqDescrPtr sdp, Pointer userdata)
+
+{
+ GatherContextPtr gcp;
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+ ValidStructPtr vsp;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_user) return;
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL) return;
+ oip = uop->type;
+ if (oip == NULL) return;
+
+ if (StringICmp (oip->str, "DBLink") != 0) return;
+
+ vsp = (ValidStructPtr) userdata;
+ if (vsp == NULL) return;
+ gcp = vsp->gcp;
+ if (gcp == NULL) return;
+
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "DBLink user object should not be on this set");
}
static void ShouldHaveNoDblink (BioseqSetPtr bssp, Pointer userdata)
@@ -5682,7 +6201,7 @@ static void ShouldHaveNoDblink (BioseqSetPtr bssp, Pointer userdata)
vsp = (ValidStructPtr) userdata;
if (vsp == NULL) return;
- VisitDescriptorsOnSet (bssp, vsp, FindDBlinkUserObject);
+ VisitDescriptorsOnSet (bssp, vsp, FindDBlinkUserObjectInSet);
}
static void ValidateBioseqSet (GatherContextPtr gcp)
@@ -5703,8 +6222,11 @@ static void ValidateBioseqSet (GatherContextPtr gcp)
vsp->sfp = NULL;
if (vsp->non_ascii_chars) { /* non_ascii chars in AsnRead step */
+ //LCOV_EXCL_START
+ //reader strips non-ascii characters, can't test in regression
ValidErr (vsp, SEV_ERROR, ERR_GENERIC_NonAsciiAsn, "Non-ascii chars in input ASN.1 strings");
vsp->non_ascii_chars = FALSE; /* only do once */
+ //LCOV_EXCL_STOP
}
vsp->nuccnt = 0;
@@ -5717,8 +6239,11 @@ static void ValidateBioseqSet (GatherContextPtr gcp)
switch (bssp->_class) {
case BioseqseqSet_class_not_set:
+ //LCOV_EXCL_START
+ //BasicCleanup fixes not-set to genbank
ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_BioseqSetClassNotSet, "Bioseq_set class not set");
break;
+ //LCOV_EXCL_STOP
case BioseqseqSet_class_nuc_prot:
if (vsp->nuccnt == 0) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_PKG_NucProtProblem, "No nucleotides in nuc-prot set");
@@ -5732,19 +6257,25 @@ static void ValidateBioseqSet (GatherContextPtr gcp)
ValidateNucProtSet (bssp, vsp);
break;
case BioseqseqSet_class_segset:
+ //LCOV_EXCL_START
+ //segsets are obsolete
if (vsp->segcnt == 0) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_PKG_SegSetProblem, "No segmented Bioseq in segset");
}
ValidateSegmentedSet (bssp, vsp);
break;
+ //LCOV_EXCL_STOP
case BioseqseqSet_class_conset:
if (vsp->indexerVersion && (! vsp->is_refseq_in_sep)) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_PKG_ConSetProblem, "Set class should not be conset");
}
break;
case BioseqseqSet_class_parts:
- ValidatePartsSet (bssp, vsp);
+ //LCOV_EXCL_START
+ //segsets are obsolete
+ ValidatePartsSet(bssp, vsp);
break;
+ //LCOV_EXCL_STOP
case BioseqseqSet_class_genbank:
ValidateGenbankSet (bssp, vsp);
ShouldHaveNoDblink (bssp, vsp);
@@ -5826,6 +6357,8 @@ static Boolean SuppressTrailingXMessage (BioseqPtr bsp)
cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
if (cds != NULL) {
+ //LCOV_EXCL_START
+ // bug in C code prevents this from being called
bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
if (bs != NULL) {
str = BSMerge (bs, NULL);
@@ -5840,6 +6373,7 @@ static Boolean SuppressTrailingXMessage (BioseqPtr bsp)
MemFree (str);
return hasstar;
}
+ //LCOV_EXCL_STOP
}
sdp = BioseqGetSeqDescr (bsp, Seq_descr_molinfo, NULL);
if (sdp != NULL) {
@@ -5871,6 +6405,8 @@ static void LookForSecondaryConflict (ValidStructPtr vsp, GatherContextPtr gcp,
}
}
+//LCOV_EXCL_START
+// Only for SegSets
static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, BioseqPtr bsp)
{
BioseqSetPtr bssp;
@@ -5949,6 +6485,7 @@ static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, B
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_PartsOutOfOrder, "Parts set contains too many Bioseqs");
}
}
+//LCOV_EXCL_STOP
/*****************************************************************************
*
@@ -5960,7 +6497,7 @@ static void ValidateBioseqHist (GatherContextPtr gcp)
{
BioseqPtr bsp;
- Int4 gi = 0;
+ BIG_ID gi = 0;
SeqHistPtr hist;
SeqIdPtr sip;
ValidStructPtr vsp;
@@ -5980,7 +6517,7 @@ static void ValidateBioseqHist (GatherContextPtr gcp)
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
}
}
if (gi == 0) return;
@@ -5989,7 +6526,7 @@ static void ValidateBioseqHist (GatherContextPtr gcp)
for (sip = hist->replaced_by_ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- if (gi == (Int4) sip->data.intvalue) {
+ if (gi == (BIG_ID) sip->data.intvalue) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_HistoryGiCollision, "Replaced by gi (%ld) is same as current Bioseq", (long) gi);
}
}
@@ -6000,7 +6537,7 @@ static void ValidateBioseqHist (GatherContextPtr gcp)
for (sip = hist->replace_ids; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_GI) {
- if (gi == (Int4) sip->data.intvalue) {
+ if (gi == (BIG_ID) sip->data.intvalue) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_HistoryGiCollision, "Replaces gi (%ld) is same as current Bioseq", (long) gi);
}
}
@@ -6093,8 +6630,8 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi
SeqIdPtr dbGbId;
DbtagPtr generalID = NULL;
DbtagPtr dbGeneralID;
- Int4 gi = 0;
- Int4 dbGI;
+ BIG_ID gi = 0;
+ BIG_ID dbGI;
Char oldGenID [128], newGenID [128];
if (gcp != NULL && vsp != NULL && bsp != NULL && vsp->validateIDSet) {
@@ -6104,7 +6641,7 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi
gbId = sip;
break;
case SEQID_GI :
- gi = (Int4) sip->data.intvalue;
+ gi = (BIG_ID) sip->data.intvalue;
break;
case SEQID_GENERAL :
generalID = (DbtagPtr) sip->data.ptrvalue;
@@ -6127,7 +6664,7 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi
for (sip = sipset; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_GI :
- dbGI = (Int4) sip->data.intvalue;
+ dbGI = (BIG_ID) sip->data.intvalue;
break;
case SEQID_GENBANK:
dbGbId = sip;
@@ -6291,41 +6828,6 @@ static CharPtr GetSequencePlusGapByFeature (SeqFeatPtr sfp)
return str;
}
-static Boolean IsWgsIntermediate (SeqEntryPtr sep)
-
-{
- BioseqPtr bsp;
- Boolean has_gi = FALSE, is_other = FALSE, is_wgs = FALSE;
- MolInfoPtr mip;
- SeqDescrPtr sdp;
- SeqIdPtr sip;
-
- bsp = FindNucBioseq (sep);
- if (bsp == NULL) return FALSE;
-
- for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
- if (sdp->choice != Seq_descr_molinfo) continue;
- mip = (MolInfoPtr) sdp->data.ptrvalue;
- if (mip == NULL) continue;
- if (mip->tech == MI_TECH_wgs) {
- is_wgs = TRUE;
- }
- }
- if (! is_wgs) return FALSE;
-
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice == SEQID_OTHER) {
- is_other = TRUE;
- } else if (sip->choice == SEQID_GI) {
- has_gi = TRUE;
- }
- }
- if (! is_other) return FALSE;
- if (has_gi) return FALSE;
-
- return TRUE;
-}
-
typedef struct reusedata {
CharPtr seqidstr;
Int4 from;
@@ -6372,7 +6874,7 @@ static int LIBCALLBACK SortVnpByDeltaLoc (VoidPtr ptr1, VoidPtr ptr2)
static void CheckDeltaForReuse (ValidStructPtr vsp, GatherContextPtr gcp, BioseqPtr bsp)
{
- Char buf [80];
+ Char buf [128];
ValNodePtr head = NULL;
ValNodePtr last = NULL;
ReuseDataPtr lastrdp = NULL;
@@ -6482,6 +6984,256 @@ static Boolean SequenceHasGaps (BioseqPtr bsp)
}
}
+static Boolean IsConWithGaps (BioseqPtr bsp)
+
+{
+ DeltaSeqPtr dsp;
+ SeqLitPtr litp;
+
+ if (bsp->repr != Seq_repr_delta) return FALSE;
+ if (bsp->seq_ext_type != 4) return FALSE;
+ if (DeltaLitOnly (bsp)) return FALSE;
+
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
+ if (dsp->choice != 2) continue;
+ litp = (SeqLitPtr) dsp->data.ptrvalue;
+ if (litp == NULL) continue;
+ if ((litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) && litp->length > 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+NLM_EXTERN void CheckBioseqEndsForNAndGap (BioseqPtr bsp, Uint1Ptr begin_n, Uint1Ptr begin_gap, Uint1Ptr end_n, Uint1Ptr end_gap)
+{
+ SeqFeatPtr sfp;
+ CharPtr str;
+ Int4 len;
+
+ if (begin_n != NULL) {
+ *begin_n = eEndIsChar_No;
+ }
+ if (begin_gap != NULL) {
+ *begin_gap = eEndIsChar_No;
+ }
+ if (end_n != NULL) {
+ *end_n = eEndIsChar_No;
+ }
+ if (end_gap != NULL) {
+ *end_gap = eEndIsChar_No;
+ }
+ if (bsp == NULL) {
+ return;
+ }
+ if (ISA_na (bsp->mol)
+ && (bsp->repr == Seq_repr_raw || (bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp)))
+ && bsp->length > 10 && bsp->topology != 2) {
+ /* check for N bases at start or stop of sequence */
+ sfp = (SeqFeatPtr) MemNew (sizeof (SeqFeat));
+ if (sfp == NULL) return;
+ sfp->data.choice = SEQFEAT_COMMENT;
+
+ sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 9, FALSE, FALSE);
+ str = GetSequencePlusGapByFeature (sfp);
+ if (str != NULL) {
+ if (str [0] == 'n' || str [0] == 'N' && begin_n != NULL) {
+ if (StringICmp (str, "NNNNNNNNNN") == 0) {
+ *begin_n = eEndIsChar_All;
+ } else {
+ *begin_n = eEndIsChar_Last;
+ }
+ } else if (str [0] == '-' && begin_gap != NULL) {
+ if (StringICmp (str, "----------") == 0) {
+ *begin_gap = eEndIsChar_All;
+ } else {
+ *begin_gap = eEndIsChar_Last;
+ }
+ }
+ }
+ MemFree (str);
+ sfp->location = SeqLocFree (sfp->location);
+
+ sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 10, bsp->length - 1, FALSE, FALSE);
+ str = GetSequencePlusGapByFeature (sfp);
+ len = StringLen (str);
+ if (str != NULL && len > 0) {
+ if (str [len - 1] == 'n' || str [len - 1] == 'N' && end_n != NULL) {
+ if (StringICmp (str, "NNNNNNNNNN") == 0) {
+ *end_n = eEndIsChar_All;
+ } else {
+ *end_n = eEndIsChar_Last;
+ }
+ } else if (str [len - 1] == '-' && end_gap != NULL) {
+ if (StringICmp (str, "----------") == 0) {
+ *end_gap = eEndIsChar_All;
+ } else {
+ *end_gap = eEndIsChar_Last;
+ }
+ }
+ }
+
+ MemFree (str);
+ sfp->location = SeqLocFree (sfp->location);
+
+ MemFree (sfp);
+ }
+
+}
+
+
+static ErrSev GetBioseqEndWarning (Boolean isNC, Boolean isPatent, Boolean only_local, BioseqPtr bsp, Uint1 end_is_char)
+{
+ ErrSev sev;
+
+ if (isNC || isPatent) {
+ sev = SEV_WARNING;
+ } else if (bsp->topology == TOPOLOGY_CIRCULAR) {
+ sev = SEV_WARNING;
+ } else if (only_local) {
+ sev = SEV_WARNING;
+ } else if (end_is_char == eEndIsChar_All) {
+ sev = SEV_ERROR;
+ } else {
+ sev = SEV_WARNING;
+ }
+ return sev;
+}
+
+
+static void LIBCALLBACK IsAllNsProc (CharPtr sequence, Pointer userdata)
+
+{
+ Int4 n_len;
+ BoolPtr pIsAllNs;
+
+ pIsAllNs = (BoolPtr) userdata;
+ if (sequence == NULL || pIsAllNs == NULL) return;
+
+ n_len = StringSpn (sequence, "N");
+ if (StringLen (sequence) != n_len) {
+ *pIsAllNs = FALSE;
+ }
+}
+
+
+static Boolean IsSequenceAllNs (BioseqPtr bsp)
+{
+ Boolean rval = TRUE;
+ ErrSev logsev;
+ ErrSev msgsev;
+
+ if (bsp == NULL || bsp->repr == Seq_repr_virtual || bsp->repr == Seq_repr_map) {
+ return FALSE;
+ }
+ msgsev = ErrSetMessageLevel (SEV_MAX);
+ logsev = ErrSetLogLevel (SEV_MAX);
+ SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) &rval, IsAllNsProc);
+ ErrSetLogLevel (logsev);
+ ErrSetMessageLevel (msgsev);
+ return rval;
+}
+
+
+static void ValidateBioseqEnds (BioseqPtr bsp, ValidStructPtr vsp, Boolean isPatent)
+{
+ Uint1 begin_n, begin_gap, end_n, end_gap;
+ ErrSev sev;
+ Boolean only_local = TRUE;
+ Boolean isNC = FALSE;
+ SeqIdPtr sip1;
+ TextSeqIdPtr tsip;
+
+ if (bsp == NULL || ISA_aa(bsp->mol)) {
+ return;
+ }
+
+ if (IsSequenceAllNs(bsp)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_AllNs, "Sequence is all Ns");
+ return;
+ }
+
+ CheckBioseqEndsForNAndGap (bsp, &begin_n, &begin_gap, &end_n, &end_gap);
+
+ for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
+ if (sip1->choice != SEQID_LOCAL) {
+ only_local = FALSE;
+ } else if (sip1->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL && StringNICmp (tsip->accession, "NC_", 3) == 0) {
+ isNC = TRUE;
+ }
+ }
+ }
+
+ if (begin_n != eEndIsChar_No) {
+ sev = GetBioseqEndWarning(isNC, isPatent, only_local, bsp, begin_n);
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence");
+ } else if (begin_gap != eEndIsChar_No) {
+ sev = GetBioseqEndWarning(isNC, isPatent, only_local, bsp, begin_gap);
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalGap, "Gap at beginning of sequence");
+ }
+
+ if (end_n != eEndIsChar_No) {
+ sev = GetBioseqEndWarning(isNC, isPatent, only_local, bsp, end_n);
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at end of sequence");
+ } else if (end_gap != eEndIsChar_No) {
+ sev = GetBioseqEndWarning(isNC, isPatent, only_local, bsp, end_gap);
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalGap, "Gap at end of sequence");
+ }
+
+}
+
+
+static Boolean s_IsInNucProtSet (BioseqPtr bsp)
+{
+ BioseqSetPtr bssp;
+
+ if (bsp == NULL
+ || bsp->idx.parenttype != OBJ_BIOSEQSET
+ || (bssp = (BioseqSetPtr)bsp->idx.parentptr) == NULL
+ || bssp->_class != BioseqseqSet_class_nuc_prot) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static CharPtr linkEvStrings [] = {
+ "paired-ends",
+ "align genus",
+ "align xgenus",
+ "align trnscpt",
+ "within clone",
+ "clone contig",
+ "map",
+ "strobe",
+ "unspecified",
+ "pcr",
+ "other",
+ "UNKNOWN VALUE",
+ NULL
+};
+
+static void CheckForBadSeqIdChars (ValidStructPtr vsp, CharPtr id)
+
+{
+ Char ch;
+ CharPtr str;
+
+ if (vsp == NULL || id == NULL) return;
+
+ str = id;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == '|' || ch == ',') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadSeqIdFormat, "Bad character '%c' in accession '%s'", ch , id);
+ }
+ str++;
+ ch = *str;
+ }
+}
static void ValidateBioseqInst (GatherContextPtr gcp)
{
@@ -6508,7 +7260,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
SeqIdPtr sip1, sip2, sip3;
SeqLocPtr slp;
SeqIntPtr sintp;
- Char buf1[41], buf2[41];
+ Char buf1[128], buf2[128];
SeqLitPtr slitp;
SeqGapPtr sgp;
SeqCodeTablePtr sctp;
@@ -6546,34 +7298,39 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean isGB = FALSE;
Boolean isGIBBMT = FALSE;
Boolean isGIBBSQ = FALSE;
+ Boolean is_grc_title = FALSE;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
Boolean isPreFin = FALSE;
- Boolean isNC = FALSE;
Boolean isNG = FALSE;
Boolean isNTorNC = FALSE;
Boolean isNZ;
Boolean is_gps = FALSE;
Boolean isRefSeq = FALSE;
Boolean isSwissProt = FALSE;
- Boolean is_genome_assembly;
- Boolean is_finished_status;
- Boolean only_local = TRUE;
+ Boolean isWP = FALSE;
+ Boolean isYP = FALSE;
+ Boolean is_assembly = FALSE;
+ Boolean is_genome_assembly = FALSE;
+ Boolean is_finished_status = FALSE;
+ Boolean is_unspec;
+ Boolean this_is_gen_asm;
Boolean isLRG = FALSE;
ValNodePtr keywords;
Boolean last_is_gap;
Boolean non_interspersed_gaps;
Int2 num_adjacent_gaps;
Int2 num_gaps;
+ Int2 num_gap_known_or_spec;
+ Int2 num_gap_unknown_unspec;
Boolean reportFastaBracket;
- SeqFeatPtr sfp;
SeqEntryPtr sep;
ErrSev sev;
DbtagPtr dbt;
SeqIdPtr sip;
Int2 trailingX = 0;
Int2 numletters, numdigits, numunderscores;
- Boolean letterAfterDigit, badIDchars;
+ Boolean letterAfterDigit, badIDchars, internalS;
EMBLBlockPtr ebp;
SeqDescrPtr sdp;
SeqMgrDescContext dcontext;
@@ -6597,6 +7354,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Int4 runsofn;
Int4 segnum;
StreamCache sc;
+ ValNodePtr sc_head = NULL;
+ ValNodePtr sc_tail = NULL;
RunOfNs ron;
Boolean leadingX;
Boolean isLower;
@@ -6612,10 +7371,21 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Int4 dblink_count = 0;
Int4 taa_count = 0;
Int4 bs_count = 0;
+ Int4 as_count = 0;
Int4 pdb_count = 0;
Int4 sra_count = 0;
Int4 bp_count = 0;
Int4 unknown_count = 0;
+ Boolean is_master = FALSE;
+ Boolean tsa_master = FALSE;
+ Boolean wgs_master = FALSE;
+ int linktype;
+ Int4 linkcount;
+ Int2 linkevarray [12];
+ ValNodePtr linkvnp;
+ LinkageEvidencePtr lep;
+ CharPtr curr_str, prev_str;
+ Int2 num_seen;
/* set up data structures */
@@ -6627,22 +7397,26 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
vsp->bssp = (BioseqSetPtr) (gcp->parentitem);
vsp->bsp_partial_val = 0;
+ Heartbeat (vsp, NULL);
sep = vsp->sep;
if (vsp->non_ascii_chars) { /* non_ascii chars in AsnRead step */
+ //LCOV_EXCL_START
+ //reader strips non-ascii characters, can't test in regression
ValidErr (vsp, SEV_REJECT, ERR_GENERIC_NonAsciiAsn, "Non-ascii chars in input ASN.1 strings");
vsp->non_ascii_chars = FALSE; /* only do once */
+ //LCOV_EXCL_STOP
}
if (bsp->id == NULL) {
+ //LCOV_EXCL_START
+ //C Toolkit can't get here from reading file
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_NoIdOnBioseq, "No ids on a Bioseq");
return;
+ //LCOV_EXCL_STOP
}
for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
- if (sip1->choice != SEQID_LOCAL) {
- only_local = FALSE;
- }
if (sip1->choice == SEQID_OTHER) {
isRefSeq = TRUE;
tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
@@ -6651,9 +7425,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
isNTorNC = TRUE;
} else if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
isNTorNC = TRUE;
- isNC = TRUE;
} else if (StringNICmp (tsip->accession, "NG_", 3) == 0) {
isNG = TRUE;
+ } else if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
+ isWP = TRUE;
+ } else if (StringNICmp (tsip->accession, "YP_", 3) == 0) {
+ isYP = TRUE;
}
}
} else if (sip1->choice == SEQID_GI) {
@@ -6684,6 +7461,54 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
+ switch (sip1->choice) {
+ case SEQID_OTHER :
+ tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ len = StringLen (tsip->accession);
+ if (len == 15) {
+ if (StringCmp (tsip->accession + 9, "000000") == 0) {
+ is_master = TRUE;
+ }
+ } else if (len == 16) {
+ if (StringCmp (tsip->accession + 9, "0000000") == 0) {
+ is_master = TRUE;
+ }
+ } else if (len == 17) {
+ if (StringCmp (tsip->accession + 10, "0000000") == 0) {
+ is_master = TRUE;
+ }
+ }
+ }
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ len = StringLen (tsip->accession);
+ if (len == 12) {
+ if (StringCmp (tsip->accession + 6, "000000") == 0) {
+ is_master = TRUE;
+ }
+ } else if (len == 13) {
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ is_master = TRUE;
+ }
+ } else if (len == 14) {
+ if (StringCmp (tsip->accession + 6, "00000000") == 0) {
+ is_master = TRUE;
+ }
+ }
+ }
+ break;
+ default :
+ break;
+ }
+ }
+
+
+ for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
/* disabled for now
ReportLongSeqId (sip1, vsp, 40);
*/
@@ -6733,34 +7558,57 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
case SEQID_DDBJ:
tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
+ CheckForBadSeqIdChars (vsp, tsip->accession);
numletters = 0;
numdigits = 0;
+ numunderscores = 0;
+ internalS = FALSE;
letterAfterDigit = FALSE;
badIDchars = FALSE;
for (ptr = tsip->accession, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
if (IS_UPPER (ch)) {
numletters++;
- if (numdigits > 0) {
- letterAfterDigit = TRUE;
+ if (numdigits > 0 || numunderscores > 0) {
+ if (ch == 'S' && numletters == 5 && numdigits == 2 && (! internalS)) {
+ numletters--;
+ internalS = TRUE;
+ } else {
+ letterAfterDigit = TRUE;
+ }
}
} else if (IS_DIGIT (ch)) {
numdigits++;
+ } else if (ch == '_') {
+ numunderscores++;
+ if (numdigits > 0 || numunderscores > 1) {
+ letterAfterDigit = TRUE;
+ }
} else {
badIDchars = TRUE;
}
}
if (letterAfterDigit || badIDchars) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
+ } else if (numunderscores > 0) {
+ if (StringNCmp (tsip->accession, "MAP_", 4) != 0 || numdigits != 6) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
+ }
} else if (numletters == 1 && numdigits == 5 && ISA_na (bsp->mol)) {
} else if (numletters == 2 && numdigits == 6 && ISA_na (bsp->mol)) {
} else if (numletters == 3 && numdigits == 5 && ISA_aa (bsp->mol)) {
} else if (numletters == 2 && numdigits == 6 && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_seg) {
+ } else if (numletters == 4 && internalS && (numdigits == 8 || numdigits == 9 || numdigits == 10) && ISA_na (bsp->mol) &&
+ (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ ||
+ sip1->choice == SEQID_TPG || sip1->choice == SEQID_TPE || sip1->choice == SEQID_TPD)) {
} else if (numletters == 4 && numdigits == 8 && ISA_na (bsp->mol) &&
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ ||
sip1->choice == SEQID_TPG || sip1->choice == SEQID_TPE || sip1->choice == SEQID_TPD)) {
} else if (numletters == 4 && numdigits == 9 && ISA_na (bsp->mol) &&
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ ||
sip1->choice == SEQID_TPG || sip1->choice == SEQID_TPE || sip1->choice == SEQID_TPD)) {
+ } else if (numletters == 4 && numdigits == 10 && ISA_na (bsp->mol) &&
+ (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ ||
+ sip1->choice == SEQID_TPG || sip1->choice == SEQID_TPE || sip1->choice == SEQID_TPD)) {
} else if (numletters == 5 && numdigits == 7 && ISA_na (bsp->mol) &&
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
} else {
@@ -6803,6 +7651,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
if (tsip != NULL && tsip->accession != NULL && sip1->choice == SEQID_OTHER) {
+ CheckForBadSeqIdChars (vsp, tsip->accession);
numletters = 0;
numdigits = 0;
numunderscores = 0;
@@ -6843,10 +7692,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (hasGi && tsip != NULL && tsip->accession == NULL && (! StringHasNoText (tsip->name))) {
if (sip1->choice == SEQID_DDBJ && bsp->repr == Seq_repr_seg) {
+ //LCOV_EXCL_START
+ // Only for SegSets
sev = SEV_WARNING;
- /*
- ValidErr (vsp, sev, ERR_SEQ_INST_BadSeqIdFormat, "Missing accession for %s", tsip->name);
- */
+ //LCOV_EXCL_STOP
} else {
sev = SEV_REJECT;
ValidErr (vsp, sev, ERR_SEQ_INST_BadSeqIdFormat, "Missing accession for %s", tsip->name);
@@ -6891,28 +7740,50 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
case SEQID_GENERAL:
dbt = (DbtagPtr) sip1->data.ptrvalue;
if (dbt != NULL) {
+ if (StringHasNoText (dbt->db)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "General identifier missing database field");
+ }
if (StringICmp (dbt->db, "LRG") == 0) {
isLRG = TRUE;
}
+ sev = SEV_ERROR;
+ if (vsp->only_lcl_gnl_in_sep) {
+ sev = SEV_REJECT;
+ } else if (vsp->is_refseq_in_sep) {
+ sev = SEV_ERROR;
+ } else if (vsp->is_insd_in_sep) {
+ sev = SEV_ERROR;
+ } else if (vsp->indexerVersion) {
+ sev = SEV_ERROR;
+ }
if (StringLen (dbt->db) > 20) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadSeqIdFormat, "Database name longer than 20 characters");
+ ValidErr (vsp, sev, ERR_SEQ_INST_BadSeqIdFormat, "General database longer than 20 characters");
}
if (StringICmp (dbt->db, "BankIt") != 0 && StringICmp (dbt->db, "TMSMART") != 0 && StringICmp (dbt->db, "NCBIFILE") != 0) {
oip = dbt->tag;
- if (oip != NULL && StringLen (dbt->db) + StringLen (oip->str) > 64 && (! vsp->indexerVersion) && (! IsNCBIFileID (sip1))) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "General identifier longer than 64 characters");
+ if (oip != NULL && StringLen (oip->str) > 64) {
+ ValidErr (vsp, sev, ERR_SEQ_INST_BadSeqIdFormat, "General identifier longer than 64 characters");
}
}
+ oip = dbt->tag;
+ if (oip != NULL && oip->str != NULL) {
+ CheckForBadSeqIdChars (vsp, oip->str);
+ }
}
break;
case SEQID_LOCAL:
oip = (ObjectIdPtr) sip1->data.ptrvalue;
- if (oip != NULL && StringLen (oip->str) > 64) {
+ if (oip != NULL && oip->str != NULL) {
+ CheckForBadSeqIdChars (vsp, oip->str);
+ }
+ if (oip != NULL && StringLen (oip->str) > 50) {
+ sev = SEV_ERROR;
if (! vsp->is_insd_in_sep) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Local identifier longer than 64 characters");
+ sev = SEV_REJECT;
} else if (! vsp->indexerVersion) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Local identifier longer than 64 characters");
+ sev = SEV_ERROR;
}
+ ValidErr (vsp, sev, ERR_SEQ_INST_BadSeqIdFormat, "Local identifier longer than 50 characters");
}
break;
default:
@@ -6947,13 +7818,43 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
vnp = NULL;
if (vsp->useSeqMgrIndexes) {
vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ if (vnp != NULL) {
+ mip = (MolInfoPtr) vnp->data.ptrvalue;
+ if (mip != NULL && bsp->mol == MOLECULE_CLASS_DNA) {
+ switch (mip->biomol) {
+ case MOLECULE_TYPE_PRE_MRNA:
+ case MOLECULE_TYPE_MRNA:
+ case MOLECULE_TYPE_RRNA:
+ case MOLECULE_TYPE_TRNA:
+ case MOLECULE_TYPE_SNRNA:
+ case MOLECULE_TYPE_SCRNA:
+ case MOLECULE_TYPE_CRNA:
+ case MOLECULE_TYPE_SNORNA:
+ case MOLECULE_TYPE_TRANSCRIBED_RNA:
+ case MOLECULE_TYPE_NCRNA:
+ case MOLECULE_TYPE_TMRNA:
+ olditemid = gcp->itemID;
+ olditemtype = gcp->thistype;
+ gcp->itemID = context.itemID;
+ gcp->thistype = OBJ_SEQDESC;
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InconsistentMolTypeBiomol, "Molecule type (DNA) does not match biomol (RNA)");
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ break;
+ default:
+ break;
+ }
+ }
+ }
} else {
+//LCOV_EXCL_START
bcp = BioseqContextNew (bsp);
vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL);
BioseqContextFree (bcp);
- }
- if (vnp != NULL) {
- mip = (MolInfoPtr) vnp->data.ptrvalue;
+ if (vnp != NULL) {
+ mip = (MolInfoPtr) vnp->data.ptrvalue;
+ }
+//LCOV_EXCL_STOP
}
if (vsp->useSeqMgrIndexes) {
@@ -7031,35 +7932,47 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL || oip->str == NULL) continue;
- if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && ufp->choice == 8) {
+ if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && (ufp->choice == 2 || ufp->choice == 8)) {
taa_count++;
- } else if (StringICmp (oip->str, "BioSample") == 0 && ufp->choice == 7) {
+ } else if (StringICmp (oip->str, "BioSample") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
bs_count++;
- } else if (StringICmp (oip->str, "ProbeDB") == 0 && ufp->choice == 7) {
+ } else if (StringICmp (oip->str, "Assembly") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
+ as_count++;
+ } else if (StringICmp (oip->str, "ProbeDB") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
pdb_count++;
- } else if (StringICmp (oip->str, "Sequence Read Archive") == 0 && ufp->choice == 7) {
+ } else if (StringICmp (oip->str, "Sequence Read Archive") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
sra_count++;
- } else if (StringICmp (oip->str, "BioProject") == 0 && ufp->choice == 7) {
+ } else if (StringICmp (oip->str, "BioProject") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
bp_count++;
} else {
unknown_count++;
}
}
} else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
- is_genome_assembly = FALSE;
- is_finished_status = FALSE;
+ this_is_gen_asm = FALSE;
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL || oip->str == NULL) continue;
if (StringICmp (oip->str, "StructuredCommentPrefix") == 0) {
+ ValNodeCopyStrEx (&sc_head, &sc_tail, 0, (CharPtr) ufp->data.ptrvalue);
if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
is_genome_assembly = TRUE;
+ this_is_gen_asm = TRUE;
+ } else if (StringCmp ((CharPtr) ufp->data.ptrvalue, "##Assembly-Data-START##") == 0) {
+ is_assembly = TRUE;
+ }
+ }
+ }
+ if (this_is_gen_asm) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL) continue;
+ if (StringICmp (oip->str, "Current Finishing Status") == 0) {
+ if (StringCmp ((CharPtr) ufp->data.ptrvalue, "Finished") == 0) {
+ is_finished_status = TRUE;
+ }
}
- } else if (StringICmp (oip->str, "Current Finishing Status") == 0) {
- if (StringCmp ((CharPtr) ufp->data.ptrvalue, "Finished") == 0) {
- is_finished_status = TRUE;
- }
- }
+ }
}
if (is_genome_assembly && is_finished_status && mip != NULL && mip->tech == MI_TECH_wgs) {
olditemid = gcp->itemID;
@@ -7074,22 +7987,63 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if ((keyword = KeywordForStructuredCommentName (uop)) != NULL) {
if (IsStructuredCommentValid(uop, NULL, NULL) == eFieldValid_Valid) {
- if (HasKeywordForStructuredCommentName(bsp, uop)) {
- /* as it should be */
- } else {
+ if (! HasAllKeywordsForStructuredComment (bsp, keyword)) {
+ /*
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_MissingKeyword, "Structured Comment compliant, keyword should be added");
+ */
}
} else {
- if (HasKeywordForStructuredCommentName(bsp, uop)) {
+ if (HasAnyKeywordForStructuredComment (bsp, keyword)) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_BadKeyword, "Structured Comment is non-compliant, keyword should be removed");
}
}
}
+ keyword = MemFree (keyword);
}
vnp = SeqMgrGetNextDescriptor (bsp, vnp, Seq_descr_user, &context);
}
}
+ if (sc_head != NULL) {
+ sc_head = ValNodeSort (sc_head, SortVnpByString);
+
+ prev_str = NULL;
+ num_seen = 0;
+
+ for (vnp = sc_head; vnp != NULL; vnp = vnp->next) {
+ curr_str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (curr_str)) continue;
+
+ if (StringICmp (curr_str, prev_str) == 0) {
+ num_seen++;
+ } else {
+ if (num_seen > 1) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MultipleComments, "Multiple structured comments with prefix %s", prev_str);
+ }
+ prev_str = curr_str;
+ num_seen = 1;
+ }
+ }
+ if (num_seen > 1) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MultipleComments, "Multiple structured comments with prefix %s", prev_str);
+ }
+
+ sc_head = ValNodeFreeData (sc_head);
+ }
+
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
+ while (vnp != NULL) {
+ title = (CharPtr) vnp->data.ptrvalue;
+ if (StringNCmp (title, "GRC", 3) == 0 && StringLen (title) > 3) {
+ is_grc_title = TRUE;
+ }
+ vnp = SeqMgrGetNextDescriptor (bsp, vnp, Seq_descr_title, &context);
+ }
+
+ if (s_IsInNucProtSet (bsp)) {
+ VisitDescriptorsOnBsp (bsp, vsp, FindDBlinkUserObjectOnBsp);
+ }
+
if (dblink_count > 1) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "%ld DBLink user objects apply to a Bioseq", (long) dblink_count);
}
@@ -7099,6 +8053,9 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (bs_count > 1) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "BioSample entries appear in %ld DBLink user objects", (long) bs_count);
}
+ if (as_count > 1) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "Assembly entries appear in %ld DBLink user objects", (long) as_count);
+ }
if (pdb_count > 1) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "ProbeDB entries appear in %ld DBLink user objects", (long) pdb_count);
}
@@ -7114,11 +8071,26 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "Unrecognized entries appear in %ld DBLink user object", (long) unknown_count);
}
+ if (bp_count == 0 && isRefSeq && (! isNG)) {
+ if ((bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) ||
+ (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) ||
+ (bsp->repr == Seq_repr_ref)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_ScaffoldLacksBioProject, "BioProject entries not present on CON record");
+ }
+ }
+ if (bp_count == 0 && (isGB || isEMBL || isDDBJ) && ((mip != NULL && mip->tech == MI_TECH_wgs) || is_grc_title)) {
+ if ((bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) ||
+ (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) ||
+ (bsp->repr == Seq_repr_ref)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_ScaffoldLacksBioProject, "BioProject entries not present on CON record");
+ }
+ }
+
for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
bsp2 = BioseqFindSpecial (sip1);
if (bsp2 == NULL) {
if (!isPatent) {
- SeqIdWrite (sip1, buf1, PRINTID_FASTA_SHORT, 40);
+ SeqIdWrite (sip1, buf1, PRINTID_FASTA_SHORT, sizeof (buf1) -1);
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_IdOnMultipleBioseqs, "BioseqFind (%s) unable to find itself - possible internal error", buf1);
}
} else if (bsp2 != bsp) {
@@ -7126,7 +8098,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
dbt = (DbtagPtr) sip1->data.ptrvalue;
if (dbt != NULL && StringICmp (dbt->db, "NCBIFILE") == 0) continue;
}
- SeqIdWrite (sip1, buf1, PRINTID_FASTA_SHORT, 40);
+ SeqIdWrite (sip1, buf1, PRINTID_FASTA_SHORT, sizeof (buf2) -1);
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_IdOnMultipleBioseqs, "SeqID %s is present on multiple Bioseqs in record", buf1);
}
}
@@ -7142,8 +8114,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
errors[3] = TRUE;
break;
case Seq_repr_map:
+ //LCOV_EXCL_START
+ //C Toolkit gather assumes correct ext type, crashes if other
if ((bsp->seq_ext_type != 3) || (bsp->seq_ext == NULL))
errors[1] = TRUE;
+ //LCOV_EXCL_STOP
if ((bsp->seq_data_type) || (bsp->seq_data != NULL))
errors[3] = TRUE;
break;
@@ -7154,11 +8129,14 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
errors[3] = TRUE;
break;
case Seq_repr_seg:
+ //LCOV_EXCL_START
+ //segsets are obsolete
if ((bsp->seq_ext_type != 1) || (bsp->seq_ext == NULL))
errors[1] = TRUE;
if ((bsp->seq_data_type) || (bsp->seq_data != NULL))
errors[3] = TRUE;
break;
+ //LCOV_EXCL_STOP
case Seq_repr_raw:
case Seq_repr_const:
if ((bsp->seq_ext_type) || (bsp->seq_ext != NULL))
@@ -7179,7 +8157,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (errors[0] == TRUE) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_ExtNotAllowed, "Bioseq-ext not allowed on %s Bioseq", repr[bsp->repr - 1]);
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_ExtNotAllowed, "Bioseq-ext not allowed on %s Bioseq", repr[bsp->repr - 1]);
retval = FALSE;
}
@@ -7241,7 +8219,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (bsp->length < 1) {
+ //LCOV_EXCL_START
+ //can't test in regression, C toolkit can't read it
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_InvalidLen, "Invalid Bioseq length [%ld]", (long) bsp->length);
+ //LCOV_EXCL_STOP
}
seqtype = (int) (bsp->seq_data_type);
@@ -7269,8 +8250,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
case Seq_code_gap:
break;
default:
+ //LCOV_EXCL_START
+ //not readable by C Toolkit
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_InvalidAlphabet, "Using illegal sequence alphabet [%d]", (int) bsp->seq_data_type);
return;
+ //LCOV_EXCL_STOP
}
check_alphabet = FALSE;
@@ -7334,8 +8318,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
break;
}
if (! StreamCacheSetup (bsp, NULL, STREAM_EXPAND_GAPS, &sc)) {
+ //LCOV_EXCL_START
+ //C toolkit specific
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqPortFail, "Can't open StreamCache");
return;
+ //LCOV_EXCL_STOP
}
/*
spp = SeqPortNew (bsp, 0, -1, 0, 0);
@@ -7358,12 +8345,15 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
residue = SeqPortGetResidue (spp);
*/
if (!IS_residue (residue)) {
+ //LCOV_EXCL_START
+ //code never reached, StreamCache ignores invalid residues silently
i++;
if (i > 10) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_InvalidResidue, "More than 10 invalid residues. Checking stopped");
/*
SeqPortFree (spp);
*/
+ // patch_seq is never set
if (vsp->patch_seq)
PatchBadSequence (bsp);
return;
@@ -7378,6 +8368,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_InvalidResidue, "Invalid residue [%d] at position [%ld]", (int) x, (long) (len + 1));
}
}
+ //LCOV_EXCL_STOP
} else if (residue == termination) {
terminations++;
trailingX = 0; /* suppress if followed by terminator */
@@ -7491,9 +8482,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
return;
}
if (i) {
+ //LCOV_EXCL_START
+ // patch_seq is never set
if (vsp->patch_seq)
PatchBadSequence (bsp);
return;
+ //LCOV_EXCL_STOP
}
}
@@ -7501,8 +8495,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (ISA_na (bsp->mol) && bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp)) {
if (! StreamCacheSetup (bsp, NULL, EXPAND_GAPS_TO_DASHES, &sc)) {
+ //LCOV_EXCL_START
+ //C toolkit specific
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqPortFail, "Can't open StreamCache");
return;
+ //LCOV_EXCL_STOP
}
in_gap = FALSE;
in_N = FALSE;
@@ -7532,10 +8529,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_ref)) { /* check segmented sequence */
+ //LCOV_EXCL_START
+ //segmented sequences are obsolete
head.choice = SEQLOC_MIX;
head.data.ptrvalue = bsp->seq_ext;
head.next = NULL;
- ValidateSeqLoc (vsp, (SeqLocPtr) & head, "Segmented Bioseq");
+ ValidateSeqLoc (vsp, (SeqLocPtr) & head, TRUE, "Segmented Bioseq");
/* check the length */
len = 0;
vnp = NULL;
@@ -7616,6 +8615,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_PartialInconsistent, "Partial segmented sequence without MolInfo partial");
}
}
+ //LCOV_EXCL_STOP
}
if (bsp->repr == Seq_repr_delta || bsp->repr == Seq_repr_raw) {
@@ -7624,9 +8624,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (vsp->useSeqMgrIndexes) {
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
} else {
+//LCOV_EXCL_START
bcp = BioseqContextNew (bsp);
sdp = BioseqContextGetSeqDescr (bcp, Seq_descr_genbank, NULL, NULL);
BioseqContextFree (bcp);
+//LCOV_EXCL_STOP
}
if (sdp != NULL) {
gbp = (GBBlockPtr) sdp->data.ptrvalue;
@@ -7668,6 +8670,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (bsp->repr == Seq_repr_delta) {
len = 0;
count = 0;
+ num_gap_known_or_spec = 0;
+ num_gap_unknown_unspec = 0;
doNotSkip = TRUE;
for (vnp = (ValNodePtr) (bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
if (vnp->choice == 1) {
@@ -7684,9 +8688,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (doNotSkip) {
for (vnp = (ValNodePtr) (bsp->seq_ext), segnum = 1; vnp != NULL; vnp = vnp->next, segnum++) {
- if (vnp->data.ptrvalue == NULL)
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqDataLenWrong, "NULL pointer in delta seq_ext valnode");
- else {
+ if (vnp->data.ptrvalue == NULL) {
+ //LCOV_EXCL_START
+ //not possible reading from file
+ ValidErr(vsp, SEV_ERROR, ERR_SEQ_INST_SeqDataLenWrong, "NULL pointer in delta seq_ext valnode");
+ //LCOV_EXCL_STOP
+ } else {
switch (vnp->choice) {
case 1: /* SeqLocPtr */
slp = (SeqLocPtr) (vnp->data.ptrvalue);
@@ -7736,13 +8743,17 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
}
- if (len2 <= 10) {
+ if (len2 <= 10 && DeltaLitOnly (bsp)) {
+ //LCOV_EXCL_START
+ //by definition this code cannot be reached, because it is examining a segment
+ //that is a Seq-loc, and DeltaLitOnly only returns true if there are no Seq-loc segments
str = SeqLocPrint ((SeqLocPtr) (vnp->data.ptrvalue));
if (str == NULL) {
str = StringSave ("?");
}
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqLocLength, "Short length (%ld) on seq-loc (%s) of delta seq_ext", (long) len2, str);
MemFree (str);
+ //LCOV_EXCL_STOP
}
break;
case 2: /* SeqLitPtr */
@@ -7753,9 +8764,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
sctp = SeqCodeTableFind (slitp->seq_data_type);
if (sctp == NULL) {
+ //LCOV_EXCL_START
+ //not readable by C Toolkit
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_InvalidAlphabet, "Using illegal sequence alphabet [%d] in SeqLitPtr", (int) slitp->seq_data_type);
len += slitp->length;
break;
+ //LCOV_EXCL_STOP
}
start_at = (Int2) (sctp->start_at);
@@ -7811,12 +8825,44 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
} else if (slitp->seq_data != NULL && slitp->seq_data_type == Seq_code_gap) {
sgp = (SeqGapPtr) slitp->seq_data;
+ is_unspec = FALSE;
if (sgp->linkage_evidence != NULL) {
+ MemSet ((Pointer) &linkevarray, 0, sizeof (linkevarray));
+ linkcount = 0;
+ for (linkvnp = sgp->linkage_evidence; linkvnp != NULL; linkvnp = linkvnp->next) {
+ lep = (LinkageEvidencePtr) linkvnp->data.ptrvalue;
+ if (lep == NULL) continue;
+ linktype = (int) lep->type;
+ if (linktype == 8) {
+ is_unspec = TRUE;
+ }
+ linkcount++;
+ if (linktype == 255) {
+ (linkevarray [10])++;
+ } else if (linktype < 0 || linktype > 9) {
+ (linkevarray [11])++;
+ } else {
+ (linkevarray [linktype])++;
+ }
+ }
if (sgp->linkage != 1) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqGapProblem, "Seq-gap with linkage evidence must have linkage field set to linked");
}
- if (sgp->type == 3 || sgp->type == 4 || sgp->type == 5 || sgp->type == 6 || sgp->type == 8 || sgp->type == 255) {
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqGapProblem, "Seq-gap of type %d should not have linkage evidence", (int) sgp->type);
+ if (sgp->type != 1 && sgp->type != 2 && sgp->type != 7 && sgp->type != 9) {
+ sev = SEV_REJECT;
+ if (sgp->type == 0 && is_unspec) {
+ /* suppress for legacy records */
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_INST_SeqGapProblem, "Seq-gap of type %d should not have linkage evidence", (int) sgp->type);
+ }
+ }
+ if (linkevarray [8] > 0 && linkcount > linkevarray [8]) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqGapProblem, "Seq-gap type has unspecified and additional linkage evidence");
+ }
+ for (i = 0; i < 12; i++) {
+ if (linkevarray [i] > 1) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqGapProblem, "Linkage evidence '%s' appears %d times", linkEvStrings [i], (long) linkevarray [i]);
+ }
}
} else {
if (sgp->type == 9) {
@@ -7826,6 +8872,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqGapProblem, "Seq-gap type == repeat and linkage == linked is missing required linkage evidence");
}
}
+ if (sgp->type == 0 && is_unspec) {
+ num_gap_unknown_unspec++;
+ } else {
+ num_gap_known_or_spec++;
+ }
} else if (slitp->length == 0) {
if (isSwissProt) {
sev = SEV_WARNING;
@@ -7857,6 +8908,14 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqDataLenWrong, "Bioseq.seq_data is larger [%ld] than given length [%ld]", (long) (len), (long) bsp->length);
}
+ if (num_gap_unknown_unspec > 0 && num_gap_known_or_spec == 0) {
+ if (num_gap_unknown_unspec > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "All %ld Seq-gaps have unknown type and unspecified linkage", (long) num_gap_unknown_unspec);
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "Single Seq-gap has unknown type and unspecified linkage");
+ }
+ }
+
} else {
for (vnp = (ValNodePtr) (bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
@@ -7907,7 +8966,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
is_gps = TRUE;
}
}
- if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
+ if ((!isNTorNC) && (! is_gps) && ISA_na (bsp->mol) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs &&
mip->tech != MI_TECH_composite_wgs_htgs && mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard
&& mip->tech != MI_TECH_htc && mip->tech != MI_TECH_barcode && mip->tech != MI_TECH_tsa) {
@@ -7934,7 +8993,9 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ron.isWGS = TRUE;
}
- SeqPortStream (bsp, EXPAND_GAPS_TO_DASHES, (Pointer) &ron, CountAdjacentProc);
+ if (ISA_na (bsp->mol)) {
+ SeqPortStream (bsp, EXPAND_GAPS_TO_DASHES, (Pointer) &ron, CountAdjacentProc);
+ }
/*
if (ron.inNrun && ron.showAll && ron.ncount >= 100) {
@@ -7970,7 +9031,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
vnp = (DeltaSeqPtr) bsp->seq_ext;
if (vnp != NULL && vnp->choice == 2) {
slitp = (SeqLitPtr) vnp->data.ptrvalue;
- if (slitp != NULL && (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap)) {
+ if (slitp != NULL && (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) && bsp->topology != TOPOLOGY_CIRCULAR) {
ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "First delta seq component is a gap");
}
}
@@ -8011,7 +9072,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (vnp != NULL && vnp->choice == 2) {
slitp = (SeqLitPtr) vnp->data.ptrvalue;
- if (slitp != NULL && (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap)) {
+ if (slitp != NULL && (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) && bsp->topology != TOPOLOGY_CIRCULAR) {
ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "Last delta seq component is a gap");
}
}
@@ -8062,7 +9123,34 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
- if (ISA_aa (bsp->mol)) {
+ if (is_master && bsp->repr == Seq_repr_virtual && mip != NULL) {
+ if (mip->tech == MI_TECH_wgs) {
+ wgs_master = TRUE;
+ }
+ if (mip->tech == MI_TECH_tsa) {
+ tsa_master = TRUE;
+ }
+ }
+
+ if (wgs_master && ! is_genome_assembly) {
+ sev = SEV_ERROR;
+ if (isEMBL || isDDBJ) {
+ sev = SEV_WARNING;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_INST_WGSMasterLacksStrucComm, "WGS master without Genome Assembly Data user object");
+ }
+
+ if (tsa_master && ! is_assembly) {
+ sev = SEV_ERROR;
+ if (isEMBL || isDDBJ) {
+ sev = SEV_WARNING;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_INST_TSAMasterLacksStrucComm, "TSA master without Assembly Data user object");
+ }
+
+ if (wgs_master) {
+ /* ignore ShortSeq for WGS master - length is actually the number of contigs in the project */
+ } else if (ISA_aa (bsp->mol)) {
if ((bsp->length <= 3) && (bsp->length >= 0) && (!isPDB)) {
if (mip == NULL || mip->completeness < 2 || mip->completeness > 5) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_ShortSeq, "Sequence only %ld residues", (long) (bsp->length));
@@ -8146,7 +9234,10 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
#endif
if (bsp->repr == Seq_repr_seg) {
+//LCOV_EXCL_START
+// Only for SegSets
CheckSegBspAgainstParts (vsp, gcp, bsp);
+//LCOV_EXCL_STOP
}
if (ISA_na (bsp->mol) || ISA_aa (bsp->mol)) {
@@ -8172,9 +9263,11 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (vsp->useSeqMgrIndexes) {
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
} else {
+//LCOV_EXCL_START
bcp = BioseqContextNew (bsp);
sdp = BioseqContextGetSeqDescr (bcp, Seq_descr_source, NULL, NULL);
BioseqContextFree (bcp);
+//LCOV_EXCL_STOP
}
if (sdp != NULL) {
biop = (BioSourcePtr) sdp->data.ptrvalue;
@@ -8207,6 +9300,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (StringISearch (title, "complete genome") != NULL && SequenceHasGaps (bsp)) {
+ //LCOV_EXCL_START
+ //bug in C Toolkit - only works if gaps are instantiated
/* warning if title contains complete genome but sequence contains gap features */
olditemid = gcp->itemID;
olditemtype = gcp->thistype;
@@ -8215,6 +9310,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_CompleteTitleProblem, "Title contains 'complete genome' but sequence has gaps");
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
+ //LCOV_EXCL_STOP
}
}
} else {
@@ -8267,11 +9363,15 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
bufplus += 11;
} else if (StringNCmp (bufplus, "UNVERIFIED: ", 12) == 0) {
bufplus += 12;
+ } else if (StringNCmp (bufplus, "PUTATIVE PSEUDOGENE: ", 12) == 0) {
+ bufplus += 21;
}
if (StringNCmp (title, "PREDICTED: ", 11) == 0) {
title += 11;
} else if (StringNCmp (title, "UNVERIFIED: ", 12) == 0) {
title += 12;
+ } else if (StringNCmp (title, "PUTATIVE PSEUDOGENE: ", 21) == 0) {
+ title += 21;
}
if (StringICmp (bufplus, title) != 0) {
olditemid = gcp->itemID;
@@ -8312,7 +9412,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
if (! in_nps) {
if (isGB || isEMBL || isDDBJ || isRefSeq) {
- if (! isGIBBMT && ! isGIBBSQ && ! isPatent) {
+ if (! isGIBBMT && ! isGIBBSQ && ! isPatent && ! isWP && ! isYP) {
olditemid = gcp->itemID;
olditemtype = gcp->thistype;
gcp->itemID = bsp->idx.itemID;
@@ -8347,7 +9447,9 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
MemFree (buf);
}
- if (mip->completeness != 1 && bsp->topology == 2) {
+ if (mip->completeness != 1 && bsp->topology == 2 &&
+ (! IsConWithGaps (bsp)) &&
+ !vsp->is_embl_ddbj_in_sep) {
olditemid = gcp->itemID;
olditemtype = gcp->thistype;
if (vnp->extended != 0) {
@@ -8362,81 +9464,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
- if (ISA_na (bsp->mol) && (bsp->repr == Seq_repr_raw || (bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp))) && bsp->length > 10 && bsp->topology != 2) {
- /* check for N bases at start or stop of sequence */
- sfp = (SeqFeatPtr) MemNew (sizeof (SeqFeat));
- if (sfp == NULL) return;
- sfp->data.choice = SEQFEAT_COMMENT;
-
- sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 9, FALSE, FALSE);
- str = GetSequencePlusGapByFeature (sfp);
- if (str != NULL) {
- if (str [0] == 'n' || str [0] == 'N') {
- if (isNC || isPatent) {
- sev = SEV_WARNING;
- } else if (bsp->topology == TOPOLOGY_CIRCULAR) {
- sev = SEV_WARNING;
- } else if (only_local) {
- sev = SEV_WARNING;
- } else if (StringICmp (str, "NNNNNNNNNN") == 0) {
- sev = SEV_ERROR;
- } else {
- sev = SEV_WARNING;
- }
- ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence");
- }
- if (str [0] == '-' || str [0] == '-') {
- if (isNC || isPatent) {
- sev = SEV_WARNING;
- } else if (bsp->topology == TOPOLOGY_CIRCULAR) {
- sev = SEV_WARNING;
- } else if (StringICmp (str, "----------") == 0) {
- sev = SEV_ERROR;
- } else {
- sev = SEV_WARNING;
- }
- ValidErr (vsp, sev, ERR_SEQ_INST_TerminalGap, "Gap at beginning of sequence");
- }
- }
- MemFree (str);
- sfp->location = SeqLocFree (sfp->location);
-
- sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 10, bsp->length - 1, FALSE, FALSE);
- str = GetSequencePlusGapByFeature (sfp);
- len = StringLen (str);
- if (str != NULL && len > 0) {
- if (str [len - 1] == 'n' || str [len - 1] == 'N') {
- if (isNC || isPatent) {
- sev = SEV_WARNING;
- } else if (bsp->topology == TOPOLOGY_CIRCULAR) {
- sev = SEV_WARNING;
- } else if (only_local) {
- sev = SEV_WARNING;
- } else if (StringICmp (str, "NNNNNNNNNN") == 0) {
- sev = SEV_ERROR;
- } else {
- sev = SEV_WARNING;
- }
- ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at end of sequence");
- }
- if (str [len - 1] == '-' || str [len - 1] == '-') {
- if (isNC || isPatent) {
- sev = SEV_WARNING;
- } else if (bsp->topology == TOPOLOGY_CIRCULAR) {
- sev = SEV_WARNING;
- } else if (StringICmp (str, "----------") == 0) {
- sev = SEV_ERROR;
- } else {
- sev = SEV_WARNING;
- }
- ValidErr (vsp, sev, ERR_SEQ_INST_TerminalGap, "Gap at end of sequence");
- }
- }
- MemFree (str);
- sfp->location = SeqLocFree (sfp->location);
-
- MemFree (sfp);
- }
+ /* check for N bases at start or stop of sequence */
+ ValidateBioseqEnds(bsp, vsp, isPatent);
}
/*****************************************************************************
@@ -8632,13 +9661,19 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
if (vsp == NULL || csp == NULL) return;
- sev = SEV_ERROR;
+ sev = SEV_REJECT;
if (vsp->is_refseq_in_sep) {
sev = SEV_WARNING;
}
+ if (vsp->is_insd_in_sep) {
+ sev = SEV_WARNING;
+ }
if (vsp->is_htg_in_sep) {
sev = SEV_WARNING;
}
+ if (vsp->is_pdb_in_sep) {
+ sev = SEV_WARNING;
+ }
alp = csp->authors;
if (alp != NULL) {
@@ -8661,6 +9696,9 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
ap->sub == NULL && ap->postal_code == NULL && ap->country == NULL &&
ap->phone == NULL && ap->fax == NULL && ap->email == NULL) {
/* no affiliation */
+ if (sev == SEV_REJECT) {
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission has no affiliation");
+ }
} else {
hasAffil = TRUE;
if (ap->choice == 2) {
@@ -8670,7 +9708,10 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
}
*/
if (StringHasNoText (ap->country)) {
- ValidErr (vsp, SEV_WARNING, ERR_GENERIC_MissingPubInfo, "Submission citation affiliation has no country");
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation affiliation has no country");
+ }
+ if (StringHasNoText (ap->div) && StringHasNoText (ap->affil)) {
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation affiliation has no institution");
}
if (StringCmp (ap->country, "USA") == 0) {
if (StringHasNoText (ap->sub)) {
@@ -8680,9 +9721,17 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
}
}
}
+ } else {
+ //LCOV_EXCL_START
+ //not valid ASN.1
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation affiliation has no authors");
+ //LCOV_EXCL_STOP
}
if (!hasName) {
- ValidErr (vsp, SEV_ERROR, ERR_GENERIC_MissingPubInfo, "Submission citation has no author names");
+ //LCOV_EXCL_START
+ //BasicCleanup inserts a "?" if there are no authors
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation has no author names");
+ //LCOV_EXCL_STOP
}
if (!hasAffil) {
if (! vsp->is_patent_in_sep) {
@@ -8694,6 +9743,8 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
if (DateIsBad (dp, FALSE, &baddate)) {
PrintBadDateError (vsp, baddate, SEV_ERROR, ERR_GENERIC_BadDate, "Submission citation date has error");
}
+ } else {
+ ValidErr (vsp, SEV_ERROR, ERR_GENERIC_MissingPubInfo, "Submission citation has no date");
}
}
@@ -8809,6 +9860,8 @@ static Boolean BadCharsInAuth (CharPtr str, CharPtr PNTR badauthor, Boolean allo
stp = StringISearch(str, "St.");
if (stp == str) {
stp += 2; /* point to the period */
+ } else if ((stp = StringISearch(str, "de M.")) == str) {
+ stp += 4; /* point to the period */
}
}
@@ -8820,6 +9873,12 @@ static Boolean BadCharsInAuth (CharPtr str, CharPtr PNTR badauthor, Boolean allo
} else if (ch == '-' || ch == '\'' || ch == ' ') {
} else if (ch == ',' && allowcomma) {
} else if (ch == '.' && (allowperiod || stp == ptr)) {
+ } else if (StringCmp (ptr, "2nd") == 0 ||
+ StringCmp (ptr, "3rd") == 0 ||
+ StringCmp (ptr, "4th") == 0 ||
+ StringCmp (ptr, "5th") == 0 ||
+ StringCmp (ptr, "6th") == 0) {
+ return FALSE;
} else {
/* bad character found */
*badauthor = str;
@@ -8864,7 +9923,7 @@ static Boolean BadCharsInName (ValNodePtr name, CharPtr PNTR badauthor, BoolPtr
}
static CharPtr suffixList [] = {
- "Jr.", "Sr.", "II", "III", "IV", "V", "VI", NULL
+ "Jr.", "Sr.", "II", "III", "IV", "V", "VI", "2nd", "3rd", "4th", "5th", "6th", NULL
};
static void ValidateSuffix (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPtr pdp, ValNodePtr name)
@@ -9122,25 +10181,27 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt
if (StringDoesHaveText (imp->pages)) {
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_PublicationInconsistency, "In-press is not expected to have page numbers");
}
+ dp = imp->date;
+ if (dp == NULL || StringCmp (dp->str, "?") == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_MissingPubInfo, "In-press is missing the date");
+ }
}
if (imp->prepub == 0 && imp->pubstatus != PUBSTATUS_aheadofprint) {
noVol = StringHasNoText (imp->volume);
noPages = StringHasNoText (imp->pages);
- sev = SEV_ERROR;
- if (vsp->is_refseq_in_sep) {
- sev = SEV_WARNING;
- }
- if (imp->pubstatus == PUBSTATUS_epublish) {
- sev = SEV_WARNING;
+ if (noVol) {
+ if (electronic_journal) {
+ ValidErr (vsp, SEV_INFO, ERR_GENERIC_MissingVolumeEpub, "Electronic journal volume missing");
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_MissingVolume, "Journal volume missing");
+ }
}
- if (noVol && noPages) {
- ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Journal volume and pages missing");
- } else if (noVol) {
- if (! electronic_journal) {
- ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Journal volume missing");
+ if (noPages) {
+ if (electronic_journal) {
+ ValidErr (vsp, SEV_INFO, ERR_GENERIC_MissingPagesEpub, "Electronic journal pages missing");
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_MissingPages, "Journal pages missing");
}
- } else if (noPages) {
- ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Journal pages missing");
}
if ((! noPages) && (! electronic_journal)) {
sev = SEV_WARNING;
@@ -9185,19 +10246,22 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt
}
if (imp->pubstatus == PUBSTATUS_aheadofprint && imp->prepub != 2) {
if (noVol || noPages) {
- } else {
+ } else if (thepmid == 0) {
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_PublicationInconsistency, "Ahead-of-print without in-press");
}
}
if (imp->pubstatus == PUBSTATUS_epublish && imp->prepub == 2) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes prepub = 2 if pubstatus is epublish
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_PublicationInconsistency, "Electronic-only publication should not also be in-press");
+ //LCOV_EXCL_STOP
}
if (imp->pubstatus == PUBSTATUS_epublish || imp->pubstatus == PUBSTATUS_ppublish || imp->pubstatus == PUBSTATUS_aheadofprint) {
if (StringDoesHaveText (pdp->comment)) {
if (StringStr (pdp->comment, "Publication Status") != NULL ||
StringStr (pdp->comment, "Publication-Status") != NULL ||
StringStr (pdp->comment, "Publication_Status") != NULL) {
- ValidErr (vsp, SEV_ERROR, ERR_GENERIC_UnexpectedPubStatusComment, "Publication status is in comment for pmid %ld", (long) thepmid);
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_UnexpectedPubStatusComment, "Publication status is in comment for pmid %ld", (long) thepmid);
}
}
}
@@ -9264,8 +10328,11 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt
if (pid->choice == 5) {
str = (CharPtr) pid->data;
if (StringHasNoText (str)) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts empty consortium to std author with last name "?"
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_PublicationInconsistency, "Empty consortium");
continue;
+ //LCOV_EXCL_STOP
}
if (StringAlreadyInList (conslist, str)) {
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_PublicationInconsistency, "Duplicate consortium '%s'", str);
@@ -9329,6 +10396,7 @@ typedef struct bioseqvalid
}
BioseqValidStr , PNTR BioseqValidStrPtr;
+// Used by DeltaOrFarSeg
static void CheckForNucProt (BioseqSetPtr bssp, Pointer userdata)
{
BoolPtr hasPartsP;
@@ -9339,6 +10407,8 @@ static void CheckForNucProt (BioseqSetPtr bssp, Pointer userdata)
}
}
+//LCOV_EXCL_START
+// Only for SegSets
static void CheckForParts (BioseqSetPtr bssp, Pointer userdata)
{
BoolPtr hasPartsP;
@@ -9348,6 +10418,7 @@ static void CheckForParts (BioseqSetPtr bssp, Pointer userdata)
*hasPartsP = TRUE;
}
}
+//LCOV_EXCL_STOP
static Boolean DeltaOrFarSeg (SeqEntryPtr sep, SeqLocPtr location)
{
@@ -9362,29 +10433,18 @@ static Boolean DeltaOrFarSeg (SeqEntryPtr sep, SeqLocPtr location)
return TRUE;
}
if (bsp->repr == Seq_repr_seg) {
+//LCOV_EXCL_START
+// Only for SegSets
VisitSetsInSep (sep, (Pointer) &hasParts, CheckForParts);
if (!hasParts)
return TRUE;
+//LCOV_EXCL_STOP
}
}
return FALSE;
}
-static Boolean IsOrganelleBioseq (BioseqPtr bsp)
-{
- SeqDescrPtr sdp;
- SeqMgrDescContext dcontext;
- BioSourcePtr biop;
-
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsLocationOrganelle (biop->genome)) {
- return FALSE;
- } else {
- return TRUE;
- }
-}
-
static Boolean ConsistentWithA (Char ch)
{
@@ -9453,7 +10513,7 @@ ValidateIntronEndsAtSpliceSiteOrGap
if (bsp == NULL)
return;
- if (IsOrganelleBioseq(bsp)) {
+ if (IsBioseqOrganelle(bsp)) {
BioseqUnlock (bsp);
return;
}
@@ -9509,6 +10569,8 @@ ValidateIntronEndsAtSpliceSiteOrGap
BioseqUnlock (bsp);
}
+//LCOV_EXCL_START
+//farloc is never set during indexing, so this function is never called
static Boolean IsLocInSmallGenomeSet (
SeqLocPtr loc
)
@@ -9532,6 +10594,7 @@ static Boolean IsLocInSmallGenomeSet (
return TRUE;
}
+//farloc is never set during indexing, so this function is never called
static Boolean AllPartsInSmallGenomeSet (
SeqLocPtr loc,
ValidStructPtr vsp,
@@ -9572,6 +10635,72 @@ static Boolean AllPartsInSmallGenomeSet (
return rsult;
}
+//LCOV_EXCL_STOP
+
+
+static Boolean HasTerminalException (CdRegionPtr crp)
+{
+ CodeBreakPtr cbp;
+ Boolean rval = FALSE;
+
+ for (cbp = crp->code_break; cbp != NULL && !rval; cbp = cbp->next) {
+ if (cbp->aa.choice == 1 && cbp->aa.value.intvalue == 42) {
+ rval = TRUE;
+ }
+ }
+ return TRUE;
+}
+
+
+static Boolean IsAmbiguous(Char ch)
+{
+ if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C' || ch == 'U') {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static void CheckCommentForAmbiguityPhrase(SeqFeatPtr sfp, ValidStructPtr vsp)
+{
+ CdRegionPtr crp;
+ Int4 len, last_codon_len, j;
+ CharPtr underlying;
+ Boolean has_ambig = FALSE;
+
+ if (sfp == NULL || vsp == NULL || sfp->data.choice != SEQFEAT_CDREGION
+ || StringSearch (sfp->comment, "ambiguity in stop codon") == NULL
+ || (crp = (CdRegionPtr)(sfp->data.value.ptrvalue)) == NULL
+ || !HasTerminalException(crp)) {
+ return;
+ }
+
+ len = SeqLocLen(sfp->location);
+ if (crp->frame == 2) {
+ len -= 1;
+ } else if (crp->frame == 3) {
+ len -= 2;
+ }
+
+ last_codon_len = len % 3;
+ if (last_codon_len == 0) {
+ last_codon_len = 3;
+ }
+
+ underlying = MemNew (sizeof (Char) * (len + 2));
+ if (underlying != NULL) {
+ SeqPortStreamLoc (sfp->location, EXPAND_GAPS_TO_DASHES, (Pointer) underlying, NULL);
+ for (j = 0; j < last_codon_len && !has_ambig; j++) {
+ has_ambig = IsAmbiguous(*(underlying + len - 1 - j));
+ }
+ }
+ underlying = MemFree (underlying);
+ if (!has_ambig) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_BadComment,
+ "Feature comment indicates ambiguity in stop codon but no ambiguities are present in stop codon.");
+ }
+}
/*****************************************************************************
@@ -9601,6 +10730,10 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
Boolean is_refseq = FALSE;
ErrSev sev;
Boolean no_nonconsensus_except;
+ GBQualPtr gbq;
+ GeneRefPtr grp;
+ CharPtr sfp_pseudo, gene_pseudo;
+ SeqFeatPtr gene;
vsp->descr = NULL;
@@ -9751,7 +10884,10 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
if (ifp != NULL && ifp->key != NULL && (!HasNoText (ifp->key))) {
if (StringCmp (ifp->key, "CAAT_signal") == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts this feature to regulatory class, code not reachable
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidForType, "Invalid feature for an pre-RNA Bioseq.");
+ //LCOV_EXCL_STOP
}
}
break;
@@ -9761,7 +10897,10 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
}
if (farloc && (! is_nc) && (! is_emb) && (! AllPartsInSmallGenomeSet (sfp->location, vsp, bsp))) {
+ //LCOV_EXCL_START
+ //farloc is never set during indexing
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FarLocation, "Feature has 'far' location - accession not packaged in record");
+ //LCOV_EXCL_STOP
}
if ((sfp->data.choice == SEQFEAT_PUB) || (sfp->cit != NULL))
@@ -9772,8 +10911,11 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_SerialInComment,
"Feature comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead.");
}
+ CheckCommentForAmbiguityPhrase(sfp, vsp);
if (bsp != NULL && bsp->repr == Seq_repr_seg) {
+//LCOV_EXCL_START
+// Only for SegSets
slp = SeqLocFindNext (sfp->location, NULL);
while (slp != NULL) {
sip = SeqLocId (slp);
@@ -9793,6 +10935,7 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
ValidErr (vsp, sev, ERR_SEQ_FEAT_LocOnSegmentedBioseq, "Feature location on segmented bioseq, not on parts");
}
}
+//LCOV_EXCL_STOP
}
if (sfp->idx.subtype == FEATDEF_intron) {
@@ -9807,6 +10950,45 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
}
}
+ if (sfp->idx.subtype == FEATDEF_CDS || sfp->idx.subtype == FEATDEF_mRNA) {
+ gene = NULL;
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ } else if (! SeqMgrGeneIsSuppressed (grp)) {
+ if (StringDoesHaveText (grp->locus_tag)) {
+ gene = SeqMgrGetGeneByLocusTag (bsp, grp->locus_tag, NULL);
+ } else if (StringDoesHaveText (grp->locus)) {
+ gene = SeqMgrGetFeatureByLabel (bsp, grp->locus, SEQFEAT_GENE, 0, NULL);
+ }
+ }
+ if (gene != NULL && gene->pseudo && sfp->pseudo) {
+ sfp_pseudo = "unqualified";
+ gene_pseudo = "unqualified";
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "pseudogene") != 0) continue;
+ if (StringHasNoText (gbq->val)) continue;
+ sfp_pseudo = gbq->val;
+ }
+ for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "pseudogene") != 0) continue;
+ if (StringHasNoText (gbq->val)) continue;
+ gene_pseudo = gbq->val;
+ }
+ if (StringCmp (sfp_pseudo, gene_pseudo) != 0) {
+ if (sfp->idx.subtype == FEATDEF_CDS) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InconsistentPseudogeneValue,
+ "Different pseudogene values on CDS (%s) and gene (%s)",
+ sfp_pseudo, gene_pseudo);
+ } else if (sfp->idx.subtype == FEATDEF_mRNA) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InconsistentPseudogeneValue,
+ "Different pseudogene values on mRNA (%s) and gene (%s)",
+ sfp_pseudo, gene_pseudo);
+ }
+ }
+ }
+ }
+
if (gcp != NULL) {
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
@@ -9855,6 +11037,8 @@ static void CheckMultiIntervalGene (SeqFeatPtr sfp, SeqMgrFeatContextPtr context
if (SeqLocId (sfp->location) == NULL) {
bsp = context->bsp;
if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
+//LCOV_EXCL_START
+// Only for SegSets
mappedloc = SeqLocMerge (bsp, sfp->location, NULL, FALSE, TRUE, FALSE);
if (mappedloc == NULL) return;
count = 0;
@@ -9866,6 +11050,7 @@ static void CheckMultiIntervalGene (SeqFeatPtr sfp, SeqMgrFeatContextPtr context
SeqLocFree (mappedloc);
if (count < 2) return;
segmented = TRUE;
+//LCOV_EXCL_STOP
}
bsp = context->bsp;
@@ -9897,8 +11082,11 @@ static void CheckMultiIntervalGene (SeqFeatPtr sfp, SeqMgrFeatContextPtr context
vsp->sfp = sfp;
if (segmented) {
+ //LCOV_EXCL_START
+ //segmented sets are obsolete
ValidErr (vsp, sev, ERR_SEQ_FEAT_SegmentedGeneProblem,
"Gene feature on segmented sequence should cover all bases within its extremes");
+ //LCOV_EXCL_STOP
} else if (vsp->is_small_genome_set) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MultiIntervalGene,
"Multiple interval gene feature in small genome set - set trans-splicing exception if appropriate");
@@ -9929,6 +11117,7 @@ static Boolean LIBCALLBACK ValidateSeqFeatIndexed (SeqFeatPtr sfp, SeqMgrFeatCon
return ValidateSeqFeatCommon (sfp, bvsp, vsp, context->left, context->right, context->numivals, context->itemID, context->farloc, context->bsp);
}
+//LCOV_EXCL_START
static void ValidateSeqFeatContext (GatherContextPtr gcp)
{
ValidStructPtr vsp;
@@ -9941,6 +11130,7 @@ static void ValidateSeqFeatContext (GatherContextPtr gcp)
ValidateSeqFeatCommon (sfp, bvsp, vsp, gcp->extremes.left, gcp->extremes.right, 0, 0, FALSE, NULL);
}
+//LCOV_EXCL_STOP
/*****************************************************************************
*
@@ -10094,6 +11284,7 @@ static CharPtr Nlm_valid_country_codes [] = {
"Liberia",
"Libya",
"Liechtenstein",
+ "Line Islands",
"Lithuania",
"Luxembourg",
"Macau",
@@ -10141,6 +11332,7 @@ static CharPtr Nlm_valid_country_codes [] = {
"Pacific Ocean",
"Pakistan",
"Palau",
+ "Palestine",
"Palmyra Atoll",
"Panama",
"Papua New Guinea",
@@ -10159,9 +11351,11 @@ static CharPtr Nlm_valid_country_codes [] = {
"Ross Sea",
"Russia",
"Rwanda",
+ "Saint Barthelemy",
"Saint Helena",
"Saint Kitts and Nevis",
"Saint Lucia",
+ "Saint Martin",
"Saint Pierre and Miquelon",
"Saint Vincent and the Grenadines",
"Samoa",
@@ -10186,6 +11380,7 @@ static CharPtr Nlm_valid_country_codes [] = {
"Spain",
"Spratly Islands",
"Sri Lanka",
+ "State of Palestine",
"Sudan",
"Suriname",
"Svalbard",
@@ -10244,7 +11439,8 @@ static CharPtr Nlm_formerly_valid_country_codes [] = {
NULL
};
-NLM_EXTERN CharPtr PNTR GetValidCountryList (void)
+//LCOV_EXCL_START
+NLM_EXTERN CharPtr PNTR GetValidCountryList(void)
{
return (CharPtr PNTR) Nlm_valid_country_codes;
@@ -10255,6 +11451,7 @@ NLM_EXTERN CharPtr PNTR GetFormerCountryList (void)
{
return (CharPtr PNTR) Nlm_formerly_valid_country_codes;
}
+//LCOV_EXCL_STOP
NLM_EXTERN Boolean CountryIsValid (CharPtr name, BoolPtr old_countryP, BoolPtr bad_capP)
{
@@ -10319,6 +11516,7 @@ NLM_EXTERN Boolean CountryIsValid (CharPtr name, BoolPtr old_countryP, BoolPtr b
}
+//LCOV_EXCL_START
NLM_EXTERN CharPtr GetCorrectedCountryCapitalization (CharPtr name)
{
Int2 L, R, mid;
@@ -10440,6 +11638,7 @@ NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str)
return FALSE;
}
+//LCOV_EXCL_STOP
/* BEGINNING OF NEW LATITUDE-LONGITUDE COUNTRY VALIDATION CODE */
@@ -11049,6 +12248,7 @@ NLM_EXTERN Boolean CountryIsInLatLonList (
return FALSE;
}
+//LCOV_EXCL_START
NLM_EXTERN Boolean IsCountryInLatLonList (
CharPtr country
)
@@ -11056,6 +12256,7 @@ NLM_EXTERN Boolean IsCountryInLatLonList (
{
return CountryIsInLatLonList (country);
}
+//LCOV_EXCL_STOP
NLM_EXTERN Boolean WaterIsInLatLonList (
CharPtr country
@@ -11196,6 +12397,7 @@ NLM_EXTERN Boolean CountryContainsLatLon (
return RegionContainsLatLon (country, lat, lon, csp);
}
+//LCOV_EXCL_START
NLM_EXTERN Boolean TestLatLonForCountry (
CharPtr country,
FloatHi lat,
@@ -11205,6 +12407,7 @@ NLM_EXTERN Boolean TestLatLonForCountry (
{
return CountryContainsLatLon (country, lat, lon);
}
+//LCOV_EXCL_STOP
NLM_EXTERN Boolean WaterContainsLatLon (
CharPtr country,
@@ -11338,7 +12541,8 @@ static CtyBlockPtr GuessWaterByLatLon (
return LookupRegionByLatLon (lat, lon, country, NULL, csp);
}
-NLM_EXTERN CharPtr LookupCountryByLatLon (
+//LCOV_EXCL_START
+NLM_EXTERN CharPtr LookupCountryByLatLon(
FloatHi lat,
FloatHi lon
)
@@ -11352,7 +12556,7 @@ NLM_EXTERN CharPtr LookupCountryByLatLon (
return cbp->name;
}
-NLM_EXTERN CharPtr GuessCountryForLatLon (
+NLM_EXTERN CharPtr GuessCountryForLatLon(
FloatHi lat,
FloatHi lon
)
@@ -11361,7 +12565,7 @@ NLM_EXTERN CharPtr GuessCountryForLatLon (
return LookupCountryByLatLon (lat, lon);
}
-NLM_EXTERN CharPtr LookupWaterByLatLon (
+NLM_EXTERN CharPtr LookupWaterByLatLon(
FloatHi lat,
FloatHi lon
)
@@ -11375,7 +12579,7 @@ NLM_EXTERN CharPtr LookupWaterByLatLon (
return cbp->name;
}
-NLM_EXTERN FloatHi CountryDataScaleIs (void)
+NLM_EXTERN FloatHi CountryDataScaleIs(void)
{
CtrySetPtr csp;
@@ -11386,7 +12590,7 @@ NLM_EXTERN FloatHi CountryDataScaleIs (void)
return csp->scale;
}
-NLM_EXTERN FloatHi WaterDataScaleIs (void)
+NLM_EXTERN FloatHi WaterDataScaleIs(void)
{
CtrySetPtr csp;
@@ -11398,7 +12602,7 @@ NLM_EXTERN FloatHi WaterDataScaleIs (void)
}
-static Boolean RegionExtremesOverlap (
+static Boolean RegionExtremesOverlap(
CharPtr first,
CharPtr second,
CtrySetPtr csp
@@ -11462,6 +12666,8 @@ NLM_EXTERN Boolean WaterExtremesOverlap (
return RegionExtremesOverlap (first, second, csp);
}
+//LCOV_EXCL_STOP
+
/*
Distance on a spherical surface calculation adapted from
@@ -11630,6 +12836,8 @@ static CtyBlockPtr NearestCountryByLatLon (
return RegionClosestToLatLon (lat, lon, range, distanceP, csp);
}
+//LCOV_EXCL_START
+//map used during regression is too good, no areas of data insufficiency
static CtyBlockPtr NearestWaterByLatLon (
FloatHi lat,
FloatHi lon,
@@ -11646,6 +12854,7 @@ static CtyBlockPtr NearestWaterByLatLon (
return RegionClosestToLatLon (lat, lon, range, distanceP, csp);
}
+
NLM_EXTERN CharPtr CountryClosestToLatLon (
FloatHi lat,
FloatHi lon,
@@ -11677,6 +12886,8 @@ NLM_EXTERN CharPtr WaterClosestToLatLon (
return cbp->name;
}
+//LCOV_EXCL_STOP
+
static CtyBlockPtr RegionIsNearLatLon (
CharPtr country,
@@ -11792,6 +13003,8 @@ static CtyBlockPtr WaterToLatLonDistance (
return RegionIsNearLatLon (country, NULL, lat, lon, range, distanceP, csp);
}
+//LCOV_EXCL_START
+//map used during regression is too good, no areas of data insufficiency
NLM_EXTERN Boolean CountryIsNearLatLon (
CharPtr country,
FloatHi lat,
@@ -11809,6 +13022,7 @@ NLM_EXTERN Boolean CountryIsNearLatLon (
return TRUE;
}
+//map used during regression is too good, no areas of data insufficiency
NLM_EXTERN Boolean WaterIsNearLatLon (
CharPtr country,
FloatHi lat,
@@ -11825,6 +13039,7 @@ NLM_EXTERN Boolean WaterIsNearLatLon (
return TRUE;
}
+//LCOV_EXCL_STOP
/*
static void WriteLatLonRegionData (
@@ -11948,6 +13163,7 @@ static CharPtr modified_base_abbrevs [] = {
"<m2a>",
"<m2g>",
"<m3c>",
+ "<m4c>",
"<m5c>",
"<m6a>",
"<m7g>",
@@ -11990,6 +13206,102 @@ static void InitializeModBaseFSA (ValidStructPtr vsp)
}
}
+NLM_EXTERN Boolean AltitudeIsValid (CharPtr name)
+
+{
+ Char ch;
+ size_t len;
+ CharPtr ptr;
+
+ if (StringHasNoText (name)) return FALSE;
+ len = StringLen (name);
+ if (len < 1) return FALSE;
+
+ ptr = name;
+ ch = *ptr;
+
+ if (ch == '+' || ch == '-') {
+ ptr++;
+ ch = *ptr;
+ }
+
+ if (! IS_DIGIT (ch)) return FALSE;
+
+ ptr++;
+ ch = *ptr;
+ while (IS_DIGIT (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+
+ if (ch == '.') {
+ ptr++;
+ ch = *ptr;
+ if (! IS_DIGIT (ch)) return FALSE;
+ ptr++;
+ ch = *ptr;
+ while (IS_DIGIT (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+ }
+
+ if (ch != ' ') return FALSE;
+ ptr++;
+ ch = *ptr;
+ if (ch != 'm') return FALSE;
+
+ /*
+ ptr++;
+ ch = *ptr;
+ if (ch != '.') return FALSE;
+ */
+
+ ptr++;
+ ch = *ptr;
+ if (ch != '\0') return FALSE;
+
+ return TRUE;
+}
+
+static CharPtr type_prefixes [] = {
+ "type strain",
+ "neotype strain",
+ "holotype",
+ "paratype",
+ "neotype",
+ "allotype",
+ "hapanotype",
+ "syntype",
+ "lectotype",
+ "paralectotype",
+ "isotype",
+ "epitype",
+ "isosyntype",
+ "ex-type",
+ "reference strain",
+ "type material",
+ NULL
+};
+
+NLM_EXTERN Boolean TypeMaterialIsValid (CharPtr name)
+
+{
+ Int2 i;
+ size_t len;
+ CharPtr str;
+
+ if (StringHasNoText (name)) return FALSE;
+
+ for (i = 0; type_prefixes [i] != NULL; i++) {
+ str = type_prefixes [i];
+ len = StringLen (str);
+ if (StringNICmp (name, str, len) == 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
static Boolean PrimerSeqIsValid (ValidStructPtr vsp, CharPtr name, Char PNTR badch)
{
@@ -12213,6 +13525,7 @@ static Boolean LatLonIsValid (CharPtr name)
static CharPtr source_qual_prefixes [] = {
"acronym:",
+ "altitude:",
"anamorph:",
"authority:",
"biotype:",
@@ -12262,6 +13575,7 @@ static CharPtr source_qual_prefixes [] = {
"metagenomic:",
"nat_host:",
"pathovar:",
+ "phenotype:",
"placement:",
"plasmid_name:",
"plastid_name:",
@@ -12293,6 +13607,7 @@ static CharPtr source_qual_prefixes [] = {
"transposon_name:",
"type:",
"variety:",
+ "whole_replicon:",
NULL
};
@@ -12549,11 +13864,102 @@ static CharPtr CheckInstCollName (CharPtr name, Uint1Ptr typeP)
return ic_code_data [(int) R];
}
+
+//LCOV_EXCL_START
+NLM_EXTERN Boolean FixOrgModVoucher (OrgModPtr mod)
+{
+ Boolean rval = FALSE;
+ CharPtr cpy, inst = NULL, id = NULL, ptr, ptr2, match;
+ Uint1 type = 0, allowed_type = 0;
+
+ if (mod == NULL || StringHasNoText (mod->subname)) {
+ return FALSE;
+ }
+
+ switch (mod->subtype) {
+ case ORGMOD_bio_material:
+ type = BIO_MATERIAL_TYPE;
+ break;
+ case ORGMOD_culture_collection:
+ type = CULTURE_COLLECTION_TYPE;
+ break;
+ case ORGMOD_specimen_voucher:
+ type = SPECIMEN_VOUCHER_TYPE;
+ break;
+ default:
+ break;
+ }
+ if (type == 0) {
+ return FALSE;
+ }
+
+ cpy = StringSave(mod->subname);
+ if (ParseStructuredVoucher (cpy, &inst, &id) && inst != NULL && inst[0] != ':') {
+ /* see if we need to eliminate unnecessary country code */
+ match = CheckInstCollName (inst, &type);
+ if (match == NULL || StringCmp (match, inst) != 0) {
+ if ((ptr = StringChr(inst, '<')) != NULL
+ && (ptr2 = StringChr(ptr, '>')) != NULL) {
+ StringCpy (ptr, ptr2 + 1);
+ match = CheckInstCollName (inst, &type);
+ if (match != NULL && StringCmp (match, inst) == 0) {
+ mod->subname = MemFree (mod->subname);
+ mod->subname = (CharPtr) MemNew(sizeof (Char) * (StringLen(inst) + StringLen (id) + 2));
+ sprintf (mod->subname, "%s:%s", inst, id);
+ rval = TRUE;
+ }
+ }
+ }
+ } else {
+#if 0
+ /* removed from BasicCleanup */
+ /* add structure if missing */
+ ptr = cpy;
+ inst_len = 0;
+ while (*ptr != 0 && isalpha(*ptr)) {
+ ++ptr;
+ ++inst_len;
+ }
+
+ if (inst_len >= 3) {
+ /* can only continue if three or more characters in institution code */
+ while (*ptr != 0 && isspace (*ptr)) {
+ ++ptr;
+ }
+ if (*ptr != 0) {
+ id = ptr;
+ while (*ptr != 0 && isdigit(*ptr)) {
+ ptr++;
+ }
+ if (*ptr == 0 && ptr - id > 0) {
+ /* can only continue if ID is non-empty and all numbers */
+ inst = (CharPtr) MemNew (sizeof (Char) * inst_len + 1);
+ StringNCpy (inst, cpy, inst_len);
+ inst[inst_len] = 0;
+ match = CheckInstCollName(inst, &allowed_type);
+ if (match != NULL && StringCmp (match, inst) == 0 && (type & allowed_type)) {
+ mod->subname = MemFree (mod->subname);
+ mod->subname = (CharPtr) MemNew(sizeof (Char) * (StringLen(inst) + StringLen (id) + 2));
+ sprintf (mod->subname, "%s:%s", inst, id);
+ rval = TRUE;
+ }
+ inst = MemFree (inst);
+ }
+ }
+ }
+#endif
+ }
+ cpy = MemFree (cpy);
+ return rval;
+}
+//LCOV_EXCL_STOP
+
+
static void ValidateOrgModVoucher (ValidStructPtr vsp, OrgModPtr mod)
{
Char buf [512];
- CharPtr inst = NULL, id = NULL, coll = NULL, str;
+ CharPtr inst = NULL, id = NULL, coll = NULL, ptr, str;
size_t len1, len2;
Uint1 type;
@@ -12620,6 +14026,17 @@ static void ValidateOrgModVoucher (ValidStructPtr vsp, OrgModPtr mod)
coll = StringChr (inst, ':');
if (coll == NULL) {
+ ptr = StringChr (inst, '<');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ str = CheckInstCollName (inst, &type);
+ if (StringCmp (str, inst) == 0) {
+ *ptr = '<';
+ ValidErr(vsp, SEV_WARNING, ERR_SEQ_DESCR_BadInstitutionCountry, "Institution code %s should not should not be qualified with a <COUNTRY> designation", inst, ptr + 1);
+ return;
+ }
+ *ptr = '<';
+ }
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadInstitutionCode, "Institution code %s is not in list", inst);
return;
}
@@ -12653,6 +14070,160 @@ static void ValidateOrgModVoucher (ValidStructPtr vsp, OrgModPtr mod)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadInstitutionCode, "Institution code %s:%s is not in list", inst, coll);
}
+/* returns reconstructed institution:id if valid but instutution:collection:id is invalid */
+//LCOV_EXCL_START
+// not part of validation
+NLM_EXTERN CharPtr RemoveBadInstitutionCollection (OrgModPtr mod)
+
+{
+ Char buf [512];
+ CharPtr inst = NULL, id = NULL, coll = NULL, ptr, str;
+ size_t len, len1, len2;
+ Uint1 type;
+
+ if (mod == NULL) return NULL;
+
+ if (mod->subtype != ORGMOD_bio_material && mod->subtype != ORGMOD_culture_collection && mod->subtype != ORGMOD_specimen_voucher) {
+ return NULL;
+ }
+
+ StringNCpy_0 (buf, mod->subname, sizeof (buf));
+ if (StringChr (buf, ':') == NULL) {
+ return NULL;
+ }
+ if (! ParseStructuredVoucher (buf, &inst, &id) || inst == NULL || inst[0] == ':') {
+ return NULL;
+ }
+ if (inst == NULL) return NULL;
+
+ str = CheckInstCollName (inst, &type);
+ if (StringCmp (str, inst) == 0) {
+ if ((mod->subtype == ORGMOD_bio_material && (type & BIO_MATERIAL_TYPE) == 0) ||
+ (mod->subtype == ORGMOD_culture_collection && (type & CULTURE_COLLECTION_TYPE) == 0) ||
+ (mod->subtype == ORGMOD_specimen_voucher && (type & SPECIMEN_VOUCHER_TYPE) == 0)) {
+ }
+ return NULL;
+ }
+
+ if (StringICmp (str, inst) == 0) {
+ return NULL;
+ }
+
+ /* previously ignored personal collections, now complain if name missing */
+ if (StringNICmp (inst, "personal", 8) == 0) {
+ return NULL;
+ }
+
+ len1 = StringLen (inst);
+ len2 = StringLen (str);
+
+ if (len1 < len2) {
+ if (StringNICmp (str, inst, len1) == 0 && str [len1] == '<') {
+ return NULL;
+ }
+ }
+
+ coll = StringChr (inst, ':');
+ if (coll == NULL) {
+ return NULL;
+ }
+
+ *coll = '\0';
+ coll++;
+ str = CheckInstCollName (inst, &type);
+ if (StringCmp (str, inst) == 0) {
+ if (StringCmp (coll, "DNA") == 0) {
+ /* DNA is a valid collection for any institution (using bio_material) */
+ return NULL;
+ }
+ len = StringLen (inst) + StringLen (id) + 10;
+ ptr = (CharPtr) MemNew (sizeof (Char) * len);
+ if (ptr != NULL) {
+ StringCpy (ptr, inst);
+ StringCat (ptr, ":");
+ StringCat (ptr, id);
+ return ptr;
+ }
+ }
+
+ return NULL;
+}
+
+/* returns reconstructed institution:id if valid but instutution<country>:id is invalid */
+// not part of validation
+NLM_EXTERN CharPtr RemoveBadInstitutionCountry (OrgModPtr mod)
+
+{
+ Char buf [512];
+ CharPtr inst = NULL, id = NULL, ctry = NULL, ptr, str;
+ size_t len, len1, len2;
+ Uint1 type;
+
+ if (mod == NULL) return NULL;
+
+ if (mod->subtype != ORGMOD_bio_material && mod->subtype != ORGMOD_culture_collection && mod->subtype != ORGMOD_specimen_voucher) {
+ return NULL;
+ }
+
+ StringNCpy_0 (buf, mod->subname, sizeof (buf));
+ if (StringChr (buf, ':') == NULL) {
+ return NULL;
+ }
+ if (! ParseStructuredVoucher (buf, &inst, &id) || inst == NULL || inst[0] == ':') {
+ return NULL;
+ }
+ if (inst == NULL) return NULL;
+
+ str = CheckInstCollName (inst, &type);
+ if (StringCmp (str, inst) == 0) {
+ if ((mod->subtype == ORGMOD_bio_material && (type & BIO_MATERIAL_TYPE) == 0) ||
+ (mod->subtype == ORGMOD_culture_collection && (type & CULTURE_COLLECTION_TYPE) == 0) ||
+ (mod->subtype == ORGMOD_specimen_voucher && (type & SPECIMEN_VOUCHER_TYPE) == 0)) {
+ }
+ return NULL;
+ }
+
+ if (StringICmp (str, inst) == 0) {
+ return NULL;
+ }
+
+ /* previously ignored personal collections, now complain if name missing */
+ if (StringNICmp (inst, "personal", 8) == 0) {
+ return NULL;
+ }
+
+ len1 = StringLen (inst);
+ len2 = StringLen (str);
+
+ if (len1 < len2) {
+ if (StringNICmp (str, inst, len1) == 0 && str [len1] == '<') {
+ return NULL;
+ }
+ }
+
+ ctry = StringChr (inst, '<');
+ if (ctry == NULL) {
+ return NULL;
+ }
+
+ *ctry = '\0';
+ ctry++;
+ str = CheckInstCollName (inst, &type);
+ if (StringCmp (str, inst) == 0) {
+ len = StringLen (inst) + StringLen (id) + 10;
+ ptr = (CharPtr) MemNew (sizeof (Char) * len);
+ if (ptr != NULL) {
+ StringCpy (ptr, inst);
+ StringCat (ptr, ":");
+ StringCat (ptr, id);
+ return ptr;
+ }
+ }
+
+ return NULL;
+}
+
+// not part of validation
NLM_EXTERN Boolean VoucherInstitutionIsValid (CharPtr inst)
{
@@ -12666,6 +14237,7 @@ NLM_EXTERN Boolean VoucherInstitutionIsValid (CharPtr inst)
return FALSE;
}
+//LCOV_EXCL_STOP
/* works on subname copy that it can change */
@@ -12935,10 +14507,15 @@ static CharPtr valid_sex_values [] = {
"unisexual",
"bisexual",
"asexual",
+ "intersex",
+ "mixed",
"monoecious",
"monecious",
"dioecious",
"diecious",
+ "neuter",
+ "pooled males and females",
+ "pooled male and female",
NULL
};
@@ -13161,6 +14738,8 @@ static void CalculateLatLonMap (
}
}
} else {
+ //LCOV_EXCL_START
+ //map used during regression is too good, no areas of data insufficiency
/* may be coastal inlet, area of data insufficiency */
cbp = NearestCountryByLatLon (lat, lon, 5.0, &landdistance);
if (cbp != NULL) {
@@ -13180,6 +14759,7 @@ static void CalculateLatLonMap (
goodmatch = TRUE;
}
}
+ //LCOV_EXCL_STOP
}
}
/* if guess is not the provided country or province, calculate distance to claimed country */
@@ -13395,7 +14975,10 @@ static void LatLonLandErrors (
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, lmp->fullguess, fullname);
}
} else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname);
+ if (StringNCmp (fullname, "Norway: Svalbard", 16) == 0 && StringCmp (lmp->fullguess, "Svalbard") == 0) {
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname);
+ }
}
}
}
@@ -13408,6 +14991,182 @@ typedef enum {
eLatLonAdjust_negate_lon = 4
} ELatLonAdjust;
+
+static void
+CheckForFlippedCoordinates
+(FloatHi lat,
+ FloatHi lon,
+ FloatHi scale,
+ CharPtr country,
+ CharPtr province,
+ CharPtr fullname,
+ LatLonMapPtr adjusted,
+ Uint4Ptr adjust_test,
+ Uint4Ptr adjust)
+{
+ *adjust_test = 0;
+ *adjust = eLatLonAdjust_none;
+
+ /* try using lon for lat, lat for lon */
+ CalculateLatLonMap (lon, lat, country, province, scale, adjusted);
+ *adjust_test = ClassifyLatLonMap (fullname, country, province, adjusted);
+ if (*adjust_test) {
+ *adjust = eLatLonAdjust_flip;
+ } else {
+ /* try negative lat */
+ CalculateLatLonMap (-lat, lon, country, province, scale, adjusted);
+ *adjust_test = ClassifyLatLonMap (fullname, country, province, adjusted);
+ if (*adjust_test) {
+ *adjust = eLatLonAdjust_negate_lat;
+ } else {
+ /* try negative lon */
+ CalculateLatLonMap (lat, -lon, country, province, scale, adjusted);
+ *adjust_test = ClassifyLatLonMap (fullname, country, province, adjusted);
+ if (*adjust_test) {
+ *adjust = eLatLonAdjust_negate_lon;
+ }
+ }
+ }
+}
+
+typedef struct waterpair {
+ CharPtr sea;
+ CharPtr ocean;
+} WaterPairData, PNTR WaterPairPtr;
+
+static WaterPairData sea_parents [] = {
+ {"Adriatic Sea", "Mediterranean Sea"},
+ {"Aegean Sea", "Mediterranean Sea"},
+ {"Alboran Sea", "Mediterranean Sea"},
+ {"Andaman Sea", "Indian Ocean"},
+ {"Arabian Sea", "Indian Ocean"},
+ {"Argentine Sea", "Atlantic Ocean"},
+ {"Ariake Sea", "Pacific Ocean"},
+ {"Baffin Bay", "Atlantic Ocean"},
+ {"Balearic Sea", "Mediterranean Sea"},
+ {"Baltic Sea", "Atlantic Ocean"},
+ {"Barents Sea", "Arctic Ocean"},
+ {"Bay of Bengal", "Indian Ocean"},
+ {"Beaufort Sea", "Arctic Ocean"},
+ {"Bering Sea", "Pacific Ocean"},
+ {"Bismarck Sea", "Pacific Ocean"},
+ {"Black Sea", "Mediterranean Sea"},
+ {"Bohai Sea", "Pacific Ocean"},
+ {"Caribbean Sea", "Atlantic Ocean"},
+ {"Celebes Sea", "Pacific Ocean"},
+ {"Champlain Sea", "Atlantic Ocean"},
+ {"Chilean Sea", "Pacific Ocean"},
+ {"China Seas", "Pacific Ocean"},
+ {"Chukchi Sea", "Arctic Ocean"},
+ {"Coral Sea", "Pacific Ocean"},
+ {"Davis Strait", "Atlantic Ocean"},
+ {"East China Sea", "Pacific Ocean"},
+ {"East Siberian Sea", "Arctic Ocean"},
+ {"English Channel", "Atlantic Ocean"},
+ {"Erythraean Sea", "Indian Ocean"},
+ {"Greenland Sea", "Arctic Ocean"},
+ {"Gulf of Mexico", "Atlantic Ocean"},
+ {"Gulf of Thailand", "Pacific Ocean"},
+ {"Gulf of Tonkin", "Pacific Ocean"},
+ {"Hudson Bay", "Arctic Ocean"},
+ {"Ionian Sea", "Mediterranean Sea"},
+ {"Irish Sea", "Atlantic Ocean"},
+ {"Irminger Sea", "Atlantic Ocean"},
+ {"James Bay", "Atlantic Ocean"},
+ {"Java Sea", "Indian Ocean"},
+ {"Kara Sea", "Arctic Ocean"},
+ {"Koro Sea", "Pacific Ocean"},
+ {"Labrador Sea", "Atlantic Ocean"},
+ {"Laccadive Sea", "Indian Ocean"},
+ {"Laptev Sea", "Arctic Ocean"},
+ {"Ligurian Sea", "Mediterranean Sea"},
+ {"Lincoln Sea", "Arctic Ocean"},
+ {"Myrtoan Sea", "Mediterranean Sea"},
+ {"North Sea", "Atlantic Ocean"},
+ {"Norwegian Sea", "Atlantic Ocean"},
+ {"Pechora Sea", "Arctic Ocean"},
+ {"Persian Gulf", "Indian Ocean"},
+ {"Philippine Sea", "Pacific Ocean"},
+ {"Red Sea", "Indian Ocean"},
+ {"Salish Sea", "Pacific Ocean"},
+ {"Sargasso Sea", "Atlantic Ocean"},
+ {"Scotia Sea", "Southern Ocean"},
+ {"Sea of Azov", "Black Sea"},
+ {"Sea of Chiloe", "Pacific Ocean"},
+ {"Sea of Crete", "Mediterranean Sea"},
+ {"Sea of Japan", "Pacific Ocean"},
+ {"Sea of Okhotsk", "Pacific Ocean"},
+ {"Sea of the Hebrides", "Atlantic Ocean"},
+ {"Sea of Zanj", "Indian Ocean"},
+ {"Seas of Greenland", "Atlantic Ocean"},
+ {"Sethusamudram", "Indian Ocean"},
+ {"Sibutu Passage", "Pacific Ocean"},
+ {"Solomon Sea", "Pacific Ocean"},
+ {"South China Sea", "Pacific Ocean"},
+ {"Sulu Sea", "Pacific Ocean"},
+ {"Tasman Sea", "Pacific Ocean"},
+ {"Thracian Sea", "Mediterranean Sea"},
+ {"Timor Sea", "Indian Ocean"},
+ {"Tyrrhenian Sea", "Mediterranean Sea"},
+ {"Wandel Sea", "Arctic Ocean"},
+ {"White Sea", "Arctic Ocean"},
+ {"Yellow Sea", "Pacific Ocean"}
+};
+
+static CharPtr FindSurroundingOcean (
+ CharPtr country
+)
+
+{
+ Int2 L, R, mid;
+
+ if (StringHasNoText (country)) return NULL;
+
+ L = 0;
+ R = sizeof (sea_parents) / sizeof (WaterPairData) - 1;
+
+ while (L < R) {
+ mid = (L + R) / 2;
+ if (StringICmp (sea_parents [mid].sea, country) < 0) {
+ L = mid + 1;
+ } else {
+ R = mid;
+ }
+ }
+
+ if (StringICmp (sea_parents [R].sea, country) == 0) {
+ return sea_parents [R].ocean;
+ }
+
+ return NULL;
+}
+
+static CharPtr RepairCountryName (
+ CharPtr countryname,
+ CharPtr cbuf
+)
+
+{
+ CharPtr comma;
+
+ if (StringHasNoText (countryname)) return NULL;
+ if (StringLen (countryname) > 400) return countryname;
+
+ if (StringNCmp (countryname, "USA:", 4) != 0) return countryname;
+
+ comma = StringChr (countryname, ',');
+
+ if (StringICmp (countryname, "USA: Washington DC") == 0 || StringICmp (countryname, "USA: Washington, DC") == 0) {
+ StringCpy (cbuf, "USA: District of Columbia");
+ countryname = cbuf;
+ } else if (StringICmp (comma, ", Puerto Rico") == 0) {
+ StringCpy (cbuf, "USA: Puerto Rico");
+ countryname = cbuf;
+ }
+
+ return countryname;
+}
+
static void NewerValidateCountryLatLon (
ValidStructPtr vsp,
GatherContextPtr gcp,
@@ -13417,7 +15176,7 @@ static void NewerValidateCountryLatLon (
{
Char buf0 [256], buf1 [256], buf2 [256];
- CharPtr country = NULL, province = NULL, fullname = NULL;
+ CharPtr country = NULL, province = NULL, fullname = NULL, parent;
CtrySetPtr csp;
Boolean format_ok = FALSE, lat_in_range = FALSE, lon_in_range = FALSE, precision_ok = FALSE;
FloatHi lat = 0.0;
@@ -13426,8 +15185,8 @@ static void NewerValidateCountryLatLon (
CharPtr ptr;
FloatHi scale = 1.0;
FloatHi neardist = 0.0;
- ELatLonAdjust adjust = eLatLonAdjust_none;
- Uint4 test, adjust_test = 0;
+ Uint4 adjust = eLatLonAdjust_none, adjust_test = eLatLonAdjust_none;
+ ELatLonAdjust test;
CharPtr fmt;
if (vsp == NULL || gcp == NULL) return;
@@ -13513,15 +15272,16 @@ static void NewerValidateCountryLatLon (
return;
}
- if (! CountryIsInLatLonList (country)) {
- if (! WaterIsInLatLonList (country)) {
- /* report unrecognized country */
- return;
- } else {
- /* report that it may refer to specific small body of water */
- /* continue to look for nearby country for proximity report */
- /* (do not return) */
- }
+ if (province != NULL) {
+ /* do not attempt quick exit */
+ } else if (CountryIsInLatLonList (country)) {
+ if (CountryContainsLatLon (country, lat, lon)) return;
+ } else if (WaterIsInLatLonList (country)) {
+ if (WaterContainsLatLon (country, lat, lon)) return;
+ } else if (StringICmp (country, "Palestine") == 0 || StringICmp (country, "State of Palestine") == 0) {
+ } else {
+ /* report unrecognized country */
+ return;
}
csp = GetLatLonCountryData ();
@@ -13536,37 +15296,42 @@ static void NewerValidateCountryLatLon (
/* calculate assignment or proximity by coordinates */
CalculateLatLonMap (lat, lon, country, province, scale, &llm);
+ if (llm.guesscountry == NULL && llm.guesswater != NULL) {
+ parent = FindSurroundingOcean (llm.guesswater);
+ if (parent != NULL) {
+ if (StringICmp (parent, country) == 0) return;
+ }
+ }
+
/* compare indicated country/province to guess/proximate country/water */
test = ClassifyLatLonMap (fullname, country, province, &llm);
+ if (!test /* && lat < 5.0 */ && llm.guesscountry != NULL && llm.guesswater == NULL) {
+ CheckForFlippedCoordinates (lat, lon, scale, country, province, fullname, &adjusted, &adjust_test, &adjust);
+ if (adjust_test && adjusted.guesscountry != NULL && adjusted.guesswater == NULL) {
+ test = adjust_test;
+ MemCopy (&llm, &adjusted, sizeof (LatLonMap));
+ } else {
+ adjust = eLatLonAdjust_none;
+ }
+ }
+
if (!test && CountryIsNearLatLon(country, lat, lon, 2.0, &neardist) && neardist < 5.0) {
llm.guesscountry = country;
llm.guessprovince = NULL;
test = ClassifyLatLonMap (fullname, country, province, &llm);
}
- if (!test && !CountryIsNearLatLon(country, lat, lon, 20.0, &neardist) && !WaterIsNearLatLon(country, lat, lon, 20.0, &neardist)) {
- CalculateLatLonMap (lon, lat, country, province, scale, &adjusted);
- adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
- if (adjust_test) {
- adjust = eLatLonAdjust_flip;
- } else {
- CalculateLatLonMap (-lat, lon, country, province, scale, &adjusted);
- adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
- if (adjust_test) {
- adjust = eLatLonAdjust_negate_lat;
- } else {
- CalculateLatLonMap (lat, -lon, country, province, scale, &adjusted);
- adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
- if (adjust_test) {
- adjust = eLatLonAdjust_negate_lon;
- }
- }
- }
-
- if (adjust_test) {
+ if (!test
+ && !CountryIsNearLatLon(country, lat, lon, 20.0, &neardist)
+ && !WaterIsNearLatLon(country, lat, lon, 20.0, &neardist)
+ /* && lat >= 5.0 */ && llm.guesscountry != NULL && llm.guesswater == NULL) {
+ CheckForFlippedCoordinates (lat, lon, scale, country, province, fullname, &adjusted, &adjust_test, &adjust);
+ if (adjust_test && adjusted.guesscountry != NULL && adjusted.guesswater == NULL) {
test = adjust_test;
MemCopy (&llm, &adjusted, sizeof (LatLonMap));
+ } else {
+ adjust = eLatLonAdjust_none;
}
}
@@ -13599,7 +15364,16 @@ static void NewerValidateCountryLatLon (
} else if (llm.guesswater != NULL) {
LatLonWaterErrors(vsp, &llm, test, neardist, country, province, lat_lon, fullname, scale);
} else if (llm.guesscountry != NULL) {
- LatLonLandErrors (vsp, &llm, country, province, lat_lon, fullname);
+ if (StringICmp (llm.guesscountry, "Hong Kong") == 0 && StringICmp (country, "China") == 0) {
+ /* Hong Kong okay as China */
+ } else if (StringICmp (llm.guesscountry, "Puerto Rico") == 0 && StringICmp (country, "USA") == 0) {
+ } else if ((StringICmp (llm.guesscountry, "Gaza Strip") == 0 ||
+ StringICmp (llm.guesscountry, "West Bank") == 0) &&
+ (StringICmp (country, "Palestine") == 0 ||
+ StringICmp (country, "State of Palestine") == 0)) {
+ } else {
+ LatLonLandErrors (vsp, &llm, country, province, lat_lon, fullname);
+ }
} else if (llm.closestcountry != NULL) {
fmt = "Lat_lon '%s' is closest to '%s' instead of '%s'";
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, llm.closestcountry, fullname);
@@ -13660,14 +15434,46 @@ static Boolean IsUnexpectedViralOrgModQualifier (Uint1 subtype)
return rval;
}
+static CharPtr ValGetDbtagStr (DbtagPtr dbt, CharPtr buf)
+
+{
+ ObjectIdPtr oip;
+ CharPtr rslt;
+
+ rslt = "";
+ if (dbt == NULL || buf == NULL) return rslt;
+
+ oip = dbt->tag;
+ if (oip == NULL) return rslt;
+
+ if (oip->str != NULL) return oip->str;
+ if (oip->id == 0) return rslt;
+
+ sprintf (buf, "%ld", (long) oip->id);
+ return buf;
+}
+
+/**********************************************************/
+static Boolean s_IfContains(CharPtr name, CharPtr pat)
+{
+ CharPtr p;
+
+ p = StringISearch(name, pat);
+
+ if(p && (p == name || *(p - 1) == ' '))
+ return(TRUE);
+ return(FALSE);
+}
static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop, SeqFeatPtr sfp, ValNodePtr sdp)
{
Char badch;
Boolean bad_cap = FALSE;
Boolean bad_frequency;
- BioseqPtr bsp;
+ BioseqPtr bsp = NULL;
BioseqSetPtr bssp;
+ Char buf [32];
+ Char cbuf [512];
Char ch;
Boolean chromconf = FALSE;
Int2 chromcount = 0;
@@ -13677,26 +15483,26 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValNodePtr db;
DbtagPtr dbt;
CharPtr gb_synonym = NULL;
- Boolean germline = FALSE;
CharPtr good;
+ Boolean has_isolate = FALSE;
Boolean has_strain = FALSE;
+ Boolean has_taxon = FALSE;
Boolean has_fwd_pcr_seq = FALSE;
Boolean has_rev_pcr_seq = FALSE;
Boolean has_pcr_name = FALSE;
Boolean has_metagenome_source = FALSE;
- Boolean has_plasmid = FALSE;
Int4 id;
- Boolean is_env_sample = FALSE;
Boolean is_iso_source = FALSE;
Boolean is_mating_type = FALSE;
- Boolean is_metagenomic = FALSE;
Boolean is_sex = FALSE;
Boolean is_specific_host = FALSE;
- Boolean is_transgenic = FALSE;
Boolean isAnimal = FALSE;
Boolean isArchaea = FALSE;
Boolean isBacteria = FALSE;
+ Boolean isBioSample = FALSE;
+ Boolean isEukaryote = FALSE;
Boolean isFungal = FALSE;
+ Boolean isMicrosporidia = FALSE;
Boolean isPlant = FALSE;
Boolean isViral = FALSE;
Boolean is_bc;
@@ -13704,6 +15510,8 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Boolean is_sc;
CharPtr last_db = NULL;
CharPtr lat_lon = NULL;
+ size_t len;
+ Int2 num_altitude = 0;
Int2 num_bio_material = 0;
Int2 num_collection_dates = 0;
Int2 num_culture_collection = 0;
@@ -13714,6 +15522,12 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Int2 num_rev_primer_seq = 0;
Int2 num_fwd_primer_name = 0;
Int2 num_rev_primer_name = 0;
+ Int2 num_plasmid_name = 0;
+ Int2 num_germline = 0;
+ Int2 num_rearranged = 0;
+ Int2 num_transgenic = 0;
+ Int2 num_metagenomic = 0;
+ Int2 num_env_sample = 0;
ObjectIdPtr oip;
Boolean old_country = FALSE;
OrgNamePtr onp;
@@ -13723,23 +15537,28 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Int4 primer_len_before;
Int4 primer_len_after;
ValNodePtr pset;
- Boolean rearranged = FALSE;
SeqEntryPtr sep;
ErrSev sev;
SubSourcePtr ssp;
CharPtr str;
CharPtr synonym = NULL;
+ UserFieldPtr ufp;
+ UserObjectPtr uop;
+ ValNodePtr vnp;
Boolean varietyOK;
CharPtr inst1, inst2, id1, id2, coll1, coll2;
Char buf1 [512], buf2 [512];
PCRPrimerPtr ppp;
PCRReactionSetPtr prp;
+ SeqMgrDescContext dcontext;
+ CharPtr p;
+ if (vsp == NULL) return;
if (vsp->sourceQualTags == NULL) {
InitializeSourceQualTags (vsp);
}
- if (biop == NULL)
- return;
+ if (biop == NULL) return;
+
if (biop->genome == GENOME_transposon || biop->genome == GENOME_insertion_seq) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_ObsoleteSourceLocation,
"Transposon and insertion sequence are no longer legal locations");
@@ -13751,38 +15570,103 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
orp = biop->org;
if (orp != NULL) {
+
+ for (db = orp->db; db != NULL; db = db->next) {
+ dbt = (DbtagPtr) db->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (StringICmp (dbt->db, "taxon") == 0) {
+ has_taxon = TRUE;
+ }
+ }
+
onp = orp->orgname;
if (onp != NULL) {
- if (StringNICmp (onp->lineage, "Viruses; ", 9) == 0) {
- isViral = TRUE;
- } else if (StringNICmp (onp->lineage, "Eukaryota; Metazoa; ", 20) == 0) {
- isAnimal = TRUE;
- } else if (StringNICmp (onp->lineage, "Eukaryota; Viridiplantae; Streptophyta; Embryophyta; ", 53) == 0 ||
- StringNICmp (onp->lineage, "Eukaryota; Rhodophyta; ", 23) == 0 ||
- StringNICmp (onp->lineage, "Eukaryota; stramenopiles; Phaeophyceae; ", 40) == 0) {
- isPlant = TRUE;
+ if (StringNICmp (onp->lineage, "Eukaryota; ", 11) == 0) {
+ isEukaryote = TRUE;
+ if (StringNICmp (onp->lineage, "Eukaryota; Metazoa; ", 20) == 0) {
+ isAnimal = TRUE;
+ } else if (StringNICmp (onp->lineage, "Eukaryota; Viridiplantae; Streptophyta; Embryophyta; ", 53) == 0 ||
+ StringNICmp (onp->lineage, "Eukaryota; Rhodophyta; ", 23) == 0 ||
+ StringNICmp (onp->lineage, "Eukaryota; stramenopiles; Phaeophyceae; ", 40) == 0) {
+ isPlant = TRUE;
+ } else if (StringNICmp (onp->lineage, "Eukaryota; Fungi; ", 18) == 0) {
+ isFungal = TRUE;
+ if (StringNICmp (onp->lineage, "Eukaryota; Fungi; Microsporidia; ", 33) == 0) {
+ isMicrosporidia = TRUE;
+ }
+ }
} else if (StringNICmp (onp->lineage, "Bacteria; ", 10) == 0) {
isBacteria = TRUE;
} else if (StringNICmp (onp->lineage, "Archaea; ", 9) == 0) {
isArchaea = TRUE;
- } else if (StringNICmp (onp->lineage, "Eukaryota; Fungi; ", 18) == 0) {
- isFungal = TRUE;
+ } else if (StringNICmp (onp->lineage, "Viruses; ", 9) == 0) {
+ isViral = TRUE;
+ }
+ }
+ }
+
+ if (isBacteria) {
+ bsp = NULL;
+ if (sfp != NULL) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ } else if (sdp != NULL && sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ if (ovp->idx.parenttype == OBJ_BIOSEQ) {
+ bsp = (BioseqPtr) ovp->idx.parentptr;
+ } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) ovp->idx.parentptr;
+ if (bssp != NULL) {
+ sep = bssp->seqentry;
+ if (sep != NULL) {
+ sep = FindNthBioseq (sep, 1);
+ if (sep != NULL && IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ }
+ }
+ }
+ }
+ }
+ if (bsp != NULL) {
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (vnp != NULL) {
+ uop = (UserObjectPtr) vnp->data.ptrvalue;
+ if (uop != NULL) {
+ oip = uop->type;
+ if (oip != NULL && StringICmp (oip->str, "DBLink") == 0) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip == NULL || oip->str == NULL) continue;
+ if (StringICmp (oip->str, "BioSample") == 0 && (ufp->choice == 1 || ufp->choice == 7)) {
+ isBioSample = TRUE;
+ }
+ }
+ }
+ }
+ vnp = SeqMgrGetNextDescriptor (bsp, vnp, Seq_descr_user, &dcontext);
}
}
}
ssp = biop->subtype;
while (ssp != NULL) {
+ str = ssp->name;
+ if (StringCmp (str, "N/A") == 0 || StringCmp (str, "Missing") == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Subsource name should not be %s", str);
+ }
if (ssp->subtype == SUBSRC_country) {
num_country++;
- if (countryname != NULL) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCountryCode, "Multiple country names on BioSource");
- }
countryname = ssp->name;
if (CountryIsValid (countryname, &old_country, &bad_cap)) {
if (bad_cap) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCountryCapitalization, "Bad country capitalization [%s]", countryname);
}
+ len = StringLen (countryname);
+ if (len > 0 && countryname [len - 1] == ':') {
+ //LCOV_EXCL_START
+ //BasicCleanup strips colon from end of country name
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCountryCode, "Colon at end of country name [%s]", countryname);
+ //LCOV_EXCL_STOP
+ }
} else {
if (StringHasNoText (countryname)) {
countryname = "?";
@@ -13796,7 +15680,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
sev = SEV_WARNING;
}
*/
- sev = SEV_WARNING;
+ sev = SEV_ERROR;
ValidErr (vsp, sev, ERR_SEQ_DESCR_BadCountryCode, "Bad country name [%s]", countryname);
}
}
@@ -13813,54 +15697,83 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_ObsoleteSourceQual,
"Transposon name and insertion sequence name are no longer legal qualifiers");
} else if (ssp->subtype == 0) {
+ //LCOV_EXCL_START
+ //not valid ASN.1
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadSubSource, "Unknown subsource subtype %d", (int) (ssp->subtype));
+ //LCOV_EXCL_STOP
} else if (ssp->subtype == SUBSRC_other) {
ValidateSourceQualTags (vsp, gcp, biop, ssp->name);
} else if (ssp->subtype == SUBSRC_germline) {
- germline = TRUE;
+ num_germline++;
str = ssp->name;
if (str == NULL || str [0] != '\0') {
+ //LCOV_EXCL_START
+ //BasicCleanup removes germline text
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Germline qualifier should not have descriptive text");
+ //LCOV_EXCL_STOP
}
} else if (ssp->subtype == SUBSRC_rearranged) {
- rearranged = TRUE;
+ num_rearranged++;
str = ssp->name;
if (str == NULL || str [0] != '\0') {
+ //LCOV_EXCL_START
+ //BasicCleanup removes rearranged text
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Rearranged qualifier should not have descriptive text");
+ //LCOV_EXCL_STOP
}
} else if (ssp->subtype == SUBSRC_transgenic) {
- is_transgenic = TRUE;
+ num_transgenic++;
str = ssp->name;
if (str == NULL || str [0] != '\0') {
+ //LCOV_EXCL_START
+ //BasicCleanup removes transgenic text
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Transgenic qualifier should not have descriptive text");
+ //LCOV_EXCL_STOP
}
} else if (ssp->subtype == SUBSRC_environmental_sample) {
- is_env_sample = TRUE;
+ num_env_sample++;
str = ssp->name;
if (str == NULL || str [0] != '\0') {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Environmental_sample qualifier should not have descriptive text");
+ //LCOV_EXCL_START
+ //BasicCleanup removes environmental-sample text
+ ValidErr(vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Environmental_sample qualifier should not have descriptive text");
+ //LCOV_EXCL_STOP
}
} else if (ssp->subtype == SUBSRC_metagenomic) {
- is_metagenomic = TRUE;
+ num_metagenomic++;
str = ssp->name;
if (str == NULL || str [0] != '\0') {
+ //LCOV_EXCL_START
+ //BasicCleanup removes metagenomic text
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Metagenomic qualifier should not have descriptive text");
+ //LCOV_EXCL_STOP
}
} else if (ssp->subtype == SUBSRC_isolation_source) {
is_iso_source = TRUE;
} else if (ssp->subtype == SUBSRC_sex) {
is_sex = TRUE;
str = ssp->name;
+ if (StringHasNoText (str)) {
+ str = "?";
+ }
+ sev = SEV_WARNING;
+ if (IsGenomicPipeline (vsp)) {
+ sev = SEV_ERROR;
+ }
if (isAnimal || isPlant) {
- /* always use /sex, do not check values at this time */
+ /* always allow /sex, but now check values */
+ if (! IsValidSexValue (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Invalid value (%s) for /sex qualifier", str);
+ }
} else if (isViral) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected sex qualifier");
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected Sex qualifier");
} else if (isBacteria || isArchaea || isFungal) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /sex qualifier");
- } else if (IsValidSexValue (str)) {
- /* otherwise expect male or female, or a few others */
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /sex qualifier");
} else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /sex qualifier");
+ if (! IsValidSexValue (str)) {
+ /* otherwise expect male or female, or a few others */
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Invalid value (%s) for /sex qualifier", str);
+ }
}
} else if (ssp->subtype == SUBSRC_mating_type) {
is_mating_type = TRUE;
@@ -13872,7 +15785,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /mating_type qualifier");
}
} else if (ssp->subtype == SUBSRC_plasmid_name) {
- has_plasmid = TRUE;
+ num_plasmid_name++;
if (biop->genome != GENOME_plasmid) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid subsource but not plasmid location");
}
@@ -13920,7 +15833,12 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
else if (CollectionDateIsInTheFuture (ssp->name)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCollectionDate, "Collection_date is in the future");
}
+ else if (! CollectionDatesInOrder (ssp->name)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCollectionDate, "Collection_dates are out of order");
+ }
} else if (ssp->subtype == SUBSRC_fwd_primer_seq) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts to biosource.pcr_primers structure
num_fwd_primer_seq++;
has_fwd_pcr_seq = TRUE;
if (! PrimerSeqIsValid (vsp, ssp->name, &badch)) {
@@ -13936,7 +15854,10 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
"PCR forward primer sequence has duplicates");
}
*/
+ //LCOV_EXCL_STOP
} else if (ssp->subtype == SUBSRC_rev_primer_seq) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts to biosource.pcr_primers structure
num_rev_primer_seq++;
has_rev_pcr_seq = TRUE;
if (! PrimerSeqIsValid (vsp, ssp->name, &badch)) {
@@ -13952,7 +15873,10 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
"PCR reverse primer sequence has duplicates");
}
*/
+ //LCOV_EXCL_STOP
} else if (ssp->subtype == SUBSRC_fwd_primer_name) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts to biosource.pcr_primers structure
num_fwd_primer_name++;
if (StringLen (ssp->name) > 10 && PrimerSeqIsValid (vsp, ssp->name, &badch)) {
if (badch < ' ' || badch > '~') {
@@ -13961,7 +15885,10 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR primer name appears to be a sequence");
}
has_pcr_name = TRUE;
+ //LCOV_EXCL_STOP
} else if (ssp->subtype == SUBSRC_rev_primer_name) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts to biosource.pcr_primers structure
num_rev_primer_name++;
if (StringLen (ssp->name) > 10 && PrimerSeqIsValid (vsp, ssp->name, &badch)) {
if (badch < ' ' || badch > '~') {
@@ -13970,6 +15897,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR primer name appears to be a sequence");
}
has_pcr_name = TRUE;
+ //LCOV_EXCL_STOP
} else if (ssp->subtype == SUBSRC_lat_lon) {
num_lat_lon++;
if (lat_lon != NULL) {
@@ -13977,6 +15905,11 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
lat_lon = ssp->name;
ValidateLatLon (vsp, lat_lon);
+ } else if (ssp->subtype == SUBSRC_altitude) {
+ num_altitude++;
+ if (! AltitudeIsValid (ssp->name)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_BadAltitude, "bad altitude qualifier value %s", ssp->name);
+ }
} else if (ssp->subtype == SUBSRC_frequency) {
str = ssp->name;
if (StringDoesHaveText (str)) {
@@ -14022,8 +15955,9 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
if (biop->genome == GENOME_plasmid) {
- if (! has_plasmid) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid location but not plasmid subsource");
+ if (!num_plasmid_name) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency,
+ "Plasmid location set but plasmid name missing. Add a plasmid source modifier with the plasmid name. Use unnamed if the name is not known.");
}
}
@@ -14033,6 +15967,11 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (num_lat_lon > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple lat_lon qualifiers present");
}
+ if (num_altitude > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple altitude qualifiers present");
+ }
+ //LCOV_EXCL_START
+ //BasicCleanup converts these to Biosource.pcr_primers structure
if (num_fwd_primer_seq > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple fwd_primer_seq qualifiers present");
}
@@ -14045,14 +15984,39 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (num_rev_primer_name > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple rev_primer_name qualifiers present");
}
+ //LCOV_EXCL_STOP
+ if (num_plasmid_name > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple plasmid_name qualifiers present");
+ }
+ //LCOV_EXCL_START
+ //BasicCleanup removes duplicates
+ if (num_germline > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple germline qualifiers present");
+ }
+ if (num_rearranged > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple rearranged qualifiers present");
+ }
+ if (num_transgenic > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple transgenic qualifiers present");
+ }
+ if (num_metagenomic > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple metagenomic qualifiers present");
+ }
+ if (num_env_sample > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple environtal_sample qualifiers present");
+ }
+ //LCOV_EXCL_STOP
if (countryname != NULL && lat_lon != NULL) {
csp = GetLatLonCountryData ();
if (csp != NULL) {
+ countryname = RepairCountryName (countryname, cbuf);
NewerValidateCountryLatLon (vsp, gcp, countryname, lat_lon);
}
}
+ //LCOV_EXCL_START
+ //BasicCleanup converts these to Biosource.pcr_primers structure
if (has_pcr_name) {
if ((! has_fwd_pcr_seq) || (! has_rev_pcr_seq)) {
/*
@@ -14077,6 +16041,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
"PCR primer sequence has duplicates");
}
}
+ //LCOV_EXCL_STOP
for (prp = biop->pcr_primers; prp != NULL; prp = prp->next) {
@@ -14113,13 +16078,13 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
}
- if (germline && rearranged) {
+ if (num_germline && num_rearranged) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Germline and rearranged should not both be present");
}
- if (is_transgenic && is_env_sample) {
+ if (num_transgenic && num_env_sample) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Transgenic and environmental sample should not both be present");
}
- if (is_metagenomic && (! is_env_sample)) {
+ if (num_metagenomic && (! num_env_sample)) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BioSourceInconsistency, "Metagenomic should also have environmental sample annotated");
}
if (is_sex && is_mating_type) {
@@ -14129,14 +16094,17 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (biop->org != NULL
&& biop->org->orgname != NULL
&& StringISearch (biop->org->orgname->lineage, "metagenomes") != NULL
- && !is_metagenomic) {
+ && !num_metagenomic) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "If metagenomes appears in lineage, BioSource should have metagenomic qualifier");
}
if (chromcount > 1) {
if (chromconf) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleChromosomes, "Multiple conflicting chromosome qualifiers");
} else {
+ //LCOV_EXCL_START
+ //cleanup removes identical chromosome subsources
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleChromosomes, "Multiple identical chromosome qualifiers");
+ //LCOV_EXCL_STOP
}
}
orp = biop->org;
@@ -14173,7 +16141,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
}
if (StringNICmp (orp->taxname, "uncultured ", 11) == 0) {
- if (! is_env_sample) {
+ if (! num_env_sample) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Uncultured should also have /environmental_sample");
}
}
@@ -14186,6 +16154,17 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (StringHasSgml (vsp, str)) {
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "taxname %s has SGML", str);
}
+
+ p = StringRChr(str, ' ');
+ if(p != NULL &&
+ (StringICmp(p + 1, "sp.") == 0 || StringICmp(p + 1, "sp") == 0) &&
+ StringNICmp(str, "uncultured ", 11) != 0 &&
+ StringICmp(str, "Haemoproteus sp.") != 0 &&
+ s_IfContains(str, "endosymbiont ") == FALSE &&
+ s_IfContains(str, "symbiont ") == FALSE)
+ ValidErr (vsp, SEV_INFO,
+ ERR_SEQ_DESCR_OrganismIsUndefinedSpecies, "Organism '%s' is undefined species and does not have a specific identifier.",
+ str);
}
}
@@ -14210,29 +16189,29 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_NoOrgFound, "No organism name has been applied to this Bioseq. Other qualifiers may exist.");
}
if (orp == NULL) {
- if (is_env_sample && (! is_iso_source) && (! is_specific_host)) {
+ //LCOV_EXCL_START
+ //Not valid ASN.1
+ if (num_env_sample && (! is_iso_source) && (! is_specific_host)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Environmental sample should also have isolation source or specific host annotated");
}
return;
+ //LCOV_EXCL_STOP
}
onp = orp->orgname;
if (onp == NULL || StringHasNoText (onp->lineage)) {
if (! vsp->seqSubmitParent && vsp->indexerVersion) { /* suppress when validator run from tbl2asn or when not indexer version */
sev = SEV_ERROR;
if (vsp->is_refseq_in_sep) {
- for (db = orp->db; db != NULL; db = db->next) {
- dbt = (DbtagPtr) db->data.ptrvalue;
- if (dbt != NULL) {
- if (StringICmp (dbt->db, "taxon") == 0) {
- sev = SEV_REJECT;
- }
- }
+ if (has_taxon) {
+ sev = SEV_REJECT;
}
}
if (vsp->is_embl_ddbj_in_sep) {
sev = SEV_WARNING;
}
- ValidErr (vsp, sev, ERR_SEQ_DESCR_MissingLineage, "No lineage for this BioSource.");
+ if (! vsp->is_wp_in_sep) {
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_MissingLineage, "No lineage for this BioSource.");
+ }
}
} else {
if (biop->genome == GENOME_kinetoplast) {
@@ -14255,6 +16234,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
|| biop->genome == GENOME_genomic
|| biop->genome == GENOME_plasmid
|| biop->genome == GENOME_chromosome
+ || (biop->genome == GENOME_extrachrom && StringCmp (onp->div, "BCT") == 0)
|| (biop->genome == GENOME_proviral && StringCmp (onp->div, "VRL") == 0)) {
/* it's ok */
} else {
@@ -14262,7 +16242,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
}
- if (StringCmp (onp->div, "ENV") == 0 && (! is_env_sample)) {
+ if (StringCmp (onp->div, "ENV") == 0 && (! num_env_sample)) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "BioSource with ENV division is missing environmental sample subsource");
}
}
@@ -14281,19 +16261,43 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
omp = onp->mod;
varietyOK = FALSE;
while (omp != NULL) {
+ str = omp->subname;
+ if (StringCmp (str, "N/A") == 0 || StringCmp (str, "Missing") == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod name should not be %s", str);
+ }
if (omp->subtype == 0 || omp->subtype == 1) {
+ //LCOV_EXCL_START
+ //Not valid ASN.1
ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_BadOrgMod, "Unknown orgmod subtype %d", (int) (omp->subtype));
+ //LCOV_EXCL_STOP
} else if (omp->subtype == ORGMOD_strain) {
+ str = omp->subname;
+ if (StringNCmp (str, "subsp. ", 7) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod.strain should not start with subsp.");
+ } else if (StringNCmp (str, "serovar ", 8) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod.strain should not start with serovar");
+ }
if (has_strain) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrgMod, "Multiple strain qualifiers on the same BioSource");
}
has_strain = TRUE;
+ } else if (omp->subtype == ORGMOD_isolate) {
+ has_isolate = TRUE;
+ } else if (omp->subtype == ORGMOD_serovar) {
+ str = omp->subname;
+ if (StringNCmp (str, "subsp. ", 7) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod.serovar should not start with subsp.");
+ } else if (StringNCmp (str, "strain ", 7) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod.serovar should not start with strain");
+ }
} else if (omp->subtype == ORGMOD_variety) {
if ((StringHasNoText (onp->div) || StringICmp (onp->div, "PLN") != 0) &&
StringStr (onp->lineage, "Cyanobacteria") == 0 &&
StringStr (onp->lineage, "Myxogastria") == 0 &&
StringStr (onp->lineage, "Oomycetes") == 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrgMod, "Orgmod variety should only be in plants, fungi, or cyanobacteria");
+ if (! has_taxon) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrgMod, "Orgmod variety should only be in plants, fungi, or cyanobacteria");
+ }
}
varietyOK = ValidateOrgModInTaxName (vsp, omp, orp->taxname, varietyOK);
} else if (omp->subtype == ORGMOD_nat_host) {
@@ -14306,9 +16310,14 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
} else if (omp->subtype == ORGMOD_biovar ||
omp->subtype == ORGMOD_forma ||
omp->subtype == ORGMOD_forma_specialis ||
- omp->subtype == ORGMOD_sub_species ||
omp->subtype == ORGMOD_pathovar) {
ValidateOrgModInTaxName (vsp, omp, orp->taxname, varietyOK);
+ } else if (omp->subtype == ORGMOD_sub_species) {
+ str = omp->subname;
+ if (StringStr (str, "subsp. ") != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Orgmod.sub-species should not contain subsp.");
+ }
+ ValidateOrgModInTaxName (vsp, omp, orp->taxname, varietyOK);
} else if (omp->subtype == ORGMOD_specimen_voucher) {
num_specimen_voucher++;
ValidateOrgModVoucher (vsp, omp);
@@ -14320,10 +16329,19 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidateOrgModVoucher (vsp, omp);
} else if (omp->subtype == ORGMOD_metagenome_source) {
has_metagenome_source = TRUE;
+ } else if (omp->subtype == ORGMOD_type_material) {
+ if (! TypeMaterialIsValid (omp->subname)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrgMod, "Bad value for type_material");
+ }
} else if (omp->subtype == ORGMOD_common) {
+ //LCOV_EXCL_START
+ //don't care
if (StringICmp (omp->subname, orp->common) == 0) {
+ /*
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrgMod, "OrgMod common is identical to Org-ref common");
+ */
}
+ //LCOV_EXCL_STOP
} else if (omp->subtype == ORGMOD_synonym) {
synonym = omp->subname;
} else if (omp->subtype == ORGMOD_gb_synonym) {
@@ -14393,6 +16411,14 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
}
+ if (vsp->indexerVersion) {
+ if (isBacteria && isBioSample) {
+ if ( ! has_strain && ! has_isolate && ! num_env_sample ) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Bacteria should have strain or isolate or environmental sample");
+ }
+ }
+ }
+
/*
if (num_bio_material > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple bio_material qualifiers present");
@@ -14407,13 +16433,13 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (num_collection_dates > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple collection_date qualifiers present");
}
- if (is_env_sample && has_strain) {
+ if (num_env_sample && has_strain) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Strain should not be present in an environmental sample");
}
- if (is_env_sample && (! is_iso_source) && (! is_specific_host)) {
+ if (num_env_sample && (! is_iso_source) && (! is_specific_host)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Environmental sample should also have isolation source or specific host annotated");
}
- if (has_metagenome_source && (! is_metagenomic)) {
+ if (has_metagenome_source && (! num_metagenomic)) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Metagenome source should also have metagenomic qualifier");
}
if (StringDoesHaveText (synonym) && StringDoesHaveText (gb_synonym)) {
@@ -14434,27 +16460,27 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
if (is_sc) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "Illegal db_xref type %s, legal capitalization is %s", dbt->db, good);
+ "Illegal db_xref type %s (%s), legal capitalization is %s", dbt->db, ValGetDbtagStr (dbt, buf), good);
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "Illegal db_xref type %s, legal capitalization is %s, but should not be used on an OrgRef",
- dbt->db, good);
+ "Illegal db_xref type %s (%s), legal capitalization is %s, but should not be used on an OrgRef",
+ dbt->db, ValGetDbtagStr (dbt, buf), good);
}
} else if (is_rf) {
if (vsp->is_refseq_in_sep || vsp->is_gps_in_sep) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "RefSeq-specific db_xref type %s should not used on an OrgRef", dbt->db);
+ "RefSeq-specific db_xref type %s (%s) should not be used on an OrgRef", dbt->db, ValGetDbtagStr (dbt, buf));
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "RefSeq-specific db_xref type %s should not used on a non-RefSeq OrgRef", dbt->db);
+ "RefSeq-specific db_xref type %s (%s) should not be used on a non-RefSeq OrgRef", dbt->db, ValGetDbtagStr (dbt, buf));
}
} else if (is_sc) {
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "db_xref type %s should not used on an OrgRef", dbt->db);
+ "db_xref type %s (%s) should not be used on an OrgRef", dbt->db, ValGetDbtagStr (dbt, buf));
}
} else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", dbt->db);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s (%s)", dbt->db, ValGetDbtagStr (dbt, buf));
}
if (StringDoesHaveText (dbt->db)) {
@@ -14483,13 +16509,8 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (! vsp->indexerVersion) return;
- for (db = orp->db; db != NULL; db = db->next) {
- dbt = (DbtagPtr) db->data.ptrvalue;
- if (dbt != NULL) {
- if (StringICmp (dbt->db, "taxon") == 0)
- return;
- }
- }
+ if (has_taxon) return;
+
if (! vsp->seqSubmitParent) { /* suppress when validator run from tbl2asn */
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_NoTaxonID, "BioSource is missing taxon ID");
}
@@ -14638,6 +16659,15 @@ static Boolean BadBSFormat (CharPtr str)
if (ch != 'E' && ch != 'N' && ch != 'D') return TRUE;
ptr = str + 4;
+
+ /* EBI alternative format */
+ if (ch == 'E') {
+ ch = *ptr;
+ if (IS_ALPHA (ch)) {
+ ptr++;
+ }
+ }
+
ch = *ptr;
while (ch != '\0') {
if (! IS_DIGIT (ch)) return TRUE;
@@ -14730,6 +16760,16 @@ static Boolean BadBPFormat (CharPtr str)
return FALSE;
}
+static CharPtr dblink_names [] = {
+ "Trace Assembly Archive",
+ "ProbeDB",
+ "Assembly",
+ "BioSample",
+ "Sequence Read Archive",
+ "BioProject",
+ NULL
+};
+
static void ValidateDblink (ValidStructPtr vsp, UserObjectPtr uop)
{
@@ -14746,37 +16786,76 @@ static void ValidateDblink (ValidStructPtr vsp, UserObjectPtr uop)
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (oip == NULL || oip->str == NULL) continue;
- if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && ufp->choice == 8) {
- } else if (StringICmp (oip->str, "BioSample") == 0 && ufp->choice == 7) {
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num < 1 || cpp == NULL) continue;
- for (i = 0; i < ufp->num; i++) {
- str = cpp [i];
- if (StringHasNoText (str)) continue;
- if (BadBSFormat (str) && BadAltBSFormat (str)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioSample format - %s", str);
+ if (StringICmp (oip->str, "Trace Assembly Archive") == 0) {
+ } else if (StringICmp (oip->str, "ProbeDB") == 0) {
+ } else if (StringICmp (oip->str, "Assembly") == 0) {
+ } else if (StringICmp (oip->str, "BioSample") == 0) {
+ if (ufp->choice == 1) {
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ if (BadBSFormat (str) && BadAltBSFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioSample format - %s", str);
+ }
+ }
+ } else if (ufp->choice == 7) {
+ cpp = (CharPtr PNTR) ufp->data.ptrvalue;
+ if (ufp->num < 1 || cpp == NULL) continue;
+ for (i = 0; i < ufp->num; i++) {
+ str = cpp [i];
+ if (StringHasNoText (str)) continue;
+ if (BadBSFormat (str) && BadAltBSFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioSample format - %s", str);
+ }
}
}
- } else if (StringICmp (oip->str, "ProbeDB") == 0 && ufp->choice == 7) {
} else if (StringICmp (oip->str, "Sequence Read Archive") == 0 && ufp->choice == 7) {
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num < 1 || cpp == NULL) continue;
- for (i = 0; i < ufp->num; i++) {
- str = cpp [i];
- if (StringHasNoText (str)) continue;
- if (BadSRAFormat (str)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad Sequence Read Archive format - %s", str);
+ if (ufp->choice == 1) {
+ //LCOV_EXCL_START
+ //DUH. choice is required to be 7, will not be 1
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ if (BadSRAFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad Sequence Read Archive format - %s", str);
+ }
+ }
+ //LCOV_EXCL_STOP
+ } else if (ufp->choice == 7) {
+ cpp = (CharPtr PNTR) ufp->data.ptrvalue;
+ if (ufp->num < 1 || cpp == NULL) continue;
+ for (i = 0; i < ufp->num; i++) {
+ str = cpp [i];
+ if (StringHasNoText (str)) continue;
+ if (BadSRAFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad Sequence Read Archive format - %s", str);
+ }
}
}
} else if (StringICmp (oip->str, "BioProject") == 0 && ufp->choice == 7) {
- cpp = (CharPtr PNTR) ufp->data.ptrvalue;
- if (ufp->num < 1 || cpp == NULL) continue;
- for (i = 0; i < ufp->num; i++) {
- str = cpp [i];
- if (StringHasNoText (str)) continue;
- if (BadBPFormat (str)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioProject format - %s", str);
+ if (ufp->choice == 1) {
+ //LCOV_EXCL_START
+ //DUH. choice is required to be 7, will not be 1
+ str = (CharPtr) ufp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ if (BadBPFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioProject format - %s", str);
+ }
}
+ //LCOV_EXCL_STOP
+ } else if (ufp->choice == 7) {
+ cpp = (CharPtr PNTR) ufp->data.ptrvalue;
+ if (ufp->num < 1 || cpp == NULL) continue;
+ for (i = 0; i < ufp->num; i++) {
+ str = cpp [i];
+ if (StringHasNoText (str)) continue;
+ if (BadBPFormat (str)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_DBLinkProblem, "Bad BioProject format - %s", str);
+ }
+ }
+ }
+ }
+ for (i = 0; dblink_names [i] != NULL; i++) {
+ if (StringICmp (oip->str, dblink_names [i]) == 0 && StringCmp (oip->str, dblink_names [i]) != 0) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_DESCR_DBLinkProblem, "Bad DBLink capitalization - %s", oip->str);
}
}
}
@@ -14810,6 +16889,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
SeqMgrFeatContext fcontext;
Int2 baddate;
static char *badmod = "Inconsistent GIBB-mod [%d] and [%d]";
+ CharPtr p;
vsp->sfp = NULL;
vnp = sdp;
@@ -14990,19 +17070,19 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
}
if (bvsp->last_create != NULL) {
tmpval = (int) DateMatch ((DatePtr) vnp->data.ptrvalue, (DatePtr) (bvsp->last_create->data.ptrvalue), FALSE);
- if (tmpval) {
+ if (tmpval && vsp->has_gi_or_accn_ver) {
DatePrint ((DatePtr) (vnp->data.ptrvalue), buf1);
DatePrint ((DatePtr) (bvsp->last_create->data.ptrvalue), buf2);
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_Inconsistent, "Inconsistent create_dates [%s] and [%s]", buf1, buf2);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InconsistentDates, "Inconsistent create_dates [%s] and [%s]", buf1, buf2);
}
} else
bvsp->last_create = vnp;
if (bvsp->last_update != NULL) {
tmpval = (int) DateMatch ((DatePtr) vnp->data.ptrvalue, (DatePtr) (bvsp->last_update->data.ptrvalue), FALSE);
- if (tmpval == 1) {
+ if (tmpval == 1 && vsp->has_gi_or_accn_ver) {
DatePrint ((DatePtr) (vnp->data.ptrvalue), buf1);
DatePrint ((DatePtr) (bvsp->last_update->data.ptrvalue), buf2);
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_Inconsistent, "Inconsistent create_date [%s] and update_date [%s]", buf1, buf2);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InconsistentDates, "Inconsistent create_date [%s] and update_date [%s]", buf1, buf2);
}
}
break;
@@ -15013,10 +17093,10 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
}
if (bvsp->last_create != NULL) {
tmpval = (int) DateMatch ((DatePtr) bvsp->last_create->data.ptrvalue, (DatePtr) (vnp->data.ptrvalue), FALSE);
- if (tmpval == 1) {
+ if (tmpval == 1 && vsp->has_gi_or_accn_ver) {
DatePrint ((DatePtr) (bvsp->last_create->data.ptrvalue), buf1);
DatePrint ((DatePtr) (vnp->data.ptrvalue), buf2);
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_Inconsistent, "Inconsistent create_date [%s] and update_date [%s]", buf1, buf2);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InconsistentDates, "Inconsistent create_date [%s] and update_date [%s]", buf1, buf2);
}
}
if (bvsp->last_update == NULL)
@@ -15037,7 +17117,7 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
}
if (biop != NULL && biop->origin == 5) {
bsp = bvsp->bsp;
- if (! IsOtherDNA (bsp) && !ISA_aa (bsp->mol)) {
+ if (bsp != NULL && ! IsOtherDNA (bsp) && !ISA_aa (bsp->mol)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other should be used if Biosource-location is synthetic");
}
}
@@ -15052,7 +17132,9 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
if (bvsp->last_org != NULL) {
if ((this_org->taxname != NULL) && (bvsp->last_org->taxname != NULL)) {
if (StringCmp (this_org->taxname, bvsp->last_org->taxname)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "Inconsistent taxnames [%s] and [%s]", this_org->taxname, bvsp->last_org->taxname);
+ if (! vsp->is_wp_in_sep) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "Inconsistent taxnames [%s] and [%s]", this_org->taxname, bvsp->last_org->taxname);
+ }
}
}
} else
@@ -15219,6 +17301,40 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
case MI_TECH_concept_trans_a:
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Nucleic acid with protein sequence method");
break;
+ case MI_TECH_tsa:
+ if(mip->biomol == 0)
+ p = "unknown";
+ else if(mip->biomol == 1)
+ p = "genomic";
+ else if(mip->biomol == 2)
+ p = "pre-RNA";
+ else if(mip->biomol == 5)
+ p = "tRNA";
+ else if(mip->biomol == 6)
+ p = "snRNA";
+ else if(mip->biomol == 7)
+ p = "scRNA";
+ else if(mip->biomol == 8)
+ p = "peptide";
+ else if(mip->biomol == 9)
+ p = "other-genetic";
+ else if(mip->biomol == 10)
+ p = "genomic-mRNA";
+ else if(mip->biomol == 11)
+ p = "cRNA";
+ else if(mip->biomol == 12)
+ p = "snoRNA";
+ else if(mip->biomol == 15)
+ p = "tmRNA";
+ else if(mip->biomol == 255)
+ p = "other";
+ else
+ p = NULL;
+ if(p != NULL)
+ ValidErr(vsp, SEV_ERROR, ERR_SEQ_DESCR_WrongBiomolForTechnique,
+ "Biomol \"%s\" is not appropriate for sequences that use the TSA technique.",
+ p);
+ break;
default:
break;
}
@@ -15298,6 +17414,8 @@ static Boolean LIBCALLBACK ValidateSeqDescrIndexed (ValNodePtr sdp, SeqMgrDescCo
return ValidateSeqDescrCommon (sdp, bvsp, vsp, context->itemID);
}
+//LCOV_EXCL_START
+//only used when indexing not available
static void ValidateSeqDescrContext (GatherContextPtr gcp)
{
ValidStructPtr vsp;
@@ -15310,6 +17428,7 @@ static void ValidateSeqDescrContext (GatherContextPtr gcp)
ValidateSeqDescrCommon (sdp, bvsp, vsp, 0);
}
+//LCOV_EXCL_STOP
/*****************************************************************************
*
@@ -15632,7 +17751,7 @@ static void ValidateLocusTagGeneral (ValidStructPtr vsp, BioseqPtr bsp)
if (grp == NULL) {
gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
if (gene != NULL) {
- grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
}
}
if (grp != NULL && StringDoesHaveText (grp->locus_tag)) {
@@ -15765,7 +17884,7 @@ static CharPtr ValGetAuthorsPlusConsortium (
return tmp;
}
-static Boolean IsIdenticalPublication (PubdescPtr pdp1, PubdescPtr pdp2)
+NLM_EXTERN Boolean IsIdenticalPublication (PubdescPtr pdp1, PubdescPtr pdp2)
{
AuthListPtr alp1, alp2;
@@ -15932,7 +18051,7 @@ static Int2 IdXrefsNotReciprocal (
)
{
- Int4 giu = 0, gip = 0;
+ BIG_ID giu = 0, gip = 0;
SeqFeatPtr matchsfp;
ObjectIdPtr oip;
SeqIdPtr sip;
@@ -15969,7 +18088,7 @@ static Int2 IdXrefsNotReciprocal (
sip = SeqLocId (cds->product);
if (sip == NULL) return 0;
if (sip->choice == SEQID_GI) {
- gip = (Int4) sip->data.intvalue;
+ gip = (BIG_ID) sip->data.intvalue;
} else {
gip = GetGIForSeqId (sip);
}
@@ -15983,7 +18102,7 @@ static Int2 IdXrefsNotReciprocal (
sip = MakeSeqID (tmp);
if (sip == NULL) return 0;
if (sip->choice == SEQID_GI) {
- giu = (Int4) sip->data.intvalue;
+ giu = (BIG_ID) sip->data.intvalue;
} else {
giu = GetGIForSeqId (sip);
}
@@ -16156,13 +18275,51 @@ static Boolean LIBCALLBACK DummyCM121Proc (
}
*/
+static Boolean IsTransposonOrRetro (
+ SeqFeatPtr mbl_element
+)
+
+{
+ GBQualPtr gbq;
+
+ if (mbl_element == NULL) return FALSE;
+
+ for (gbq = mbl_element->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "mobile_element_type") != 0) continue;
+ if (StringNICmp (gbq->val, "transposon", 10) == 0) return TRUE;
+ if (StringNICmp (gbq->val, "retrotransposon", 15) == 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
+static Boolean IsSGDTransposonOrRetro (
+ SeqFeatPtr cds
+)
+
+{
+ DbtagPtr dbt;
+ ValNodePtr vnp;
+
+ if (cds == NULL) return FALSE;
+ if (StringHasNoText (cds->comment)) return FALSE;
+
+ for (vnp = cds->dbxref; vnp != NULL; vnp = vnp->next) {
+ dbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbt == NULL) continue;
+ if (StringCmp (dbt->db, "SGD") != 0) continue;
+ if (StringISearch (cds->comment, "transposon") != NULL) return TRUE;
+ }
+
+ return FALSE;
+}
+
static void ValidateCDSmRNAmatch (
ValidStructPtr vsp,
BioseqPtr bsp,
Int2 numgene,
Int2 numcds,
- Int2 nummrna,
- Boolean suppress_duplicate_messages
+ Int2 nummrna
)
{
@@ -16179,15 +18336,15 @@ static void ValidateCDSmRNAmatch (
Int2 i, j, k, numfeats, tmpnumcds, tmpnummrna, count;
Boolean is_genbank = FALSE;
LpData ld;
+ SeqFeatPtr mbl_element, rpt_region;
+ VoidPtr mobile_element_array, repeat_region_array;
+ Int4 num_mobile_elements, num_repeat_regions;
Int2 num_no_mrna = 0;
- Int4 num_repeat_regions;
Uint2 olditemtype = 0;
Uint4 olditemid = 0;
OrgNamePtr onp;
OrgRefPtr orp;
Int2 recip;
- VoidPtr repeat_region_array;
- SeqFeatPtr rpt_region;
SeqDescrPtr sdp;
ErrSev sev = /* SEV_INFO */ SEV_WARNING;
SeqFeatPtr sfp;
@@ -16239,6 +18396,7 @@ static void ValidateCDSmRNAmatch (
}
repeat_region_array = SeqMgrBuildFeatureIndex (bsp, &num_repeat_regions, 0, FEATDEF_repeat_region);
+ mobile_element_array = SeqMgrBuildFeatureIndex (bsp, &num_mobile_elements, 0, FEATDEF_mobile_element);
if (numgene > 0 && numcds > 0 && nummrna > 0) {
numfeats = numcds + nummrna;
@@ -16388,12 +18546,6 @@ static void ValidateCDSmRNAmatch (
if (vdp->featid_matched) {
/* presence of reciprocal link suppresses warnings */
} else if (vdp->products_unique) {
- /*
- if (! suppress_duplicate_messages) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_CDSwithMultipleMRNAs,
- "CDS overlapped by %d mRNAs, but product locations are unique", (int) count);
- }
- */
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_CDSwithMultipleMRNAs,
"CDS overlapped by %d mRNAs, but product locations are unique", (int) count);
} else {
@@ -16404,7 +18556,9 @@ static void ValidateCDSmRNAmatch (
if (! pseudo) {
rpt_region = SeqMgrGetOverlappingFeature (sfp->location, 0, repeat_region_array, num_repeat_regions,
NULL, CONTAINED_WITHIN, &rcontext);
- if (rpt_region == NULL) {
+ mbl_element = SeqMgrGetOverlappingFeature (sfp->location, 0, mobile_element_array, num_mobile_elements,
+ NULL, CONTAINED_WITHIN, &rcontext);
+ if (rpt_region == NULL && (! IsTransposonOrRetro (mbl_element)) && (! IsSGDTransposonOrRetro (sfp))) {
if (StringStr (sfp->except_text, "rearrangement required for product") == NULL) {
/*
if (gcp != NULL) {
@@ -16430,6 +18584,7 @@ static void ValidateCDSmRNAmatch (
}
MemFree (repeat_region_array);
+ MemFree (mobile_element_array);
if (num_no_mrna > 0) {
if (num_no_mrna >= 10) {
@@ -16639,6 +18794,44 @@ static Boolean BaseRangeIsVirtual (BioseqPtr bsp, Int4 left, Int4 right)
return TRUE;
}
+
+static Boolean IsAllNs (SeqLocPtr slp)
+{
+ Boolean rval = TRUE;
+ ErrSev logsev;
+ ErrSev msgsev;
+
+ msgsev = ErrSetMessageLevel (SEV_MAX);
+ logsev = ErrSetLogLevel (SEV_MAX);
+ SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS, (Pointer) &rval, IsAllNsProc);
+ ErrSetLogLevel (logsev);
+ ErrSetMessageLevel (msgsev);
+ return rval;
+}
+
+
+/* Pass in DeltaSeqPtr for start of loc and offset */
+static Boolean DoesLocIntersectGapOfUnknownLength (SeqLocPtr slp, DeltaSeqPtr dsp, Int4 dsp_start)
+{
+ Int4 stop;
+ Int4 offset = dsp_start;
+
+ if (dsp == NULL) {
+ return FALSE;
+ }
+
+ stop = SeqLocStop (slp);
+ while (dsp != NULL && offset < stop) {
+ if (IsDeltaSeqUnknownGap(dsp)) {
+ return TRUE;
+ }
+ offset += GetDeltaSeqLen(dsp);
+ dsp = dsp->next;
+ }
+ return FALSE;
+}
+
+
static Boolean LIBCALLBACK GetFeatsInGaps (
SeqFeatPtr sfp,
SeqMgrFeatContextPtr fcontext
@@ -16647,29 +18840,19 @@ static Boolean LIBCALLBACK GetFeatsInGaps (
{
BioseqPtr bsp;
Int4 dashes;
- Int2 first = 0;
GatherContextPtr gcp;
- Int2 last = 0;
- Int4 len;
SeqLocPtr loc;
- Int2 localfirst;
- Int2 locallast;
- ErrSev logsev;
- ErrSev msgsev;
- Boolean needToStream = TRUE;
Int4 Ns;
Uint2 olditemtype = 0;
Uint4 olditemid = 0;
Int4 plusses;
- Int2 prefix = 0;
- Int2 suffix = 0;
Int4 realBases;
- Int2 res;
- StreamCache sc;
- SeqIntPtr sintp;
SeqLocPtr slp;
Boolean startsOrEndsInGap = FALSE;
ValidStructPtr vsp;
+ DeltaSeqPtr dsp;
+ Int4 dsp_start;
+ Int4 start, stop;
if (sfp == NULL || fcontext == NULL) return FALSE;
vsp = (ValidStructPtr) fcontext->userdata;
@@ -16694,120 +18877,58 @@ static Boolean LIBCALLBACK GetFeatsInGaps (
Ns = 0;
realBases = 0;
- msgsev = ErrSetMessageLevel (SEV_MAX);
- logsev = ErrSetLogLevel (SEV_MAX);
+ bsp = BioseqFindFromSeqLoc (loc);
+ start = SeqLocStart(loc);
+ stop = SeqLocStop(loc);
+ dsp = GetDeltaSeqForPosition(start, bsp, &dsp_start);
/* special check for single interval misc_features that may exactly cover a gap */
- if (loc->choice == SEQLOC_INT && sfp->idx.subtype == FEATDEF_misc_feature) {
- sintp = (SeqIntPtr) loc->data.ptrvalue;
- if (sintp != NULL) {
- bsp = BioseqFind (sintp->id);
- if (bsp != NULL && sintp->from > 0 && sintp->to < bsp->length - 1) {
- len = SeqLocLen (loc);
- if (StreamCacheSetup (bsp, NULL, EXPAND_GAPS_TO_DASHES | KNOWN_GAP_AS_PLUS, &sc)) {
- StreamCacheSetPosition (&sc, sintp->from - 1);
- prefix = StreamCacheGetResidue (&sc);
- while ((res = StreamCacheGetResidue (&sc)) != '\0' && len > 0) {
- if (IS_LOWER (res)) {
- res = TO_UPPER (res);
- }
- if (first == 0) {
- first = res;
- }
- last = res;
- if (res == '-') {
- dashes++;
- } else if (res == '+') {
- plusses++;
- } else if (res == 'N') {
- Ns++;
- } else if (res != 0) {
- realBases++;
- }
- len--;
- }
- suffix = StreamCacheGetResidue (&sc);
- needToStream = FALSE;
- }
- }
- }
+ if (loc->choice == SEQLOC_INT && sfp->idx.subtype == FEATDEF_misc_feature
+ && dsp != NULL
+ && dsp_start == start && SeqLocLen(loc) == GetDeltaSeqLen(dsp)) {
+ /* single interval misc_feature covers a delta segment exactly, can ignore it */
+ return TRUE;
}
- /*
- if (needToStream && StreamCacheSetup (NULL, loc, EXPAND_GAPS_TO_DASHES | KNOWN_GAP_AS_PLUS, &sc)) {
- while ((res = StreamCacheGetResidue (&sc)) != '\0') {
- if (IS_LOWER (res)) {
- res = TO_UPPER (res);
- }
- if (first == 0) {
- first = res;
- }
- last = res;
- if (res == '-') {
- dashes++;
- } else if (res == '+') {
- plusses++;
- } else if (res == 'N') {
- Ns++;
- } else if (res != 0) {
- realBases++;
- }
- }
+ if (SeqLocLen (loc) >= 50 && IsAllNs(loc)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureInsideGap, "Feature inside gap of Ns");
+ return TRUE;
}
- */
- if (needToStream) {
- for (slp = SeqLocFindNext (loc, NULL); slp != NULL; slp = SeqLocFindNext (loc, slp)) {
- if (StreamCacheSetup (NULL, slp, EXPAND_GAPS_TO_DASHES | KNOWN_GAP_AS_PLUS, &sc)) {
- localfirst = 0;
- locallast = 0;
- while ((res = StreamCacheGetResidue (&sc)) != '\0') {
- if (IS_LOWER (res)) {
- res = TO_UPPER (res);
- }
- if (first == 0) {
- first = res;
- }
- if (localfirst == 0) {
- localfirst = res;
- }
- last = res;
- locallast = res;
- if (res == '-') {
- dashes++;
- } else if (res == '+') {
- plusses++;
- } else if (res == 'N') {
- Ns++;
- } else if (res != 0) {
- realBases++;
- }
- }
- if (localfirst == '-' || localfirst == '+' || locallast == '-' || locallast == '+') {
- startsOrEndsInGap = TRUE;
- }
+ if (dsp == NULL) {
+ /* not a delta sequence, no other errors possible */
+ return TRUE;
+ }
+
+ if (IsDeltaSeqGap(dsp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureBeginsOrEndsInGap, "Feature begins or ends in gap starting at %d", dsp_start + 1);
+ return TRUE;
+ }
+ dsp = GetDeltaSeqForPosition(stop, bsp, &dsp_start);
+ if (IsDeltaSeqGap(dsp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureBeginsOrEndsInGap, "Feature begins or ends in gap starting at %d", dsp_start + 1);
+ return TRUE;
+ }
+
+ for (slp = SeqLocFindNext (loc, NULL); slp != NULL; slp = SeqLocFindNext (loc, slp)) {
+ start = SeqLocStart(slp);
+ dsp = GetDeltaSeqForPosition(start, bsp, &dsp_start);
+ if (DoesLocIntersectGapOfUnknownLength(slp, dsp, dsp_start)) {
+ if (sfp->data.choice != SEQFEAT_GENE) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureCrossesGap, "Feature crosses gap of unknown length");
+ return TRUE;
}
+ } else if (IsDeltaSeqGap(dsp)) {
+ startsOrEndsInGap = TRUE;
+ }
+ stop = SeqLocStop (slp);
+ dsp = GetDeltaSeqForPosition(stop, bsp, &dsp_start);
+ if (IsDeltaSeqGap(dsp)) {
+ startsOrEndsInGap = TRUE;
}
}
- ErrSetLogLevel (logsev);
- ErrSetMessageLevel (msgsev);
-
- if (dashes == 0 && plusses == 0 && Ns == 0) {
- /* ignore features that do not cover any gap characters */
- } else if (first == '-' || first == '+' || last == '-' || last == '+') {
- if (realBases > 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureBeginsOrEndsInGap, "Feature begins or ends in gap");
- } else if (IS_ALPHA (prefix) && IS_ALPHA (suffix)) {
- /* ignore (misc_) features that exactly cover the gap */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureInsideGap, "Feature inside sequence gap");
- }
- } else if (realBases == 0 && dashes == 0 && plusses == 0 && Ns >= 50) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureInsideGap, "Feature inside gap of Ns");
- } else if ((sfp->data.choice == SEQFEAT_CDREGION || sfp->data.choice == SEQFEAT_RNA) && dashes > 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureCrossesGap, "Feature crosses gap of unknown length");
- } else if (startsOrEndsInGap) {
+ if (startsOrEndsInGap) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IntervalBeginsOrEndsInGap, "Internal interval begins or ends in gap");
}
@@ -16840,6 +18961,95 @@ static void CheckBioseqForFeatsInGap (
}
}
+static Boolean LIBCALLBACK GetFeatsInNs (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr fcontext
+)
+
+{
+ Char ch;
+ GatherContextPtr gcp;
+ int i;
+ size_t len;
+ Int4 Ns = 0;
+ Uint2 olditemtype = 0;
+ Uint4 olditemid = 0;
+ Int4 realBases = 0;
+ CharPtr str;
+ ValidStructPtr vsp;
+
+ if (sfp == NULL || fcontext == NULL) return FALSE;
+ vsp = (ValidStructPtr) fcontext->userdata;
+ if (vsp == NULL) return FALSE;
+ gcp = vsp->gcp;
+ if (gcp == NULL) return FALSE;
+
+ if (sfp->idx.subtype == FEATDEF_gap || sfp->idx.subtype == FEATDEF_misc_feature) return TRUE;
+
+ str = GetSequenceByFeatureEx (sfp, STREAM_EXPAND_GAPS | SEQ_GAP_AS_TILDE);
+ if (str == NULL) return TRUE;
+
+ olditemid = gcp->itemID;
+ olditemtype = gcp->thistype;
+
+ gcp->itemID = fcontext->itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ vsp->sfp = sfp;
+
+ len = StringLen (str);
+ if (len > 0) {
+ /*
+ if (str [0] == 'N') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureBeginsOrEndsWithN, "Feature begins with an N");
+ }
+ if (len > 1 && str [len - 1] == 'N') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureBeginsOrEndsWithN, "Feature ends with an N");
+ }
+ */
+ for (i = 0; i < len; i++) {
+ ch = str [i];
+ if (ch == 'N') {
+ Ns++;
+ } else if (IS_ALPHA (ch)) {
+ realBases++;
+ }
+ }
+ if (Ns > realBases) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureIsMostlyNs, "Feature contains more than 50%s Ns", "%");
+ }
+ }
+
+ MemFree (str);
+
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ vsp->sfp = NULL;
+
+ return TRUE;
+}
+
+static void CheckBioseqForFeatsInNs (
+ BioseqPtr bsp,
+ ValidStructPtr vsp
+)
+
+{
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+ SeqIdPtr sip;
+
+ if (bsp == NULL || ISA_aa (bsp->mol)) return;
+ sip = SeqIdFindBest (bsp->id, 0);
+ if (sip == NULL) return;
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext)) {
+ fcontext.userdata = (Pointer) vsp;
+ GetFeatsInNs (sfp, &fcontext);
+ }
+}
+
static Boolean ReportGeneCollision (GeneRefPtr grp, GeneRefPtr lastgrp)
{
@@ -17101,6 +19311,66 @@ static void LookForViralMolInfoInLineage (BioseqPtr bsp, ValidStructPtr vsp, Gat
}
+static Boolean SuppressMultipleEquivBioSources (BioSourcePtr biop)
+{
+ CharPtr viruses = "Viruses";
+ if (biop == NULL || biop->org == NULL) {
+ return FALSE;
+ }
+ if (StringICmp (biop->org->taxname, "unidentified phage") == 0) {
+ return TRUE;
+ } else if (biop->org->orgname != NULL
+ && StringNICmp (biop->org->orgname->lineage, viruses, StringLen (viruses)) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static void ReportMultipleUnprocessedProteinFeatures (BioseqPtr bsp, ValidStructPtr vsp)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+
+ if (bsp == NULL || vsp == NULL || !ISA_aa(bsp->mol)) {
+ return;
+ }
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &context);
+ if (sfp != NULL) {
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_PROT, &context);
+ if (sfp != NULL) {
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_PROT, &context)) {
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_ExtraProteinFeature,
+ "Protein sequence has multiple unprocessed protein features");
+ }
+ }
+ }
+ vsp->sfp = NULL;
+}
+
+
+static Boolean FeatureHastRNAXref(SeqFeatPtr sfp)
+{
+ SeqFeatXrefPtr xref;
+ RnaRefPtr rrp;
+ Boolean rval = FALSE;
+
+ for (xref = sfp->xref; xref != NULL && !rval; xref = xref->next) {
+ if (xref->data.choice == SEQFEAT_RNA
+ && (rrp = (RnaRefPtr) xref->data.value.ptrvalue) != NULL
+ && rrp->type == 3) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bvsp)
{
@@ -17127,17 +19397,26 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
Int4Ptr ivals = NULL;
Boolean ivalssame;
SeqAnnotPtr sap = NULL;
+ Boolean no_cit_sub;
Uint2 olditemtype = 0;
Uint4 olditemid = 0;
CharPtr lastLabel;
+ Boolean isSplitGene, lastIsSplitGene;
CharPtr message;
Int2 i;
Boolean isCuratedFlybase = FALSE;
Boolean isDrosophila = FALSE;
+ Boolean isEukaryote = FALSE;
Boolean isGenBankAccn = FALSE;
Boolean isGeneralAccn = FALSE;
Boolean isGPSorNTorNCorNGorNW = FALSE;
+ Boolean isMicrosporidia = FALSE;
Boolean isViral = FALSE;
+ Boolean non_pseudo_16S_rRNA = FALSE;
+ Uint1 genome = 0;
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
+ CharPtr str;
Int2 j;
CdRegionPtr crp;
Uint1 frame = 0;
@@ -17162,7 +19441,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
numcdspseudo, nummrnapseudo, numrearrangedcds, lastrnatype,
thisrnatype;
Boolean cds_products_unique = TRUE, mrna_products_unique = TRUE,
- suppress_duplicate_messages = FALSE, pseudo, suppressed;
+ pseudo, suppressed;
SeqIdPtr sip;
Char buf [96];
SeqFeatXrefPtr xref = NULL;
@@ -17184,6 +19463,14 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
BioseqPtr nbsp;
Boolean last_reported;
Boolean found_overlapping_peptide;
+ SeqFeatPtr utr5plus, cdsplus, utr3plus, utr5minus, cdsminus, utr3minus;
+ SeqFeatPtr utr5pgene, cdspgene, utr3pgene, utr5mgene, cdsmgene, utr3mgene;
+ Int2 cdsgencode, firstcdsgencode = 0;
+ Boolean mixedcdsgencodes = FALSE;
+ GeneticCodePtr gc;
+ Uint1 origin;
+ ErrSev sev;
+
gcp = bvsp->gcp;
vsp = bvsp->vsp;
@@ -17195,6 +19482,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
vsp->trna_array = SeqMgrBuildFeatureIndex (bsp, &(vsp->numtrna), 0, FEATDEF_tRNA);
SeqMgrExploreFeatures (bsp, (Pointer) bvsp, ValidateSeqFeatIndexed, NULL, NULL, NULL);
+ ReportMultipleUnprocessedProteinFeatures(bsp, vsp);
vsp->rrna_array = MemFree (vsp->rrna_array);
vsp->trna_array = MemFree (vsp->trna_array);
@@ -17245,6 +19533,25 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
numcdspseudo++;
}
}
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp != NULL) {
+ cdsgencode = 0;
+ gc = crp->genetic_code;
+ if (gc != NULL) {
+ for (vnp = gc->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 2) {
+ cdsgencode = (Int2) vnp->data.intvalue;
+ }
+ }
+ }
+ if (cdsgencode != 0) {
+ if (firstcdsgencode == 0) {
+ firstcdsgencode = cdsgencode;
+ } else if (firstcdsgencode != cdsgencode) {
+ mixedcdsgencodes = TRUE;
+ }
+ }
+ }
break;
case FEATDEF_mRNA :
nummrna++;
@@ -17266,6 +19573,25 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
}
break;
+ case FEATDEF_rRNA :
+ if (! sfp->pseudo) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp != NULL) {
+ str = NULL;
+ if (rrp->ext.choice == 1) {
+ str = (CharPtr) rrp->ext.value.ptrvalue;
+ } else if (rrp->ext.choice == 3) {
+ rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
+ if (rgp != NULL) {
+ str = rgp->product;
+ }
+ }
+ if (str != NULL && StringICmp (str, "16S ribosomal RNA") == 0) {
+ non_pseudo_16S_rRNA = TRUE;
+ }
+ }
+ }
+ break;
default :
break;
}
@@ -17287,11 +19613,6 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
if (numcds > 0 && nummrna > 1) {
- if (numcdsproducts + numcdspseudo == numcds &&
- (nummrnaproducts + nummrnapseudo == nummrna || nummrnaproducts == 0) &&
- cds_products_unique && mrna_products_unique) {
- suppress_duplicate_messages = TRUE;
- }
if (numcdsproducts > 0 && numcdsproducts + numcdspseudo != numcds && numcdsproducts + numcdspseudo + numrearrangedcds != numcds) {
if (gcp != NULL) {
gcp->itemID = olditemid;
@@ -17344,15 +19665,21 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
ValNodeFreeData (cds_prod_head);
ValNodeFreeData (mrna_prod_head);
+ sev = SEV_ERROR;
/*
SeqEntryToBioSource (vsp->sep, NULL, NULL, 0, &biop);
*/
BioseqToGeneticCode (bsp, NULL, NULL, NULL, NULL, 0, &biop);
if (biop != NULL) {
+ genome = biop->genome;
+ origin = biop->origin;
+ if (origin == ORG_MUT || origin == ORG_ARTIFICIAL || origin == ORG_SYNTHETIC) {
+ sev = SEV_WARNING;
+ }
orp = biop->org;
if (orp != NULL) {
/* curated fly source still has duplicate features */
- if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) {
+ if (StringNICmp (orp->taxname, "Drosophila ", 11) == 0) {
isDrosophila = TRUE;
}
onp = orp->orgname;
@@ -17360,9 +19687,75 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if (StringNICmp (onp->lineage, "Viruses; ", 9) == 0) {
isViral = TRUE;
}
+ if (StringNICmp (onp->lineage, "Eukaryota; ", 11) == 0) {
+ isEukaryote = TRUE;
+ if (StringNICmp (onp->lineage, "Eukaryota; Fungi; Microsporidia; ", 33) == 0) {
+ isMicrosporidia = TRUE;
+ }
+ }
+ if (StringICmp (onp->div, "SYN") == 0) {
+ sev = SEV_WARNING;
+ }
+ }
+ }
+ for (sbsp = biop->subtype; sbsp != NULL; sbsp = sbsp->next) {
+ if (sbsp->subtype == SUBSRC_transgenic) {
+ sev = SEV_WARNING;
+ }
+ }
+ }
+
+ if (mixedcdsgencodes) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_MultipleGenCodes, "Multiple CDS genetic codes on sequence");
+ }
+
+ if (isEukaryote && (! isMicrosporidia) && non_pseudo_16S_rRNA &&
+ genome != GENOME_mitochondrion &&
+ genome != GENOME_chloroplast &&
+ genome != GENOME_chromoplast &&
+ genome != GENOME_kinetoplast &&
+ genome != GENOME_plastid &&
+ genome != GENOME_apicoplast &&
+ genome != GENOME_leucoplast &&
+ genome != GENOME_proplastid &&
+ genome != GENOME_chromatophore) {
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = dcontext.itemID;
+ gcp->thistype = OBJ_SEQDESC;
+ }
+ }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_WrongOrganismFor16SrRNA, "Improper 16S ribosomal RNA");
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ }
+
+ if (! vsp->suppress_no_cit_subs) {
+ no_cit_sub = TRUE;
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
+ while (sdp != NULL) {
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (pdp != NULL) {
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Sub) {
+ no_cit_sub = FALSE;
+ }
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext);
+ }
+ if (no_cit_sub) {
+ sev = SEV_INFO;
+ if (vsp->genomeSubmission) {
+ sev = SEV_ERROR;
}
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Expected submission citation is missing for this Bioseq");
}
}
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
mip = (MolInfoPtr) sdp->data.ptrvalue;
@@ -17493,12 +19886,15 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if (GeneXrefsDifferent (sfp, last)) {
severity = SEV_WARNING;
}
+ if (vsp->is_small_genome_set) {
+ severity = SEV_WARNING;
+ }
ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature");
} else if (featdeftype != FEATDEF_PUB) {
if (fcontext.partialL != partialL || fcontext.partialR != partialR) {
/* do not report if partial flags are different */
} else {
- if (suppress_duplicate_messages && (featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
+ if ((featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
/* do not report CDS or mRNA if every one has a unique product and unique featID xrefs */
} else if (featdeftype == FEATDEF_GENE &&
StringStr (sfp->except_text, "dicistronic gene") != NULL &&
@@ -17543,9 +19939,12 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if (GeneXrefsDifferent (sfp, last)) {
severity = SEV_WARNING;
}
+ if (vsp->is_small_genome_set) {
+ severity = SEV_WARNING;
+ }
ValidErr (vsp, severity, ERR_SEQ_FEAT_FeatContentDup, "Duplicate feature (packaged in different feature table)");
} else if (featdeftype != FEATDEF_PUB) {
- if (suppress_duplicate_messages && (featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
+ if ((featdeftype == FEATDEF_CDS || featdeftype == FEATDEF_mRNA) && HaveUniqueFeatIDXrefs (xref, sfp->xref)) {
/* do not report CDS or mRNA if every one has a unique product and unique featID xrefs */
} else {
ValidErr (vsp, /* severity */ SEV_WARNING, ERR_SEQ_FEAT_DuplicateFeat, "Features have identical intervals, but labels differ (packaged in different feature table)");
@@ -17660,9 +20059,17 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
} else if (vsp->is_small_genome_set && StringISearch (lastsfp->except_text, "trans-splicing") != NULL && StringISearch (sfp->except_text, "trans-splicing") != NULL) {
/* suppress for trans-spliced genes on small genome set */
} else if (FeatureSequencesIdentical (sfp, lastsfp)) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_ReplicatedGeneSequence, "%s, but underlying sequences are identical", message);
+ if (vsp->is_gpipe_in_sep && FeatureHastRNAXref(sfp) && FeatureHastRNAXref(lastsfp)) {
+ /* suppress for gpipe */
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_ReplicatedGeneSequence, "%s, but underlying sequences are identical", message);
+ }
} else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CollidingGeneNames, "%s", message);
+ if (vsp->is_gpipe_in_sep && FeatureHastRNAXref(sfp) && FeatureHastRNAXref(lastsfp)) {
+ /* suppress for gpipe */
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CollidingGeneNames, "%s", message);
+ }
}
vsp->sfp = NULL;
if (gcp != NULL) {
@@ -17678,14 +20085,35 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
lastLabel = NULL;
+ lastIsSplitGene = FALSE;
sfp = SeqMgrGetNextGeneByLocusTag (bsp, NULL, &fcontext);
while (sfp != NULL) {
label = NULL;
+ isSplitGene = FALSE;
if (sfp->data.choice == SEQFEAT_GENE) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
if (grp != NULL) {
label = grp->locus_tag;
}
+ if (sfp->excpt) {
+ if (StringStr (sfp->except_text, "gene split at ") != NULL) {
+ isSplitGene = TRUE;
+ }
+ }
+ }
+ if (isSplitGene && label == NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ExceptionProblem, "Gene has split exception but no locus_tag");
+ vsp->sfp = NULL;
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
}
if (lastLabel != NULL) {
message = NULL;
@@ -17695,24 +20123,112 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
message = "Colliding locus_tags (with different capitalization) in gene features";
}
if (message != NULL) {
- if (gcp != NULL) {
- gcp->itemID = fcontext.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CollidingLocusTags, "%s", message);
- vsp->sfp = NULL;
- if (gcp != NULL) {
- gcp->itemID = olditemid;
- gcp->thistype = olditemtype;
+ if (isSplitGene && lastIsSplitGene) {
+ /* suppress if colliding locus_tags have split gene exception */
+ } else {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CollidingLocusTags, "%s", message);
+ vsp->sfp = NULL;
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
}
}
}
lastLabel = label;
+ lastIsSplitGene = isSplitGene;
sfp = SeqMgrGetNextGeneByLocusTag (bsp, sfp, &fcontext);
}
+ utr5plus = NULL;
+ cdsplus = NULL;
+ utr3plus = NULL;
+ utr5minus = NULL;
+ cdsminus = NULL;
+ utr3minus = NULL;
+
+ utr5pgene = NULL;
+ cdspgene = NULL;
+ utr3pgene = NULL;
+ utr5mgene = NULL;
+ cdsmgene = NULL;
+ utr3mgene = NULL;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ while (sfp != NULL) {
+ strand = fcontext.strand;
+ if (sfp->idx.subtype == FEATDEF_CDS) {
+ if (strand == Seq_strand_minus) {
+ cdsminus = sfp;
+ cdsmgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ } else {
+ cdsplus = sfp;
+ cdspgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ }
+ } else if (sfp->idx.subtype == FEATDEF_5UTR) {
+ if (strand == Seq_strand_minus) {
+ utr5minus = sfp;
+ utr5mgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ if (utr3minus != NULL && cdsminus == NULL && utr3mgene == utr5mgene && utr5mgene != NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = utr5mgene->idx.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSnotBetweenUTRs, "CDS not between 5'UTR and 3'UTR on minus strand");
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ }
+ utr5minus = NULL;
+ cdsminus = NULL;
+ utr3minus = NULL;
+ utr5mgene = NULL;
+ cdsmgene = NULL;
+ utr3mgene = NULL;
+ } else {
+ utr5plus = sfp;
+ utr5pgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ }
+ } else if (sfp->idx.subtype == FEATDEF_3UTR) {
+ if (strand == Seq_strand_minus) {
+ utr3minus = sfp;
+ utr3mgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ } else {
+ utr3plus = sfp;
+ utr3pgene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ if (utr5plus != NULL && cdsplus == NULL && utr5pgene == utr3pgene && utr3pgene != NULL) {
+ if (gcp != NULL) {
+ gcp->itemID = utr3pgene->idx.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSnotBetweenUTRs, "CDS not between 5'UTR and 3'UTR on plus strand");
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ }
+ utr5plus = NULL;
+ cdsplus = NULL;
+ utr3plus = NULL;
+ utr5minus = NULL;
+ cdsminus = NULL;
+ utr3minus = NULL;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
+ }
+
/* do UTR vs. CDS check on genomic if only one CDS, still need separate minus strand logic */
cdscount = 0;
genecount = 0;
@@ -18157,7 +20673,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if (lastsfp != NULL) {
if (StringDoesHaveText (lastsfp->comment) && StringDoesHaveText (sfp->comment) && StringICmp (lastsfp->comment, sfp->comment) != 0) {
/* different comments, so ignore */
- } else if (IsIdenticalBioSource (biop, lastbiop) && (! bvsp->is_synthetic) && (!bvsp->is_artificial)) {
+ } else if (IsIdenticalBioSource (biop, lastbiop) && (! bvsp->is_synthetic) && (!bvsp->is_artificial)
+ && !SuppressMultipleEquivBioSources(biop)) {
if (gcp != NULL) {
gcp->itemID = fcontext.itemID;
gcp->thistype = OBJ_SEQFEAT;
@@ -18226,18 +20743,20 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
SeqMgrExploreDescriptors (bsp, (Pointer) bvsp, ValidateSeqDescrIndexed, NULL);
- omdp = ObjMgrGetData (gcp->entityID);
- if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
- ssp = (SeqSubmitPtr) omdp->dataptr;
- if (ssp != NULL) {
- sbp = ssp->sub;
- if (sbp != NULL) {
- bvsp->got_a_pub = TRUE;
+ if (gcp != NULL) {
+ omdp = ObjMgrGetData (gcp->entityID);
+ if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
+ ssp = (SeqSubmitPtr) omdp->dataptr;
+ if (ssp != NULL) {
+ sbp = ssp->sub;
+ if (sbp != NULL) {
+ bvsp->got_a_pub = TRUE;
+ }
}
}
}
- ValidateCDSmRNAmatch (vsp, bsp, numgene, numcds, nummrna, suppress_duplicate_messages);
+ ValidateCDSmRNAmatch (vsp, bsp, numgene, numcds, nummrna);
if (vsp->locusTagGeneralMatch) {
ValidateLocusTagGeneral (vsp, bsp);
@@ -18251,6 +20770,12 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
CheckBioseqForFeatsInGap (bsp, vsp);
}
+if (! vsp->debugTestDuJour) {
+ if (bsp->repr == Seq_repr_raw || (bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp))) {
+ CheckBioseqForFeatsInNs (bsp, vsp);
+ }
+}
+
CheckForNonViralComplete (bsp, vsp, gcp);
LookForViralMolInfoInLineage (bsp, vsp, gcp);
@@ -18258,6 +20783,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
return TRUE;
}
+//LCOV_EXCL_START
static Boolean ValidateBioseqContextGather (GatherContextPtr gcp)
{
ValidStructPtr vsp;
@@ -18287,6 +20813,7 @@ static Boolean ValidateBioseqContextGather (GatherContextPtr gcp)
}
return TRUE;
}
+//LCOV_EXCL_STOP
static ValNodePtr ListFeaturesContainedInLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice)
@@ -18465,9 +20992,208 @@ static void FindMultiGeneOverlaps (BioseqPtr bsp, ValidStructPtr vsp)
}
+static Boolean LocationIsFar (SeqLocPtr location)
+
+{
+ BioseqPtr bsp;
+ DeltaSeqPtr dsp;
+ Boolean is_far = FALSE;
+ SeqLocPtr loc;
+ SeqEntryPtr oldscope;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+
+ if (location == NULL) return FALSE;
+
+ oldscope = SeqEntrySetScope (NULL);
+
+ slp = SeqLocFindNext (location, NULL);
+ while (slp != NULL) {
+ if (slp->choice != SEQLOC_NULL) {
+ sip = SeqLocId (slp);
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) {
+ is_far = TRUE;
+ } else if (bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4) {
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext;
+ dsp != NULL && (! is_far);
+ dsp = dsp->next) {
+ if (dsp->choice != 1) continue;
+ loc = (SeqLocPtr) dsp->data.ptrvalue;
+ if (loc == NULL) continue;
+ if (loc->choice == SEQLOC_NULL) continue;
+ sip = SeqLocId (loc);
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) {
+ is_far = TRUE;
+ }
+ }
+ } else if (bsp->repr == Seq_repr_seg && bsp->seq_ext_type == 1) {
+//LCOV_EXCL_START
+// Only for SegSets
+ for (loc = (SeqLocPtr) bsp->seq_ext;
+ loc != NULL && (! is_far);
+ loc = loc->next) {
+ if (loc == NULL) continue;
+ if (loc->choice == SEQLOC_NULL) continue;
+ sip = SeqLocId (loc);
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) {
+ is_far = TRUE;
+ }
+ }
+//LCOV_EXCL_STOP
+ }
+ }
+ slp = SeqLocFindNext (location, slp);
+ }
+
+ SeqEntrySetScope (oldscope);
+
+ return is_far;
+}
+
+static Boolean NoFetchFunctions (void)
+
+{
+ ObjMgrProcPtr ompp = NULL;
+
+ ompp = ObjMgrProcFindNext (NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_BIOSEQ, NULL);
+
+ return (Boolean) (ompp == NULL);
+}
+
+static Boolean HasAssemblyOrNullGap (BioseqPtr bsp)
+
+{
+ DeltaSeqPtr dsp;
+ SeqLitPtr litp;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
+
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
+ if (dsp->choice != 2) continue;
+ litp = (SeqLitPtr) dsp->data.ptrvalue;
+ if (litp == NULL) continue;
+ if (litp->seq_data == NULL) return TRUE;
+ if (litp->seq_data_type == Seq_code_gap) return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void ReportBadAssemblyGap (BioseqPtr bsp, ValidStructPtr vsp, GatherContextPtr gcp)
+
+{
+ DeltaSeqPtr dsp;
+ SeqLitPtr litp;
+ Uint2 oldEntityID, oldItemtype;
+ Uint4 oldItemID;
+ SeqGapPtr sgp;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
+ if (vsp == NULL || gcp == NULL) return;
+
+ oldEntityID = gcp->entityID;
+ oldItemID = gcp->itemID;
+ oldItemtype = gcp->thistype;
+
+ vsp->bsp = bsp;
+ vsp->descr = NULL;
+ vsp->sfp = NULL;
+ gcp->entityID = bsp->idx.entityID;
+ gcp->itemID = bsp->idx.itemID;
+ gcp->thistype = OBJ_BIOSEQ;
+
+ for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp; dsp=dsp->next) {
+ if (dsp->choice != 2) continue;
+ litp = (SeqLitPtr) dsp->data.ptrvalue;
+ if (litp == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "TSA gap not assembly_gap");
+ } else if (litp->seq_data == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "TSA Seq_data NULL");
+ } else if (litp->seq_data_type == Seq_code_gap) {
+ sgp = (SeqGapPtr) litp->seq_data;
+ if (sgp == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "TSA Seq_gap NULL");
+ } else if (sgp->type == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "TSA Seq_gap.unknown");
+ } else if (sgp->type == 255) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_SeqGapProblem, "TSA Seq_gap.other");
+ }
+ }
+ }
+
+ gcp->entityID = oldEntityID;
+ gcp->itemID = oldItemID;
+ gcp->thistype = oldItemtype;
+}
+
+
+static void ReportBadWGSGap(BioseqPtr bsp, ValidStructPtr vsp, GatherContextPtr gcp)
+
+{
+ DeltaSeqPtr dsp;
+ SeqLitPtr litp;
+ Uint2 oldEntityID, oldItemtype;
+ Uint4 oldItemID;
+ SeqGapPtr sgp;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext context;
+ MolInfoPtr mip;
+ Boolean is_wgs = FALSE;
+ SeqIdPtr sip;
+ Boolean linkage_evidence_missing = FALSE;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
+ if (vsp == NULL || gcp == NULL) return;
+
+ sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_molinfo, &context);
+ if (sdp == NULL || (mip = (MolInfoPtr)sdp->data.ptrvalue) == NULL || mip->tech != MI_TECH_wgs) {
+ return;
+ }
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_DDBJ || sip->choice == SEQID_EMBL || sip->choice == SEQID_OTHER) {
+ return;
+ }
+ }
+
+ for (dsp = (DeltaSeqPtr)bsp->seq_ext; dsp && !linkage_evidence_missing; dsp = dsp->next) {
+ if (dsp->choice != 2) /* continue */ return;
+ litp = (SeqLitPtr)dsp->data.ptrvalue;
+ if (litp == NULL || litp->seq_data == NULL) {
+ linkage_evidence_missing = TRUE;
+ } else if (litp->seq_data_type == Seq_code_gap) {
+ sgp = (SeqGapPtr)litp->seq_data;
+ if (sgp->linkage_evidence == NULL) {
+ linkage_evidence_missing = TRUE;
+ }
+ }
+ }
+
+ if (linkage_evidence_missing) {
+ oldEntityID = gcp->entityID;
+ oldItemID = gcp->itemID;
+ oldItemtype = gcp->thistype;
+
+ vsp->bsp = bsp;
+ vsp->descr = NULL;
+ vsp->sfp = NULL;
+ gcp->entityID = bsp->idx.entityID;
+ gcp->itemID = bsp->idx.itemID;
+ gcp->thistype = OBJ_BIOSEQ;
+
+ ValidErr(vsp, SEV_ERROR, ERR_SEQ_INST_SeqGapProblem, "WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence.");
+ gcp->entityID = oldEntityID;
+ gcp->itemID = oldItemID;
+ gcp->thistype = oldItemtype;
+ }
+}
+
+
static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
{
- Int4 total = 0, max_stretch = 0;
+ Int4 total = 0, totalN = 0, totalDash = 0, totalBang = 0, max_stretch = 0;
GatherContextPtr gcp;
ErrSev logsev;
ErrSev msgsev;
@@ -18483,6 +21209,8 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
return;
}
if (bsp->repr == Seq_repr_virtual) return;
+ if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp)) && NoFetchFunctions ()) return;
+ if (bsp->repr == Seq_repr_ref && NoFetchFunctions ()) return;
gcp = vsp->gcp;
@@ -18491,10 +21219,15 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
oldItemtype = gcp->thistype;
if (IsTSA (bsp)) {
+ ReportBadAssemblyGap (bsp, vsp, gcp);
+
+ if (HasAssemblyOrNullGap (bsp)) return;
+
msgsev = ErrSetMessageLevel (SEV_MAX);
logsev = ErrSetLogLevel (SEV_MAX);
- CountNsInSequence (bsp, &total, &max_stretch, FALSE);
+ CountNsInSequence (bsp, &totalN, &totalDash, &totalBang, &max_stretch, /* FALSE */ TRUE, TRUE);
+ total = totalN + totalDash + totalBang;
ErrSetLogLevel (logsev);
ErrSetMessageLevel (msgsev);
@@ -18510,7 +21243,7 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentPercent, "Sequence contains %d percent Ns", percent_N);
}
- if (max_stretch > 15) {
+ if (max_stretch >= 15) {
vsp->bsp = bsp;
vsp->descr = NULL;
vsp->sfp = NULL;
@@ -18519,7 +21252,7 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
gcp->thistype = OBJ_BIOSEQ;
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentStretch, "Sequence has a stretch of %d Ns", max_stretch);
- } else if (bsp->length > 10) {
+ } else if (bsp->length > 20) {
vsp->bsp = bsp;
vsp->descr = NULL;
vsp->sfp = NULL;
@@ -18535,17 +21268,17 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
vn.data.ptrvalue = (Pointer) &si;
si.id = bsp->id;
si.from = 0;
- si.to = 9;
- str = GetSequenceByFeature (&sf);
- if (StringStr (str, "NNNNN") != NULL) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentStretch, "Sequence has a stretch of at least 5 Ns within the first 10 bases");
+ si.to = 19;
+ str = GetSequenceByFeatureEx (&sf, STREAM_EXPAND_GAPS | SEQ_GAP_AS_TILDE);
+ if (StringStr (str, "NNNNNNNNNN") != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentStretch, "Sequence has a stretch of at least 10 Ns within the first 20 bases");
}
MemFree (str);
- si.from = bsp->length - 10;
+ si.from = bsp->length - 20;
si.to = bsp->length - 1;
- str = GetSequenceByFeature (&sf);
- if (StringStr (str, "NNNNN") != NULL) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentStretch, "Sequence has a stretch of at least 5 Ns within the last 10 bases");
+ str = GetSequenceByFeatureEx (&sf, STREAM_EXPAND_GAPS | SEQ_GAP_AS_TILDE);
+ if (StringStr (str, "NNNNNNNNNN") != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentStretch, "Sequence has a stretch of at least 10 Ns within the last 20 bases");
}
MemFree (str);
}
@@ -18553,7 +21286,8 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
msgsev = ErrSetMessageLevel (SEV_MAX);
logsev = ErrSetLogLevel (SEV_MAX);
- CountNsInSequence (bsp, &total, &max_stretch, FALSE);
+ CountNsInSequence (bsp, &totalN, &totalDash, &totalBang, &max_stretch, FALSE, TRUE);
+ total = totalN + totalDash + totalBang;
ErrSetLogLevel (logsev);
ErrSetMessageLevel (msgsev);
@@ -18570,12 +21304,31 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentPercent, "Sequence contains %d percent Ns", percent_N);
}
}
+
+ ReportBadWGSGap(bsp, vsp, gcp);
+
gcp->entityID = oldEntityID;
gcp->itemID = oldItemID;
gcp->thistype = oldItemtype;
}
+static Boolean FindBracketed (CharPtr title, CharPtr taxname)
+
+{
+ CharPtr ptr;
+
+ if (StringHasNoText (title) || StringHasNoText (taxname)) return FALSE;
+
+ ptr = StringStr (title, taxname);
+ if (ptr == NULL) return FALSE;
+ if (ptr == title) return FALSE;
+ if (*(ptr - 1) != '[') return FALSE;
+ if (*(ptr + StringLen (taxname)) != ']') return FALSE;
+
+ return TRUE;
+}
+
static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_virus)
{
SeqDescrPtr sdp;
@@ -18629,7 +21382,10 @@ static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_v
StringNICmp (title + tlen - len - 1, taxname, len) != 0 ||
title [tlen - len - 2] != '[' ||
title [tlen - 1] != ']') {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_NoOrganismInTitle, "RefSeq protein title does not end with organism name");
+ if (vsp->is_wp_in_sep && FindBracketed (title, taxname)) {
+ } else {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_NoOrganismInTitle, "RefSeq protein title does not end with organism name");
+ }
}
}
}
@@ -18719,6 +21475,161 @@ static void ValidateStructuredCommentsInContext (BioseqPtr bsp, ValidStructPtr v
}
+static void TestForUnwantedCompleteFlag (BioseqPtr bsp, GatherContextPtr gcp)
+{
+ MolInfoPtr mip;
+ ErrSev sev = SEV_WARNING;
+ Boolean do_report = FALSE;
+ SeqMgrDescContext dcontext;
+ ValNodePtr vnp = NULL;
+ SeqIdPtr sip;
+ Boolean is_gb = FALSE;
+ CharPtr str;
+ BioSourcePtr biop;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ ValidStructPtr vsp;
+ Uint2 oldEntityID, oldItemtype;
+ Uint4 oldItemID;
+ Uint2 mipEntityID = 0, mipItemtype = 0;
+ Uint4 mipItemID = 0;
+
+
+ if (bsp == NULL || !ISA_na (bsp->mol)) {
+ return;
+ }
+ vsp = (ValidStructPtr) (gcp->userdata);
+
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
+ if (vnp == NULL || (mip = (MolInfoPtr) vnp->data.ptrvalue) == NULL || mip->completeness != 1) {
+ /* nothing to validate if no molinfo or not complete */
+ return;
+ }
+ mipEntityID = dcontext.entityID;
+ mipItemID = dcontext.itemID;
+ mipItemtype = OBJ_SEQDESC;
+
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
+ if (vnp != NULL) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringDoesHaveText (str)) {
+ if (StringISearch (str, "complete sequence") != NULL || StringISearch (str, "complete genome") != NULL) {
+ /* complete sequence or complete genome in title suppresses warning */
+ return;
+ }
+ }
+ }
+
+ if (mip->biomol == MOLECULE_TYPE_GENOMIC) {
+ sev = /* SEV_ERROR */ SEV_WARNING;
+ if (mip->tech == MI_TECH_htgs_3) {
+ sev = SEV_WARNING;
+ }
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK) {
+ is_gb = TRUE;
+ }
+ }
+
+ if (is_gb) {
+ if (bsp->topology == 2) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_CompleteCircleProblem, "Circular topology has complete flag set, but title should say complete sequence or complete genome");
+ } else {
+ do_report = TRUE;
+ }
+ }
+ }
+ if (!do_report) {
+ /* for SQD-1484
+ * warn if completeness = complete, organism not viral and not artificial, no location set or location is genomic
+ */
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (vnp != NULL) {
+ biop = (BioSourcePtr) vnp->data.ptrvalue;
+ if (biop != NULL) {
+ orp = biop->org;
+ if (orp != NULL) {
+ onp = orp->orgname;
+ if (onp != NULL) {
+ if (StringSearch(onp->lineage, "Viruses") == NULL &&
+ StringSearch(onp->lineage, "Viroids") == NULL &&
+ biop->origin != 4 /* not artificial */ &&
+ (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic)) { /* location not set or genomic */
+ do_report = TRUE;
+ sev = SEV_WARNING;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (do_report) {
+ oldEntityID = gcp->entityID;
+ oldItemID = gcp->itemID;
+ oldItemtype = gcp->thistype;
+
+ gcp->entityID = mipEntityID;
+ gcp->itemID = mipItemID;
+ gcp->thistype = mipItemtype;
+
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_UnwantedCompleteFlag, "Suspicious use of complete");
+
+ gcp->entityID = oldEntityID;
+ gcp->itemID = oldItemID;
+ gcp->thistype = oldItemtype;
+ }
+}
+
+static void IsBspWGS (BioseqPtr bsp, Pointer userdata)
+
+{
+ BoolPtr bp;
+ SeqMgrDescContext dcontext;
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+
+ if (bsp == NULL) return;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
+ if (sdp == NULL) return;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) return;
+ if (mip->tech != MI_TECH_wgs) return;
+ bp = (BoolPtr) userdata;
+ if (bp == NULL) return;
+ *bp = TRUE;
+}
+
+static Boolean WGSinBssp (BioseqSetPtr bssp)
+
+{
+ Boolean is_wgs = FALSE;
+
+ if (bssp == NULL) return FALSE;
+
+ VisitBioseqsInSet (bssp, (Pointer) (&is_wgs), IsBspWGS);
+
+ return is_wgs;
+}
+
+static Boolean HasBadPlasmidChromLinkName (CharPtr name, CharPtr taxname)
+
+{
+ if (StringHasNoText (name)) return FALSE;
+
+ if (StringLen (name) > 33) return TRUE;
+
+ if (StringStr (name, "plasmid") != NULL) return TRUE;
+ if (StringStr (name, "chromosome") != NULL) return TRUE;
+ if (StringStr (name, "linkage group") != NULL) return TRUE;
+ if (StringStr (name, "chr") != NULL) return TRUE;
+
+ if (StringDoesHaveText (taxname) && StringStr (name, taxname) != NULL) return TRUE;
+
+ return FALSE;
+}
+
+
/*****************************************************************************
*
* ValidateBioseqContext(gcp)
@@ -18739,6 +21650,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
SeqIdPtr sip;
ValNodePtr vnp = NULL;
MolInfoPtr mip = NULL;
+ DbtagPtr dbt;
SeqMgrDescContext dcontext;
SeqMgrFeatContext fcontext;
BioseqContextPtr bcp;
@@ -18753,6 +21665,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
Boolean isPDB = FALSE;
Boolean is_wgs = FALSE;
Boolean is_gb = FALSE;
+ Boolean is_eb_db = FALSE;
Boolean is_ac = FALSE;
Boolean is_ch_or_cm = FALSE;
Boolean is_nc = FALSE;
@@ -18769,6 +21682,8 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
Boolean is_virus = FALSE;
Boolean has_cds = FALSE;
Boolean has_chromosome = FALSE;
+ Boolean has_linkage_group = FALSE;
+ Boolean has_wgs_general = FALSE;
ErrSev sev;
SubSourcePtr ssp;
CharPtr str;
@@ -18781,6 +21696,12 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
/*
Char buf1[255];
*/
+ EMBLBlockPtr ebp;
+ GBBlockPtr gbp;
+ Boolean okay;
+ Char prefix [32];
+ ValNodePtr secondaries;
+ Uint4 whichdb;
vsp = (ValidStructPtr) (gcp->userdata);
bsp = (BioseqPtr) (gcp->thisitem);
@@ -18822,9 +21743,11 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
mipItemtype = OBJ_SEQDESC;
}
} else {
+//LCOV_EXCL_START
bcp = BioseqContextNew (bsp);
vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL);
BioseqContextFree (bcp);
+//LCOV_EXCL_STOP
}
if (vnp != NULL) {
mip = (MolInfoPtr) vnp->data.ptrvalue;
@@ -18919,9 +21842,13 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
if (ssp->subtype == SUBSRC_transgenic) {
is_transgenic = TRUE;
} else if (ssp->subtype == SUBSRC_chromosome) {
- if (StringDoesHaveText (ssp->name)) {
- has_chromosome = TRUE;
- }
+ if (StringDoesHaveText(ssp->name)) {
+ has_chromosome = TRUE;
+ }
+ } else if (ssp->subtype == SUBSRC_linkage_group) {
+ if (StringDoesHaveText(ssp->name)) {
+ has_linkage_group = TRUE;
+ }
} else if (ssp->subtype == SUBSRC_other) {
if (mip != NULL && (StringICmp (ssp->name, "cRNA") == 0)) {
oldEntityID = gcp->entityID;
@@ -18943,6 +21870,23 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
gcp->thistype = oldItemtype;
}
}
+ if (ssp->subtype == SUBSRC_chromosome || ssp->subtype == SUBSRC_plasmid_name || ssp->subtype == SUBSRC_linkage_group) {
+ if (HasBadPlasmidChromLinkName (ssp->name, taxname)) {
+ oldEntityID = gcp->entityID;
+ oldItemID = gcp->itemID;
+ oldItemtype = gcp->thistype;
+
+ gcp->entityID = dcontext.entityID;
+ gcp->itemID = dcontext.itemID;
+ gcp->thistype = OBJ_SEQDESC;
+
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Problematic plasmid/chromosome/linkage group name '%s'", ssp->name);
+
+ gcp->entityID = oldEntityID;
+ gcp->itemID = oldItemID;
+ gcp->thistype = oldItemtype;
+ }
+ }
}
if (is_transgenic && ISA_na (bsp->mol)) {
if (SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext) == NULL) {
@@ -18961,12 +21905,37 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
gcp->itemID = mipItemID;
gcp->thistype = mipItemtype;
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_ConflictingBiomolTech, "TSA sequence should not be DNA");
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_ConflictingBiomolTech, "TSA sequence should not be DNA");
gcp->entityID = oldEntityID;
gcp->itemID = oldItemID;
gcp->thistype = oldItemtype;
}
+
+ if (mip != NULL && mip->tech == MI_TECH_tsa && mip->biomol == MOLECULE_TYPE_TRANSCRIBED_RNA) {
+ oldEntityID = gcp->entityID;
+ oldItemID = gcp->itemID;
+ oldItemtype = gcp->thistype;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ while (sfp != NULL) {
+
+ if (SeqLocStrand (sfp->location) == Seq_strand_minus) {
+ gcp->entityID = sfp->idx.entityID;
+ gcp->itemID = sfp->idx.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSonMinusStrandTranscribedRNA, "Coding region on TSA transcribed RNA should not be on the minus strand");
+ }
+
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
+ }
+
+ gcp->entityID = oldEntityID;
+ gcp->itemID = oldItemID;
+ gcp->thistype = oldItemtype;
+ }
+
if (BioseqHasKeyword(bsp, "BARCODE") && BioseqHasKeyword(bsp, "UNVERIFIED")) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadKeyword, "Sequence has both BARCODE and UNVERIFIED keywords");
}
@@ -19018,50 +21987,13 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
gcp->thistype = oldItemtype;
}
+ TestForUnwantedCompleteFlag(bsp, gcp);
+
bvs.is_mrna = FALSE;
bvs.is_prerna = FALSE;
if (bsp != NULL && ISA_na (bsp->mol)) {
if (mip != NULL) {
- if (mip->biomol == MOLECULE_TYPE_GENOMIC && mip->completeness == 1) {
- sev = SEV_ERROR;
- if (mip->tech == MI_TECH_htgs_3) {
- sev = SEV_WARNING;
- }
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice == SEQID_GENBANK) {
- is_gb = TRUE;
- }
- }
- if (is_gb) {
- vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
- if (vnp != NULL) {
- str = (CharPtr) vnp->data.ptrvalue;
- if (! StringHasNoText (str)) {
- if (StringISearch (str, "complete sequence") == NULL &&
- StringISearch (str, "complete genome") == NULL) {
-
- oldEntityID = gcp->entityID;
- oldItemID = gcp->itemID;
- oldItemtype = gcp->thistype;
-
- gcp->entityID = mipEntityID;
- gcp->itemID = mipItemID;
- gcp->thistype = mipItemtype;
-
- if (bsp->topology == 2) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_CompleteCircleProblem, "Circular topology has complete flag set, but title should say complete sequence or complete genome");
- } else {
- ValidErr (vsp, sev, ERR_SEQ_DESCR_UnwantedCompleteFlag, "Suspicious use of complete");
- }
-
- gcp->entityID = oldEntityID;
- gcp->itemID = oldItemID;
- gcp->thistype = oldItemtype;
- }
- }
- }
- }
- } else if (mip->biomol == MOLECULE_TYPE_MRNA) {
+ if (mip->biomol == MOLECULE_TYPE_MRNA) {
bvs.is_mrna = TRUE;
} else if (mip->biomol == MOLECULE_TYPE_PRE_MRNA) {
bvs.is_prerna = TRUE;
@@ -19113,16 +22045,24 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
/* check proteins in nuc-prot set have a CdRegion */
if (vsp->bssp != NULL) {
if (vsp->bssp->_class == 1) { /* in a nuc-prot set */
+ sfp = NULL;
if (vsp->useSeqMgrIndexes) {
sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
if (sfp == NULL) {
sfp = SeqMgrGetPROTgivenProduct (bsp, NULL); /* now instantiating and indexing products of protein processing */
}
} else {
+//LCOV_EXCL_START
sfp = SeqEntryGetSeqFeat (vsp->sep, 3, NULL, NULL, 1, bsp);
+//LCOV_EXCL_STOP
+ }
+ if (sfp == NULL) { /* no CdRegion points to this bsp */
+ sev = SEV_ERROR;
+ if (WGSinBssp (vsp->bssp)) {
+ sev = SEV_REJECT;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_PKG_NoCdRegionPtr, "No CdRegion in nuc-prot set points to this protein");
}
- if (sfp == NULL) /* no CdRegion points to this bsp */
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_PKG_NoCdRegionPtr, "No CdRegion in nuc-prot set points to this protein");
}
}
}
@@ -19132,7 +22072,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
bvs.bsp = bsp;
ValidateBioseqContextIndexed (bsp, &bvs);
} else {
+//LCOV_EXCL_START
GatherSeqEntry (vsp->sep, &bvs, ValidateBioseqContextGather, &gs);
+//LCOV_EXCL_STOP
}
vsp->gcp = gcp; /* reset the gcp pointer changed in previous gather */
@@ -19146,10 +22088,8 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
}
if (omdp == NULL || omdp->datatype != OBJ_SEQSUB) {
sev = SEV_ERROR;
- if (!IsNoncuratedRefSeq (bsp, &sev)) {
- if (! IsWgsIntermediate (vsp->sep)) {
- ValidErr (vsp, sev, ERR_SEQ_DESCR_NoPubFound, "No publications refer to this Bioseq.");
- }
+ if ((! IsNoncuratedRefSeq (bsp, &sev)) && (! IsWgsIntermediate (vsp->sep)) && (! IsTsaIntermediate (vsp->sep))) {
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_NoPubFound, "No publications refer to this Bioseq.");
}
}
}
@@ -19174,6 +22114,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
sip->choice == SEQID_EMBL ||
sip->choice == SEQID_DDBJ) {
is_gb = TRUE;
+ if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
+ is_eb_db = TRUE;
+ }
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
acclen = StringLen (tsip->accession);
@@ -19181,6 +22124,10 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
is_wgs = TRUE;
} else if (acclen == 13) {
is_wgs = TRUE;
+ } else if (acclen == 14) {
+ is_wgs = TRUE;
+ } else if (acclen == 15) {
+ is_wgs = TRUE;
/*
} else if (StringNCmp (tsip->accession, "CH", 2) == 0 ||
StringNCmp (tsip->accession, "CM", 2) == 0) {
@@ -19209,10 +22156,17 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
is_ac = TRUE;
}
}
+ } else if (sip->choice == SEQID_GENERAL) {
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt != NULL) {
+ if (StringNICmp (dbt->db, "WGS:", 4) == 0) {
+ has_wgs_general = TRUE;
+ }
+ }
}
}
if (is_nc || is_ac) {
- if (! is_prokaryote && ! is_organelle && ! has_chromosome && ! is_plasmid) {
+ if (!is_prokaryote && !is_organelle && !has_chromosome && !is_plasmid && !has_linkage_group) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_MissingChromosome, "Missing chromosome qualifier on NC or AC RefSeq record");
}
}
@@ -19224,10 +22178,50 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "WGS accession should have Mol-info.tech of wgs");
}
} else if (mip != NULL && mip->tech == MI_TECH_wgs && is_gb) {
- if (is_ch_or_cm || is_local_only) {
- /* skip warning if CH or CM or SEQID_LOCAL only */
+ if (is_ch_or_cm || is_local_only || has_wgs_general) {
+ /* skip warning if CH or CM (or other segset ID) or SEQID_LOCAL only */
} else {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_Inconsistent, "Mol-info.tech of wgs should have WGS accession");
+ secondaries = NULL;
+ vnp = GetNextDescriptorUnindexed (bsp, Seq_descr_genbank, NULL);
+ if (vnp != NULL && vnp->choice == Seq_descr_genbank) {
+ gbp = (GBBlockPtr) vnp->data.ptrvalue;
+ if (gbp != NULL) {
+ secondaries = gbp->extra_accessions;
+ }
+ }
+ if (secondaries == NULL) {
+ vnp = GetNextDescriptorUnindexed (bsp, Seq_descr_embl, NULL);
+ if (vnp != NULL && vnp->choice == Seq_descr_embl) {
+ ebp = (EMBLBlockPtr) vnp->data.ptrvalue;
+ if (ebp != NULL) {
+ secondaries = ebp->extra_acc;
+ }
+ }
+ }
+ okay = TRUE;
+ if (secondaries != NULL) {
+ for (vnp = secondaries; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ StringNCpy_0 (prefix, str, sizeof (prefix));
+ whichdb = WHICH_db_accession (prefix);
+ if (ACCN_IS_WGS (whichdb)) {
+ acclen = StringLen (prefix);
+ if (acclen > 8 && StringCmp (prefix + acclen - 6, "000000") == 0) {
+ okay = FALSE;
+ }
+ }
+ }
+ }
+ if (okay) {
+ sev = SEV_ERROR;
+ if (is_eb_db) {
+ sev = SEV_WARNING;
+ }
+ if (! is_eb_db) {
+ ValidErr (vsp, sev, ERR_SEQ_DESCR_InconsistentWGSFlags, "Mol-info.tech of wgs should have WGS accession");
+ }
+ }
}
}
if (is_nc) {
@@ -19330,9 +22324,6 @@ static void CheckPeptideOnCodonBoundary (ValidStructPtr vsp, GatherContextPtr gc
Boolean partial5, partial3;
Int4 pos1, pos2, adjust = 0, mod1, mod2;
- if (SeqLocStop (sfp->location) == 2150166) {
- mod1 = 0;
- }
cds = SeqMgrGetOverlappingCDS (sfp->location, NULL);
if (cds == NULL)
return;
@@ -19379,8 +22370,22 @@ static void CheckPeptideOnCodonBoundary (ValidStructPtr vsp, GatherContextPtr gc
}
static CharPtr legal_repeat_types[] = {
- "tandem", "inverted", "flanking", "terminal",
- "direct", "dispersed", "other", NULL
+ "tandem",
+ "inverted",
+ "flanking",
+ "nested",
+ "terminal",
+ "direct",
+ "dispersed",
+ "long_terminal_repeat",
+ "non_LTR_retrotransposon_polymeric_tract",
+ "X_element_combinatorial_repeat",
+ "Y_prime_element",
+ "telomeric_repeat",
+ "centromeric_repeat",
+ "engineered_foreign_repetitive_element",
+ "other",
+ NULL
};
static CharPtr legal_cons_splice_strings [] = {
@@ -19409,6 +22414,58 @@ static CharPtr legal_mobile_element_strings [] = {
NULL
};
+static CharPtr legal_modified_bases[] = {
+ "ac4c",
+ "chm5u",
+ "cm",
+ "cmnm5s2u",
+ "cmnm5u",
+ "d",
+ "fm",
+ "gal q",
+ "gm",
+ "i",
+ "i6a",
+ "m1a",
+ "m1f",
+ "m1g",
+ "m1i",
+ "m22g",
+ "m2a",
+ "m2g",
+ "m3c",
+ "m4c",
+ "m5c",
+ "m6a",
+ "m7g",
+ "mam5u",
+ "mam5s2u",
+ "man q",
+ "mcm5s2u",
+ "mcm5u",
+ "mo5u",
+ "ms2i6a",
+ "ms2t6a",
+ "mt6a",
+ "mv",
+ "o5u",
+ "osyw",
+ "p",
+ "q",
+ "s2c",
+ "s2t",
+ "s2u",
+ "s4u",
+ "t",
+ "t6a",
+ "tm",
+ "um",
+ "yw",
+ "x",
+ "OTHER",
+ NULL
+};
+
NLM_EXTERN Boolean LookForECnumberPattern (CharPtr str)
{
@@ -19610,19 +22667,19 @@ NLM_EXTERN void ECNumberFSAFreeAll (void)
ic_code_list = ValNodeFreeData (ic_code_list);
}
-static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local, size_t numitems, Boolean trimAtTab)
+static TextFsaPtr GetECNumberFSA (ValidStructPtr vsp, CharPtr prop, CharPtr file, CharPtr PNTR local, size_t numitems, Boolean trimAtTab)
{
FileCache fc;
FILE *fp = NULL;
TextFsaPtr fsa;
Int2 i;
- Char line [512];
+ Char line [1024];
Char path [PATH_MAX];
CharPtr ptr;
ErrSev sev;
CharPtr str;
- Char tmp [128];
+ Char tmp [512];
Boolean use_data_dir_first = FALSE;
fsa = (TextFsaPtr) GetAppProperty (prop);
@@ -19635,11 +22692,18 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local
}
#endif
- if (use_data_dir_first && FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
- FileBuildPath (path, NULL, file);
- sev = ErrSetMessageLevel (SEV_ERROR);
- fp = FileOpen (path, "r");
- ErrSetMessageLevel (sev);
+ if (use_data_dir_first) {
+ if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
+ FileBuildPath (path, NULL, file);
+ sev = ErrSetMessageLevel (SEV_ERROR);
+ fp = FileOpen (path, "r");
+ ErrSetMessageLevel (sev);
+ if (fp == NULL && vsp != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberDataMissing, "Unable to use EC number file '%s' in data directory", file);
+ }
+ } else if (vsp != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberDataMissing, "Unable to find EC number file '%s' in data directory", file);
+ }
}
fsa = TextFsaNew ();
@@ -19697,28 +22761,28 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local
return fsa;
}
-static TextFsaPtr GetSpecificECNumberFSA (void)
+static TextFsaPtr GetSpecificECNumberFSA (ValidStructPtr vsp)
{
- return (GetECNumberFSA ("SpecificECNumberFSA", "ecnum_specific.txt", (CharPtr PNTR) kECNum_specific, sizeof (kECNum_specific) / sizeof (char*), TRUE));
+ return (GetECNumberFSA (vsp, "SpecificECNumberFSA", "ecnum_specific.txt", (CharPtr PNTR) kECNum_specific, sizeof (kECNum_specific) / sizeof (char*), TRUE));
}
-static TextFsaPtr GetAmbiguousECNumberFSA (void)
+static TextFsaPtr GetAmbiguousECNumberFSA (ValidStructPtr vsp)
{
- return (GetECNumberFSA ("AmbiguousECNumberFSA", "ecnum_ambiguous.txt", (CharPtr PNTR) kECNum_ambiguous, sizeof (kECNum_ambiguous) / sizeof (char*), TRUE));
+ return (GetECNumberFSA (vsp, "AmbiguousECNumberFSA", "ecnum_ambiguous.txt", (CharPtr PNTR) kECNum_ambiguous, sizeof (kECNum_ambiguous) / sizeof (char*), TRUE));
}
-static TextFsaPtr GetDeletedECNumberFSA (void)
+static TextFsaPtr GetDeletedECNumberFSA (ValidStructPtr vsp)
{
- return (GetECNumberFSA ("DeletedECNumberFSA", "ecnum_deleted.txt", (CharPtr PNTR) kECNum_deleted, sizeof (kECNum_deleted) / sizeof (char*), TRUE));
+ return (GetECNumberFSA (vsp, "DeletedECNumberFSA", "ecnum_deleted.txt", (CharPtr PNTR) kECNum_deleted, sizeof (kECNum_deleted) / sizeof (char*), TRUE));
}
-static TextFsaPtr GetReplacedECNumberFSA (void)
+static TextFsaPtr GetReplacedECNumberFSA (ValidStructPtr vsp)
{
- return (GetECNumberFSA ("ReplacedEECNumberFSA", "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*), TRUE));
+ return (GetECNumberFSA (vsp, "ReplacedEECNumberFSA", "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*), TRUE));
}
NLM_EXTERN Boolean ECnumberNotInList (CharPtr str)
@@ -19730,7 +22794,9 @@ NLM_EXTERN Boolean ECnumberNotInList (CharPtr str)
CharPtr ptr;
Int4 state;
- fsa = GetSpecificECNumberFSA ();
+ if (StringHasNoText (str)) return FALSE;
+
+ fsa = GetSpecificECNumberFSA (NULL);
if (fsa == NULL) return FALSE;
state = 0;
@@ -19742,7 +22808,7 @@ NLM_EXTERN Boolean ECnumberNotInList (CharPtr str)
state = TextFsaNext (fsa, state, ' ', &matches);
if (matches != NULL) return FALSE;
- fsa = GetAmbiguousECNumberFSA ();
+ fsa = GetAmbiguousECNumberFSA (NULL);
if (fsa == NULL) return FALSE;
state = 0;
@@ -19766,7 +22832,9 @@ NLM_EXTERN Boolean ECnumberWasDeleted (CharPtr str)
CharPtr ptr;
Int4 state;
- fsa = GetDeletedECNumberFSA ();
+ if (StringHasNoText (str)) return FALSE;
+
+ fsa = GetDeletedECNumberFSA (NULL);
if (fsa == NULL) return FALSE;
state = 0;
@@ -19790,7 +22858,9 @@ NLM_EXTERN Boolean ECnumberWasReplaced (CharPtr str)
CharPtr ptr;
Int4 state;
- fsa = GetReplacedECNumberFSA ();
+ if (StringHasNoText (str)) return FALSE;
+
+ fsa = GetReplacedECNumberFSA (NULL);
if (fsa == NULL) return FALSE;
state = 0;
@@ -19840,7 +22910,7 @@ static int LIBCALLBACK SortVnpByEcBefore (VoidPtr ptr1, VoidPtr ptr2)
return StringCmp (str1, str2);
}
-static void SetupECReplacementTable (CharPtr file, CharPtr PNTR local, size_t numitems)
+static void SetupECReplacementTable (ValidStructPtr vsp, CharPtr file, CharPtr PNTR local, size_t numitems)
{
Char buf [256];
@@ -19867,11 +22937,18 @@ static void SetupECReplacementTable (CharPtr file, CharPtr PNTR local, size_t nu
}
#endif
- if (use_data_dir_first && FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
- FileBuildPath (path, NULL, file);
- sev = ErrSetMessageLevel (SEV_ERROR);
- fp = FileOpen (path, "r");
- ErrSetMessageLevel (sev);
+ if (use_data_dir_first) {
+ if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
+ FileBuildPath (path, NULL, file);
+ sev = ErrSetMessageLevel (SEV_ERROR);
+ fp = FileOpen (path, "r");
+ ErrSetMessageLevel (sev);
+ if (fp == NULL && vsp != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberDataMissing, "Unable to use EC number file '%s' in data directory", file);
+ }
+ } else if (vsp != NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberDataMissing, "Unable to find EC number file '%s' in data directory", file);
+ }
}
if (fp != NULL) {
@@ -19978,7 +23055,7 @@ static EcRepPtr GetECReplacement (CharPtr str, BoolPtr splitp)
return erp;
}
-static Boolean EcCnumberWasSplit (CharPtr str)
+static Boolean EcCnumberWasSplit (ValidStructPtr vsp, CharPtr str)
{
EcRepPtr erp;
@@ -19986,7 +23063,7 @@ static Boolean EcCnumberWasSplit (CharPtr str)
if (StringHasNoText (str)) return FALSE;
- SetupECReplacementTable ("ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*));
+ SetupECReplacementTable (vsp, "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*));
split = FALSE;
erp = GetECReplacement (str, &split);
@@ -19995,6 +23072,7 @@ static Boolean EcCnumberWasSplit (CharPtr str)
return split;
}
+//LCOV_EXCL_START
static Boolean SqnGetLocusTagFromProtRef (SeqFeatPtr sfp, CharPtr buf, size_t len)
{
@@ -20167,7 +23245,7 @@ NLM_EXTERN Int4 UpdateReplacedECNumbersEx (SeqEntryPtr sep, ValNodePtr PNTR head
ed.only_unambig = only_unambig;
ed.justwarn = justwarn;
- SetupECReplacementTable ("ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*));
+ SetupECReplacementTable (NULL, "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*));
if (ec_rep_data != NULL && ec_rep_len > 0) {
VisitFeaturesInSep (sep, (Pointer) &ed, UpdateProtEC);
}
@@ -20264,6 +23342,7 @@ NLM_EXTERN Int4 DeleteBadECNumbers (SeqEntryPtr sep)
{
return DeleteBadECNumbersEx (sep, NULL, NULL, FALSE);
}
+//LCOV_EXCL_STOP
@@ -20320,11 +23399,12 @@ static void ValidatePseudogene (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, GBQualPtr gbqual, Int2 qual, CharPtr key)
{
- Boolean badchars, found, just_nuc_letters, multi_rpt_unit;
+ Boolean badchars, found, just_nuc_letters, multi_rpt_unit, in_range;
Char ch;
SeqMgrFeatContext context;
Int4 from = -1, to = -1, ffrom, fto, ftmp;
CharPtr ptr, tmp;
+ SeqLocPtr slp;
if (vsp == NULL || gcp == NULL || sfp == NULL || gbqual == NULL || gbqual->val == NULL || key == NULL) return;
@@ -20386,10 +23466,32 @@ static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (from < ffrom || from > fto || to < ffrom || to > fto) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RptUnitRangeProblem, "/rpt_unit_range is not within sequence length");
}
+ } else if (LocationHasNullsBetween (sfp->location)) {
+ in_range = FALSE;
+ slp = SeqLocFindNext (sfp->location, NULL);
+ while (slp != NULL) {
+ ffrom = SeqLocStart (slp);
+ fto = SeqLocStop (slp);
+ if (ffrom > fto) {
+ ftmp = ffrom;
+ ffrom = fto;
+ fto = ftmp;
+ }
+ if (from >= ffrom && from <= fto && to >= ffrom && to <= fto) {
+ in_range = TRUE;
+ }
+ slp = SeqLocFindNext (sfp->location, slp);
+ }
+ if (! in_range) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RptUnitRangeProblem, "/rpt_unit_range is not within ordered intervals");
+ }
}
}
} else {
+ //LCOV_EXCL_START
+ //if not base range, BasicCleanup converts qual to rpt_unit_seq
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "/rpt_unit_range is not a base range");
+ //LCOV_EXCL_STOP
}
}
@@ -20458,7 +23560,8 @@ NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp)
return key;
}
-
+//LCOV_EXCL_START
+//not used for validation
NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name)
{
if (StringHasNoText (qual_name)) {
@@ -20491,7 +23594,7 @@ NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name)
return FALSE;
}
-
+//not used for validation
NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual)
{
@@ -20556,7 +23659,7 @@ NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProdu
return TRUE;
}
-
+//LCOV_EXCL_STOP
static CharPtr sWrongQualReasons[] = {
"conflicting codon_start values",
@@ -20629,18 +23732,88 @@ NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp)
return rval;
}
+static void AssemblyGapFeatValidate (
+ ValidStructPtr vsp,
+ GatherContextPtr gcp,
+ SeqFeatPtr sfp,
+ BioseqPtr bsp
+)
+
+{
+ Char ch;
+ Int4 count = 0;
+ int i;
+ Boolean is5 = FALSE;
+ Boolean is3 = FALSE;
+ size_t len;
+ CharPtr seq;
+ ErrSev sev = SEV_WARNING;
+ SeqIntPtr sintp;
+ SeqLocPtr slp;
+
+ if (vsp == NULL || gcp == NULL || sfp == NULL || sfp->location == NULL || bsp == NULL) return;
+
+ slp = (SeqLocPtr) AsnIoMemCopy ((Pointer) sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
+ if (slp == NULL) return;
+
+ if (vsp->genomeSubmission) {
+ sev = SEV_ERROR;
+ }
+
+ if (slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL && sintp->from > 0 && sintp->to < bsp->length - 1) {
+ (sintp->from)--;
+ (sintp->to)++;
+ seq = GetSequenceByLocation (slp);
+ if (seq != NULL) {
+ len = StringLen (seq);
+ if (len > 0 && len == SeqLocLen (slp)) {
+ ch = seq [0];
+ if (ch == 'N') {
+ is5 = TRUE;
+ }
+ ch = seq [len - 1];
+ if (ch == 'N') {
+ is3 = TRUE;
+ }
+ if (is5 && is3) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_AssemblyGapAdjacentToNs, "Assembly_gap flanked by Ns on 5' and 3' sides");
+ } else if (is5) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_AssemblyGapAdjacentToNs, "Assembly_gap flanked by Ns on 5' side");
+ } else if (is3) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_AssemblyGapAdjacentToNs, "Assembly_gap flanked by Ns on 3' side");
+ }
+ for (i = 1; i < len - 1; i++) {
+ ch = seq [i];
+ if (ch != 'N') {
+ count++;
+ }
+ }
+ if (count > 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_AssemblyGapCoversSequence, "Assembly_gap extends into sequence");
+ }
+ }
+ }
+ MemFree (seq);
+ }
+ }
+
+ SeqLocFree (slp);
+}
static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, ImpFeatPtr ifp)
{
Int2 adv;
BioseqPtr bsp;
+ SeqFeatPtr cds;
Char ch;
Boolean failed;
Boolean found;
IntFuzzPtr fuzz;
GBQualPtr gbqual;
- SeqMgrFeatContext gcontext;
+ SeqMgrFeatContext fcontext, gcontext;
SeqFeatPtr gene;
GeneRefPtr grp;
Int2 i;
@@ -20648,21 +23821,31 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
Boolean just_nuc_letters;
Boolean just_prt_letters;
CharPtr key;
+ Int4 left;
size_t len;
Boolean multi_compare;
Boolean no_white_space;
+ Int2 num_intervals;
+ SeqFeatPtr nxt;
+ Boolean ok;
Boolean only_digits;
+ ProtRefPtr prp;
+ SeqFeatPtr prt;
CharPtr ptr;
Int2 qual;
Char range[32];
+ Int4 right;
ErrSev sev;
SeqIntPtr sint;
SeqIdPtr sip;
SeqLocPtr slp;
SeqPntPtr spp;
CharPtr str;
+ Uint1 strand;
CharPtr tmp;
+ Boolean twintron;
Int2 val;
+ ValNodePtr vnp;
Int4 qvalid;
if (vsp == NULL || gcp == NULL || sfp == NULL || ifp == NULL)
@@ -20707,15 +23890,32 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (vsp->is_refseq_in_sep) {
sev = SEV_ERROR;
}
- ValidErr (vsp, sev, ERR_SEQ_FEAT_InvalidForType, "Peptide processing feature should be converted to the appropriate protein feature subtype");
+ if (vsp->is_embl_ddbj_in_sep) {
+ if (SeqMgrGetOverlappingCDS (sfp->location, NULL) == NULL) {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PeptideFeatureLacksCDS, "sig/mat/transit_peptide feature cannot be associated with a protein product of a coding region feature");
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PeptideFeatureLacksCDS, "Peptide processing feature should be converted to the appropriate protein feature subtype");
+ }
CheckPeptideOnCodonBoundary (vsp, gcp, sfp, key);
} else if (StringICmp (key, "preprotein") == 0 ||
StringICmp (key, "proprotein") == 0) {
+ //LCOV_EXCL_START
+ //preprotein and proprotein are unknown feature keys, this code is never reached
sev = SEV_WARNING;
if (vsp->is_refseq_in_sep) {
sev = SEV_ERROR;
}
- ValidErr (vsp, sev, ERR_SEQ_FEAT_InvalidForType, "Peptide processing feature should be converted to the appropriate protein feature subtype");
+ if (vsp->is_embl_ddbj_in_sep) {
+ if (SeqMgrGetOverlappingCDS (sfp->location, NULL) == NULL) {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PeptideFeatureLacksCDS, "Pre/pro protein feature cannot be associated with a protein product of a coding region feature");
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PeptideFeatureLacksCDS, "Peptide processing feature should be converted to the appropriate protein feature subtype");
+ }
+ //LCOV_EXCL_STOP
} else if (StringICmp (key, "mRNA") == 0 ||
StringICmp (key, "tRNA") == 0 ||
StringICmp (key, "rRNA") == 0 ||
@@ -20724,9 +23924,14 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
StringICmp (key, "snoRNA") == 0 ||
StringICmp (key, "misc_RNA") == 0 ||
StringICmp (key, "precursor_RNA") == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts imp RNA to real RNA
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidForType,
"RNA feature should be converted to the appropriate RNA feature subtype, location should be converted manually");
+ //LCOV_EXCL_STOP
} else if (StringICmp (key, "CDS") == 0) {
+ //LCOV_EXCL_START
+ // Basic Cleanup converts imp CDS to real CDS
failed = TRUE; /* impfeat CDS must be pseudo; fail if not */
if (sfp->pseudo) {
failed = FALSE;
@@ -20756,6 +23961,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (failed) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_ImpCDSnotPseudo, "ImpFeat CDS should be pseudo");
}
+ //LCOV_EXCL_STOP
} else if (StringICmp (key, "misc_feature") == 0) {
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
if (StringCmp (gbqual->qual, "standard_name") == 0) {
@@ -20767,11 +23973,60 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (StringHasNoText(sfp->comment) && sfp->qual == NULL && sfp->dbxref == NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NeedsNote, "A note or other qualifier is required for a misc_feature");
}
+ } else if (StringICmp (key, "intron") == 0) {
+ num_intervals = 0;
+ for (slp = SeqLocFindNext (sfp->location, NULL); slp != NULL; slp = SeqLocFindNext (sfp->location, slp)) {
+ num_intervals++;
+ }
+ if (num_intervals > 1) {
+ sev = SEV_ERROR;
+ if (vsp->is_embl_ddbj_in_sep) {
+ sev = SEV_WARNING;
+ }
+ twintron = FALSE;
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &gcontext) == sfp && gcontext.numivals == 2 && gcontext.ivals != NULL) {
+ left = gcontext.ivals [1];
+ right = gcontext.ivals [2];
+ strand = gcontext.strand;
+ nxt = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_intron, &gcontext);
+ if (nxt != NULL) {
+ if (strand == gcontext.strand) {
+ if (left + 1 == gcontext.left && right - 1 == gcontext.right) {
+ twintron = TRUE;
+ }
+ }
+ }
+ }
+ if (twintron) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MultiIntervalIntron, "Multi-interval intron contains possible twintron");
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_MultiIntervalIntron, "An intron should not have multiple intervals");
+ }
+ }
+ } else if (StringICmp (key, "repeat_region") == 0) {
+ if (StringHasNoText(sfp->comment) && sfp->qual == NULL && sfp->dbxref == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NeedsNote, "repeat_region has no qualifiers");
+ }
+ } else if (StringICmp (key, "regulatory") == 0) {
+ for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
+ if (StringICmp (gbqual->qual, "regulatory_class") != 0) continue;
+ if (StringHasNoText (gbqual->val)) continue;
+ if (IsStringInRegulatoryClassList (gbqual->val)) continue;
+ if (StringICmp (gbqual->val, "other") == 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "The regulatory_class value should not be '%s'", gbqual->val);
+ } else {
+ /*
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_InvalidQualifierValue, "Other regulatory_class value '%s'", gbqual->val);
+ */
+ }
+ }
} else if (StringICmp (key, "assembly_gap") == 0) {
bsp = BioseqFindFromSeqLoc (sfp->location);
if (! IsDeltaSeqWithFarpointers (bsp)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_GapFeatureProblem, "An assembly_gap feature should only be on a contig record");
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GapFeatureProblem, "An assembly_gap feature should only be on a contig record");
}
+ AssemblyGapFeatValidate (vsp, gcp, sfp, bsp);
}
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
qvalid = IsQualValidForFeature (gbqual, sfp);
@@ -20828,6 +24083,8 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
} else if (val == GBQUAL_pseudogene) {
ValidatePseudogene (vsp, gcp, sfp, gbqual);
} else if (val == GBQUAL_label) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes label qualifier, puts contents in note
no_white_space = TRUE;
only_digits = TRUE;
for (ptr = gbqual->val, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
@@ -20841,6 +24098,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (only_digits || (! no_white_space)) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal value for qualifier %s", gbqual->qual);
}
+ //LCOV_EXCL_STOP
} else if (val == GBQUAL_replace) {
bsp = BioseqFindFromSeqLoc (sfp->location);
if (bsp != NULL) {
@@ -20912,6 +24170,17 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (!found) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual);
}
+ } else if (val == GBQUAL_mod_base) {
+ found = FALSE;
+ for (i = 0; legal_modified_bases[i] != NULL; i++) {
+ if (StringICmp (gbqual->val, legal_modified_bases[i]) == 0) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual);
+ }
} else if (val == GBQUAL_mobile_element_type) {
found = FALSE;
str = NULL;
@@ -21024,6 +24293,49 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
}
}
}
+
+ if (StringICmp (key, "misc_feature") == 0) {
+ tmp = StringStr (sfp->comment, "cspA");
+ if (tmp != NULL) {
+ ok = FALSE;
+ if (tmp == sfp->comment) {
+ ch = tmp[4];
+ if (ch == '\0' || IS_WHITESP (ch)) {
+ ok = TRUE;
+ }
+ } else {
+ ptr = tmp-1;
+ ch = *ptr;
+ if (IS_WHITESP (ch)) {
+ ch = tmp[4];
+ if (ch == '\0' || IS_WHITESP (ch)) {
+ ok = TRUE;
+ }
+ }
+ }
+ if (ok) {
+ cds = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_CDS, NULL, 0, NULL, SIMPLE_OVERLAP, &fcontext);
+ if (cds != NULL) {
+ bsp = BioseqFindFromSeqLoc (cds->product);
+ if (bsp != NULL) {
+ prt = SeqMgrGetBestProteinFeature (bsp, NULL);
+ if (prt != NULL) {
+ prp = (ProtRefPtr) prt->data.value.ptrvalue;
+ if (prp != NULL) {
+ vnp = prp->name;
+ if (vnp != NULL) {
+ if (StringICmp ((CharPtr) vnp->data.ptrvalue, "cold-shock protein") == 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_ColdShockProteinProblem, "cspA misc_feature overlapped by cold-shock protein CDS");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
MemFree (key);
}
@@ -21068,8 +24380,10 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Wrong qualifier %s for feature %s", gbqual->qual, key);
} else if (qvalid > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, sWrongQualReasons[qvalid - 2]);
+ } else if (sfp->data.choice == SEQFEAT_GENE && StringCmp (gbqual->qual, "product") == 0) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_SuspiciousQualifierValue, "A product qualifier is not normally used on a gene feature");
}
-
+
if (StringCmp (gbqual->qual, "gsdb_id") == 0) {
continue;
}
@@ -21155,6 +24469,17 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
if (!found) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual);
}
+ } else if (val == GBQUAL_mod_base) {
+ found = FALSE;
+ for (i = 0; legal_modified_bases[i] != NULL; i++) {
+ if (StringICmp (gbqual->val, legal_modified_bases[i]) == 0) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual);
+ }
} else if (val == GBQUAL_compare) {
multi_compare = FALSE;
ptr = gbqual->val;
@@ -21194,6 +24519,9 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
}
if (!found) {
if (qual == GBQUAL_citation) {
+ //LCOV_EXCL_START
+ //citation is only mandatory for old_sequence and conflict, which are
+ // import features, which are not handled here
if (sfp->cit != NULL) {
found = TRUE;
} else if (! StringHasNoText (sfp->comment)) {
@@ -21209,16 +24537,20 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
}
}
}
+ //LCOV_EXCL_STOP
}
}
if (!found) {
if (StringICmp (key, "conflict") == 0 || StringICmp (key, "old_sequence") == 0) {
+ //LCOV_EXCL_START
+ //conflict and old_sequence are import features not handled by this function
/* compare qualifier can now substitute for citation qualifier for conflict and old_sequence */
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
if (StringICmp (gbqual->qual, "compare") == 0 && StringDoesHaveText (gbqual->val)) {
found = TRUE;
}
}
+ //LCOV_EXCL_STOP
}
}
if (!found) {
@@ -21387,13 +24719,13 @@ static Boolean PartialAtSpliceSiteOrGap (ValidStructPtr vsp, SeqLocPtr head, Uin
residue1 = SeqPortGetResidue (spp);
residue2 = SeqPortGetResidue (spp);
*/
- if (residue1 == '-' && residue2 == '-') {
+ if (residue1 == '-' || residue2 == '-') {
if (isgapP != NULL) {
*isgapP = TRUE;
}
rsult = TRUE;
- } else if (IS_residue (residue1) && IS_residue (residue2)) {
- if (ConsistentWithG (residue1) && ConsistentWithT (residue2)) {
+ } else if (IS_residue (residue1) && IS_residue (residue2) && IS_ALPHA ((Char) residue1) && IS_ALPHA ((Char) residue2)) {
+ if (ConsistentWithG ((Char)residue1) && ConsistentWithT ((Char)residue2)) {
rsult = TRUE;
} else if ((residue1 == 'G') && (residue2 == 'C')) {
rsult = TRUE;
@@ -21410,13 +24742,13 @@ static Boolean PartialAtSpliceSiteOrGap (ValidStructPtr vsp, SeqLocPtr head, Uin
residue1 = SeqPortGetResidue (spp);
residue2 = SeqPortGetResidue (spp);
*/
- if (residue1 == '-' && residue2 == '-') {
+ if (residue1 == '-' || residue2 == '-') {
if (isgapP != NULL) {
*isgapP = TRUE;
}
rsult = TRUE;
} else if (IS_residue (residue1) && IS_residue (residue2) && IS_ALPHA ((Char) residue1) && IS_ALPHA ((Char) residue2)) {
- if (ConsistentWithA (residue1) && ConsistentWithG (residue2)) {
+ if (ConsistentWithA ((Char)residue1) && ConsistentWithG ((Char)residue2)) {
rsult = TRUE;
}
} else if (badseqP != NULL) {
@@ -22121,11 +25453,32 @@ static Boolean CDS3primePartialTest (
return FALSE;
}
+static CharPtr bypass_cds_partial_check [] = {
+ "RNA editing",
+ "reasons given in citation",
+ "artificial frameshift",
+ "rearrangement required for product",
+ "translated product replaced",
+ "unclassified translation discrepancy",
+ "mismatches in translation",
+ /*
+ "adjusted for low-quality genome",
+ */
+ "annotated by transcript or proteomic data",
+ /*
+ "heterogeneous population sequenced",
+ "low-quality sequence region",
+ "artificial location",
+ */
+ NULL
+};
+
static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp)
{
BioseqPtr bsp;
SeqMgrDescContext context;
+ Int4 i;
MolInfoPtr mip;
Boolean partial5;
Boolean partial3;
@@ -22136,6 +25489,15 @@ static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp)
if (vsp == NULL || sfp == NULL) return;
if (sfp->product == NULL) return;
if (!vsp->useSeqMgrIndexes) return;
+
+ if (sfp->excpt && (! vsp->ignoreExceptions) && (! StringHasNoText (sfp->except_text))) {
+ for (i = 0; bypass_cds_partial_check [i] != NULL; i++) {
+ if (StringISearch (sfp->except_text, bypass_cds_partial_check [i]) != NULL) {
+ return; /* biological exception */
+ }
+ }
+ }
+
bsp = BioseqFindFromSeqLoc (sfp->product);
if (bsp == NULL && vsp->farFetchCDSproducts) {
bsp = BioseqLockById (SeqLocId(sfp->product));
@@ -22490,14 +25852,20 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp)
if (mrna != NULL) {
if (StringISearch (sfp->except_text, "ribosomal slippage") == NULL && StringISearch (sfp->except_text, "trans-splicing") == NULL) {
if (pseudo) {
+ //LCOV_EXCL_START
+ //code never reached, because if pseudo function would have returned
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PseudoCDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match");
+ //LCOV_EXCL_STOP
} else {
ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match");
}
}
} else {
if (pseudo) {
+ //LCOV_EXCL_START
+ //code never reached, because if pseudo function would have returned
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PseudoCDSmRNArange, "mRNA overlaps or contains CDS but does not completely contain intervals");
+ //LCOV_EXCL_STOP
} else {
ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA overlaps or contains CDS but does not completely contain intervals");
}
@@ -22575,12 +25943,15 @@ static void CheckForBothOrBothRev (ValidStructPtr vsp, SeqFeatPtr sfp)
} else if (bothreverse) {
suffix = "(reverse)";
}
+ //LCOV_EXCL_START
+ //cannot test with regression because basic cleanup corrects problems
if (bothstrands || bothreverse) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_BothStrands, "%s may not be on both %s strands", prefix, suffix);
}
if (iswhole) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WholeLocation, "%s may not have whole location", prefix);
}
+ //LCOV_EXCL_STOP
}
static Boolean OverlappingGeneIsPseudo (SeqFeatPtr sfp)
@@ -22612,6 +25983,7 @@ static Boolean OverlappingGeneIsPseudo (SeqFeatPtr sfp)
static void CheckForIllegalDbxref (ValidStructPtr vsp, GatherContextPtr gcp, ValNodePtr dbxref)
{
+ Char buf [32];
DbtagPtr db;
CharPtr good;
Int4 id;
@@ -22633,26 +26005,26 @@ static void CheckForIllegalDbxref (ValidStructPtr vsp, GatherContextPtr gcp, Val
}
if (is_sc && StringICmp (db->db, "taxon") == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "Illegal db_xref type %s, legal capitalization is %s, but should only be used on an OrgRef",
- db->db, good);
+ "Illegal db_xref type %s (%s), legal capitalization is %s, but should only be used on an OrgRef",
+ db->db, ValGetDbtagStr (db, buf), good);
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "Illegal db_xref type %s, legal capitalization is %s",
- db->db, good);
+ "Illegal db_xref type %s (%s), legal capitalization is %s",
+ db->db, ValGetDbtagStr (db, buf), good);
}
} else if (is_rf) {
if (vsp->is_refseq_in_sep || vsp->is_gps_in_sep) {
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "db_xref type %s is only legal for RefSeq", db->db);
+ "db_xref type %s (%s) is only legal for RefSeq", db->db, ValGetDbtagStr (db, buf));
}
} else if (is_sc && StringICmp (db->db, "taxon") == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref,
- "db_xref type %s should only be used on an OrgRef", db->db);
+ "db_xref type %s (%s) should only be used on an OrgRef", db->db, ValGetDbtagStr (db, buf));
} else {
}
} else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", db->db);
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s (%s)", db->db, ValGetDbtagStr (db, buf));
}
if (StringDoesHaveText (db->db)) {
@@ -22726,6 +26098,9 @@ static CharPtr legal_exception_strings [] = {
"low-quality sequence region",
"unextendable partial coding region",
"artificial location",
+ "gene split at contig boundary",
+ "gene split at sequence boundary",
+ kAllowManualGenCodeException,
NULL
};
@@ -22735,6 +26110,13 @@ static CharPtr refseq_exception_strings [] = {
"mismatches in transcription",
"mismatches in translation",
"adjusted for low-quality genome",
+ "translation initiation by tRNA-Leu at CUG codon",
+ "16S ribosomal RNA and 23S ribosomal RNA overlap",
+ "16S ribosomal RNA and 5S ribosomal RNA overlap",
+ "23S ribosomal RNA and 16S ribosomal RNA overlap",
+ "23S ribosomal RNA and 5S ribosomal RNA overlap",
+ "5S ribosomal RNA and 16S ribosomal RNA overlap",
+ "5S ribosomal RNA and 23S ribosomal RNA overlap",
NULL
};
@@ -22799,6 +26181,7 @@ static void ValidateExceptText (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
if (sfp->comment != NULL && StringISearch (sfp->comment, tmp) != NULL) {
if (StringICmp (tmp, "ribosomal slippage") != 0 &&
StringICmp (tmp, "trans-splicing") != 0 &&
+ StringICmp (tmp, "RNA editing") != 0 &&
StringICmp (tmp, "artificial location") != 0) {
redundant_with_comment = TRUE;
} else if (StringICmp (sfp->comment, tmp) == 0) {
@@ -23255,6 +26638,25 @@ static Boolean SplicingNotExpected (SeqFeatPtr sfp)
return FALSE;
}
+static Boolean FeatureOnOrganelle (SeqFeatPtr sfp)
+
+{
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ SeqMgrDescContext dcontext;
+ SeqDescrPtr sdp;
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return FALSE;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp == NULL) return FALSE;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) return FALSE;
+
+ return IsLocationOrganelle (biop->genome);;
+}
+
static Boolean RareConsensusNotExpected (SeqFeatPtr sfp)
{
@@ -23309,6 +26711,8 @@ static Boolean IsNumericChar (Char ch)
}
*/
+//LCOV_EXCL_START
+// Not used for validation
NLM_EXTERN Boolean IsNuclAcc (CharPtr name)
{
@@ -23320,6 +26724,7 @@ NLM_EXTERN Boolean IsNuclAcc (CharPtr name)
return TRUE;
}
+//LCOV_EXCL_STOP
static Boolean IsCddFeat (
SeqFeatPtr sfp
@@ -23501,7 +26906,8 @@ static void ValidateAnticodon (ValidStructPtr vsp, SeqLocPtr slp)
/* newer check for intervals out of order on segmented bioseq */
if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
-
+//LCOV_EXCL_START
+// Only for SegSets
if (SeqLocBadSortOrder (bsp, slp)) {
ctmp = SeqLocPrint (slp);
if (ctmp != NULL && StringLen (ctmp) > 800) {
@@ -23521,6 +26927,7 @@ static void ValidateAnticodon (ValidStructPtr vsp, SeqLocPtr slp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MixedStrand, "Mixed strands in Anticodon [%s]", ctmp);
MemFree (ctmp);
}
+//LCOV_EXCL_STOP
}
static Boolean JustQuotes (CharPtr str)
@@ -23896,74 +27303,6 @@ static void LookForAccnLocs (SeqIdPtr sip, Pointer userdata)
}
}
-static Boolean LocationIsFar (SeqLocPtr location)
-
-{
- BioseqPtr bsp;
- DeltaSeqPtr dsp;
- Boolean is_far = FALSE;
- SeqLocPtr loc;
- SeqEntryPtr oldscope;
- SeqIdPtr sip;
- SeqLocPtr slp;
-
- if (location == NULL) return FALSE;
-
- oldscope = SeqEntrySetScope (NULL);
-
- slp = SeqLocFindNext (location, NULL);
- while (slp != NULL) {
- if (slp->choice != SEQLOC_NULL) {
- sip = SeqLocId (slp);
- bsp = BioseqFind (sip);
- if (bsp == NULL) {
- is_far = TRUE;
- } else if (bsp->repr == Seq_repr_delta && bsp->seq_ext_type == 4) {
- for (dsp = (DeltaSeqPtr) bsp->seq_ext;
- dsp != NULL && (! is_far);
- dsp = dsp->next) {
- if (dsp->choice != 1) continue;
- loc = (SeqLocPtr) dsp->data.ptrvalue;
- if (loc == NULL) continue;
- if (loc->choice == SEQLOC_NULL) continue;
- sip = SeqLocId (loc);
- bsp = BioseqFind (sip);
- if (bsp == NULL) {
- is_far = TRUE;
- }
- }
- } else if (bsp->repr == Seq_repr_seg && bsp->seq_ext_type == 1) {
- for (loc = (SeqLocPtr) bsp->seq_ext;
- loc != NULL && (! is_far);
- loc = loc->next) {
- if (loc == NULL) continue;
- if (loc->choice == SEQLOC_NULL) continue;
- sip = SeqLocId (loc);
- bsp = BioseqFind (sip);
- if (bsp == NULL) {
- is_far = TRUE;
- }
- }
- }
- }
- slp = SeqLocFindNext (location, slp);
- }
-
- SeqEntrySetScope (oldscope);
-
- return is_far;
-}
-
-static Boolean NoFetchFunctions (void)
-
-{
- ObjMgrProcPtr ompp = NULL;
-
- ompp = ObjMgrProcFindNext (NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_BIOSEQ, NULL);
-
- return (Boolean) (ompp == NULL);
-}
-
static CharPtr infMessage [] = {
"unknown error",
"empty inference string",
@@ -23971,11 +27310,13 @@ static CharPtr infMessage [] = {
"bad inference body",
"single inference field",
"spaces in inference",
+ "possible comment in inference",
"same species misused",
"bad inference accession",
"bad inference accession version",
"accession.version not public",
"bad accession type",
+ "unrecognized database",
NULL
};
@@ -24401,19 +27742,30 @@ static void TestForBracketsInProductName (CharPtr str, ValidStructPtr vsp)
static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gcp)
{
+ Uint1 aa;
RnaRefPtr rrp;
Boolean pseudo, ovgenepseudo = FALSE;
Boolean protidqual = FALSE, transidqual = FALSE;
GBQualPtr gbq;
- tRNAPtr trp;
- Boolean badanticodon, anticodonqual, productqual;
+ tRNAPtr trp = NULL;
+ Boolean badanticodon, anticodonqual, productqual, mustbemethionine;
Int4 anticodonlen;
SeqLocPtr slp;
RNAGenPtr rgp;
Int2 i;
- CharPtr str;
+ CharPtr str, three_letter_aa;
rrp = (RnaRefPtr) (sfp->data.value.ptrvalue);
+ if (rrp == NULL) return;
+
+ if (rrp->type != RNA_TYPE_tRNA && rrp->ext.choice == 2) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidForType, "tRNA data structure on non-tRNA feature");
+ }
+ /*
+ if (rrp->type == RNA_TYPE_misc_RNA && rrp->ext.choice == 3) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidForType, "RNA-gen data structure on miscRNA feature");
+ }
+ */
pseudo = sfp->pseudo;
ovgenepseudo = FALSE;
@@ -24449,13 +27801,15 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "transcript_id should not be a gbqual on an mRNA feature");
}
str = (CharPtr) rrp->ext.value.ptrvalue;
- if (StringDoesHaveText (str) && StringNICmp (str, "transfer RNA ", 13) == 0) {
+ if (StringDoesHaveText (str) && StringNICmp (str, "transfer RNA ", 13) == 0 &&
+ StringICmp (str, "transfer RNA nucleotidyltransferase") != 0 &&
+ StringICmp (str, "transfer RNA methyltransferase") != 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_tRNAmRNAmixup, "mRNA feature product indicates it should be a tRNA feature");
}
}
if (rrp->ext.choice == 2) { /* tRNA */
trp = (tRNAPtr) (rrp->ext.value.ptrvalue);
- if (trp->anticodon != NULL) {
+ if (trp != NULL && trp->anticodon != NULL) {
badanticodon = FALSE;
anticodonlen = 0;
slp = SeqLocFindNext (trp->anticodon, NULL);
@@ -24480,12 +27834,17 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
if (rrp->type == 3) { /* tRNA */
anticodonqual = FALSE;
productqual = FALSE;
+ mustbemethionine = FALSE;
gbq = sfp->qual;
while (gbq != NULL) {
if (StringICmp (gbq->qual, "anticodon") == 0) {
anticodonqual = TRUE;
} else if (StringICmp (gbq->qual, "product") == 0) {
- productqual = TRUE;
+ if (StringICmp (gbq->val, "tRNA-fMet") != 0 && StringICmp (gbq->val, "tRNA-iMet") != 0) {
+ productqual = TRUE;
+ } else {
+ mustbemethionine = TRUE;
+ }
}
gbq = gbq->next;
}
@@ -24495,6 +27854,18 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
if (productqual) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Unparsed product qualifier in tRNA");
}
+ if (mustbemethionine) {
+ if (trp != NULL) {
+ aa = GetAaFromtRNA (trp);
+ if (aa != 'M') {
+ three_letter_aa = Get3LetterSymbol (NULL, Seq_code_ncbieaa, NULL, aa);
+ if (StringHasNoText (three_letter_aa)) {
+ three_letter_aa = "?";
+ }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Initiation tRNA claims to be tRNA-%s, but should be tRNA-Met", three_letter_aa);
+ }
+ }
+ }
}
if (rrp->type == 3 && rrp->ext.choice == 1) { /* tRNA with string extension */
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Unparsed product qualifier in tRNA");
@@ -24520,7 +27891,7 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
if (StringICmp (gbq->qual, "ncRNA_class") != 0) continue;
if (StringHasNoText (gbq->val)) continue;
if (IsStringInNcRNAClassList (gbq->val)) continue;
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal ncRNA_class value '%s'", gbq->val);
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_InvalidQualifierValue, "Other ncRNA_class value '%s'", gbq->val);
}
}
}
@@ -24588,6 +27959,8 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
}
+//LCOV_EXCL_START
+// Not part of validation
NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp)
{
GeneRefPtr grp;
@@ -24644,6 +28017,7 @@ NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp)
}
return redundantgenexref;
}
+//LCOV_EXCL_STOP
static void CheckCodingRegionAndProteinFeaturePartials (SeqFeatPtr sfp, ValidStructPtr vsp)
@@ -24699,6 +28073,149 @@ static void CheckCodingRegionAndProteinFeaturePartials (SeqFeatPtr sfp, ValidStr
}
+static void CheckForShortExons (ValidStructPtr vsp, SeqLocPtr loc)
+{
+ /* note - only want to look at internal exons, so not the first and not the last */
+ SeqLocPtr slp;
+ Int4 prev_len = 16;
+ Int4 num_short = 0;
+
+ slp = SeqLocFindNext (loc, NULL);
+ if (slp == NULL) {
+ return;
+ }
+ while ((slp = SeqLocFindNext(loc, slp)) != NULL) {
+ if (prev_len < 16) {
+ num_short++;
+ }
+ prev_len = SeqLocLen (slp);
+ }
+ if (num_short > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ShortExon, "Coding region has multiple internal exons that are too short");
+ } else if (num_short > 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ShortExon,
+ "Internal coding region exon is too short");
+ }
+}
+
+
+static Boolean FeaturePairIsTwoTypes (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Uint1 ftype1, Uint1 ftype2)
+{
+ if (sfp1 == NULL || sfp2 == NULL) {
+ return FALSE;
+ }
+ if (sfp1->idx.subtype == ftype1 && sfp2->idx.subtype == ftype2) {
+ return TRUE;
+ } else if (sfp1->idx.subtype == ftype2 && sfp2->idx.subtype == ftype1) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean s_GeneRefsAreEquivalent (GeneRefPtr grp, GeneRefPtr grpx, CharPtr PNTR label)
+{
+ Boolean equivalent = FALSE;
+ CharPtr syn1, syn2;
+
+ if (grp == NULL || grpx == NULL) {
+ return FALSE;
+ }
+
+ if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
+ if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
+ equivalent = TRUE;
+ if (label != NULL) {
+ *label = grp->locus_tag;
+ }
+ }
+ } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
+ if (StringICmp (grp->locus, grpx->locus) == 0) {
+ equivalent = TRUE;
+ if (label != NULL) {
+ *label = grp->locus;
+ }
+ }
+ } else if (grp->syn != NULL && grpx->syn != NULL) {
+ syn1 = (CharPtr) grp->syn->data.ptrvalue;
+ syn2 = (CharPtr) grpx->syn->data.ptrvalue;
+ if ((StringDoesHaveText (syn1)) && StringDoesHaveText (syn2)) {
+ if (StringICmp (syn1, syn2) == 0) {
+ equivalent = TRUE;
+ if (label != NULL) {
+ *label = syn1;
+ }
+ }
+ }
+ }
+ return equivalent;
+}
+
+
+static Boolean s_GeneXrefConflictsWithFeatureXref(SeqFeatPtr sfp, SeqFeatPtr gene)
+{
+ SeqFeatXrefPtr xref;
+
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->data.choice == SEQFEAT_GENE
+ && !s_GeneRefsAreEquivalent(xref->data.value.ptrvalue,
+ gene->data.value.ptrvalue,
+ NULL)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static void ValidateSeqFeatXref (SeqFeatPtr sfp, ValidStructPtr vsp)
+{
+ SeqFeatXrefPtr xref, matchxref;
+ SeqFeatPtr matchsfp, origsfp;
+ Boolean hasxref, has_reciprocal_xref;
+
+ for (xref = sfp->xref; xref != NULL; xref = xref->next) {
+ if (xref->id.choice == 0 && xref->data.choice == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "SeqFeatXref with no id or data field");
+ } else if (xref->id.choice != 0) {
+ matchsfp = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
+ if (matchsfp == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefFeatureMissing, "Cross-referenced feature cannot be found");
+ } else {
+ hasxref = FALSE;
+ has_reciprocal_xref = FALSE;
+ for (matchxref = matchsfp->xref; matchxref != NULL; matchxref = matchxref->next) {
+ if (matchxref->id.choice != 0) {
+ hasxref = TRUE;
+ origsfp = SeqMgrGetFeatureByFeatID (matchsfp->idx.entityID, NULL, NULL, matchxref, NULL);
+ if (origsfp == sfp) {
+ has_reciprocal_xref = TRUE;
+ if (FeaturePairIsTwoTypes(sfp, matchsfp, FEATDEF_CDS, FEATDEF_mRNA)
+ || FeaturePairIsTwoTypes(sfp, matchsfp, FEATDEF_CDS, FEATDEF_GENE)
+ || FeaturePairIsTwoTypes(sfp, matchsfp, FEATDEF_mRNA, FEATDEF_GENE)) {
+ /* okay */
+ if (matchsfp->data.choice == SEQFEAT_GENE
+ && s_GeneXrefConflictsWithFeatureXref(sfp, matchsfp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "Feature gene xref does not match Feature ID cross-referenced gene feature");
+ }
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "Cross-references are not between CDS and mRNA pair or between a gene and a CDS or mRNA");
+ }
+ }
+ }
+ }
+ if (! hasxref) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "Cross-referenced feature does not have its own cross-reference");
+ } else if (!has_reciprocal_xref) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefNotReciprocal, "Cross-referenced feature does not link reciprocally");
+ }
+ }
+ }
+ }
+}
+
+
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
{
Int2 type, i, j;
@@ -24709,7 +28226,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
"Improper use of partial (greater than or less than)"
};
Uint2 partials[2], errtype;
- Char buf[80];
+ Char buf[128];
CharPtr tmp;
ValidStructPtr vsp;
SeqFeatPtr sfp;
@@ -24721,7 +28238,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
GBQualPtr gbq;
Boolean pseudo, excpt, conflict, codonqual,
protidqual,
- transidqual, ovgenepseudo;
+ transidqual, ovgenepseudo, gene_synonym_on_cds;
ImpFeatPtr ifp;
GeneRefPtr grp;
SeqFeatPtr gene;
@@ -24749,7 +28266,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
SeqFeatPtr operon;
Boolean redundantgenexref;
SeqMgrFeatContext fcontext, gcontext;
- CharPtr syn1, syn2, label = NULL, genexref_label;
+ CharPtr label = NULL, genexref_label;
Uint2 oldEntityID;
Uint4 oldItemID;
SeqIdPtr sip;
@@ -24767,9 +28284,6 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
Boolean badseq;
Boolean is_seqloc_bond;
SeqBondPtr sbp;
- SeqFeatXrefPtr xref, matchxref;
- SeqFeatPtr matchsfp, origsfp;
- Boolean hasxref;
CharPtr sfp_old_locus_tag;
CharPtr gene_old_locus_tag;
Boolean bypassGeneTest;
@@ -24792,9 +28306,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
vsp->sfp = sfp;
type = (Int2) (sfp->data.choice);
- ValidateSeqLoc (vsp, sfp->location, "Location");
+ ValidateSeqLoc (vsp, sfp->location, (sfp->data.choice == SEQFEAT_GENE || !IsGenomicPipeline(vsp)), "Location");
- ValidateSeqLoc (vsp, sfp->product, "Product");
+ ValidateSeqLoc (vsp, sfp->product, TRUE, "Product");
CheckForBothOrBothRev (vsp, sfp);
@@ -24802,7 +28316,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
accn_seqid = FALSE;
VisitSeqIdsInSeqLoc (sfp->location, (Pointer) &accn_seqid, LookForAccnLocs);
if (accn_seqid) {
- if (! vsp->is_smupd_in_sep) {
+ if (! vsp->is_smupd_in_sep && !vsp->is_gpipe_in_sep) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureRefersToAccession, "Feature location refers to accession");
}
}
@@ -24812,7 +28326,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
accn_seqid = FALSE;
VisitSeqIdsInSeqLoc (sfp->product, (Pointer) &accn_seqid, LookForAccnLocs);
if (accn_seqid) {
- if (! vsp->is_smupd_in_sep) {
+ if (! vsp->is_smupd_in_sep && !vsp->is_gpipe_in_sep) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FeatureRefersToAccession, "Feature product refers to accession");
}
}
@@ -24827,7 +28341,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if ((partials[0] != SLP_COMPLETE) || (partials[1] != SLP_COMPLETE) || (sfp->partial)) { /* partialness */
/* a feature on a partial sequence should be partial -- if often isn't */
if ((!sfp->partial) && (partials[1] != SLP_COMPLETE) && (sfp->location->choice == SEQLOC_WHOLE)) {
+ //LCOV_EXCL_START
+ //BasicCleanup changes whole locations to ints
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, "On partial Bioseq, SeqFeat.partial should be TRUE");
+ //LCOV_EXCL_STOP
}
/* a partial feature, with complete location, but partial product */
else if ((sfp->partial) && (sfp->product != NULL) && (partials[1] == SLP_COMPLETE) && (sfp->product->choice == SEQLOC_WHOLE)
@@ -24837,7 +28354,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
/* gene on segmented set is now 'order', should also be partial */
else if (type == SEQFEAT_GENE && sfp->product == NULL && partials[1] == SLP_INTERNAL) {
if (!sfp->partial) {
+ //LCOV_EXCL_START
+ //BasicCleanup sets partial flag for ordered locations
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "Gene of 'order' with otherwise complete location should have partial flag set");
+ //LCOV_EXCL_STOP
}
}
/* inconsistent combination of partial/complete product,location,partial flag - part 1 */
@@ -24925,6 +28445,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
errtype = SLP_NOSTART;
for (j = 0; j < 4; j++) {
bypassGeneTest = FALSE;
+ badseq = FALSE;
if (partials[1] & errtype) {
if (j == 3) {
if (LocationIsFar (sfp->location) && NoFetchFunctions ()) {
@@ -24956,10 +28477,20 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) {
} else if (PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) {
if (! isgap) {
- if (sfp->idx.subtype != FEATDEF_CDS || SplicingNotExpected (sfp)) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
+ if (sfp->idx.subtype == FEATDEF_tRNA && j == 0 && AdjacentToIntron (sfp)) {
+ } else if (sfp->idx.subtype == FEATDEF_CDS && FeatureOnOrganelle (sfp)) {
+ if (AdjacentToIntron (sfp)) {
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: %s (organelle does not use standard splice site convention)",
+ parterrs[j]);
+ }
+ } else if (sfp->idx.subtype != FEATDEF_CDS || SplicingNotExpected (sfp)) {
+ if ( ! sfp->pseudo) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
"PartialLocation: %s (but is at consensus splice site)",
parterrs[j]);
+ }
} else if (sfp->idx.subtype == FEATDEF_CDS) {
bsp = BioseqFindFromSeqLoc (sfp->location);
if (bsp != NULL) {
@@ -24968,9 +28499,11 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
mip = (MolInfoPtr) sdp->data.ptrvalue;
if (mip != NULL) {
if (mip->biomol == MOLECULE_TYPE_MRNA) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
- "PartialLocation: %s (but is at consensus splice site, but is on an mRNA that is already spliced)",
- parterrs[j]);
+ if ( ! sfp->pseudo) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: %s (but is at consensus splice site, but is on an mRNA that is already spliced)",
+ parterrs[j]);
+ }
}
}
}
@@ -24984,23 +28517,31 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
} else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt &&
StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) {
- if (PartialAtGapOrNs (vsp, sfp->location, errtype) && StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
+ if (PartialAtGapOrNs (vsp, sfp->location, errtype) || StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
"PartialLocation: 5' partial is not at start AND is not at consensus splice site");
}
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) {
- if (PartialAtGapOrNs (vsp, sfp->location, errtype) && StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
+ if (PartialAtGapOrNs (vsp, sfp->location, errtype) || StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
"PartialLocation: 3' partial is not at stop AND is not at consensus splice site");
}
} else if (sfp->idx.subtype == FEATDEF_tRNA && j == 0 && AdjacentToIntron (sfp)) {
} else if (j == 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ sev = SEV_WARNING;
+ if (vsp->genomeSubmission && sfp->idx.subtype == FEATDEF_rRNA) {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem,
"PartialLocation: Start does not include first/last residue of sequence");
} else if (j == 1) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ sev = SEV_WARNING;
+ if (vsp->genomeSubmission && sfp->idx.subtype == FEATDEF_rRNA) {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem,
"PartialLocation: Stop does not include first/last residue of sequence");
}
}
@@ -25047,7 +28588,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
if (StringCmp (gbq->qual, "allele") == 0 && StringDoesHaveText (gbq->val)) {
if (StringICmp (gbq->val, grp->allele) == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes redundant allele qualifier
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Redundant allele qualifier (%s) on gene", gbq->val);
+ //LCOV_EXCL_STOP
} else if (sfp->idx.subtype != FEATDEF_variation) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Hidden allele qualifier (%s) on gene", gbq->val);
}
@@ -25089,7 +28633,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
if (grp->locus_tag != NULL && sfp->comment != NULL) {
if (StringCmp (grp->locus_tag, sfp->comment) == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes redundant comment
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RedundantFields, "Comment has same value as gene locus_tag");
+ //LCOV_EXCL_STOP
}
}
if (StringDoesHaveText (grp->locus_tag)) {
@@ -25106,7 +28653,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
str = (CharPtr) vnp->data.ptrvalue;
if (StringHasNoText (str)) continue;
if (NameInList (str, badGeneSyn, sizeof (badGeneSyn) / sizeof (badGeneSyn [0]))) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UndesiredGeneSynonym, "Uninformative gene synonym '%s'", str);
+ ValidErr (vsp, vsp->is_gpipe_in_sep ? SEV_INFO : SEV_WARNING,
+ ERR_SEQ_FEAT_UndesiredGeneSynonym, "Uninformative gene synonym '%s'", str);
}
}
}
@@ -25115,22 +28663,27 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
str = (CharPtr) vnp->data.ptrvalue;
if (StringHasNoText (str)) continue;
if (StringDoesHaveText (grp->locus) && StringCmp (grp->locus, str) == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes redundant gene synonym
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UndesiredGeneSynonym, "gene synonym has same value as gene locus");
+ //LCOV_EXCL_STOP
}
}
}
- if (grp->syn != NULL) {
- bsp = BioseqFindFromSeqLoc (sfp->location);
- for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
- str = (CharPtr) vnp->data.ptrvalue;
- if (StringHasNoText (str)) continue;
- sfpx = SeqMgrGetFeatureByLabel (bsp, str, SEQFEAT_GENE, 0, NULL);
- if (sfpx != NULL && sfpx != sfp) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym, "gene synonym has same value (%s) as locus of another gene feature", str);
+ if (!vsp->is_gpipe_in_sep) {
+ if (grp->syn != NULL) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ sfpx = SeqMgrGetFeatureByLabel (bsp, str, SEQFEAT_GENE, 0, NULL);
+ if (sfpx != NULL && sfpx != sfp) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym, "gene synonym has same value (%s) as locus of another gene feature", str);
+ }
+ }
}
- }
}
- if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->desc) && StringCmp (grp->locus, grp->desc) == 0) {
+ if (!vsp->is_gpipe_in_sep && StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->desc) && StringCmp (grp->locus, grp->desc) == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UndesiredGeneSynonym, "gene description has same value as gene locus");
}
if (StringHasNoText (grp->locus) && StringHasNoText (grp->desc) && grp->syn != NULL) {
@@ -25186,6 +28739,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
case 2: /* Org-ref */
break;
case 3: /* Cdregion */
+ CheckForShortExons(vsp, sfp->location);
pseudo = sfp->pseudo; /* now also uses new feature pseudo flag */
excpt = FALSE;
conflict = FALSE;
@@ -25197,6 +28751,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
protidqual = FALSE;
transidqual = FALSE;
ovgenepseudo = FALSE;
+ gene_synonym_on_cds = FALSE;
gbq = sfp->qual;
while (gbq != NULL) {
if (StringICmp (gbq->qual, "pseudo") == 0) {
@@ -25214,6 +28769,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (StringICmp (gbq->qual, "transcript_id") == 0) {
transidqual = TRUE;
}
+ if (StringICmp (gbq->qual, "gene_synonym") == 0) {
+ gene_synonym_on_cds = TRUE;
+ }
gbq = gbq->next;
}
if (protidqual) {
@@ -25222,6 +28780,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (transidqual) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "transcript_id should not be a gbqual on a CDS feature");
}
+ if (gene_synonym_on_cds) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "gene_synonym should not be a gbqual on a CDS feature");
+ }
if (OverlappingGeneIsPseudo (sfp)) {
pseudo = TRUE;
ovgenepseudo = TRUE;
@@ -25265,7 +28826,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
prevcbp = cbp;
}
if (excpt && (!sfp->excpt)) {
+ //LCOV_EXCL_START
+ //BasicCleanup converts "exception" gbqual to except_text and sets except flag
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ExceptInconsistent, "Exception flag should be set in coding region");
+ //LCOV_EXCL_STOP
}
if (crp->orf && sfp->product != NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_OrfCdsHasProduct, "An ORF coding region should not have a product");
@@ -25281,17 +28845,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PseudoCdsHasProduct, "A pseudo coding region should not have a product");
}
}
- if (pseudo && SeqMgrGetProtXref (sfp) != NULL) {
- is_nc = FALSE;
- if (NGorNT (vsp->sep, sfp->location, &is_nc) || IsEMBLAccn (vsp->sep, sfp->location)) {
- sev = SEV_WARNING;
- } else if (is_nc) {
- sev = SEV_WARNING;
- } else {
- sev = SEV_ERROR;
- }
- ValidErr (vsp, sev, ERR_SEQ_FEAT_PseudoCdsHasProtXref, "A pseudo coding region should not have a protein xref");
- }
+
if (codonqual) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_CodonQualifierUsed, "Use the proper genetic code, if available, or set transl_excepts on specific codons");
}
@@ -25303,8 +28857,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (vsp->useSeqMgrIndexes) {
vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
} else {
+//LCOV_EXCL_START
bcp = BioseqContextNew (bsp);
vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_source, NULL, NULL);
+//LCOV_EXCL_STOP
}
if (vnp != NULL && vnp->data.ptrvalue != NULL) {
plastid = FALSE;
@@ -25341,7 +28897,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
}
- if (biopgencode != cdsgencode) {
+ if (biopgencode != cdsgencode && StringISearch (sfp->except_text, kAllowManualGenCodeException) == NULL) {
if (! vsp->seqSubmitParent) { /* suppress when validator run from tbl2asn */
if (plastid) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GenCodeMismatch,
@@ -25402,7 +28958,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (vnp != NULL) {
str = (CharPtr) vnp->data.ptrvalue;
if (StringDoesHaveText (str)) {
- TestForBracketsInProductName (str, vsp);
+ if (! vsp->is_embl_tpe_in_sep) {
+ TestForBracketsInProductName (str, vsp);
+ }
if (StringNICmp (str, "hypothetical protein XP_", 24) == 0) {
bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
if (bsp != NULL) {
@@ -25428,6 +28986,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Apparent EC number in protein title");
}
if (vsp->rubiscoTest && StringStr (str, "ribulose") != NULL && StringStr (str, "bisphosphate") != NULL) {
+ //LCOV_EXCL_START
+ //no option to enable rubisco test, problems handled by basic cleanup
if (StringStr (str, "methyltransferase") == NULL && StringStr (str, "activase") == NULL) {
if (StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0) {
/* allow standard name without large or small subunit designation - later need kingdom test */
@@ -25436,6 +28996,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RubiscoProblem, "Nonstandard ribulose bisphosphate protein name");
}
}
+ //LCOV_EXCL_STOP
}
if (StringHasPMID (str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ProteinNameHasPMID, "Protein name has internal PMID");
@@ -25443,7 +29004,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
if (str != NULL && sfp->comment != NULL) {
if (StringCmp (str, sfp->comment) == 0) {
+ //LCOV_EXCL_START
+ //BasicCleanup removes redundant comment
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RedundantFields, "Comment has same value as protein name");
+ //LCOV_EXCL_STOP
}
}
if (StringDoesHaveText (sfp->comment)) {
@@ -25496,7 +29060,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (ECnumberWasDeleted (str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_DeletedEcNumber, "EC_number %s was deleted", str);
} else if (ECnumberWasReplaced (str)) {
- if (EcCnumberWasSplit (str)) {
+ if (EcCnumberWasSplit (vsp, str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SplitEcNumber, "EC_number %s was transferred and is no longer valid", str);
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ReplacedEcNumber, "EC_number %s was transferred and is no longer valid", str);
@@ -25517,7 +29081,10 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
} else {
+ //LCOV_EXCL_START
+ //BasicCleanup removes empty EC numbers
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "EC number should not be empty");
+ //LCOV_EXCL_STOP
}
}
}
@@ -25537,7 +29104,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
}
- if (prp != NULL && prp->name != NULL) {
+ if (prp != NULL) {
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
str = (CharPtr) vnp->data.ptrvalue;
if (StringHasNoText (str)) continue;
@@ -25551,15 +29118,15 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_BadTrailingHyphen, "Protein name ends with hyphen");
}
}
- }
- for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
- str = (CharPtr) vnp->data.ptrvalue;
- if (StringHasSgml (vsp, str)) {
- ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "protein name %s has SGML", str);
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasSgml (vsp, str)) {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "protein name %s has SGML", str);
+ }
+ }
+ if (StringHasSgml (vsp, prp->desc)) {
+ ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "protein description %s has SGML", prp->desc);
}
- }
- if (StringHasSgml (vsp, prp->desc)) {
- ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "protein description %s has SGML", prp->desc);
}
break;
case 5: /* RNA-ref */
@@ -25580,19 +29147,19 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (ifp != NULL && StringICmp (ifp->key, "exon") == 0 && (! sfp->pseudo)) {
skip = FALSE;
- bsp = BioseqFindFromSeqLoc (sfp->location);
- if (bsp != NULL) {
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
- if (sdp != NULL) {
- mip = (MolInfoPtr) sdp->data.ptrvalue;
- if (mip != NULL) {
- if (mip->biomol == MOLECULE_TYPE_MRNA) {
- skip = TRUE;
- }
- }
- }
- }
- if (! skip) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
+ if (sdp != NULL) {
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL) {
+ if (mip->biomol == MOLECULE_TYPE_MRNA) {
+ skip = TRUE;
+ }
+ }
+ }
+ }
+ if (! skip) {
SpliceCheckEx (vsp, sfp, TRUE);
}
}
@@ -25687,8 +29254,11 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
break;
default:
+ //LCOV_EXCL_START
+ //invalid ASN.1 cannot be tested in regression
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidType, "Invalid SeqFeat type [%d]", (int) (type));
break;
+ //LCOV_EXCL_STOP
}
if (type == SEQFEAT_HET) {
/* heterogen can have mix of bonds with just "a" point specified */
@@ -25734,38 +29304,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidateExceptText (vsp, gcp, sfp);
}
- for (xref = sfp->xref; xref != NULL; xref = xref->next) {
- if (xref->id.choice == 0 && xref->data.choice == 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "SeqFeatXref with no id or data field");
- } else if (xref->id.choice != 0) {
- matchsfp = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
- if (matchsfp == NULL) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefFeatureMissing, "Cross-referenced feature cannot be found");
- } else {
- hasxref = FALSE;
- for (matchxref = matchsfp->xref; matchxref != NULL; matchxref = matchxref->next) {
- if (matchxref->id.choice != 0) {
- hasxref = TRUE;
- origsfp = SeqMgrGetFeatureByFeatID (matchsfp->idx.entityID, NULL, NULL, matchxref, NULL);
- if (origsfp != sfp) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefNotReciprocal, "Cross-referenced feature does not link reciprocally");
- } else {
- if (sfp->idx.subtype == FEATDEF_CDS && matchsfp->idx.subtype == FEATDEF_mRNA) {
- /* okay */
- } else if (sfp->idx.subtype == FEATDEF_mRNA && matchsfp->idx.subtype == FEATDEF_CDS) {
- /* okay */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "Cross-references are not between CDS and mRNA pair");
- }
- }
- }
- }
- if (! hasxref) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqFeatXrefProblem, "Cross-referenced feature does not have its own cross-reference");
- }
- }
- }
- }
+ ValidateSeqFeatXref(sfp, vsp);
if (StringHasSgml (vsp, sfp->comment)) {
ValidErr (vsp, SEV_WARNING, ERR_GENERIC_SgmlPresentInText, "feature comment %s has SGML", sfp->comment);
@@ -25783,7 +29322,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
hasInference = TRUE;
inferenceCode = ValidateInferenceQualifier (gbq->val, vsp->inferenceAccnCheck);
if (inferenceCode != VALID_INFERENCE) {
- if (inferenceCode < VALID_INFERENCE || inferenceCode > BAD_ACCESSION_TYPE) {
+ if (inferenceCode < VALID_INFERENCE || inferenceCode > UNRECOGNIZED_DATABASE) {
inferenceCode = VALID_INFERENCE;
}
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidInferenceValue, "Inference qualifier problem - %s (%s)",
@@ -25800,7 +29339,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (ECnumberWasDeleted (str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_DeletedEcNumber, "EC_number %s was deleted", str);
} else if (ECnumberWasReplaced (str)) {
- if (EcCnumberWasSplit (str)) {
+ if (EcCnumberWasSplit (vsp, str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SplitEcNumber, "EC_number %s was replaced", str);
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ReplacedEcNumber, "EC_number %s was replaced", str);
@@ -25810,27 +29349,40 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
} else {
+ //LCOV_EXCL_START
+ //BasicCleanup removes empty EC numbers
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "EC number should not be empty");
+ //LCOV_EXCL_STOP
}
} else if (StringICmp (gbq->qual, "old_locus_tag") == 0) {
if (StringChr (gbq->val, ',') != NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_LocusTagProblem,
"old_locus_tag has comma, may contain multiple values");
}
+ pseudo = FALSE;
+ if (sfp->pseudo) {
+ pseudo = TRUE;
+ }
grp = SeqMgrGetGeneXref (sfp);
if (grp == NULL) {
if (sfp->data.choice == SEQFEAT_GENE) {
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
} else {
gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
- if (gene != NULL && ! gene->pseudo) {
- grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (gene != NULL) {
+ if (gene->pseudo) {
+ pseudo = TRUE;
+ } else {
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ }
}
}
}
if (grp == NULL || SeqMgrGeneIsSuppressed (grp) || StringHasNoText (grp->locus_tag)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_LocusTagProblem,
- "old_locus_tag without inherited locus_tag");
+ if (! pseudo) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_LocusTagProblem,
+ "old_locus_tag without inherited locus_tag");
+ }
}
}
if (StringHasSgml (vsp, gbq->val)) {
@@ -26070,26 +29622,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
return;
redundantgenexref = FALSE;
label = gcontext.label;
- if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) {
- if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
- redundantgenexref = TRUE;
- label = grp->locus_tag;
- }
- } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->locus)) {
- if (StringICmp (grp->locus, grpx->locus) == 0) {
- redundantgenexref = TRUE;
- label = grp->locus;
- }
- } else if (grp->syn != NULL && grpx->syn != NULL) {
- syn1 = (CharPtr) grp->syn->data.ptrvalue;
- syn2 = (CharPtr) grpx->syn->data.ptrvalue;
- if ((StringDoesHaveText (syn1)) && StringDoesHaveText (syn2)) {
- if (StringICmp (syn1, syn2) == 0) {
- redundantgenexref = TRUE;
- label = syn1;
- }
- }
- }
+ redundantgenexref = s_GeneRefsAreEquivalent(grp, grpx, &label);
if (redundantgenexref) {
MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData));
dsd.max = INT4_MAX;
@@ -26119,7 +29652,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
orp = biop->org;
if (orp != NULL) {
/* curated fly source still has duplicate features */
- if (StringICmp (orp->taxname, "Drosophila melanogaster") == 0) {
+ if (StringNICmp (orp->taxname, "Drosophila ", 11) == 0) {
if (StringHasNoText (label)) {
label = "?";
}
@@ -26181,6 +29714,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
BioseqPtr bsp;
Char ch;
Int4 counta, countnona;
+ DbtagPtr dbt;
CharPtr farstr = "";
ErrSev fetchsev;
GatherContextPtr gcp;
@@ -26248,8 +29782,11 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
ErrSetMessageLevel (msgsev);
if (mrseq == NULL) {
+ //LCOV_EXCL_START
+ //this measures a failure in the C Toolkit, not a problem with the ASN.1
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MrnaTransFail, "Unable to transcribe mRNA");
return;
+ //LCOV_EXCL_STOP
}
bsp = BioseqFindFromSeqLoc (sfp->location);
@@ -26309,7 +29846,14 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (bsp == NULL && sfp->product != NULL && vsp->farFetchMRNAproducts) {
SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id));
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_ProductFetchFailure, "Unable to fetch mRNA transcript '%s'", id);
+ sev = SEV_ERROR;
+ if (sip != NULL && sip->choice == SEQID_GENERAL) {
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt != NULL && StringICmp (dbt->db, "ti") != 0 && StringICmp (dbt->db, "SRA") != 0) {
+ sev = SEV_WARNING;
+ }
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_ProductFetchFailure, "Unable to fetch mRNA transcript '%s'", id);
goto erret;
}
}
@@ -26328,6 +29872,8 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
pdseq = GetSequenceByFeature (&sf);
if (pdseq == NULL) {
+ //LCOV_EXCL_START
+ //this measures a failure in the C Toolkit, not a problem with the ASN.1
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors || unclassified_except) {
@@ -26337,6 +29883,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
ValidErr (vsp, fetchsev, ERR_SEQ_FEAT_MrnaTransFail, "Unable to fetch mRNA transcript");
}
+ //LCOV_EXCL_STOP
}
if (pdseq != NULL) {
mlen = StringLen (mrseq);
@@ -26594,6 +30141,11 @@ static void ValidateTranslExcept (
prot2len = StringLen (protseq);
for (vnp = codebreakhead; vnp != NULL; vnp = vnp->next) {
i = vnp->data.intvalue;
+ if (i == 0 && ! sfp->partial && (char) vnp->choice != 'M') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept,
+ "Suspicious transl_except %c at first codon of complete CDS",
+ (char) vnp->choice);
+ }
if (i >= 0 && i < prot2len) {
if (protseq [i] == (Char) vnp->choice) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryTranslExcept,
@@ -26611,6 +30163,240 @@ static void ValidateTranslExcept (
MemFree (protseq);
}
+/* unusual translation start without initiator tRNA */
+static Boolean LeuCUGstart (SeqFeatPtr sfp)
+{
+ GBQualPtr gbq;
+
+ if (sfp == NULL) return FALSE;
+ if (! sfp->excpt) return FALSE;
+ if (StringISearch (sfp->except_text, "translation initiation by tRNA-Leu at CUG codon") == NULL) return FALSE;
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "experiment") == 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+static void ValidateCodeBreaks (ValidStructPtr vsp, SeqFeatPtr cds, Boolean farFetchProd)
+{
+ CdRegionPtr crp, tmp_crp;
+ CodeBreakPtr cbp;
+ SeqFeatPtr tmp;
+ ByteStorePtr newprot = NULL;
+ CharPtr protseq = NULL;
+ Int4 pos, prot_len = 0;
+ char aa;
+ Boolean alt_start;
+ Boolean partial5, partial3;
+
+ if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION
+ || (crp = (CdRegionPtr) (cds->data.value.ptrvalue)) == NULL
+ || (cbp = crp->code_break) == NULL) {
+ //LCOV_EXCL_START
+ //condition never met given how function is called
+ return;
+ //LCOV_EXCL_STOP
+ }
+ CheckSeqLocForPartial (cds->location, &partial5, &partial3);
+ /* don't copy code break when copying for tmp */
+ crp->code_break = NULL;
+ tmp = (SeqFeatPtr)AsnIoMemCopy (cds, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite);
+ crp->code_break = cbp;
+ /* calculate expected protein translation without breaks */
+ newprot = ProteinFromCdRegionExEx (tmp, TRUE, FALSE, &alt_start, farFetchProd);
+ if (newprot != NULL) {
+ protseq = BSMerge (newprot, NULL);
+ BSFree (newprot);
+ prot_len = StringLen (protseq);
+ /* shorten prot len if ends with stop codon */
+ if (prot_len > 0 && protseq[prot_len - 1] == '*' && !partial3) {
+ prot_len--;
+ }
+ protseq = MemFree (protseq);
+ }
+
+ /* free tmp location; will use code break locations instead */
+ tmp->location = SeqLocFree (tmp->location);
+ /* clear frame - locations should already be offset */
+ tmp_crp = (CdRegionPtr) tmp->data.value.ptrvalue;
+ tmp_crp->frame = 0;
+
+ for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
+ pos = (GetOffsetInLoc (cbp->loc, cds->location, SEQLOC_START)) / 3;
+ aa = (char) cbp->aa.value.intvalue;
+ if (pos == 0 && ! partial5 && aa != 'M') {
+ if (pos == 0 && aa == 'L' && LeuCUGstart (cds) && vsp->is_refseq_in_sep) {
+ /* do not warn on explicitly documented unusual translation initiation at CUG without initiator tRNA-Met */
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TranslExcept,
+ "Suspicious transl_except %c at first codon of complete CDS",
+ aa);
+ }
+ }
+ if (pos < prot_len) {
+ tmp->location = SeqLocCopy (cbp->loc);
+ SetSeqLocPartial (tmp->location, TRUE, TRUE);
+ newprot = ProteinFromCdRegionExEx (tmp, TRUE, FALSE, &alt_start, farFetchProd);
+ if (newprot == NULL) {
+ /* do something about inability to translate? */
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryTranslExcept,
+ "Unable to translate location for transl_except %c at position %ld",
+ aa, (long) (pos + 1));
+ } else {
+ protseq = BSMerge (newprot, NULL);
+ BSFree (newprot);
+ if (protseq == NULL) {
+ /* do something about inablity to translate? */
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryTranslExcept,
+ "Unable to translate location for transl_except %c at position %ld",
+ aa, (long) (pos + 1));
+ } else {
+ if (protseq[0] == cbp->aa.value.intvalue) {
+ if (pos == 0 && aa == 'L' && LeuCUGstart (cds) && vsp->is_refseq_in_sep) {
+ /* do not warn on explicitly documented unusual translation initiation at CUG without initiator tRNA-Met */
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryTranslExcept,
+ "Unnecessary transl_except %c at position %ld",
+ protseq[0], (long) (pos + 1));
+ }
+ }
+ protseq = MemFree (protseq);
+ }
+ }
+ tmp->location = SeqLocFree (tmp->location);
+ } else if (aa != '*') {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryTranslExcept,
+ "Unexpected transl_except %c at position %ld just past end of protein",
+ aa, (long) (pos + 1));
+ }
+
+ }
+ tmp = SeqFeatFree (tmp);
+}
+
+static void CheckForThreeBaseNonsense (ValidStructPtr vsp, SeqFeatPtr sfp, CdRegionPtr crp, Int4 start, Int4 stop, Uint1 strand)
+
+{
+ ByteStorePtr bs;
+ BioseqPtr bsp;
+ Int2 genCode = 0;
+ SeqInt sint;
+ CharPtr res;
+ ErrSev sev;
+ Char str [32];
+ Boolean tableExists = FALSE;
+ TransTablePtr tbl = NULL;
+ ValNode vn;
+ ValNodePtr vnp;
+ ValNodePtr tvnp;
+
+ if (vsp == NULL || sfp == NULL || crp == NULL) return;
+
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return;
+
+ MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
+ MemSet ((Pointer) &vn, 0, sizeof (ValNode));
+
+ sint.from = stop;
+ sint.to = start;
+ sint.strand = strand;
+ sint.id = bsp->id;
+
+ vn.choice = SEQLOC_INT;
+ vn.data.ptrvalue = (Pointer) &sint;
+ vn.next = NULL;
+
+ SetSeqLocPartial (&vn, TRUE, TRUE);
+
+ if (crp->genetic_code != NULL) {
+ vnp = (ValNodePtr) crp->genetic_code->data.ptrvalue;
+ while (vnp != NULL) {
+ if (vnp->choice == 2) {
+ genCode = (Int2) vnp->data.intvalue;
+ }
+ vnp = vnp->next;
+ }
+ }
+
+ if (genCode == 7) {
+ genCode = 4;
+ } else if (genCode == 8) {
+ genCode = 1;
+ } else if (genCode == 0) {
+ genCode = 1;
+ }
+
+ /* set app property name for storing desired FSA */
+
+ sprintf (str, "TransTableFSAforGenCode%d", (int) genCode);
+
+ /* get FSA for desired genetic code if it already exists */
+
+ tbl = (TransTablePtr) GetAppProperty (str);
+ tableExists = (Boolean) (tbl != NULL);
+
+ bs = TransTableTranslateSeqLoc (&tbl, &vn, genCode, 1, TRUE, TRUE);
+ res = BSMerge (bs, NULL);
+ BSFree (bs);
+
+ /* save FSA in genetic code-specific app property name */
+
+ if (! tableExists) {
+ SetAppProperty (str, (Pointer) tbl);
+ }
+
+ if (StringCmp (res, "*") == 0) {
+ sev = SEV_REJECT;
+ if (vsp->is_embl_ddbj_in_sep) {
+ sev = SEV_ERROR;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_NonsenseIntron, "Triplet intron encodes stop codon");
+ tvnp = ValNodeNew(NULL);
+ tvnp->data.ptrvalue = sfp;
+ tvnp->next = vsp->sisfp;
+ vsp->sisfp = tvnp;
+ }
+
+ MemFree (res);
+}
+
+static void TranslateTripletIntrons (ValidStructPtr vsp, SeqFeatPtr sfp, CdRegionPtr crp)
+
+{
+ SeqLocPtr slp;
+ Int4 last_start, last_stop, start, stop;
+ Uint1 strand;
+
+ if (vsp == NULL || sfp == NULL || crp == NULL || sfp->excpt || IsPseudo (sfp) || crp->code_break != NULL) return;
+
+ slp = SeqLocFindNext (sfp->location, NULL);
+ last_start = SeqLocStart (slp);
+ last_stop = SeqLocStop (slp);
+
+ slp = SeqLocFindNext (sfp->location, slp);
+ while (slp != NULL) {
+ start = SeqLocStart (slp);
+ stop = SeqLocStop (slp);
+ strand = SeqLocStrand (slp);
+ if (strand == Seq_strand_minus) {
+ if (last_start - stop == 4) {
+ CheckForThreeBaseNonsense (vsp, sfp, crp, last_start - 1, stop + 1, strand);
+ }
+ } else {
+ if (start - last_stop == 4) {
+ CheckForThreeBaseNonsense (vsp, sfp, crp, start - 1, last_stop + 1, strand);
+ }
+ }
+ last_start = start;
+ last_stop = stop;
+ slp = SeqLocFindNext (sfp->location, slp);
+ }
+}
+
typedef struct cdsmismatch {
Int4 pos;
Int2 cds_residue;
@@ -26660,8 +30446,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
Boolean unlockProd = FALSE;
StreamCache sc;
Boolean isgap;
- Boolean badseq;
+ Boolean badseq = FALSE;
BioseqPtr bsp;
+ DbtagPtr dbt;
SeqIdPtr sip, sip3;
Char id [64];
Boolean is_ged = FALSE;
@@ -26736,14 +30523,15 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
annotated_by_transcript_or_proteomic = TRUE;
}
- if (crp->code_break == NULL) { /* check for unparsed transl_except */
- for (gb = sfp->qual; gb != NULL; gb = gb->next) {
- if (StringCmp (gb->qual, "transl_except") == 0) {
- transl_except = TRUE;
- break;
- }
+ /* check for unparsed transl_except */
+ for (gb = sfp->qual; gb != NULL; gb = gb->next) {
+ if (StringCmp (gb->qual, "transl_except") == 0) {
+ transl_except = TRUE;
+ break;
}
- } else {
+ }
+
+ if (crp->code_break != NULL) {
codebreakhead = MakeCodeBreakList (sfp->location, SeqLocLen (sfp->location), crp->code_break, crp->frame);
}
@@ -26784,7 +30572,8 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (codebreakhead != NULL) {
- ValidateTranslExcept (vsp, sfp, codebreakhead, farFetchProd, crp->frame, crp->genetic_code);
+ /*ValidateTranslExcept (vsp, sfp, codebreakhead, farFetchProd, crp->frame, crp->genetic_code); */
+ ValidateCodeBreaks (vsp, sfp, farFetchProd);
}
protid = SeqLocId (sfp->product);
@@ -26823,6 +30612,8 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (alt_start && gccode == 1) {
+ //LCOV_EXCL_START
+ //sev is always set to none, so error won't be reported
/* sev = SEV_WARNING; */
sev = SEV_NONE; /* only enable for RefSeq, leave old code in for now */
if (Loc_is_RefSeq (sfp->location)) {
@@ -26842,6 +30633,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
ValidErr (vsp, sev, ERR_SEQ_FEAT_AltStartCodon, "Alternative start codon used");
}
}
+ //LCOV_EXCL_STOP
} else if (! alt_start) {
if (sfp->excpt && StringDoesHaveText (sfp->except_text)) {
if (StringStr (sfp->except_text, "alternative start codon") != NULL) {
@@ -26928,26 +30720,29 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (crp->frame > 1) {
if (!(part_loc & SLP_START)) {
+ sev = SEV_ERROR;
+ /*
sev = SEV_WARNING;
if (Loc_is_RefSeq (sfp->location)) {
sev = SEV_ERROR;
}
+ */
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_SuspiciousFrame, "Suspicious CDS location - frame > 1 but not 5' partial");
}
} else if ((part_loc & SLP_NOSTART) && (!PartialAtSpliceSiteOrGap (vsp, sfp->location, SLP_NOSTART, &isgap, &badseq))) {
- if (PartialAtGapOrNs (vsp, sfp->location, SLP_NOSTART) && StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
+ if (PartialAtGapOrNs (vsp, sfp->location, SLP_NOSTART) || StringStr (sfp->comment, "coding region disrupted by sequencing gap") != NULL) {
/* suppress */
} else {
- sev = SEV_INFO;
+ sev = SEV_WARNING;
if (Loc_is_RefSeq (sfp->location)) {
sev = SEV_ERROR;
}
has_errors = TRUE;
other_than_mismatch = TRUE;
- if (report_errors) {
+ if (report_errors) {
ValidErr (vsp, sev, ERR_SEQ_FEAT_SuspiciousFrame, "Suspicious CDS location - frame > 1 and not at consensus splice site");
}
}
@@ -26986,7 +30781,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
}
- if (annotated_by_transcript_or_proteomic) {
+ if (! annotated_by_transcript_or_proteomic) {
if (1.2 * prot2len < prot1len) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_ProductLength, "Protein product length [%ld] is more than 120%% of the %stranslation length [%ld]", prot1len, farstr, prot2len);
}
@@ -27096,7 +30891,14 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
if (prot1seq == NULL && sfp->product != NULL && vsp->farFetchCDSproducts) {
SeqIdWrite (protid, id, PRINTID_FASTA_LONG, sizeof (id));
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_ProductFetchFailure, "Unable to fetch CDS product '%s'", id);
+ sev = SEV_ERROR;
+ if (protid != NULL && protid->choice == SEQID_GENERAL) {
+ dbt = (DbtagPtr) protid->data.ptrvalue;
+ if (dbt != NULL && StringICmp (dbt->db, "ti") != 0 && StringICmp (dbt->db, "SRA") != 0) {
+ sev = SEV_WARNING;
+ }
+ }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_ProductFetchFailure, "Unable to fetch CDS product '%s'", id);
goto erret;
}
if (prot1seq != NULL)
@@ -27267,7 +31069,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (report_errors && !mismatch_except) {
nuclocstr = MapToNTCoords (sfp, protid, mismatches[0].pos);
loc2str = MapToNTCoords (sfp, protid, mismatches[10].pos);
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
+ ValidErr (vsp, vsp->is_geneious ? SEV_WARNING : sev, ERR_SEQ_FEAT_MisMatchAA,
"%d mismatches found. First mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Last mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Genetic code [%d]",
mismatch,
(long) (mismatches[0].pos + 1), mismatches[0].prot_residue, mismatches[0].cds_residue,
@@ -27281,7 +31083,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
} else {
for (i = 0; i < mismatch; i++) {
nuclocstr = MapToNTCoords (sfp, protid, mismatches[i].pos);
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
+ ValidErr (vsp, vsp->is_geneious ? SEV_WARNING : sev, ERR_SEQ_FEAT_MisMatchAA,
"%sResidue %ld in protein [%c] != translation [%c]%s%s", farstr,
(long) (mismatches[i].pos + 1),
(char) mismatches[i].prot_residue,
@@ -27297,7 +31099,10 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors || (rna_editing && (prot1len < len - 1 || prot1len > len))) {
- ValidErr (vsp, rna_editing ? SEV_WARNING : trans_len_sev, ERR_SEQ_FEAT_TransLen, "Given protein length [%ld] does not match %stranslation length [%ld]", prot1len, farstr, len);
+ ValidErr (vsp, rna_editing ? SEV_WARNING : trans_len_sev, ERR_SEQ_FEAT_TransLen,
+ "Given protein length [%ld] does not match %stranslation length [%ld]%s",
+ prot1len, farstr, len,
+ rna_editing ? " (RNA editing present)" : "");
}
}
@@ -27383,7 +31188,8 @@ erret:
if (! report_errors) {
if (! has_errors) {
- if ((! frameshift_except) && (! rearrange_except) && (! mixed_population) && (! low_quality) && (! artificial_location)) {
+ if ((! frameshift_except) && (! rearrange_except) && (! mixed_population) &&
+ (! low_quality) && (! artificial_location) && (! annotated_by_transcript_or_proteomic)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "CDS has exception but passes translation test");
}
} else if (unclassified_except && (! other_than_mismatch)) {
@@ -27396,6 +31202,8 @@ erret:
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnqualifiedException, "CDS has unqualified translated product replaced exception");
}
}
+
+ TranslateTripletIntrons (vsp, sfp, crp);
}
@@ -27471,7 +31279,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
Int2 residue1, residue2;
Char tbuf[40];
Boolean reportAsError, first, last, firstPartial, lastPartial, has_errors = FALSE,
- report_errors = TRUE, checkExonDonor, checkExonAcceptor, pseudo;
+ report_errors = TRUE, checkExonDonor, checkExonAcceptor, pseudo, ribo_slip = FALSE;
int severity;
Uint2 partialflag;
SeqEntryPtr sep;
@@ -27491,7 +31299,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
/* suppress if organelle */
bsp = BioseqFindFromSeqLoc (sfp->location);
- if (bsp != NULL && IsOrganelleBioseq(bsp)) {
+ if (bsp != NULL && IsBioseqOrganelle(bsp)) {
return;
}
@@ -27501,6 +31309,10 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if (StringISearch (sfp->except_text, "low-quality sequence region") != NULL) {
return;
}
+ if (StringISearch (sfp->except_text, "ribosomal slippage") != NULL) {
+ report_errors = FALSE;
+ ribo_slip = TRUE;
+ }
if (StringISearch (sfp->except_text, "ribosomal slippage") != NULL||
StringISearch (sfp->except_text, "artificial frameshift") != NULL ||
StringISearch (sfp->except_text, "nonconsensus splice site") != NULL ||
@@ -27590,8 +31402,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
len = bsp->length;
if (strand != Seq_strand_minus) {
if (! StreamCacheSetup (bsp, NULL, EXPAND_GAPS_TO_DASHES, &sc)) {
+ //LCOV_EXCL_START
+ //C Toolkit specific
BioseqUnlock (bsp);
break;
+ //LCOV_EXCL_STOP
}
} else {
sint.from = 0;
@@ -27602,8 +31417,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
vn.data.ptrvalue = (Pointer) &sint;
vn.next = NULL;
if (! StreamCacheSetup (NULL, &vn, EXPAND_GAPS_TO_DASHES, &sc)) {
- BioseqUnlock (bsp);
+ //LCOV_EXCL_START
+ //C Toolkit specific
+ BioseqUnlock(bsp);
break;
+ //LCOV_EXCL_STOP
}
}
/* spp = SeqPortNew (bsp, 0, -1, strand, Seq_code_ncbi4na); */
@@ -27690,14 +31508,17 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
/* ignore gap, and suppress UnnecessaryException message */
has_errors = TRUE;
} else if (IS_residue (residue1) && IS_residue (residue2)) {
- if (ConsistentWithG (residue1) && ConsistentWithT (residue2)) {
+ if (ConsistentWithG ((Char)residue1) && ConsistentWithT ((Char)residue2)) {
} else { /* not T */
if (residue1 == 'G' && residue2 == 'C') { /* GC minor splice site */
tbuf[0] = '\0';
if (bsp == NULL) {
StringCpy (tbuf, "?");
} else if (vsp->suppressContext || vsp->convertGiToAccn) {
- WorstBioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_START
+ // option not used
+ WorstBioseqLabel(bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_STOP
} else {
BioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
}
@@ -27721,7 +31542,10 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if (bsp == NULL) {
StringCpy (tbuf, "?");
} else if (vsp->suppressContext || vsp->convertGiToAccn) {
- WorstBioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_START
+ // option not used
+ WorstBioseqLabel(bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_STOP
} else {
BioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
}
@@ -27734,11 +31558,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
}
} else {
+ //LCOV_EXCL_START
+ //StreamCacheGetResidue converts bad residues to "good"
has_errors = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
"Bad sequence at splice donor after exon ending at position %ld of %s", (long) (donor + 1), tbuf);
}
+ //LCOV_EXCL_STOP
}
}
@@ -27759,7 +31586,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
/* ignore gap, and suppress UnnecessaryException message */
has_errors = TRUE;
} else if (IS_residue (residue1) && IS_residue (residue2)) {
- if (ConsistentWithA (residue1) && ConsistentWithG (residue2)) {
+ if (ConsistentWithA ((Char)residue1) && ConsistentWithG ((Char)residue2)) {
} else {
if (checkExonAcceptor) {
severity = SEV_WARNING;
@@ -27773,7 +31600,10 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
StringCpy (tbuf, "?");
SeqIdWrite (sip, tbuf, PRINTID_FASTA_SHORT, 39);
} else if (vsp->suppressContext || vsp->convertGiToAccn) {
- WorstBioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_START
+ // option not used
+ WorstBioseqLabel(bsp, tbuf, 39, OM_LABEL_CONTENT);
+ //LCOV_EXCL_STOP
} else {
BioseqLabel (bsp, tbuf, 39, OM_LABEL_CONTENT);
}
@@ -27785,11 +31615,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
}
} else {
+ //LCOV_EXCL_START
+ //StreamCacheGetResidue converts bad residues to "good"
has_errors = TRUE;
if (report_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
"Bad sequence at splice acceptor before exon starting at position %ld of %s", (long) (acceptor + 1), tbuf);
}
+ //LCOV_EXCL_STOP
}
}
@@ -27801,15 +31634,20 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if (! report_errors) {
if (! has_errors) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "feature has exception but passes splice site test");
+ if (! ribo_slip) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "feature has exception but passes splice site test");
+ }
}
}
}
+//LCOV_EXCL_START
+//lcov is just being weird
NLM_EXTERN void SpliceCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
{
SpliceCheckEx (vsp, sfp, FALSE);
}
+//LCOV_EXCL_STOP
/*****************************************************************************
*
@@ -27851,7 +31689,8 @@ static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
}
if (! okay) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MissingCDSproduct, "Expected CDS product absent");
+ ValidErr (vsp, vsp->is_geneious ? SEV_WARNING : SEV_ERROR,
+ ERR_SEQ_FEAT_MissingCDSproduct, "Expected CDS product absent");
}
}
@@ -27881,7 +31720,7 @@ static Int2 SeqLocMixCount (SeqLocPtr slp)
return count;
}
-NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefix)
+NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, Boolean report_abutting, CharPtr prefix)
{
SeqLocPtr tmp, prev;
Boolean retval = TRUE, tmpval, mixed_strand = FALSE, unmarked_strand = FALSE,
@@ -27935,6 +31774,8 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
}
if (SeqLocMixCount (slp) > 1) {
+ //LCOV_EXCL_START
+ //C code fails before location with multiple mixes can be validated
retval = FALSE;
ctmp = SeqLocPrint (slp);
if (ctmp != NULL && StringLen (ctmp) > 800) {
@@ -27942,6 +31783,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
}
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_NestedSeqLocMix, "%s: SeqLoc [%s] has nested SEQLOC_MIX elements", prefix, ctmp);
MemFree (ctmp);
+ //LCOV_EXCL_STOP
}
tmp = NULL;
@@ -27957,6 +31799,28 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
sip2 = (SeqIntPtr) (tmp->data.ptrvalue);
strand2 = sip2->strand;
id2 = sip2->id;
+
+ if (sip2->from == sip2->to) {
+ /*
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_SeqLocTypeProblem, "Seq-loc.int has identical from and to values, should be Seq-loc.pt");
+ */
+ }
+
+ /* for SQD-663 */
+ if (sip2->if_from != NULL && sip2->if_to != NULL) {
+ if (sip2->if_from->choice == sip2->if_to->choice && sip2->if_from->choice == 4) {
+ if(sip2->if_from->a == sip2->if_to->a) {
+ if (sip2->if_from->a == 4) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidFuzz,
+ "Should not specify 'space to left' for both ends of interval");
+ } else if (sip2->if_from->a == 3) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidFuzz,
+ "Should not specify 'space to right' for both ends of interval");
+ }
+ }
+ }
+ }
+
tmpval = SeqIntCheck (sip2);
if ((tmpval) && (sip1 != NULL)) {
if (SeqIdForSameBioseq (sip1->id, sip2->id)) {
@@ -28099,7 +31963,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
}
}
- if (adjacent) {
+ if (adjacent && report_abutting) {
ctmp = SeqLocPrint (slp);
if (exception) {
sev = SEV_WARNING;
@@ -28133,7 +31997,8 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
if (vsp->is_small_genome_set) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MixedStrand, "%s: Mixed strands in SeqLoc [%s] in small genome set - set trans-splicing exception if appropriate", prefix, ctmp);
} else {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MixedStrand, "%s: Mixed strands in SeqLoc [%s]", prefix, ctmp);
+ ValidErr (vsp, vsp->is_geneious ? SEV_WARNING : SEV_ERROR,
+ ERR_SEQ_FEAT_MixedStrand, "%s: Mixed strands in SeqLoc [%s]", prefix, ctmp);
}
} else if (unmarked_strand) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MixedStrand, "%s: Mixed plus and unknown strands in SeqLoc [%s]", prefix, ctmp);
@@ -28162,6 +32027,8 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
/* newer check for intervals out of order on segmented bioseq */
if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
+//LCOV_EXCL_START
+// Only for SegSets
oldsev = ErrSetMessageLevel (SEV_ERROR);
bad = SeqLocBadSortOrder (bsp, slp);
@@ -28188,6 +32055,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MixedStrand, "%s: Mixed strands in SeqLoc [%s]", prefix, ctmp);
MemFree (ctmp);
}
+//LCOV_EXCL_STOP
}
/*****************************************************************************
@@ -28401,7 +32269,10 @@ static void ValidateGraphsOnBioseq (GatherContextPtr gcp)
if (outOfOrder) {
gcp->itemID = firstsgitemid;
if (fa2htgsBug) {
+ //LCOV_EXCL_START
+ //fa2htgs bug no longer seen
ValidErr (vsp, SEV_ERROR, ERR_SEQ_GRAPH_GraphOutOfOrder, "Graph components are out of order - probably caused by old fa2htgs bug");
+ //LCOV_EXCL_STOP
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_GRAPH_GraphOutOfOrder, "Graph components are out of order - may be a software bug");
}
@@ -28735,6 +32606,8 @@ static void ValidateGraphsOnBioseq (GatherContextPtr gcp)
ValNodeFreeData (head);
}
+//LCOV_EXCL_START
+// patch_seq is never set, function is never called
/*****************************************************************************
*
* PatchBadSequence(bsp)
@@ -28808,6 +32681,7 @@ static void FindABioseq (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
return;
}
+
NLM_EXTERN CharPtr FindIDForEntry (SeqEntryPtr sep, CharPtr buf)
{
BioseqPtr bsp = NULL;
@@ -28824,6 +32698,7 @@ NLM_EXTERN CharPtr FindIDForEntry (SeqEntryPtr sep, CharPtr buf)
SeqIdPrint (bsp->id, buf, PRINTID_FASTA_LONG);
return buf;
}
+//LCOV_EXCL_STOP
static CharPtr TrimSpacesOnEitherSide (CharPtr str)
{
@@ -28955,6 +32830,8 @@ static void LookForEtAl (ValidStructPtr vsp, ValNodePtr tmp)
}
}
+//LCOV_EXCL_START
+// spellcheck function never supplied
static void SpellCheckPub (ValidStructPtr vsp, ValNodePtr tmp)
{
CitArtPtr cap;
@@ -28997,6 +32874,7 @@ static void SpellCheckPub (ValidStructPtr vsp, ValNodePtr tmp)
return;
}
+// spellcheck function never supplied
static void SpellCheckSeqDescr (GatherContextPtr gcp)
{
PubdescPtr pdp;
@@ -29043,6 +32921,7 @@ static void SpellCheckSeqDescr (GatherContextPtr gcp)
return;
}
+// spellcheck function never supplied
NLM_EXTERN void SpellCheckSeqFeat (GatherContextPtr gcp)
{
PubdescPtr pdp;
@@ -29132,6 +33011,7 @@ NLM_EXTERN void SpellCheckSeqFeat (GatherContextPtr gcp)
return;
}
+// spellcheck function never supplied
NLM_EXTERN void SpellCheckString (ValidStructPtr vsp, CharPtr str)
{
if ((vsp == NULL) || (str == NULL))
@@ -29145,6 +33025,7 @@ NLM_EXTERN void SpellCheckString (ValidStructPtr vsp, CharPtr str)
return;
}
+// spellcheck function never supplied
NLM_EXTERN void SpellCallBack (char *str)
{
ErrSev sev;
@@ -29156,3 +33037,246 @@ NLM_EXTERN void SpellCallBack (char *str)
ValidErr (globalvsp, sev, ERR_GENERIC_Spell, "[ %s ]", (CharPtr) str);
return;
}
+
+
+// This section of code is used for converting features with
+// certain types of validation errors to misc_features
+typedef struct intpair {
+ Int4 errcode;
+ Int4 subcode;
+} Int4PairData, PNTR Int4PairPtr;
+
+
+static Int4PairData s_ErrCodeList[] = {
+ {ERR_SEQ_INST_StopInProtein},
+ {ERR_SEQ_FEAT_InternalStop},
+ {ERR_SEQ_FEAT_StartCodon},
+ {ERR_SEQ_INST_BadProteinStart},
+ {ERR_SEQ_FEAT_NoStop},
+ {0,0}
+};
+
+
+
+typedef struct conversionlists {
+ ValNodePtr remove_gene;
+ ValNodePtr keep_gene;
+} ConversionListsData, PNTR ConversionListsPtr;
+
+
+static Boolean s_ErrorQualifiesForConversion(Int4 errcode, Int4 subcode)
+{
+ Int4 i;
+
+ for (i = 0; s_ErrCodeList[i].errcode != 0; i++) {
+ if (errcode == s_ErrCodeList[i].errcode && subcode == s_ErrCodeList[i].subcode) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static void LIBCALLBACK ValidCountProblemsCallback(
+ ErrSev severity,
+ int errcode,
+ int subcode,
+ Uint2 entityID,
+ Uint2 itemtype,
+ Uint4 itemID,
+ CharPtr accession,
+ CharPtr seqid,
+ CharPtr featureID,
+ CharPtr message,
+ CharPtr objtype,
+ CharPtr label,
+ CharPtr context,
+ CharPtr location,
+ CharPtr product,
+ Pointer userdata
+)
+
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ BioseqPtr bsp;
+ ConversionListsPtr lists;
+
+ if ((lists = (ConversionListsPtr) userdata) == NULL) {
+ return;
+ }
+
+ if (itemtype != OBJ_SEQFEAT) {
+ return;
+ }
+ /* limit the errors we pay attention to by severity, errcode and subcode */
+ if (severity < SEV_NONE || severity > SEV_MAX) {
+ severity = SEV_MAX;
+ }
+ if (!s_ErrorQualifiesForConversion(errcode, subcode)) {
+ return;
+ }
+
+ sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext);
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ }
+ if (sfp != NULL && (sfp->data.choice == SEQFEAT_CDREGION || sfp->data.choice == SEQFEAT_RNA)) {
+ ValNodeAddPointer (&(lists->keep_gene), OBJ_SEQFEAT, sfp);
+ }
+
+}
+
+
+static void CountAllCDSAndRna (SeqFeatPtr sfp, Pointer data)
+{
+ Int4Ptr pNum;
+
+ if (sfp != NULL
+ && (sfp->data.choice == SEQFEAT_CDREGION || sfp->data.choice == SEQFEAT_RNA)
+ && (pNum = (Int4Ptr) data)) {
+ (*pNum)++;
+ }
+}
+
+
+static ValNodePtr ItemListFromAllSubcategories (ValNodePtr subcategories)
+{
+ ValNodePtr vnp;
+ ClickableItemPtr cip;
+ ValNodePtr item_list = NULL;
+
+ for (vnp = subcategories; vnp != NULL; vnp = vnp->next) {
+ cip = (ClickableItemPtr) vnp->data.ptrvalue;
+ if (cip != NULL) {
+ ValNodeLink (&item_list, ClickableItemObjectListCopy(cip->item_list));
+ ValNodeLink (&item_list, ItemListFromAllSubcategories(cip->subcategories));
+ }
+ }
+ return item_list;
+}
+
+
+static ValNodePtr ListFeaturesWithConfigProblems(ValNodePtr sep_list, DiscrepancyConfigPtr config)
+{
+ ValNodePtr errs, feat_list, vnp;
+ SeqFeatPtr sfp;
+
+ errs = CollectDiscrepancies (config, sep_list, NULL);
+ feat_list = ItemListFromAllSubcategories (errs);
+ errs = FreeClickableList(errs);
+ for (vnp = feat_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != OBJ_SEQFEAT) {
+ vnp->choice = 0;
+ } else if ((sfp = (SeqFeatPtr) vnp->data.ptrvalue) == NULL) {
+ vnp->choice = 0;
+ } else if (sfp->data.choice != SEQFEAT_CDREGION && sfp->data.choice != SEQFEAT_RNA) {
+ vnp->choice = 0;
+ }
+ }
+ vnp = ValNodeExtractList (&feat_list, 0);
+ vnp = ValNodeFree (vnp);
+ return feat_list;
+}
+
+
+static void FilterOutFeatures (ValNodePtr PNTR list, Uint1 datachoice)
+{
+ SeqFeatPtr sfp;
+ ValNodePtr vnp, remove;
+
+ if (list == NULL || *list == NULL) {
+ return;
+ }
+ for (vnp = *list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL && sfp->data.choice == datachoice) {
+ vnp->choice = 0;
+ }
+ }
+ remove = ValNodeExtractList (list, 0);
+ remove = ValNodeFree (remove);
+}
+
+
+static void GetCodingRegionsAndRNAsWithDiscrepancies (SeqEntryPtr sep, ConversionListsPtr lists)
+{
+ ValNodePtr sep_list = NULL;
+ DiscrepancyConfigData config;
+ ValNodePtr overlap_list;
+
+ ValNodeAddPointer (&sep_list, 0, sep);
+
+ MemSet (&config, 0, sizeof (DiscrepancyConfigData));
+
+ config.conf_list[DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS] = TRUE;
+ ValNodeLink (&(lists->keep_gene), ListFeaturesWithConfigProblems(sep_list, &config));
+
+ config.conf_list[DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS] = FALSE;
+ config.conf_list[DISC_SHORT_RRNA] = TRUE;
+ ValNodeLink (&(lists->remove_gene), ListFeaturesWithConfigProblems(sep_list, &config));
+
+ config.conf_list[DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS] = FALSE;
+ config.conf_list[DISC_SHORT_RRNA] = FALSE;
+ config.conf_list[DISC_RNA_CDS_OVERLAP] = TRUE;
+ overlap_list = ListFeaturesWithConfigProblems(sep_list, &config);
+ FilterOutFeatures(&overlap_list, SEQFEAT_RNA);
+ ValNodeLink (&(lists->remove_gene), overlap_list);
+
+ sep_list = ValNodeFree (sep_list);
+}
+
+
+NLM_EXTERN void ConvertFailedCodingRegionsAndRNAsToMiscFeatures(SeqEntryPtr sep, LogInfoPtr lip)
+{
+ ValidStructPtr vsp;
+ ConversionListsData lists;
+ ValNodePtr all_list;
+ Int4 num_total = 0, num_bad = 0;
+
+ MemSet (&lists, 0, sizeof (ConversionListsData));
+ vsp = ValidStructNew ();
+ vsp->errfunc = ValidCountProblemsCallback;
+ vsp->userdata = &lists;
+
+ ValidateSeqEntry (sep, vsp);
+ GetCodingRegionsAndRNAsWithDiscrepancies(sep, &lists);
+
+ if (lists.keep_gene == NULL && lists.remove_gene == NULL) {
+ /* nothing to do here */
+ return;
+ }
+
+ /* consolidate lists */
+ lists.keep_gene = ValNodeSort (lists.keep_gene, SortVnpByChoiceAndPtrvalue);
+ ValNodeUnique(&(lists.keep_gene), SortVnpByChoiceAndPtrvalue, ValNodeFree);
+ lists.remove_gene = ValNodeSort (lists.remove_gene, SortVnpByChoiceAndPtrvalue);
+ ValNodeUnique(&(lists.remove_gene), SortVnpByChoiceAndPtrvalue, ValNodeFree);
+
+ /* check to see if there are too many bad features in total */
+ all_list = ValNodeCopyPtr(lists.keep_gene);
+ ValNodeLink (&all_list, ValNodeCopyPtr(lists.remove_gene));
+ all_list = ValNodeSort (all_list, SortVnpByChoiceAndPtrvalue);
+ ValNodeUnique(&(all_list), SortVnpByChoiceAndPtrvalue, ValNodeFree);
+ num_bad = ValNodeLen (all_list);
+ all_list = ValNodeFree (all_list);
+
+ /* count total number of features, compare with number bad, do nothing if more
+ * than 50% are bad
+ */
+ VisitFeaturesInSep (sep, &num_total, CountAllCDSAndRna);
+ if (num_total < 2 * num_bad) {
+ Message (MSG_ERROR, "More than 50%% of coding regions and RNA features are bad");
+ lists.keep_gene = ValNodeFree (lists.keep_gene);
+ lists.remove_gene = ValNodeFree (lists.remove_gene);
+ return;
+ }
+
+ /* convert bad features to misc */
+ ConvertListToMiscFeat (lists.keep_gene, FALSE, lip);
+ ConvertListToMiscFeat (lists.remove_gene, TRUE, lip);
+
+ lists.keep_gene = ValNodeFree(lists.keep_gene);
+ lists.remove_gene = ValNodeFree (lists.remove_gene);
+}
+//LCOV_EXCL_STOP
diff --git a/api/valid.h b/api/valid.h
index c94e3d99..d35ec9b2 100644
--- a/api/valid.h
+++ b/api/valid.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.78 $
+* $Revision: 6.100 $
*
* File Description: Sequence editing utilities
*
@@ -90,6 +90,7 @@ typedef void (LIBCALLBACK *ValidErrorFunc) (
Uint2 itemtype,
Uint4 itemID,
CharPtr accession,
+ CharPtr seqid,
CharPtr featureID,
CharPtr message,
CharPtr objtype,
@@ -125,6 +126,7 @@ typedef struct validstruct {
Boolean patch_seq; /* repair invalid sequence residues? */
Boolean non_ascii_chars; /* non ascii chars found in read? */
Boolean suppress_no_pubs; /* internal use for no pub anywhere message */
+ Boolean suppress_no_cit_subs; /* internal use for no genome submission citation message */
Boolean suppress_no_biosrc; /* internal use for no biosource anywhere message */
SpellCheckFunc spellfunc;
SpellCallBackFunc spellcallback;
@@ -155,6 +157,7 @@ typedef struct validstruct {
Boolean indexerVersion; /* special tests for GenBank indexers */
Boolean disableSuppression; /* disables suppression of message by ShouldSuppressValidErr */
Boolean genomeSubmission; /* raise severity on numerous warnings for automated genome center submissions */
+ Boolean debugTestDuJour; /* used for turning on or off specific tests suspected of causing performance hit */
Int2 validationLimit; /* limit validation to major classes in Valid1GatherProc */
/* this section used for finer error reporting callback */
ValidErrorFunc errfunc;
@@ -167,15 +170,19 @@ typedef struct validstruct {
Boolean is_htg_in_sep; /* record has technique of htgs 0 through htgs 3 */
Boolean is_barcode_sep; /* record has technique barcode */
Boolean is_refseq_in_sep; /* record has seqid of type other (refseq) */
+ Boolean is_wp_in_sep; /* record is WP RefSeq protein */
Boolean is_gpipe_in_sep; /* record has seqid of type gpipe */
Boolean is_gps_in_sep; /* record has genomic product set */
Boolean is_small_genome_set; /* record has small genome set */
Boolean other_sets_in_sep; /* record has pop/phy/mut/eco/wgs set */
Boolean is_embl_ddbj_in_sep; /* record has embl or ddbj seqid */
+ Boolean is_embl_tpe_in_sep; /* record has embl or tpe seqid */
Boolean is_old_gb_in_sep; /* record has old style GenBank accession */
Boolean is_patent_in_sep; /* record has patent seqid */
Boolean is_insd_in_sep; /* record has genbank/embl/ddbj or tpg/tpe/tpd seqid */
+ Boolean is_pdb_in_sep; /* record has pdb seqid */
Boolean only_lcl_gnl_in_sep; /* record has seqid of only local or general */
+ Boolean has_gi_or_accn_ver; /* record has GI number of accession with non-zero version */
Boolean has_gnl_prot_sep; /* protein Bioseq has general seqid */
Boolean bsp_genomic_in_sep; /* biosource.genome == genomic */
Boolean is_smupd_in_sep; /* record in INSD internal processing */
@@ -184,10 +191,13 @@ typedef struct validstruct {
Boolean has_multi_int_genes; /* record has multi-interval genes */
Boolean has_seg_bioseqs; /* record has segmented Bioseqs */
Boolean far_fetch_failure; /* a far location or bioseq with no fetch function */
+ Boolean use_heartbeat; /* use heartbeat to indicate process */
VoidPtr rrna_array; /* sorted feature index array of rRNA features */
VoidPtr trna_array; /* sorted feature index array of tRNA features */
Int4 numrrna; /* number of rRNA features */
Int4 numtrna; /* number of tRNA features */
+ Boolean is_geneious; /* lower severity for select messages */
+ ValNodePtr sisfp;
} ValidStruct, PNTR ValidStructPtr;
NLM_EXTERN Boolean ValidateSeqEntry PROTO((SeqEntryPtr sep, ValidStructPtr vsp));
@@ -296,6 +306,12 @@ NLM_EXTERN FloatHi WaterDataScaleIs (void);
NLM_EXTERN Boolean ParseStructuredVoucher (CharPtr subname, CharPtr PNTR inst, CharPtr PNTR id);
NLM_EXTERN Boolean VoucherInstitutionIsValid (CharPtr inst);
+NLM_EXTERN CharPtr RemoveBadInstitutionCollection (OrgModPtr mod);
+NLM_EXTERN CharPtr RemoveBadInstitutionCountry (OrgModPtr mod);
+
+NLM_EXTERN Boolean AltitudeIsValid (CharPtr name);
+NLM_EXTERN Boolean TypeMaterialIsValid (CharPtr name);
+
/* EC_number finite state machine persists to avoid expensive reload, should free on program exit */
NLM_EXTERN void ECNumberFSAFreeAll (void);
@@ -315,6 +331,21 @@ NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp);
NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp);
NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name);
NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual);
+NLM_EXTERN void Heartbeat(ValidStructPtr vsp, CharPtr msg);
+
+NLM_EXTERN void ConvertFailedCodingRegionsAndRNAsToMiscFeatures(SeqEntryPtr sep, LogInfoPtr lip);
+
+typedef enum {
+ eEndIsChar_No = 0,
+ eEndIsChar_Last = 1,
+ eEndIsChar_All = 2
+} EEndIsChar;
+
+NLM_EXTERN void CheckBioseqEndsForNAndGap (BioseqPtr bsp, Uint1Ptr begin_n, Uint1Ptr begin_gap, Uint1Ptr end_n, Uint1Ptr end_gap);
+
+NLM_EXTERN Boolean FixOrgModVoucher (OrgModPtr mod);
+
+NLM_EXTERN Boolean IsIdenticalPublication (PubdescPtr pdp1, PubdescPtr pdp2);
#ifdef __cplusplus
diff --git a/api/valid.msg b/api/valid.msg
index 6d38ab56..b65a9890 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -1,6 +1,8 @@
MODULE valid
+
$$ SEQ_INST, 1
+
$^ ExtNotAllowed, 1
# This is a comment
A Bioseq "extension" is used for special classes of Bioseq. This class of
@@ -252,6 +254,15 @@ Gap of unknown length should have standard length of 100.
$^ SeqGapProblem, 72
Inconsistent data in Seq-gap fields.
+$^ WGSMasterLacksStrucComm, 73
+WGS Master records require a Genome Assembly Data structured comment user object.
+
+$^ TSAMasterLacksStrucComm, 74
+TSA Master records require an Assembly Data structured comment user object.
+
+$^ AllNs, 75
+Sequence has only Ns.
+
$$ SEQ_DESCR, 2
@@ -357,8 +368,9 @@ $^ RefGeneTrackingWithoutStatus, 25
The RefGeneTracking user object does not have the required Status field set.
$^ UnwantedCompleteFlag, 26
-The Mol-info.completeness flag should not be set on a genomic sequence unless
-the title also says it is a complete sequence or complete genome.
+The Mol-info.completeness flag should not be set on a genomic sequence, unless
+the title also says it is a complete sequence or complete genome, nor should it
+be set on a plasmid, chromosome, or organelle.
$^ CollidingPublications, 27
Multiple publication descriptors with the same PMID or MUID apply to a Bioseq.
@@ -561,6 +573,52 @@ A taxonomy consult is required for the indicated organism.
$^ TaxonomyNucleomorphProblem, 89
Taxonomy lookup indicates that the nucleomorph flag should be set for this organism.
+$^ InconsistentMolTypeBiomol, 90
+The Bioseq instance molecule field is inconsistent with the Mol-info biomol field.
+
+$^ BadInstitutionCountry, 91
+The institution (or institution: collection) code should not have a <country> modifier.
+
+$^ AmbiguousSpecificHost, 92
+A BioSource descriptor or feature has an ambiguous specific host value that may
+require a taxonomy consult.
+
+$^ BadAltitude, 93
+The altitude must be reported as a number followed by a space and the letter m (for meters).
+
+$^ RefGeneTrackingOnNucProtSet, 94
+The RefGeneTracking user object should not be on a nuc-prot set.
+
+$^ InconsistentDates, 95
+There are two date descriptors that are inconsistent with each
+other. Please make them consistent.
+
+$^ MultipleTaxonIDs, 96
+There are multiple BioSources with multiple taxonIDs in this RefSeq record.
+
+$^ ScaffoldLacksBioProject, 97
+There is no BioProject database link for this scaffold record.
+
+$^ CompleteGenomeLacksBioProject, 98
+There is no BioProject database link for this complete genome record.
+
+$^ TaxonomyPlastidsProblem, 99
+Taxonomy lookup indicates that the plastids flag should be set for this organism.
+
+$^ OrganismIsUndefinedSpecies, 100
+All organism names ending with "sp" or "sp." require taxonomy consult,
+except "uncultured" ones and "Haemoproteus sp."
+
+$^ WrongBiomolForTechnique, 101
+TSA records are expected to make use of a very limited set of
+MolInfo.biomol values: transcribed-RNA, mRNA, rRNA, ncRNA.
+
+$^ WrongOrganismFor16SrRNA, 102
+16S ribosomal RNA is not present in eukaryotic ribosomes.
+
+$^ InconsistentWGSFlags, 103
+WGS indicators are used inconsistently in this record.
+
$$ GENERIC, 3
@@ -621,6 +679,19 @@ anyway date has already passed.
$^ MissingISOJTA, 16
The publication journal is missing an ISO journal title abbreviation.
+$^ MissingVolume, 17
+The publication volume is missing.
+
+$^ MissingVolumeEpub, 18
+The electronic publication volume is missing.
+
+$^ MissingPages, 19
+The publication pages are missing.
+
+$^ MissingPagesEpub, 20
+The electronic publication pages are missing.
+
+
$$ SEQ_PKG, 4
$^ NoCdRegionPtr, 1
@@ -717,6 +788,9 @@ $^ ImproperlyNestedSets, 29
A pop/phy/mut/eco/wgs set has an unexpected internal set other than nuc-prot,
seg-set, or parts set.
+$^ SeqSubmitWithWgsSet, 30
+Seq-submit file is a wgs-set instead of a batch submission.
+
$$ SEQ_FEAT, 5
@@ -1436,6 +1510,62 @@ The EC_number has been replaced.
$^ SplitEcNumber, 189
The EC_number has been split.
+$^ PeptideFeatureLacksCDS, 190
+The peptide feature cannot be assigned to a CDS parent, and thus cannot be
+mapped to the protein product.
+
+$^ EcNumberDataMissing, 191
+An EC_number qualifier data file is missing or unreadable.
+
+$^ CDSnotBetweenUTRs, 192
+The 5'UTR and 3'UTR features do not flank a CDS feature.
+
+$^ ShortExon, 193
+Internal coding region exons should be more than 15 bp long.
+
+$^ ExtraProteinFeature, 194
+Protein sequence has multiple protein features that are not signal peptides, mature peptides, transit peptides, or preproteins.
+
+$^ AssemblyGapAdjacentToNs, 195
+Assembly_gap features must cover the entire contiguous sequence gaps.
+
+$^ AssemblyGapCoversSequence, 196
+Assembly_gap features must not cover actual bases in the sequence.
+
+$^ FeatureBeginsOrEndsWithN, 197
+The feature starts or stops with an N.
+
+$^ FeatureIsMostlyNs, 198
+The feature contains more than 50% of Ns.
+
+$^ CDSonMinusStrandTranscribedRNA, 199
+Coding regions should be on the plus strand of transcribed RNA molecules.
+
+$^ MultipleGenCodes, 200
+The genetic codes are the same for all CDS features on one Bioseq.
+
+$^ InvalidFuzz, 201
+Incorrect use of Int-fuzz.lim.
+
+$^ BadComment, 202
+Comment is inconsistent with content of feature.
+
+$^ NonsenseIntron, 203
+3 base intron actually contains a stop codon.
+
+$^ InconsistentPseudogeneValue, 204
+Pseudogene qualifiers do not match between a CDS or mRNA and the parent gene.
+
+$^ MultiIntervalIntron, 205
+Introns should only have a single interval.
+
+$^ SeqLocTypeProblem, 206
+A sequence location component is not the expected type.
+
+$^ ColdShockProteinProblem, 207
+A misc_feature containing cspA should not overlap a cold-shock protein CDS.
+
+
$$ SEQ_ALIGN, 6
$^ SeqIdProblem, 1
@@ -1504,6 +1634,7 @@ Alignment is shorter than expected.
$^ UnexpectedAlignmentType 22
Only DenseSeg alignments are expected.
+
$$ SEQ_GRAPH, 7
$^ GraphMin, 1
@@ -1569,6 +1700,7 @@ Quality score values for unknown bases should not be above 0.
$^ GraphLocInvalid, 20
Location for quality score values extends beyond end of sequence.
+
$$ SEQ_ANNOT, 8
$^ AnnotIDs, 1
diff --git a/api/validerr.h b/api/validerr.h
index 53ae6670..7ce125e6 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -74,6 +74,9 @@
#define ERR_SEQ_INST_BadSegmentedSeq 1,70
#define ERR_SEQ_INST_SeqLitGapFuzzNot100 1,71
#define ERR_SEQ_INST_SeqGapProblem 1,72
+#define ERR_SEQ_INST_WGSMasterLacksStrucComm 1,73
+#define ERR_SEQ_INST_TSAMasterLacksStrucComm 1,74
+#define ERR_SEQ_INST_AllNs 1,75
#define ERR_SEQ_DESCR 2,0
#define ERR_SEQ_DESCR_BioSourceMissing 2,1
#define ERR_SEQ_DESCR_InvalidForType 2,2
@@ -164,6 +167,20 @@
#define ERR_SEQ_DESCR_TaxonomyIsSpeciesProblem 2,87
#define ERR_SEQ_DESCR_TaxonomyConsultRequired 2,88
#define ERR_SEQ_DESCR_TaxonomyNucleomorphProblem 2,89
+#define ERR_SEQ_DESCR_InconsistentMolTypeBiomol 2,90
+#define ERR_SEQ_DESCR_BadInstitutionCountry 2,91
+#define ERR_SEQ_DESCR_AmbiguousSpecificHost 2,92
+#define ERR_SEQ_DESCR_BadAltitude 2,93
+#define ERR_SEQ_DESCR_RefGeneTrackingOnNucProtSet 2,94
+#define ERR_SEQ_DESCR_InconsistentDates 2,95
+#define ERR_SEQ_DESCR_MultipleTaxonIDs 2,96
+#define ERR_SEQ_DESCR_ScaffoldLacksBioProject 2,97
+#define ERR_SEQ_DESCR_CompleteGenomeLacksBioProject 2,98
+#define ERR_SEQ_DESCR_TaxonomyPlastidsProblem 2,99
+#define ERR_SEQ_DESCR_OrganismIsUndefinedSpecies 2,100
+#define ERR_SEQ_DESCR_WrongBiomolForTechnique 2,101
+#define ERR_SEQ_DESCR_WrongOrganismFor16SrRNA 2,102
+#define ERR_SEQ_DESCR_InconsistentWGSFlags 2,103
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
@@ -181,6 +198,10 @@
#define ERR_GENERIC_UnexpectedPubStatusComment 3,14
#define ERR_GENERIC_PastReleaseDate 3,15
#define ERR_GENERIC_MissingISOJTA 3,16
+#define ERR_GENERIC_MissingVolume 3,17
+#define ERR_GENERIC_MissingVolumeEpub 3,18
+#define ERR_GENERIC_MissingPages 3,19
+#define ERR_GENERIC_MissingPagesEpub 3,20
#define ERR_SEQ_PKG 4,0
#define ERR_SEQ_PKG_NoCdRegionPtr 4,1
#define ERR_SEQ_PKG_NucProtProblem 4,2
@@ -211,6 +232,7 @@
#define ERR_SEQ_PKG_SingleItemSet 4,27
#define ERR_SEQ_PKG_MisplacedMolInfo 4,28
#define ERR_SEQ_PKG_ImproperlyNestedSets 4,29
+#define ERR_SEQ_PKG_SeqSubmitWithWgsSet 4,30
#define ERR_SEQ_FEAT 5,0
#define ERR_SEQ_FEAT_InvalidForType 5,1
#define ERR_SEQ_FEAT_PartialProblem 5,2
@@ -401,6 +423,24 @@
#define ERR_SEQ_FEAT_DeletedEcNumber 5,187
#define ERR_SEQ_FEAT_ReplacedEcNumber 5,188
#define ERR_SEQ_FEAT_SplitEcNumber 5,189
+#define ERR_SEQ_FEAT_PeptideFeatureLacksCDS 5,190
+#define ERR_SEQ_FEAT_EcNumberDataMissing 5,191
+#define ERR_SEQ_FEAT_CDSnotBetweenUTRs 5,192
+#define ERR_SEQ_FEAT_ShortExon 5,193
+#define ERR_SEQ_FEAT_ExtraProteinFeature 5,194
+#define ERR_SEQ_FEAT_AssemblyGapAdjacentToNs 5,195
+#define ERR_SEQ_FEAT_AssemblyGapCoversSequence 5,196
+#define ERR_SEQ_FEAT_FeatureBeginsOrEndsWithN 5,197
+#define ERR_SEQ_FEAT_FeatureIsMostlyNs 5,198
+#define ERR_SEQ_FEAT_CDSonMinusStrandTranscribedRNA 5,199
+#define ERR_SEQ_FEAT_MultipleGenCodes 5,200
+#define ERR_SEQ_FEAT_InvalidFuzz 5,201
+#define ERR_SEQ_FEAT_BadComment 5,202
+#define ERR_SEQ_FEAT_NonsenseIntron 5,203
+#define ERR_SEQ_FEAT_InconsistentPseudogeneValue 5,204
+#define ERR_SEQ_FEAT_MultiIntervalIntron 5,205
+#define ERR_SEQ_FEAT_SeqLocTypeProblem 5,206
+#define ERR_SEQ_FEAT_ColdShockProteinProblem 5,207
#define ERR_SEQ_ALIGN 6,0
#define ERR_SEQ_ALIGN_SeqIdProblem 6,1
#define ERR_SEQ_ALIGN_StrandRev 6,2
diff --git a/api/validrules.inc b/api/validrules.inc
index 4a1f736f..f8acbf2a 100644
--- a/api/validrules.inc
+++ b/api/validrules.inc
@@ -1,4 +1,4 @@
-/* $Id: validrules.inc,v 1.1 2012/01/20 20:54:43 kans Exp $
+/* $Id: validrules.inc,v 1.3 2013/05/30 18:48:05 kans Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -600,13 +600,29 @@ static const char* const s_Defaultvalidrules[] = {
" severity error } ,",
" {",
" field-name \"Assembly Method\" ,",
- " match-expression \".+ v\\. .+\" ,",
+ " match-expression \".+ v\\. .+\",",
" required TRUE } ,",
" {",
" field-name \"Assembly Name\" } ,",
" {",
" field-name \"Long Assembly Name\" } ,",
" {",
+ " field-name \"Genome Representation\" ,",
+ " match-expression \"^\\(Full\\|Partial\\)$\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Expected Final Version\" ,",
+ " match-expression \"^\\(Yes\\|No\\)$\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Reference-guided Assembly\" } ,",
+ " {",
+ " field-name \"Single-cell Amplification\" } ,",
+ " {",
+ " field-name \"Corresponding 16S rRNA\" ,",
+ " match-expression \"^.+\\.\\(0\\|1\\|2\\|3\\|4\\|5\\|6\\|7\\|8\\|9\\)+$\",",
+ " required FALSE } ,",
+ " {",
" field-name \"Genome Coverage\" ,",
" required TRUE } ,",
" {",
@@ -674,14 +690,26 @@ static const char* const s_Defaultvalidrules[] = {
" fields {",
" {",
" field-name \"Assembly Method\" ,",
- " required TRUE } ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Assembly Name\" ,",
+ " required FALSE } ,",
" {",
" field-name \"Coverage\" ,",
" required FALSE } ,",
" {",
" field-name \"Sequencing Technology\" ,",
" required TRUE } } ,",
- " allow-unlisted TRUE } ,",
+ " allow-unlisted TRUE ,",
+ " dependent-rules {",
+ " {",
+ " match-name \"Sequencing Technology\" ,",
+ " value-constraint \"\\(Sanger dideoxy sequencing\\|ABI PRISM\\|Sanger\\|Sanger sequencing\\)\",",
+ " invert-match TRUE ,",
+ " other-fields {",
+ " {",
+ " field-name \"Assembly Method\" ,",
+ " required TRUE } } } } } ,",
" {",
" prefix \"##International Barcode of Life (iBOL)Data-START##\" ,",
" fields {",
@@ -696,6 +724,55 @@ static const char* const s_Defaultvalidrules[] = {
" required FALSE } ,",
" {",
" field-name \"iBOL Release Status\" ,",
+ " required FALSE } ,",
+ " { field-name \"Tentative Name\" ,",
+ " required FALSE } } ,",
+ " allow-unlisted TRUE } ,",
+ " {",
+ " prefix \"##Genome-Annotation-Data-START##\" ,",
+ " fields {",
+ " {",
+ " field-name \"Annotation Provider\" ,",
+ " required TRUE } ,",
+ " {",
+ " field-name \"Annotation Status\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Annotation Version\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Annotation Date\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Annotation Pipeline\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Annotation Method\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Annotation Software Version\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Features Annotated\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Update Version\" ,",
+ " required FALSE } ,",
+ " {",
+ " field-name \"Input Data\" ,",
+ " required FALSE } ,",
+ " { ",
+ " field-name \"URL\" ,",
+ " required FALSE } } ,",
+ " require-order FALSE ,",
+ " allow-unlisted TRUE } ,",
+ " {",
+ " prefix \"##RefSeq-Attributes-START##\" ,",
+ " fields {",
+ " {",
+ " field-name \"Transcript_exon_combination_evidence\" ,",
" required FALSE } } ,",
- " allow-unlisted TRUE } }"
+ " allow-unlisted TRUE } ",
+ "}",
+ ""
};
diff --git a/api/wprint.c b/api/wprint.c
index 6c67c6e1..ecc23a91 100644
--- a/api/wprint.c
+++ b/api/wprint.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.75 $
+* $Revision: 6.78 $
*
* File Description:
*
@@ -422,7 +422,7 @@ NLM_EXTERN Boolean LIBCALL www_taxid(CharPtr orgname, Int4 id)
return TRUE;
}
-NLM_EXTERN Boolean LIBCALL www_featkey(CharPtr key, Int4 gi, Int2 entityID, Uint4 itemID)
+NLM_EXTERN Boolean LIBCALL www_featkey(CharPtr key, BIG_ID gi, Int2 entityID, Uint4 itemID)
{
Int2 l, ll;
CharPtr s;
@@ -432,7 +432,7 @@ NLM_EXTERN Boolean LIBCALL www_featkey(CharPtr key, Int4 gi, Int2 entityID, Uint
ll = StringLen("<a href=%sgi=%ld&id=%d&entity=%d>");
s = (CharPtr)MemNew(l+ ll + 3*7);
sprintf(s, "<a href=%sgi=%d&id=%ud&entity=%d>",
- link_ff, (Int4) gi, itemID, (Int4)entityID);
+ link_ff, (BIG_ID) gi, itemID, (Int4)entityID);
AddLink(s);
MemFree(s);
ff_AddString(key);
@@ -511,7 +511,7 @@ NLM_EXTERN Boolean LIBCALL www_extra_acc(CharPtr acc, Boolean ncbi)
NLM_EXTERN Boolean LIBCALL www_genpept_gi(CharPtr str)
{
Int2 l, ll;
- Int4 gi;
+ BIG_ID gi;
CharPtr s, prefix;
if(www) {
@@ -679,7 +679,7 @@ NLM_EXTERN Boolean LIBCALL www_protein_id(CharPtr str)
NLM_EXTERN Boolean LIBCALL www_db_xref(CharPtr str)
{
Int2 l, ll;
- Int4 gi;
+ BIG_ID gi;
CharPtr s, prefix, ss, p, pp;
Boolean nothing = TRUE;
Char id[10];
@@ -1275,7 +1275,7 @@ NLM_EXTERN Boolean LIBCALL www_db_xref(CharPtr str)
NLM_EXTERN Boolean LIBCALL www_note_gi(CharPtr str)
{
Int2 l, ll;
- Int4 gi;
+ BIG_ID gi;
CharPtr s, prefix, ss, p, pp;
Boolean nothing = TRUE;
@@ -2007,7 +2007,7 @@ static Boolean iscospa(Char c)
NLM_EXTERN void LIBCALL www_PrintComment (CharPtr string, Boolean identifier, Uint1 format)
{
Int2 lpref, l, ll;
- Int4 gi;
+ BIG_ID gi;
CharPtr s, prefix=NULL, p, pp, link=NULL, www_str, acc, ss;
Boolean isfirst = TRUE;