diff options
author | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
---|---|---|
committer | Aaron M. Ucko <ucko@debian.org> | 2005-03-23 23:49:09 +0000 |
commit | 5349ec8772bc373e4c2349a04e57d7952c006326 (patch) | |
tree | b733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /api | |
parent | 0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff) |
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'api')
-rw-r--r-- | api/alignmgr2.c | 267 | ||||
-rw-r--r-- | api/alignmgr2.h | 75 | ||||
-rw-r--r-- | api/alignval.c | 10 | ||||
-rw-r--r-- | api/asn2ff3.c | 12 | ||||
-rw-r--r-- | api/asn2ffp.h | 22 | ||||
-rw-r--r-- | api/asn2gnbk.c | 2827 | ||||
-rw-r--r-- | api/asn2gnbk.h | 5 | ||||
-rw-r--r-- | api/edutil.c | 21 | ||||
-rw-r--r-- | api/explore.h | 8 | ||||
-rw-r--r-- | api/fdlKludge.h | 25 | ||||
-rw-r--r-- | api/ffprint.c | 10 | ||||
-rw-r--r-- | api/findrepl.c | 72 | ||||
-rw-r--r-- | api/findrepl.h | 14 | ||||
-rw-r--r-- | api/gbfeat.c | 10 | ||||
-rw-r--r-- | api/gbftdef.h | 20 | ||||
-rw-r--r-- | api/gbftglob.c | 416 | ||||
-rw-r--r-- | api/lsqfetch.c | 150 | ||||
-rw-r--r-- | api/lsqfetch.h | 15 | ||||
-rw-r--r-- | api/salpstat.c | 6 | ||||
-rw-r--r-- | api/seqmgr.c | 103 | ||||
-rw-r--r-- | api/seqmgr.h | 12 | ||||
-rw-r--r-- | api/seqport.c | 116 | ||||
-rw-r--r-- | api/sequtil.c | 164 | ||||
-rw-r--r-- | api/sqnutil1.c | 363 | ||||
-rw-r--r-- | api/sqnutil2.c | 174 | ||||
-rw-r--r-- | api/sqnutil3.c | 32 | ||||
-rw-r--r-- | api/sqnutils.h | 26 | ||||
-rw-r--r-- | api/subutil.c | 84 | ||||
-rw-r--r-- | api/subutil.h | 34 | ||||
-rw-r--r-- | api/tofasta.c | 230 | ||||
-rw-r--r-- | api/tomedlin.c | 173 | ||||
-rw-r--r-- | api/tomedlin.h | 10 | ||||
-rw-r--r-- | api/txalign.c | 286 | ||||
-rw-r--r-- | api/valid.c | 1072 | ||||
-rw-r--r-- | api/valid.msg | 35 | ||||
-rw-r--r-- | api/validerr.h | 10 |
36 files changed, 5567 insertions, 1342 deletions
diff --git a/api/alignmgr2.c b/api/alignmgr2.c index 6afaf900..096953e2 100644 --- a/api/alignmgr2.c +++ b/api/alignmgr2.c @@ -28,13 +28,28 @@ * * Version Creation Date: 10/01 * -* $Revision: 6.44 $ +* $Revision: 6.49 $ * * File Description: SeqAlign indexing, access, and manipulation functions * * Modifications: * -------------------------------------------------------------------------- * $Log: alignmgr2.c,v $ +* Revision 6.49 2003/10/20 17:54:34 kans +* AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp +* +* Revision 6.48 2003/10/09 13:46:52 rsmith +* Add AlnMgr2GetFirstNForSipList. +* +* Revision 6.47 2003/05/15 18:53:10 rsmith +* in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not. +* +* Revision 6.46 2003/04/24 20:28:48 rsmith +* made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions. +* +* Revision 6.45 2003/04/23 20:36:13 rsmith +* Added four functions in Section 11 to get information about Std-Seg alignments. +* * Revision 6.44 2003/03/31 20:17:11 todorov * Added AlnMgr2IndexSeqAlignEx * @@ -7159,6 +7174,29 @@ NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip) /*************************************************************************** * +* AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on, +* or -1 if none of the seqids are in the alignment or if there is another +* error. +* Handy if sip comes from a BioSeq, where it can point to a linked list +* of SeqIds. +* +***************************************************************************/ +NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip) +{ + Int4 i; + if (sap == NULL || sap->saip == NULL) + return -1; + + for (; sip; sip = sip->next) { + i = AlnMgr2GetFirstNForSip(sap, sip); + if (i != -1) + return i; + } + return -1; +} + +/*************************************************************************** +* * AlnMgr2GetParent returns the top-level seqalign associated with a given * indexed alignment. It returns the actual pointer, not a copy. * @@ -8392,30 +8430,32 @@ NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 t { sip = AlnMgr2GetNthSeqIdPtr(sap, i+1); bsp = BioseqLockById(sip); - for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE) - { - counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp); - ctr = 0; - while (ctr < counter) - { - res = buf[ctr]; - if (isna) - { - if (res == 1 || res == 2) - afp->freq[res][j]++; - else if (res == 4) - afp->freq[3][j]++; - else if (res == 8) - afp->freq[4][j]++; - else - afp->freq[5][j]++; - } else - afp->freq[res][j]++; - j++; - ctr++; - } + if (bsp != NULL) { + for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE) + { + counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp); + ctr = 0; + while (ctr < counter) + { + res = buf[ctr]; + if (isna) + { + if (res == 1 || res == 2) + afp->freq[res][j]++; + else if (res == 4) + afp->freq[3][j]++; + else if (res == 8) + afp->freq[4][j]++; + else + afp->freq[5][j]++; + } else + afp->freq[res][j]++; + j++; + ctr++; + } + } + BioseqUnlock(bsp); } - BioseqUnlock(bsp); SeqIdFree(sip); } } @@ -9792,6 +9832,185 @@ NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr *stop = SeqLocStop(slp); } + +/*************************************************************************** +* +* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence +* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the +* strand type. Either start, stop or strand can be NULL to only retrieve some of them. +* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this +* alignment or the alignment is one big insert on that id. Returns true if the sip was found +* in the alignment with real coordinates, i.e. *start would not be -1. RANGE +* +***************************************************************************/ +NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand) +{ + Int4 c_start, c_stop; + Uint1 c_strand; + StdSegPtr ssp; + Boolean range_found = FALSE; + Boolean strands_inconsistent = FALSE; + + if (start) *start = -1; + if (stop) *stop = -1; + if (strand) *strand = Seq_strand_unknown; + + if (sap->segtype != SAS_STD) + return FALSE; + + ssp = (StdSegPtr)(sap->segs); + while (ssp) { + if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) && + c_start != -1) /* skip inserts on our bioseq */ + { + range_found = TRUE; + + if (start) { + if (*start == -1) { + *start = c_start; + } else { + *start = MIN(*start, c_start); + } + } + if (stop) { + *stop = MAX(*stop, c_stop); + } + if (strand && ! strands_inconsistent) { + /* if strands are different each time, ignore them. */ + if (*strand != Seq_strand_unknown && *strand != c_strand) { + *strand = Seq_strand_unknown; + strands_inconsistent = TRUE; + } else { + *strand = c_strand; + } + } + } + ssp = ssp->next; + } + return range_found; +} + + +/*************************************************************************** +* +* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence +* coordinates in a Std-Segment for a given Sequence Id. Also return the +* strand type. Either start, stop or strand can be NULL to only retrieve some of them. +* If start and stop are -1, the SeqID was not found in this segment. +* Returns true if the sip was found, even if it is a gap (start, stop = -1). RANGE +* +***************************************************************************/ +NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg( + StdSegPtr ssp, + SeqIdPtr sip, + Int4Ptr start, + Int4Ptr stop, + Uint1Ptr strand, + Uint1Ptr segType) /* AM_SEQ, AM_GAP, AM_INSERT */ +{ + SeqLocPtr loc; + Uint1 m_strand; + Int4 m_start, m_stop, m_swap; + Boolean s_present = FALSE; + Boolean m_present = FALSE; + Boolean found_id = FALSE; + + for ( loc = ssp->loc; + loc != NULL; + loc = loc->next ) { + /* One SeqLoc for each Sequence aligned by this segment. */ + /* find the one that matches the sip parameter. */ + if (SeqIdForSameBioseq(sip, SeqLocId(loc))) { + m_strand = SeqLocStrand(loc); + m_start = SeqLocStart(loc); + m_stop = SeqLocStop(loc); + /* Might have to reverse the order of start and stop on minus strands. + /* so that start is less than stop. */ + if (m_start > m_stop) { + m_swap = m_start; + m_start = m_stop; + m_stop = m_swap; + } + if (start) *start = m_start; + if (stop) *stop = m_stop; + if (strand) *strand = m_strand; + if (m_start != -1) + m_present = TRUE; + + /* found our sequence in this segment. */ + found_id = TRUE; + } else { /* a different sequence */ + if (SeqLocStart(loc) != -1) + s_present = TRUE; + } + } + + if (segType) { + if (m_present && s_present) + *segType = AM_SEQ; + else if (!m_present && s_present) + *segType = AM_INSERT; + else if (m_present && !s_present) + *segType = AM_GAP; + else + *segType = AM_GAP; /* start will be -1 */ + } + return found_id; +} + + +/*************************************************************************** +* +* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of +* a standard segment alignment. Numbering starts with 1. +* returns NULL if not n segments or is not a std-seg aligment. +* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg() +* +***************************************************************************/ +NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n) +{ + StdSegPtr ssp; + Int2 i; + + if (sap == NULL || sap->segtype != SAS_STD || n < 1) + return NULL; + + i = 1; + ssp = (StdSegPtr)(sap->segs); + while(ssp) + { + if (i == n) + return ssp; + ++i; + ssp = ssp->next; + } + + return NULL; +} + +/*************************************************************************** +* +* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment. +* returns -1 if sap is null or not a standard-seg alignment. +* +***************************************************************************/ +NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap) +{ + Int4 seg_count = 0; + StdSegPtr ssp; + + if (sap == NULL || sap->segtype != SAS_STD) + return -1; + + ssp = (StdSegPtr)(sap->segs); + while(ssp) + { + ++seg_count; + ssp = ssp->next; + } + return seg_count; +} + static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap) { Int4 longest; diff --git a/api/alignmgr2.h b/api/alignmgr2.h index c71dccf4..2bd1dd3a 100644 --- a/api/alignmgr2.h +++ b/api/alignmgr2.h @@ -28,13 +28,19 @@ * * Version Creation Date: 10/01 * -* $Revision: 6.19 $ +* $Revision: 6.21 $ * * File Description: SeqAlign indexing, access, and manipulation functions * * Modifications: * -------------------------------------------------------------------------- * $Log: alignmgr2.h,v $ +* Revision 6.21 2003/10/09 13:46:39 rsmith +* Add AlnMgr2GetFirstNForSipList. +* +* Revision 6.20 2003/04/23 20:37:06 rsmith +* Added four functions in section 11 to allow examination of Std-Seg alignments. +* * Revision 6.19 2003/03/31 20:17:11 todorov * Added AlnMgr2IndexSeqAlignEx * @@ -637,6 +643,17 @@ NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip); /*************************************************************************** * +* AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on, +* or -1 if none of the seqids are in the alignment or if there is another +* error. +* Handy if sip comes from a BioSeq, where it can point to a linked list +* of SeqIds. +* +***************************************************************************/ +NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip); + +/*************************************************************************** +* * AlnMgr2GetParent returns the top-level seqalign associated with a given * indexed alignment. It returns the actual pointer, not a copy. * @@ -885,6 +902,62 @@ NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall); NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip); NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n); NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop); + +/*************************************************************************** +* +* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence +* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the +* strand type if it is the same on every segment, else set it to Seq_strand_unknown. +* Either start, stop or strand can be NULL to only retrieve some of them. +* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this +* alignment or the alignment is one big insert on that id. Returns true if the sip was found +* in the alignment with real coordinates, i.e. *start would not be -1. RANGE +* +***************************************************************************/ +NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand); + +/*************************************************************************** +* +* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence +* coordinates in a Std-Segment for a given Sequence Id. Also return the +* strand type. Either start, stop, strand or segType can be NULL to only retrieve some of them. +* Returns false if the SeqID was not found in this segment, so no meaningful +* data was passed back in other arguments. +* Returns true if the sip was found, even if it is a gap (start, stop = -1). +* segType is set to AM_SEQ if the SeqID Sequence is not empty and one of +* the other sequences aligned with it is also not empty. To AM_GAP if +* the other sequences are all empty, and to AM_INSERT if the main sequence +* is empty. +* RANGE +* +***************************************************************************/ +NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg( + StdSegPtr ssp, + SeqIdPtr sip, + Int4Ptr start, + Int4Ptr stop, + Uint1Ptr strand, + Uint1Ptr segType); /* AM_SEQ, AM_GAP, AM_INSERT */ + +/*************************************************************************** +* +* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of +* a standard segment alignment. +* returns NULL if not n segments or is not a std-seg aligment. +* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg() +* +***************************************************************************/ +NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n); + +/*************************************************************************** +* +* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment. +* returns -1 if sap is null or not a standard-seg alignment. +* the Std-Seg version of AlnMgr2GetNumSegs +* +***************************************************************************/ +NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap); + /*************************************************************************** * * The two mapping functions act a little differently for std-segs. The diff --git a/api/alignval.c b/api/alignval.c index 9b317c04..bf3a64b7 100644 --- a/api/alignval.c +++ b/api/alignval.c @@ -29,7 +29,7 @@ * * Version Creation Date: 6/3/99 * -* $Revision: 6.35 $ +* $Revision: 6.37 $ * * File Description: To validate sequence alignment. * @@ -443,7 +443,7 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq pos = valmsggetseqpos(salp, Intvalue, id); SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3)); sprintf(string1, "Segs"); - sprintf(string2, "Segment %ld (near alignment position %ld) contains only gaps. Each segment must contain at least one actual sequence -- look for columns with all gaps and delete them.", (long) Intvalue, (long) pos); + sprintf(string2, "Segment %ld (near alignment position %ld) in the context of %s contains only gaps. Each segment must contain at least one actual sequence -- look for columns with all gaps and delete them.", (long) Intvalue, (long) pos, buf3); break; case Err_Segs_Dim_One: @@ -1495,7 +1495,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp) /*if no more positive start value is found after the initial -1 start value, then it's fasta like */ if(k==dsp->numseg-1&&gap) { - ValMessage (salp, Err_Fastalike, SEV_ERROR, siptemp, dsp->ids, 0); + ValMessage (salp, Err_Fastalike, SEV_WARNING, siptemp, dsp->ids, 0); return TRUE; } } @@ -1541,7 +1541,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp) /*if no more segment is found for this sequence, then it's fasta like */ if(k==psp->numseg-1&&gap) { - ValMessage (salp, Err_Fastalike, SEV_ERROR, siptemp, psp->ids, 0); + ValMessage (salp, Err_Fastalike, SEV_WARNING, siptemp, psp->ids, 0); return TRUE; } @@ -1598,7 +1598,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp) break; if(!ssptemp2->next&&gap) { - ValMessage (salp, Err_Fastalike, SEV_ERROR, sip, SipInSegs, 0); + ValMessage (salp, Err_Fastalike, SEV_WARNING, sip, SipInSegs, 0); ValNodeFree(FinishedSip); SeqIdSetFree(SipInSegs); return TRUE; diff --git a/api/asn2ff3.c b/api/asn2ff3.c index ec9a8880..706036d0 100644 --- a/api/asn2ff3.c +++ b/api/asn2ff3.c @@ -35,6 +35,15 @@ * Modifications: * -------------------------------------------------------------------------- * $Log: asn2ff3.c,v $ +* Revision 6.118 2003/07/22 16:18:27 kans +* added ZFIN as legal db_xref +* +* Revision 6.117 2003/06/10 18:44:10 kans +* added GeneDB to list of legal db_xrefs +* +* Revision 6.116 2003/05/29 20:25:19 kans +* added Interpro to list of legal dbxrefs +* * Revision 6.115 2002/11/30 20:18:27 kans * added GOA to list of legal db_xrefs * @@ -723,6 +732,7 @@ CharPtr dbtag[DBNUM] = { "FLYBASE", "GABI", "GDB", + "GeneDB", "GeneID", "GI", "GO", @@ -731,6 +741,7 @@ CharPtr dbtag[DBNUM] = { "IMGT/LIGM", "IMGT/HLA", "InterimID", + "Interpro", "ISFinder", "JCM", "LocusID", @@ -756,6 +767,7 @@ CharPtr dbtag[DBNUM] = { "UniSTS", "WorfDB", "WormBase", + "ZFIN", }; diff --git a/api/asn2ffp.h b/api/asn2ffp.h index 87a8b726..f05a9b89 100644 --- a/api/asn2ffp.h +++ b/api/asn2ffp.h @@ -29,7 +29,7 @@ * * Version Creation Date: 7/15/95 * -* $Revision: 6.30 $ +* $Revision: 6.33 $ * * File Description: * @@ -45,6 +45,15 @@ /************************************* * * $Log: asn2ffp.h,v $ + * Revision 6.33 2003/07/22 16:18:27 kans + * added ZFIN as legal db_xref + * + * Revision 6.32 2003/06/10 18:44:10 kans + * added GeneDB to list of legal db_xrefs + * + * Revision 6.31 2003/05/29 20:25:19 kans + * added Interpro to list of legal dbxrefs + * * Revision 6.30 2002/11/30 20:18:27 kans * added GOA to list of legal db_xrefs * @@ -192,6 +201,15 @@ /************************************* * * $Log: asn2ffp.h,v $ +* Revision 6.33 2003/07/22 16:18:27 kans +* added ZFIN as legal db_xref +* +* Revision 6.32 2003/06/10 18:44:10 kans +* added GeneDB to list of legal db_xrefs +* +* Revision 6.31 2003/05/29 20:25:19 kans +* added Interpro to list of legal dbxrefs +* * Revision 6.30 2002/11/30 20:18:27 kans * added GOA to list of legal db_xrefs * @@ -369,7 +387,7 @@ NLM_EXTERN Boolean asn2ff_flags[13]; #define ASN2FF_SHOW_ERROR_MSG asn2ff_flags[11] #define ASN2FF_SHOW_GB_STYLE asn2ff_flags[12] -#define DBNUM 55 +#define DBNUM 58 NLM_EXTERN CharPtr dbtag[DBNUM]; NLM_EXTERN void FlatSpliceOff PROTO((SeqEntryPtr the_set, ValNodePtr desc)); diff --git a/api/asn2gnbk.c b/api/asn2gnbk.c index 39e5c40d..deccca09 100644 --- a/api/asn2gnbk.c +++ b/api/asn2gnbk.c @@ -30,7 +30,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 6.660 $ +* $Revision: 6.729 $ * * File Description: New GenBank flatfile generator - work in progress * @@ -44,6 +44,7 @@ #include <objsset.h> #include <objsub.h> #include <objfdef.h> +#include <objpubme.h> #include <seqport.h> #include <sequtil.h> #include <sqnutils.h> @@ -98,6 +99,7 @@ typedef struct asn2gbflags { Boolean hideEmptySource; Boolean goQualsToNote; Boolean geneSynsToNote; + Boolean selenocysteineToNote; Boolean forGbRelease; } Asn2gbFlags, PNTR Asn2gbFlagsPtr; @@ -137,6 +139,8 @@ typedef struct int_asn2gb_job { Boolean newSourceOrg; ValNodePtr lockedBspList; Boolean relModeError; + Boolean skipProts; + Boolean skipMrnas; IndxPtr index; GBSeqPtr gbseq; StringItemPtr pool; @@ -195,6 +199,7 @@ typedef struct asn2gbwork { Boolean citSubsFirst; Boolean hideGeneFeats; Boolean newLocusLine; + Boolean showBaseCount; Boolean hideImpFeats; Boolean hideRemImpFeats; @@ -209,6 +214,8 @@ typedef struct asn2gbwork { Boolean onlyGeneRIFs; Boolean latestGeneRIFs; + Boolean showRefs; + Boolean isGPS; Boolean copyGpsCdsUp; Boolean copyGpsGeneDown; @@ -516,6 +523,7 @@ static Uint1 source_qual_order [] = { SCQUAL_tissue_type, SCQUAL_clone_lib, SCQUAL_dev_stage, + SCQUAL_ecotype, SCQUAL_frequency, SCQUAL_germline, @@ -565,7 +573,6 @@ static Uint1 source_desc_note_order [] = { SCQUAL_authority, SCQUAL_forma, SCQUAL_forma_specialis, - SCQUAL_ecotype, SCQUAL_synonym, SCQUAL_anamorph, SCQUAL_teleomorph, @@ -607,7 +614,6 @@ static Uint1 source_feat_note_order [] = { SCQUAL_authority, SCQUAL_forma, SCQUAL_forma_specialis, - SCQUAL_ecotype, SCQUAL_synonym, SCQUAL_anamorph, SCQUAL_teleomorph, @@ -805,6 +811,7 @@ typedef enum { FTQUAL_modelev, FTQUAL_note, FTQUAL_number, + FTQUAL_operon, FTQUAL_organism, FTQUAL_partial, FTQUAL_PCR_conditions, @@ -833,12 +840,14 @@ typedef enum { FTQUAL_rrna_its, FTQUAL_sec_str_type, FTQUAL_selenocysteine, + FTQUAL_selenocysteine_note, FTQUAL_seqfeat_note, FTQUAL_site, FTQUAL_site_type, FTQUAL_standard_name, FTQUAL_transcription, FTQUAL_transcript_id, + FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */ FTQUAL_transl_except, FTQUAL_transl_table, FTQUAL_translation, @@ -859,6 +868,10 @@ static Uint1 feat_qual_order [] = { FTQUAL_locus_tag, FTQUAL_gene_syn_refseq, + FTQUAL_gene_allele, + + FTQUAL_operon, + FTQUAL_product, FTQUAL_prot_EC_number, @@ -881,6 +894,7 @@ static Uint1 feat_qual_order [] = { FTQUAL_number, FTQUAL_pseudo, + FTQUAL_selenocysteine, FTQUAL_codon_start, @@ -895,7 +909,6 @@ static Uint1 feat_qual_order [] = { FTQUAL_frequency, FTQUAL_EC_number, FTQUAL_gene_map, - FTQUAL_gene_allele, FTQUAL_allele, FTQUAL_map, FTQUAL_mod_base, @@ -944,6 +957,7 @@ pseudo after note - gi|6598562|gb|AC006419.3|AC006419 */ static Uint1 feat_note_order [] = { + FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */ FTQUAL_gene_desc, FTQUAL_gene_syn, FTQUAL_trna_codons, @@ -958,7 +972,7 @@ static Uint1 feat_note_order [] = { FTQUAL_seqfeat_note, FTQUAL_exception_note, FTQUAL_region, - /* FTQUAL_selenocysteine, */ + FTQUAL_selenocysteine_note, FTQUAL_prot_names, FTQUAL_bond, FTQUAL_site, @@ -1023,6 +1037,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "model_evidence", Qual_class_model_ev }, { "note", Qual_class_note }, { "number", Qual_class_number }, + { "operon", Qual_class_quote }, { "organism", Qual_class_quote }, { "partial", Qual_class_boolean }, { "PCR_conditions", Qual_class_quote }, @@ -1050,6 +1065,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "rpt_unit", Qual_class_rpt_unit }, { "rrna_its", Qual_class_its }, { "sec_str_type", Qual_class_sec_str }, + { "selenocysteine", Qual_class_boolean }, { "selenocysteine", Qual_class_string }, { "seqfeat_note", Qual_class_string }, { "site", Qual_class_site }, @@ -1057,6 +1073,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = { { "standard_name", Qual_class_quote }, { "transcription", Qual_class_transcription }, { "transcript_id", Qual_class_seq_id }, + { "tscpt_id_note", Qual_class_seq_id }, /* !!! remove October 15, 2003 !!! */ { "transl_except", Qual_class_code_break }, { "transl_table", Qual_class_int }, { "translation", Qual_class_translation }, @@ -1098,7 +1115,7 @@ static Char doc_link [MAX_WWWBUF]; #define DEF_LINK_DOC "http://www.ncbi.nlm.nih.gov/genome/guide/build.html" static Char ev_link [MAX_WWWBUF]; -#define DEF_LINK_EV "http://www.ncbi.nlm.nih.gov/cgi-bin/Entrez/evv.cgi?" +#define DEF_LINK_EV "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?" static Char ec_link [MAX_WWWBUF]; #define DEF_LINK_EC "http://www.expasy.ch/cgi-bin/nicezyme.pl?" @@ -1106,108 +1123,119 @@ static Char ec_link [MAX_WWWBUF]; static Char link_tax [MAX_WWWBUF]; #define DEF_LINK_TAX "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?" -static Char link_ff[MAX_WWWBUF]; +static Char link_ff [MAX_WWWBUF]; #define DEF_LINK_FF "/cgi-bin/Entrez/getfeat?" -static Char link_muid[MAX_WWWBUF]; +static Char link_muid [MAX_WWWBUF]; #define DEF_LINK_MUID "/entrez/utils/qmap.cgi?" -static Char link_ace[MAX_WWWBUF]; +static Char link_ace [MAX_WWWBUF]; #define DEF_LINK_ACE "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q=" -static Char link_code[MAX_WWWBUF]; +static Char link_code [MAX_WWWBUF]; #define DEF_LINK_CODE "http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?" -static Char link_fly[MAX_WWWBUF]; +static Char link_fly [MAX_WWWBUF]; #define DEF_LINK_FLY "http://flybase.bio.indiana.edu/.bin/fbidq.html?" -static Char link_fly_fban[MAX_WWWBUF]; +static Char link_fly_fban [MAX_WWWBUF]; #define DEF_LINK_FBAN "http://www.fruitfly.org/cgi-bin/annot/fban?" -static Char link_fly_fbgn[MAX_WWWBUF]; +static Char link_fly_fbgn [MAX_WWWBUF]; #define DEF_LINK_FBGN "http://flybase.bio.indiana.edu/.bin/fbidq.html?" -static Char link_cog[MAX_WWWBUF]; +static Char link_cog [MAX_WWWBUF]; #define DEF_LINK_COG "http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox?" -static Char link_sgd[MAX_WWWBUF]; +static Char link_sgd [MAX_WWWBUF]; #define DEF_LINK_SGD "/cgi-bin/Entrez/referer?http://genome-www4.stanford.edu/cgi-bin/SGD/locus.pl?locus=" -static Char link_gdb[MAX_WWWBUF]; +static Char link_gdb [MAX_WWWBUF]; #define DEF_LINK_GDB "http://gdbwww.gdb.org/gdb-bin/genera/genera/hgd/DBObject/GDB:" -static Char link_ck[MAX_WWWBUF]; +static Char link_ck [MAX_WWWBUF]; #define DEF_LINK_CK "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid=" -static Char link_rice[MAX_WWWBUF]; +static Char link_rice [MAX_WWWBUF]; #define DEF_LINK_RICE "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object=" -static Char link_sp[MAX_WWWBUF]; +static Char link_sp [MAX_WWWBUF]; #define DEF_LINK_SP "/cgi-bin/Entrez/referer?http://expasy.hcuge.ch/cgi-bin/sprot-search-ac%3f" -static Char link_pdb[MAX_WWWBUF]; +static Char link_pdb [MAX_WWWBUF]; #define DEF_LINK_PDB "/cgi-bin/Entrez/referer?http://expasy.hcuge.ch/cgi-bin/get-pdb-entry%3f" -static Char link_UniSTS[MAX_WWWBUF]; +static Char link_UniSTS [MAX_WWWBUF]; #define DEF_LINK_UniSTS "http://www.ncbi.nlm.nih.gov/genome/sts/sts.cgi?uid=" -static Char link_dbSTS[MAX_WWWBUF]; +static Char link_dbSTS [MAX_WWWBUF]; #define DEF_LINK_dbSTS "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?" -static Char link_dbEST[MAX_WWWBUF]; +static Char link_dbEST [MAX_WWWBUF]; #define DEF_LINK_dbEST "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?" -static Char link_omim[MAX_WWWBUF]; +static Char link_omim [MAX_WWWBUF]; #define DEF_LINK_OMIM "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id=" -static Char link_locus[MAX_WWWBUF]; +static Char link_locus [MAX_WWWBUF]; #define DEF_LINK_LOCUS "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l=" -static Char link_snp[MAX_WWWBUF]; +static Char link_snp [MAX_WWWBUF]; #define DEF_LINK_SNP "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=" -static Char link_ratmap[MAX_WWWBUF]; +static Char link_ratmap [MAX_WWWBUF]; #define DEF_LINK_RATMAP "http://ratmap.gen.gu.se/action.lasso?-database=RATMAPfmPro&-layout=Detail&-response=/RM/Detail+Format.html&-search&-recid=" -static Char link_rgd[MAX_WWWBUF]; +static Char link_rgd [MAX_WWWBUF]; #define DEF_LINK_RGD "http://rgd.mcw.edu/query/query.cgi?id=" -static Char link_mgd[MAX_WWWBUF]; +static Char link_mgd [MAX_WWWBUF]; #define DEF_LINK_MGD "http://www.informatics.jax.org/searches/accession_report.cgi?id=MGI:" -static Char link_cdd[MAX_WWWBUF]; +static Char link_cdd [MAX_WWWBUF]; #define DEF_LINK_CDD "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=" -static Char link_niaest[MAX_WWWBUF]; +static Char link_niaest [MAX_WWWBUF]; #define DEF_LINK_NIAEST "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1=" -static Char link_worm_base[MAX_WWWBUF]; +static Char link_worm_base [MAX_WWWBUF]; #define DEF_LINK_WORM_BASE "http://www.wormbase.org/db/get?class=Sequence;name=" -static Char link_worfdb[MAX_WWWBUF]; +static Char link_worfdb [MAX_WWWBUF]; #define DEF_LINK_WORFDB "http://worfdb.dfci.harvard.edu/search.pl?form=1&search=" -static Char link_nextdb[MAX_WWWBUF]; +static Char link_nextdb [MAX_WWWBUF]; #define DEF_LINK_NEXTDB "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk=" -static Char link_imgt[MAX_WWWBUF]; +static Char link_imgt [MAX_WWWBUF]; #define DEF_LINK_IMGT "http://imgt.cines.fr:8104/cgi-bin/IMGTlect.jv?query=202+" -static Char link_ifo[MAX_WWWBUF]; +static Char link_ifo [MAX_WWWBUF]; #define DEF_LINK_IFO "http://www.ifo.or.jp/index_e.html" -static Char link_jcm[MAX_WWWBUF]; +static Char link_jcm [MAX_WWWBUF]; #define DEF_LINK_JCM "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM=" -static Char link_isfinder[MAX_WWWBUF]; +static Char link_isfinder [MAX_WWWBUF]; #define DEF_LINK_ISFINDER "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name=" -static Char link_gabi[MAX_WWWBUF]; +static Char link_gabi [MAX_WWWBUF]; #define DEF_LINK_GABI "https://gabi.rzpd.de/cgi-bin-protected/GreenCards.pl.cgi?Mode=ShowBioObject&BioObjectName=" -static Char link_fantom[MAX_WWWBUF]; +static Char link_fantom [MAX_WWWBUF]; #define DEF_LINK_FANTOM "http://fantom.gsc.riken.go.jp/db/view/main.cgi?masterid=" +static Char link_interpro [MAX_WWWBUF]; +#define DEF_LINK_INTERPRO "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query=" + +static Char link_genedb [MAX_WWWBUF]; +#define DEF_LINK_GENEDB "http://www.genedb.org/genedb/Dispatcher?formType=navBar&submit=Search+for&organism=All%3Apombe%3Acerevisiae%3Adicty%3Aasp%3Atryp%3Aleish%3Amalaria%3Astyphi%3Aglossina&desc=yes&ohmr=%2F&name=" + +static Char link_zfin [MAX_WWWBUF]; +#define DEF_LINK_ZFIN "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID=" + +static Char link_rebase [MAX_WWWBUF]; +#define DEF_LINK_REBASE "http://rebase.neb.com/rebase/enz/" /* utility functions */ @@ -1447,7 +1475,7 @@ static void FFAddPeriod (StringItemPtr sip) { FFRecycleString(ajp, riter); riter = prev; riter->next = NULL; - sip->curr = riter; + sip->curr = riter; break; } } @@ -2185,7 +2213,7 @@ static Int4 FFStringSearch ( return shift; } else { shift += MAX( (Int4)good_suffix[j], - (Int4)(j - last_occurance[FFCharAt(text,shift + j)])); + (Int4)(j - last_occurance[FFCharAt(text,shift + j)])); } } @@ -2205,27 +2233,27 @@ static Boolean IsWholeWordSubstr ( CharPtr subStr ) { - Boolean left, right; - Char ch; + Boolean left, right; + Char ch; - /* check on the left only if there is a character there */ - if (foundPos > 0) { - ch = FFCharAt(searchStr, foundPos - 1); - left = IS_WHITESP(ch) || ispunct(ch); - } else { - left = TRUE; - } + /* check on the left only if there is a character there */ + if (foundPos > 0) { + ch = FFCharAt(searchStr, foundPos - 1); + left = IS_WHITESP(ch) || ispunct(ch); + } else { + left = TRUE; + } - foundPos += StringLen(subStr); + foundPos += StringLen(subStr); if ( foundPos == FFLength(searchStr) ) { right = TRUE; } else { ch = FFCharAt(searchStr, foundPos); - right = IS_WHITESP(ch) || ispunct(ch); + right = IS_WHITESP(ch) || ispunct(ch); } - return left; /* see comment above */ + return left; /* see comment above */ /* return left && right; this is how it should be!*/ } @@ -2297,6 +2325,10 @@ static void InitWWW (IntAsn2gbJobPtr ajp) GetAppParam ("NCBI", "WWWENTREZ", "LINK_ISFINDER", DEF_LINK_ISFINDER, link_isfinder, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_GABI", DEF_LINK_GABI, link_gabi, MAX_WWWBUF); GetAppParam ("NCBI", "WWWENTREZ", "LINK_FANTOM", DEF_LINK_FANTOM, link_fantom, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_INTERPRO", DEF_LINK_INTERPRO, link_interpro, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_GENEDB", DEF_LINK_GENEDB, link_genedb, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_ZFIN", DEF_LINK_ZFIN, link_zfin, MAX_WWWBUF); + GetAppParam ("NCBI", "WWWENTREZ", "LINK_REBASE", DEF_LINK_REBASE, link_rebase, MAX_WWWBUF); } @@ -2525,6 +2557,19 @@ static void FF_www_db_xref_gdb( } } +static void FF_www_db_xref_rebase ( + StringItemPtr ffstring, + CharPtr db, + CharPtr identifier +) +{ + while (*identifier == ' ') + identifier++; + + FFAddTextToString(ffstring, NULL, db, ":", FALSE, FALSE, TILDE_IGNORE); + FFAddTextToString(ffstring, "<a href=", link_rebase, identifier, FALSE, FALSE, TILDE_IGNORE); + FFAddTextToString(ffstring, ".html>", identifier, "</a>", FALSE, FALSE, TILDE_IGNORE); +} static void Do_www_db_xref( @@ -2536,67 +2581,75 @@ static void Do_www_db_xref( if ( ffstring == NULL || db == NULL || identifier == NULL ) return; if ( StringCmp(db, "FLYBASE") == 0) { - FF_www_db_xref_fly(ffstring, db, identifier); + FF_www_db_xref_fly(ffstring, db, identifier); } else if ( StringCmp(db , "COG") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_cog); + FF_www_db_xref_std(ffstring, db, identifier, link_cog); } else if ( StringCmp(db , "UniSTS") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_UniSTS); + FF_www_db_xref_std(ffstring, db, identifier, link_UniSTS); } else if ( StringCmp(db , "LocusID") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_locus); + FF_www_db_xref_std(ffstring, db, identifier, link_locus); } else if ( StringCmp(db , "InterimID") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_locus); + FF_www_db_xref_std(ffstring, db, identifier, link_locus); } else if ( StringCmp(db , "MIM") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_omim); + FF_www_db_xref_std(ffstring, db, identifier, link_omim); } else if ( StringCmp(db , "SGD") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_sgd); + FF_www_db_xref_std(ffstring, db, identifier, link_sgd); } else if ( StringCmp(db , "IMGT/LIGM") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_imgt); + FF_www_db_xref_std(ffstring, db, identifier, link_imgt); } else if ( StringCmp(db , "CK") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_ck); + FF_www_db_xref_std(ffstring, db, identifier, link_ck); } else if ( StringCmp(db , "RiceGenes") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_rice); + FF_www_db_xref_std(ffstring, db, identifier, link_rice); } else if ( StringCmp(db , "dbSNP") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_snp); + FF_www_db_xref_std(ffstring, db, identifier, link_snp); } else if ( StringCmp(db , "RATMAP") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_ratmap); + FF_www_db_xref_std(ffstring, db, identifier, link_ratmap); } else if ( StringCmp(db , "RGD") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_rgd); + FF_www_db_xref_std(ffstring, db, identifier, link_rgd); } else if ( StringCmp(db , "MGD") == 0) { - FF_www_db_xref_mgd(ffstring, db, identifier); + FF_www_db_xref_mgd(ffstring, db, identifier); } else if ( StringCmp(db , "CDD") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_cdd); + FF_www_db_xref_std(ffstring, db, identifier, link_cdd); } else if ( StringCmp(db , "JCM") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_jcm); + FF_www_db_xref_std(ffstring, db, identifier, link_jcm); } else if ( StringCmp(db , "ISFinder") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_isfinder); + FF_www_db_xref_std(ffstring, db, identifier, link_isfinder); } else if ( StringCmp(db , "GABI") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_gabi); + FF_www_db_xref_std(ffstring, db, identifier, link_gabi); + } else if ( StringCmp(db , "ZFIN") == 0) { + FF_www_db_xref_std(ffstring, db, identifier, link_zfin); } else if ( StringCmp(db , "FANTOM_DB") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_fantom); + FF_www_db_xref_std(ffstring, db, identifier, link_fantom); + } else if ( StringCmp(db , "Interpro") == 0) { + FF_www_db_xref_std(ffstring, db, identifier, link_interpro); + } else if ( StringCmp(db , "GeneDB") == 0) { + FF_www_db_xref_std(ffstring, db, identifier, link_genedb); } else if ( StringCmp(db , "PID") == 0) { - FF_www_db_xref_pid(ffstring, db, identifier); + FF_www_db_xref_pid(ffstring, db, identifier); } else if ( StringCmp(db , "dbEST") == 0) { - FF_www_db_xref_dbEST(ffstring, db, identifier); + FF_www_db_xref_dbEST(ffstring, db, identifier); } else if ( StringCmp(db , "dbSTS") == 0) { - FF_www_db_xref_dbSTS(ffstring, db, identifier); + FF_www_db_xref_dbSTS(ffstring, db, identifier); } else if ( StringCmp(db , "niaEST") == 0) { - FF_www_db_xref_niaEST(ffstring, db, identifier); + FF_www_db_xref_niaEST(ffstring, db, identifier); } else if ( StringCmp(db , "WormBase") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_worm_base); + FF_www_db_xref_std(ffstring, db, identifier, link_worm_base); } else if ( StringCmp(db , "AceView/WormGenes") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_ace); + FF_www_db_xref_std(ffstring, db, identifier, link_ace); } else if ( StringCmp(db , "WorfDB") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_worfdb); + FF_www_db_xref_std(ffstring, db, identifier, link_worfdb); } else if ( StringCmp(db , "NextDB") == 0) { - FF_www_db_xref_std(ffstring, db, identifier, link_nextdb); + FF_www_db_xref_std(ffstring, db, identifier, link_nextdb); } else if ( StringCmp(db , "IFO") == 0) { - FF_www_db_xref_ifo(ffstring, db, identifier); + FF_www_db_xref_ifo(ffstring, db, identifier); } else if ( StringCmp(db , "GDB") == 0) { - FF_www_db_xref_gdb(ffstring, db, identifier); + FF_www_db_xref_gdb(ffstring, db, identifier); + } else if ( StringCmp(db , "REBASE") == 0) { + FF_www_db_xref_rebase(ffstring, db, identifier); } else { - /* default: no link just the text */ - FFAddTextToString(ffstring, db, ":", identifier, FALSE, FALSE, TILDE_IGNORE); + /* default: no link just the text */ + FFAddTextToString(ffstring, db, ":", identifier, FALSE, FALSE, TILDE_IGNORE); } } @@ -2968,7 +3021,7 @@ static void FFAddString_NoRedund ( if (StringNICmp (string, "tRNA-", 5) == 0) { str = string+5; - } + } while ( foundPos >= 0 && !wholeWord ) { foundPos = FFStringSearch(unique, str, foundPos); @@ -3190,6 +3243,7 @@ static CharPtr legalDbXrefs [] = { "FLYBASE", "GABI", "GDB", + "GeneDB", "GeneID", "GI", "GO", @@ -3224,10 +3278,12 @@ static CharPtr legalDbXrefs [] = { "UniSTS", "WorfDB", "WormBase", + "ZFIN", NULL }; static CharPtr legalRefSeqDbXrefs [] = { + "REBASE", NULL }; @@ -3523,6 +3579,7 @@ static CharPtr FormatOrganismBlock ( IntAsn2gbJobPtr ajp; Asn2gbSectPtr asp; BioSourcePtr biop = NULL; + Char ch; CharPtr common = NULL; DbtagPtr dbt; SeqMgrDescContext dcontext; @@ -3539,6 +3596,8 @@ static CharPtr FormatOrganismBlock ( CharPtr str; Int4 taxid = -1; CharPtr taxname = NULL; + CharPtr tmp; + CharPtr ptr; ValNodePtr vnp; StringItemPtr ffstring, temp; Char buf [16]; @@ -3631,8 +3690,20 @@ static CharPtr FormatOrganismBlock ( FFAddOneString(temp, "<a href=", FALSE, FALSE, TILDE_IGNORE); FFAddOneString(temp, link_tax, FALSE, FALSE, TILDE_IGNORE); FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE); - sprintf (buf, "%ld", (long) taxid); - FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE); + tmp = StringSave (taxname); + if (tmp != NULL) { + ptr = tmp; + ch = *ptr; + while (ch != '\0') { + if (IS_WHITESP (ch)) { + *ptr = '+'; + } + ptr++; + ch = *ptr; + } + FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE); + MemFree (tmp); + } FFAddOneString(temp, ">", FALSE, FALSE, TILDE_IGNORE); } FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE); @@ -5583,7 +5654,7 @@ static CharPtr FormatCitSub ( affil = GetAffil (afp); if (format == EMBL_FMT || format == EMBLPEPT_FMT) { if (StringNCmp(affil, " to the EMBL/GenBank/DDBJ databases.", 36) != 0) { - ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases\n"); + ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n"); } else { ValNodeCopyStr (&head, 0, " "); } @@ -5593,7 +5664,7 @@ static CharPtr FormatCitSub ( ValNodeCopyStr (&head, 0, affil); MemFree (affil); } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) { - ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases\n"); + ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n"); } } @@ -5929,6 +6000,21 @@ static void AddReferenceToGbseq ( MemFree (copy); } +static Boolean IsCitSub ( + PubdescPtr pdp, + CitSubPtr csp +) + +{ + ValNodePtr vnp; + + if (csp != NULL) return TRUE; + for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == PUB_Sub) return TRUE; + } + return FALSE; +} + static CharPtr FormatReferenceBlock ( Asn2gbFormatPtr afp, BaseBlockPtr bbp @@ -5958,12 +6044,15 @@ static CharPtr FormatReferenceBlock ( IntRefBlockPtr irp; size_t len; SeqLocPtr loc = NULL; + MedlineEntryPtr mep; Int4 muid = 0; Boolean needsPeriod = FALSE; SeqLocPtr nextslp; Boolean notFound; ObjMgrDataPtr omdp; PubdescPtr pdp = NULL; + PubdescPtr pdpcopy = NULL; + PubmedEntryPtr pep = NULL; Int4 pmid = 0; CharPtr prefix = NULL; RefBlockPtr rbp; @@ -6047,6 +6136,30 @@ static CharPtr FormatReferenceBlock ( return NULL; } + /* any justuids left at this point is RefSeq protein, and should be fetched */ + + irp = (IntRefBlockPtr) rbp; + if (irp->justuids) { + if (rbp->pmid != 0) { + pep = GetPubMedForUid (rbp->pmid); + } else if (rbp->muid != 0) { + pep = GetPubMedForUid (rbp->muid); + } + if (pep != NULL) { + mep = (MedlineEntryPtr) pep->medent; + if (mep != NULL && mep->cit != NULL) { + pdpcopy = AsnIoMemCopy ((Pointer) pdp, + (AsnReadFunc) PubdescAsnRead, + (AsnWriteFunc) PubdescAsnWrite); + cap = AsnIoMemCopy ((Pointer) mep->cit, + (AsnReadFunc) CitArtAsnRead, + (AsnWriteFunc) CitArtAsnWrite); + vnp = ValNodeAddPointer (&(pdpcopy->pub), PUB_Article, (Pointer) cap); + pdp = pdpcopy; + } + } + } + /* print serial number */ FFStartPrint(temp, afp->format, 0, 12, "REFERENCE", 12, 5, 5, "RN", TRUE); @@ -6304,7 +6417,9 @@ static CharPtr FormatReferenceBlock ( if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) { needsPeriod = FALSE; } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) { - needsPeriod = TRUE; + if (! IsCitSub (pdp, csp)) { + needsPeriod = TRUE; + } } FFAddOneString (temp, str, FALSE, FALSE, TILDE_IGNORE); @@ -6386,6 +6501,13 @@ static CharPtr FormatReferenceBlock ( FFRecycleString(ajp, ffstring); FFRecycleString(ajp, temp); + if (pep != NULL) { + PubmedEntryFree (pep); + } + if (pdpcopy != NULL) { + PubdescFree (pdpcopy); + } + return str; } @@ -6536,6 +6658,13 @@ static CharPtr FormatReferenceBlock ( FFRecycleString(ajp, ffstring); FFRecycleString(ajp, temp); + if (pep != NULL) { + PubmedEntryFree (pep); + } + if (pdpcopy != NULL) { + PubdescFree (pdpcopy); + } + return str; } @@ -6552,8 +6681,8 @@ static Boolean IsTildeEOL(CharPtr str) { for ( ptr = str; - IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.'; - ++ptr) continue; + IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.'; + ++ptr) continue; return *ptr == '/' ? FALSE : TRUE; } @@ -7455,7 +7584,6 @@ static CharPtr FlatLoc ( order [SEQID_GENBANK] = num++; order [SEQID_EMBL] = num++; order [SEQID_DDBJ] = num++; - order [SEQID_LOCAL] = num++; order [SEQID_OTHER] = num++; order [SEQID_TPG] = num++; order [SEQID_TPE] = num++; @@ -7469,6 +7597,7 @@ static CharPtr FlatLoc ( order [SEQID_PATENT] = num++; order [SEQID_GI] = num++;; order [SEQID_GENERAL] = num++; + order [SEQID_LOCAL] = num++; order [SEQID_GIIM] = num++; order_initialized = TRUE; } @@ -7521,6 +7650,114 @@ static CharPtr FlatLoc ( } + + +static void PromoteSeqId (SeqIdPtr sip, Pointer userdata) + +{ + SeqIdPtr bestid, newid, oldid; + + bestid = (SeqIdPtr) userdata; + + newid = SeqIdDup (bestid); + if (newid == NULL) return; + + oldid = ValNodeNew (NULL); + if (oldid == NULL) return; + + MemCopy (oldid, sip, sizeof (ValNode)); + oldid->next = NULL; + + sip->choice = newid->choice; + sip->data.ptrvalue = newid->data.ptrvalue; + + SeqIdFree (oldid); + ValNodeFree (newid); + + SeqIdStripLocus (sip); +} + +static SeqLocPtr SeqLocReMapEx (SeqIdPtr newid, SeqLocPtr seq_loc, SeqLocPtr location, Int4 offset, Boolean rev, Boolean masterStyle) + +{ + BioseqPtr bsp; + Boolean hasNulls; + IntFuzzPtr fuzz = NULL; + SeqLocPtr loc; + Boolean noLeft; + Boolean noRight; + Uint1 num = 1; + SeqEntryPtr scope; + SeqIdPtr sip; + SeqLocPtr slp = NULL; + SeqPntPtr spp; + SeqLocPtr tmp; + + if (newid == NULL || seq_loc == NULL || location == NULL) return NULL; + + if (masterStyle) { + + sip = SeqLocId (seq_loc); + if (sip == NULL) return NULL; + bsp = BioseqFind (sip); + if (bsp == NULL) { + scope = SeqEntrySetScope (NULL); + bsp = BioseqFind (sip); + SeqEntrySetScope (scope); + } + if (bsp == NULL) return NULL; + sip = SeqIdFindBest (bsp->id, 0); + + /* map location from parts to segmented bioseq */ + + if (location->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) location->data.ptrvalue; + if (spp != NULL) { + fuzz = spp->fuzz; + } + } + + CheckSeqLocForPartial (location, &noLeft, &noRight); + hasNulls = LocationHasNullsBetween (location); + loc = SeqLocMerge (bsp, location, NULL, FALSE, TRUE, hasNulls); + if (loc == NULL) { + tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight); + loc = SeqLocMerge (bsp, tmp, NULL, FALSE, TRUE, hasNulls); + SeqLocFree (tmp); + } + if (loc == NULL) { + return NULL; + } + FreeAllFuzz (loc); + SetSeqLocPartial (loc, noLeft, noRight); + + if (loc->choice == SEQLOC_PNT && fuzz != NULL) { + spp = (SeqPntPtr) loc->data.ptrvalue; + if (spp != NULL && spp->fuzz == NULL) { + spp->fuzz = AsnIoMemCopy ((Pointer) fuzz, + (AsnReadFunc) IntFuzzAsnRead, + (AsnWriteFunc) IntFuzzAsnWrite); + } + } + + scope = SeqEntrySetScope (NULL); + slp = SeqLocReMap (newid, seq_loc, loc, offset, rev); + SeqEntrySetScope (scope); + + SeqLocFree (loc); + + VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId); + } else { + + scope = SeqEntrySetScope (NULL); + slp = SeqLocReMap (newid, seq_loc, location, offset, rev); + SeqEntrySetScope (scope); + } + + return slp; +} + + /******************************************************************************/ /* End FlatLoc functions. */ /******************************************************************************/ @@ -7836,7 +8073,7 @@ static CharPtr GetMolTypeQual ( } break; case MOLECULE_TYPE_PRE_MRNA : - return "pre-mRNA"; + return "pre-RNA"; case MOLECULE_TYPE_MRNA : return "mRNA"; case MOLECULE_TYPE_RRNA : @@ -8038,7 +8275,23 @@ static CharPtr FormatSourceFeatBlock ( qvp [SCQUAL_focus].ble = TRUE; } - qvp [SCQUAL_mol_type].str = GetMolTypeQual (bsp); + str = GetMolTypeQual (bsp); + if (str == NULL) { + switch (bsp->mol) { + case Seq_mol_dna : + str = "unassigned DNA"; + break; + case Seq_mol_rna : + str = "unassigned RNA"; + break; + case Seq_mol_aa : + break; + default : + str = "unassigned DNA"; + break; + } + } + qvp [SCQUAL_mol_type].str = str; SubSourceToQualArray (biop->subtype, qvp); @@ -8450,7 +8703,7 @@ typedef struct qualfeatur { Uint1 featurclass; } QualFeatur, PNTR QualFeaturPtr; -#define NUM_GB_QUALS 25 +#define NUM_GB_QUALS 26 static QualFeatur qualToFeature [NUM_GB_QUALS] = { { "allele", FTQUAL_allele }, @@ -8467,6 +8720,7 @@ static QualFeatur qualToFeature [NUM_GB_QUALS] = { { "map", FTQUAL_map }, { "mod_base", FTQUAL_mod_base }, { "number", FTQUAL_number }, + { "operon", FTQUAL_operon }, { "organism", FTQUAL_organism }, { "PCR_conditions", FTQUAL_PCR_conditions }, { "phenotype", FTQUAL_phenotype }, @@ -8581,6 +8835,7 @@ static CharPtr trnaList [] = { "tRNA-OTHER", "tRNA-Tyr", "tRNA-Glx", + "tRNA-TERM", NULL }; @@ -8908,441 +9163,525 @@ typedef struct valqualstruc { static ValQual legalGbqualList [] = { - {FEATDEF_GENE , FTQUAL_allele}, - {FEATDEF_GENE , FTQUAL_function}, - {FEATDEF_GENE , FTQUAL_label}, - {FEATDEF_GENE , FTQUAL_map}, - {FEATDEF_GENE , FTQUAL_phenotype}, - {FEATDEF_GENE , FTQUAL_product}, - {FEATDEF_GENE , FTQUAL_standard_name}, - {FEATDEF_GENE , FTQUAL_usedin}, - - {FEATDEF_CDS , FTQUAL_allele}, - {FEATDEF_CDS , FTQUAL_codon}, - {FEATDEF_CDS , FTQUAL_label}, - {FEATDEF_CDS , FTQUAL_map}, - {FEATDEF_CDS , FTQUAL_number}, - {FEATDEF_CDS , FTQUAL_standard_name}, - {FEATDEF_CDS , FTQUAL_usedin}, - - {FEATDEF_PROT , FTQUAL_product}, - - {FEATDEF_preRNA , FTQUAL_allele}, - {FEATDEF_preRNA , FTQUAL_function}, - {FEATDEF_preRNA , FTQUAL_label}, - {FEATDEF_preRNA , FTQUAL_map}, - {FEATDEF_preRNA , FTQUAL_product}, - {FEATDEF_preRNA , FTQUAL_standard_name}, - {FEATDEF_preRNA , FTQUAL_usedin}, - - {FEATDEF_mRNA , FTQUAL_allele}, - {FEATDEF_mRNA , FTQUAL_function}, - {FEATDEF_mRNA , FTQUAL_label}, - {FEATDEF_mRNA , FTQUAL_map}, - {FEATDEF_mRNA , FTQUAL_product}, - {FEATDEF_mRNA , FTQUAL_standard_name}, - {FEATDEF_mRNA , FTQUAL_usedin}, - - {FEATDEF_tRNA , FTQUAL_function}, - {FEATDEF_tRNA , FTQUAL_label}, - {FEATDEF_tRNA , FTQUAL_map}, - {FEATDEF_tRNA , FTQUAL_product}, - {FEATDEF_tRNA , FTQUAL_standard_name}, - {FEATDEF_tRNA , FTQUAL_usedin}, - - {FEATDEF_rRNA , FTQUAL_function}, - {FEATDEF_rRNA , FTQUAL_label}, - {FEATDEF_rRNA , FTQUAL_map}, - {FEATDEF_rRNA , FTQUAL_product}, - {FEATDEF_rRNA , FTQUAL_standard_name}, - {FEATDEF_rRNA , FTQUAL_usedin}, - - {FEATDEF_snRNA , FTQUAL_function}, - {FEATDEF_snRNA , FTQUAL_label}, - {FEATDEF_snRNA , FTQUAL_map}, - {FEATDEF_snRNA , FTQUAL_product}, - {FEATDEF_snRNA , FTQUAL_standard_name}, - {FEATDEF_snRNA , FTQUAL_usedin}, - - {FEATDEF_scRNA , FTQUAL_function}, - {FEATDEF_scRNA , FTQUAL_label}, - {FEATDEF_scRNA , FTQUAL_map}, - {FEATDEF_scRNA , FTQUAL_product}, - {FEATDEF_scRNA , FTQUAL_standard_name}, - {FEATDEF_scRNA , FTQUAL_usedin}, - - {FEATDEF_otherRNA , FTQUAL_function}, - {FEATDEF_otherRNA , FTQUAL_label}, - {FEATDEF_otherRNA , FTQUAL_map}, - {FEATDEF_otherRNA , FTQUAL_product}, - {FEATDEF_otherRNA , FTQUAL_standard_name}, - {FEATDEF_otherRNA , FTQUAL_usedin}, - - {FEATDEF_attenuator , FTQUAL_label}, - {FEATDEF_attenuator , FTQUAL_map}, - {FEATDEF_attenuator , FTQUAL_phenotype}, - {FEATDEF_attenuator , FTQUAL_usedin}, - - {FEATDEF_C_region , FTQUAL_label}, - {FEATDEF_C_region , FTQUAL_map}, - {FEATDEF_C_region , FTQUAL_product}, - {FEATDEF_C_region , FTQUAL_standard_name}, - {FEATDEF_C_region , FTQUAL_usedin}, - - {FEATDEF_CAAT_signal , FTQUAL_label}, - {FEATDEF_CAAT_signal , FTQUAL_map}, - {FEATDEF_CAAT_signal , FTQUAL_usedin}, - - {FEATDEF_Imp_CDS , FTQUAL_codon}, - {FEATDEF_Imp_CDS , FTQUAL_EC_number}, - {FEATDEF_Imp_CDS , FTQUAL_function}, - {FEATDEF_Imp_CDS , FTQUAL_label}, - {FEATDEF_Imp_CDS , FTQUAL_map}, - {FEATDEF_Imp_CDS , FTQUAL_number}, - {FEATDEF_Imp_CDS , FTQUAL_product}, - {FEATDEF_Imp_CDS , FTQUAL_standard_name}, - {FEATDEF_Imp_CDS , FTQUAL_usedin}, - - {FEATDEF_conflict , FTQUAL_label}, - {FEATDEF_conflict , FTQUAL_map}, - {FEATDEF_conflict , FTQUAL_replace}, - {FEATDEF_conflict , FTQUAL_usedin}, - - {FEATDEF_D_loop , FTQUAL_label}, - {FEATDEF_D_loop , FTQUAL_map}, - {FEATDEF_D_loop , FTQUAL_usedin}, - - {FEATDEF_D_segment , FTQUAL_label}, - {FEATDEF_D_segment , FTQUAL_map}, - {FEATDEF_D_segment , FTQUAL_product}, - {FEATDEF_D_segment , FTQUAL_standard_name}, - {FEATDEF_D_segment , FTQUAL_usedin}, - - {FEATDEF_enhancer , FTQUAL_label}, - {FEATDEF_enhancer , FTQUAL_map}, - {FEATDEF_enhancer , FTQUAL_standard_name}, - {FEATDEF_enhancer , FTQUAL_usedin}, - - {FEATDEF_exon , FTQUAL_allele}, - {FEATDEF_exon , FTQUAL_EC_number}, - {FEATDEF_exon , FTQUAL_function}, - {FEATDEF_exon , FTQUAL_label}, - {FEATDEF_exon , FTQUAL_map}, - {FEATDEF_exon , FTQUAL_number}, - {FEATDEF_exon , FTQUAL_product}, - {FEATDEF_exon , FTQUAL_standard_name}, - {FEATDEF_exon , FTQUAL_usedin}, - - {FEATDEF_GC_signal , FTQUAL_label}, - {FEATDEF_GC_signal , FTQUAL_map}, - {FEATDEF_GC_signal , FTQUAL_usedin}, - - {FEATDEF_iDNA , FTQUAL_function}, - {FEATDEF_iDNA , FTQUAL_label}, - {FEATDEF_iDNA , FTQUAL_map}, - {FEATDEF_iDNA , FTQUAL_number}, - {FEATDEF_iDNA , FTQUAL_standard_name}, - {FEATDEF_iDNA , FTQUAL_usedin}, - - {FEATDEF_intron , FTQUAL_allele}, - {FEATDEF_intron , FTQUAL_cons_splice}, - {FEATDEF_intron , FTQUAL_function}, - {FEATDEF_intron , FTQUAL_label}, - {FEATDEF_intron , FTQUAL_map}, - {FEATDEF_intron , FTQUAL_number}, - {FEATDEF_intron , FTQUAL_standard_name}, - {FEATDEF_intron , FTQUAL_usedin}, - - {FEATDEF_J_segment , FTQUAL_label}, - {FEATDEF_J_segment , FTQUAL_map}, - {FEATDEF_J_segment , FTQUAL_product}, - {FEATDEF_J_segment , FTQUAL_standard_name}, - {FEATDEF_J_segment , FTQUAL_usedin}, - - {FEATDEF_LTR , FTQUAL_function}, - {FEATDEF_LTR , FTQUAL_label}, - {FEATDEF_LTR , FTQUAL_map}, - {FEATDEF_LTR , FTQUAL_standard_name}, - {FEATDEF_LTR , FTQUAL_usedin}, - - {FEATDEF_mat_peptide , FTQUAL_EC_number}, - {FEATDEF_mat_peptide , FTQUAL_function}, - {FEATDEF_mat_peptide , FTQUAL_label}, - {FEATDEF_mat_peptide , FTQUAL_map}, - {FEATDEF_mat_peptide , FTQUAL_product}, - {FEATDEF_mat_peptide , FTQUAL_standard_name}, - {FEATDEF_mat_peptide , FTQUAL_usedin}, - - {FEATDEF_misc_binding , FTQUAL_bound_moiety}, - {FEATDEF_misc_binding , FTQUAL_function}, - {FEATDEF_misc_binding , FTQUAL_label}, - {FEATDEF_misc_binding , FTQUAL_map}, - {FEATDEF_misc_binding , FTQUAL_usedin}, - - {FEATDEF_misc_difference , FTQUAL_clone}, - {FEATDEF_misc_difference , FTQUAL_label}, - {FEATDEF_misc_difference , FTQUAL_map}, - {FEATDEF_misc_difference , FTQUAL_phenotype}, - {FEATDEF_misc_difference , FTQUAL_replace}, - {FEATDEF_misc_difference , FTQUAL_standard_name}, - {FEATDEF_misc_difference , FTQUAL_usedin}, - - {FEATDEF_misc_feature , FTQUAL_function}, - {FEATDEF_misc_feature , FTQUAL_label}, - {FEATDEF_misc_feature , FTQUAL_map}, - {FEATDEF_misc_feature , FTQUAL_number}, - {FEATDEF_misc_feature , FTQUAL_phenotype}, - {FEATDEF_misc_feature , FTQUAL_product}, - {FEATDEF_misc_feature , FTQUAL_standard_name}, - {FEATDEF_misc_feature , FTQUAL_usedin}, - - {FEATDEF_misc_recomb , FTQUAL_label}, - {FEATDEF_misc_recomb , FTQUAL_map}, - {FEATDEF_misc_recomb , FTQUAL_organism}, - {FEATDEF_misc_recomb , FTQUAL_standard_name}, - {FEATDEF_misc_recomb , FTQUAL_usedin}, - - {FEATDEF_misc_signal , FTQUAL_function}, - {FEATDEF_misc_signal , FTQUAL_label}, - {FEATDEF_misc_signal , FTQUAL_map}, - {FEATDEF_misc_signal , FTQUAL_phenotype}, - {FEATDEF_misc_signal , FTQUAL_standard_name}, - {FEATDEF_misc_signal , FTQUAL_usedin}, - - {FEATDEF_misc_structure , FTQUAL_function}, - {FEATDEF_misc_structure , FTQUAL_label}, - {FEATDEF_misc_structure , FTQUAL_map}, - {FEATDEF_misc_structure , FTQUAL_standard_name}, - {FEATDEF_misc_structure , FTQUAL_usedin}, - - {FEATDEF_modified_base , FTQUAL_frequency}, - {FEATDEF_modified_base , FTQUAL_label}, - {FEATDEF_modified_base , FTQUAL_map}, - {FEATDEF_modified_base , FTQUAL_mod_base}, - {FEATDEF_modified_base , FTQUAL_usedin}, - - {FEATDEF_N_region , FTQUAL_label}, - {FEATDEF_N_region , FTQUAL_map}, - {FEATDEF_N_region , FTQUAL_product}, - {FEATDEF_N_region , FTQUAL_standard_name}, - {FEATDEF_N_region , FTQUAL_usedin}, - - {FEATDEF_old_sequence , FTQUAL_label}, - {FEATDEF_old_sequence , FTQUAL_map}, - {FEATDEF_old_sequence , FTQUAL_replace}, - {FEATDEF_old_sequence , FTQUAL_usedin}, - - {FEATDEF_polyA_signal , FTQUAL_label}, - {FEATDEF_polyA_signal , FTQUAL_map}, - {FEATDEF_polyA_signal , FTQUAL_usedin}, - - {FEATDEF_polyA_site , FTQUAL_label}, - {FEATDEF_polyA_site , FTQUAL_map}, - {FEATDEF_polyA_site , FTQUAL_usedin}, - - {FEATDEF_prim_transcript , FTQUAL_allele}, - {FEATDEF_prim_transcript , FTQUAL_function}, - {FEATDEF_prim_transcript , FTQUAL_label}, - {FEATDEF_prim_transcript , FTQUAL_map}, - {FEATDEF_prim_transcript , FTQUAL_standard_name}, - {FEATDEF_prim_transcript , FTQUAL_usedin}, - - {FEATDEF_primer_bind , FTQUAL_label}, - {FEATDEF_primer_bind , FTQUAL_map}, - {FEATDEF_primer_bind , FTQUAL_PCR_conditions}, - {FEATDEF_primer_bind , FTQUAL_standard_name}, - {FEATDEF_primer_bind , FTQUAL_usedin}, - - {FEATDEF_promoter , FTQUAL_function}, - {FEATDEF_promoter , FTQUAL_label}, - {FEATDEF_promoter , FTQUAL_map}, - {FEATDEF_promoter , FTQUAL_phenotype}, - {FEATDEF_promoter , FTQUAL_standard_name}, - {FEATDEF_promoter , FTQUAL_usedin}, - - {FEATDEF_protein_bind , FTQUAL_bound_moiety}, - {FEATDEF_protein_bind , FTQUAL_function}, - {FEATDEF_protein_bind , FTQUAL_label}, - {FEATDEF_protein_bind , FTQUAL_map}, - {FEATDEF_protein_bind , FTQUAL_standard_name}, - {FEATDEF_protein_bind , FTQUAL_usedin}, - - {FEATDEF_RBS , FTQUAL_label}, - {FEATDEF_RBS , FTQUAL_map}, - {FEATDEF_RBS , FTQUAL_standard_name}, - {FEATDEF_RBS , FTQUAL_usedin}, - - {FEATDEF_repeat_region , FTQUAL_function}, - {FEATDEF_repeat_region , FTQUAL_insertion_seq}, - {FEATDEF_repeat_region , FTQUAL_label}, - {FEATDEF_repeat_region , FTQUAL_map}, - {FEATDEF_repeat_region , FTQUAL_rpt_family}, - {FEATDEF_repeat_region , FTQUAL_rpt_type}, - {FEATDEF_repeat_region , FTQUAL_rpt_unit}, - {FEATDEF_repeat_region , FTQUAL_standard_name}, - {FEATDEF_repeat_region , FTQUAL_transposon}, - {FEATDEF_repeat_region , FTQUAL_usedin}, - - {FEATDEF_repeat_unit , FTQUAL_function}, - {FEATDEF_repeat_unit , FTQUAL_label}, - {FEATDEF_repeat_unit , FTQUAL_map}, - {FEATDEF_repeat_unit , FTQUAL_rpt_family}, - {FEATDEF_repeat_unit , FTQUAL_rpt_type}, - {FEATDEF_repeat_unit , FTQUAL_usedin}, - - {FEATDEF_rep_origin , FTQUAL_direction}, - {FEATDEF_rep_origin , FTQUAL_label}, - {FEATDEF_rep_origin , FTQUAL_map}, - {FEATDEF_rep_origin , FTQUAL_standard_name}, - {FEATDEF_rep_origin , FTQUAL_usedin}, - - {FEATDEF_S_region , FTQUAL_label}, - {FEATDEF_S_region , FTQUAL_map}, - {FEATDEF_S_region , FTQUAL_product}, - {FEATDEF_S_region , FTQUAL_standard_name}, - {FEATDEF_S_region , FTQUAL_usedin}, - - {FEATDEF_satellite , FTQUAL_label}, - {FEATDEF_satellite , FTQUAL_map}, - {FEATDEF_satellite , FTQUAL_rpt_family}, - {FEATDEF_satellite , FTQUAL_rpt_type}, - {FEATDEF_satellite , FTQUAL_rpt_unit}, - {FEATDEF_satellite , FTQUAL_standard_name}, - {FEATDEF_satellite , FTQUAL_usedin}, - - {FEATDEF_sig_peptide , FTQUAL_function}, - {FEATDEF_sig_peptide , FTQUAL_label}, - {FEATDEF_sig_peptide , FTQUAL_map}, - {FEATDEF_sig_peptide , FTQUAL_product}, - {FEATDEF_sig_peptide , FTQUAL_standard_name}, - {FEATDEF_sig_peptide , FTQUAL_usedin}, - - {FEATDEF_stem_loop , FTQUAL_function}, - {FEATDEF_stem_loop , FTQUAL_label}, - {FEATDEF_stem_loop , FTQUAL_map}, - {FEATDEF_stem_loop , FTQUAL_standard_name}, - {FEATDEF_stem_loop , FTQUAL_usedin}, - - {FEATDEF_STS , FTQUAL_label}, - {FEATDEF_STS , FTQUAL_map}, - {FEATDEF_STS , FTQUAL_standard_name}, - {FEATDEF_STS , FTQUAL_usedin}, - - {FEATDEF_TATA_signal , FTQUAL_label}, - {FEATDEF_TATA_signal , FTQUAL_map}, - {FEATDEF_TATA_signal , FTQUAL_usedin}, - - {FEATDEF_terminator , FTQUAL_label}, - {FEATDEF_terminator , FTQUAL_map}, - {FEATDEF_terminator , FTQUAL_standard_name}, - {FEATDEF_terminator , FTQUAL_usedin}, - - {FEATDEF_transit_peptide , FTQUAL_function}, - {FEATDEF_transit_peptide , FTQUAL_label}, - {FEATDEF_transit_peptide , FTQUAL_map}, - {FEATDEF_transit_peptide , FTQUAL_product}, - {FEATDEF_transit_peptide , FTQUAL_standard_name}, - {FEATDEF_transit_peptide , FTQUAL_usedin}, - - {FEATDEF_unsure , FTQUAL_label}, - {FEATDEF_unsure , FTQUAL_map}, - {FEATDEF_unsure , FTQUAL_replace}, - {FEATDEF_unsure , FTQUAL_usedin}, - - {FEATDEF_V_region , FTQUAL_label}, - {FEATDEF_V_region , FTQUAL_map}, - {FEATDEF_V_region , FTQUAL_product}, - {FEATDEF_V_region , FTQUAL_standard_name}, - {FEATDEF_V_region , FTQUAL_usedin}, - - {FEATDEF_V_segment , FTQUAL_label}, - {FEATDEF_V_segment , FTQUAL_map}, - {FEATDEF_V_segment , FTQUAL_product}, - {FEATDEF_V_segment , FTQUAL_standard_name}, - {FEATDEF_V_segment , FTQUAL_usedin}, - - {FEATDEF_variation , FTQUAL_allele}, - {FEATDEF_variation , FTQUAL_frequency}, - {FEATDEF_variation , FTQUAL_label}, - {FEATDEF_variation , FTQUAL_map}, - {FEATDEF_variation , FTQUAL_phenotype}, - {FEATDEF_variation , FTQUAL_product}, - {FEATDEF_variation , FTQUAL_replace}, - {FEATDEF_variation , FTQUAL_standard_name}, - {FEATDEF_variation , FTQUAL_usedin}, - - {FEATDEF_3clip , FTQUAL_allele}, - {FEATDEF_3clip , FTQUAL_function}, - {FEATDEF_3clip , FTQUAL_label}, - {FEATDEF_3clip , FTQUAL_map}, - {FEATDEF_3clip , FTQUAL_standard_name}, - {FEATDEF_3clip , FTQUAL_usedin}, - - {FEATDEF_3UTR , FTQUAL_allele}, - {FEATDEF_3UTR , FTQUAL_function}, - {FEATDEF_3UTR , FTQUAL_label}, - {FEATDEF_3UTR , FTQUAL_map}, - {FEATDEF_3UTR , FTQUAL_standard_name}, - {FEATDEF_3UTR , FTQUAL_usedin}, - - {FEATDEF_5clip , FTQUAL_allele}, - {FEATDEF_5clip , FTQUAL_function}, - {FEATDEF_5clip , FTQUAL_label}, - {FEATDEF_5clip , FTQUAL_map}, - {FEATDEF_5clip , FTQUAL_standard_name}, - {FEATDEF_5clip , FTQUAL_usedin}, - - {FEATDEF_5UTR , FTQUAL_allele}, - {FEATDEF_5UTR , FTQUAL_function}, - {FEATDEF_5UTR , FTQUAL_label}, - {FEATDEF_5UTR , FTQUAL_map}, - {FEATDEF_5UTR , FTQUAL_standard_name}, - {FEATDEF_5UTR , FTQUAL_usedin}, - - {FEATDEF_10_signal , FTQUAL_label}, - {FEATDEF_10_signal , FTQUAL_map}, - {FEATDEF_10_signal , FTQUAL_standard_name}, - {FEATDEF_10_signal , FTQUAL_usedin}, - - {FEATDEF_35_signal , FTQUAL_label}, - {FEATDEF_35_signal , FTQUAL_map}, - {FEATDEF_35_signal , FTQUAL_standard_name}, - {FEATDEF_35_signal , FTQUAL_usedin}, - - {FEATDEF_REGION , FTQUAL_function}, - {FEATDEF_REGION , FTQUAL_label}, - {FEATDEF_REGION , FTQUAL_map}, - {FEATDEF_REGION , FTQUAL_number}, - {FEATDEF_REGION , FTQUAL_phenotype}, - {FEATDEF_REGION , FTQUAL_product}, - {FEATDEF_REGION , FTQUAL_standard_name}, - {FEATDEF_REGION , FTQUAL_usedin}, - - {FEATDEF_mat_peptide_aa , FTQUAL_label}, - {FEATDEF_mat_peptide_aa , FTQUAL_map}, - {FEATDEF_mat_peptide_aa , FTQUAL_product}, - {FEATDEF_mat_peptide_aa , FTQUAL_standard_name}, - {FEATDEF_mat_peptide_aa , FTQUAL_usedin}, - - {FEATDEF_sig_peptide_aa , FTQUAL_label}, - {FEATDEF_sig_peptide_aa , FTQUAL_map}, - {FEATDEF_sig_peptide_aa , FTQUAL_product}, - {FEATDEF_sig_peptide_aa , FTQUAL_standard_name}, - {FEATDEF_sig_peptide_aa , FTQUAL_usedin}, - - {FEATDEF_transit_peptide_aa , FTQUAL_label}, - {FEATDEF_transit_peptide_aa , FTQUAL_map}, - {FEATDEF_transit_peptide_aa , FTQUAL_product}, - {FEATDEF_transit_peptide_aa , FTQUAL_standard_name}, - {FEATDEF_transit_peptide_aa , FTQUAL_usedin}, - - {FEATDEF_snoRNA , FTQUAL_function}, - {FEATDEF_snoRNA , FTQUAL_label}, - {FEATDEF_snoRNA , FTQUAL_map}, - {FEATDEF_snoRNA , FTQUAL_product}, - {FEATDEF_snoRNA , FTQUAL_standard_name}, - {FEATDEF_snoRNA , FTQUAL_usedin} - + { FEATDEF_GENE , FTQUAL_allele }, + { FEATDEF_GENE , FTQUAL_function }, + { FEATDEF_GENE , FTQUAL_label }, + { FEATDEF_GENE , FTQUAL_map }, + { FEATDEF_GENE , FTQUAL_operon }, + { FEATDEF_GENE , FTQUAL_phenotype }, + { FEATDEF_GENE , FTQUAL_product }, + { FEATDEF_GENE , FTQUAL_standard_name }, + { FEATDEF_GENE , FTQUAL_usedin }, + + { FEATDEF_CDS , FTQUAL_allele }, + { FEATDEF_CDS , FTQUAL_codon }, + { FEATDEF_CDS , FTQUAL_label }, + { FEATDEF_CDS , FTQUAL_map }, + { FEATDEF_CDS , FTQUAL_number }, + { FEATDEF_CDS , FTQUAL_operon }, + { FEATDEF_CDS , FTQUAL_standard_name }, + { FEATDEF_CDS , FTQUAL_usedin }, + + { FEATDEF_PROT , FTQUAL_product }, + + { FEATDEF_preRNA , FTQUAL_allele }, + { FEATDEF_preRNA , FTQUAL_function }, + { FEATDEF_preRNA , FTQUAL_label }, + { FEATDEF_preRNA , FTQUAL_map }, + { FEATDEF_preRNA , FTQUAL_operon }, + { FEATDEF_preRNA , FTQUAL_product }, + { FEATDEF_preRNA , FTQUAL_standard_name }, + { FEATDEF_preRNA , FTQUAL_usedin }, + + { FEATDEF_mRNA , FTQUAL_allele }, + { FEATDEF_mRNA , FTQUAL_function }, + { FEATDEF_mRNA , FTQUAL_label }, + { FEATDEF_mRNA , FTQUAL_map }, + { FEATDEF_mRNA , FTQUAL_operon }, + { FEATDEF_mRNA , FTQUAL_product }, + { FEATDEF_mRNA , FTQUAL_standard_name }, + { FEATDEF_mRNA , FTQUAL_usedin }, + + { FEATDEF_tRNA , FTQUAL_allele }, + { FEATDEF_tRNA , FTQUAL_function }, + { FEATDEF_tRNA , FTQUAL_label }, + { FEATDEF_tRNA , FTQUAL_map }, + { FEATDEF_tRNA , FTQUAL_product }, + { FEATDEF_tRNA , FTQUAL_standard_name }, + { FEATDEF_tRNA , FTQUAL_usedin }, + + { FEATDEF_rRNA , FTQUAL_allele }, + { FEATDEF_rRNA , FTQUAL_function }, + { FEATDEF_rRNA , FTQUAL_label }, + { FEATDEF_rRNA , FTQUAL_map }, + { FEATDEF_rRNA , FTQUAL_product }, + { FEATDEF_rRNA , FTQUAL_standard_name }, + { FEATDEF_rRNA , FTQUAL_usedin }, + + { FEATDEF_snRNA , FTQUAL_allele }, + { FEATDEF_snRNA , FTQUAL_function }, + { FEATDEF_snRNA , FTQUAL_label }, + { FEATDEF_snRNA , FTQUAL_map }, + { FEATDEF_snRNA , FTQUAL_product }, + { FEATDEF_snRNA , FTQUAL_standard_name }, + { FEATDEF_snRNA , FTQUAL_usedin }, + + { FEATDEF_scRNA , FTQUAL_allele }, + { FEATDEF_scRNA , FTQUAL_function }, + { FEATDEF_scRNA , FTQUAL_label }, + { FEATDEF_scRNA , FTQUAL_map }, + { FEATDEF_scRNA , FTQUAL_product }, + { FEATDEF_scRNA , FTQUAL_standard_name }, + { FEATDEF_scRNA , FTQUAL_usedin }, + + { FEATDEF_otherRNA , FTQUAL_allele }, + { FEATDEF_otherRNA , FTQUAL_function }, + { FEATDEF_otherRNA , FTQUAL_label }, + { FEATDEF_otherRNA , FTQUAL_map }, + { FEATDEF_otherRNA , FTQUAL_operon }, + { FEATDEF_otherRNA , FTQUAL_product }, + { FEATDEF_otherRNA , FTQUAL_standard_name }, + { FEATDEF_otherRNA , FTQUAL_usedin }, + + { FEATDEF_attenuator , FTQUAL_allele }, + { FEATDEF_attenuator , FTQUAL_label }, + { FEATDEF_attenuator , FTQUAL_map }, + { FEATDEF_attenuator , FTQUAL_operon }, + { FEATDEF_attenuator , FTQUAL_phenotype }, + { FEATDEF_attenuator , FTQUAL_usedin }, + + { FEATDEF_C_region , FTQUAL_allele }, + { FEATDEF_C_region , FTQUAL_label }, + { FEATDEF_C_region , FTQUAL_map }, + { FEATDEF_C_region , FTQUAL_product }, + { FEATDEF_C_region , FTQUAL_standard_name }, + { FEATDEF_C_region , FTQUAL_usedin }, + + { FEATDEF_CAAT_signal , FTQUAL_allele }, + { FEATDEF_CAAT_signal , FTQUAL_label }, + { FEATDEF_CAAT_signal , FTQUAL_map }, + { FEATDEF_CAAT_signal , FTQUAL_usedin }, + + { FEATDEF_Imp_CDS , FTQUAL_codon }, + { FEATDEF_Imp_CDS , FTQUAL_EC_number }, + { FEATDEF_Imp_CDS , FTQUAL_function }, + { FEATDEF_Imp_CDS , FTQUAL_label }, + { FEATDEF_Imp_CDS , FTQUAL_map }, + { FEATDEF_Imp_CDS , FTQUAL_number }, + { FEATDEF_Imp_CDS , FTQUAL_operon }, + { FEATDEF_Imp_CDS , FTQUAL_product }, + { FEATDEF_Imp_CDS , FTQUAL_standard_name }, + { FEATDEF_Imp_CDS , FTQUAL_usedin }, + + { FEATDEF_conflict , FTQUAL_allele }, + { FEATDEF_conflict , FTQUAL_label }, + { FEATDEF_conflict , FTQUAL_map }, + { FEATDEF_conflict , FTQUAL_replace }, + { FEATDEF_conflict , FTQUAL_usedin }, + + { FEATDEF_D_loop , FTQUAL_allele }, + { FEATDEF_D_loop , FTQUAL_label }, + { FEATDEF_D_loop , FTQUAL_map }, + { FEATDEF_D_loop , FTQUAL_usedin }, + + { FEATDEF_D_segment , FTQUAL_allele }, + { FEATDEF_D_segment , FTQUAL_label }, + { FEATDEF_D_segment , FTQUAL_map }, + { FEATDEF_D_segment , FTQUAL_product }, + { FEATDEF_D_segment , FTQUAL_standard_name }, + { FEATDEF_D_segment , FTQUAL_usedin }, + + { FEATDEF_enhancer , FTQUAL_allele }, + { FEATDEF_enhancer , FTQUAL_label }, + { FEATDEF_enhancer , FTQUAL_map }, + { FEATDEF_enhancer , FTQUAL_standard_name }, + { FEATDEF_enhancer , FTQUAL_usedin }, + + { FEATDEF_exon , FTQUAL_allele }, + { FEATDEF_exon , FTQUAL_EC_number }, + { FEATDEF_exon , FTQUAL_function }, + { FEATDEF_exon , FTQUAL_label }, + { FEATDEF_exon , FTQUAL_map }, + { FEATDEF_exon , FTQUAL_number }, + { FEATDEF_exon , FTQUAL_product }, + { FEATDEF_exon , FTQUAL_standard_name }, + { FEATDEF_exon , FTQUAL_usedin }, + + { FEATDEF_GC_signal , FTQUAL_allele }, + { FEATDEF_GC_signal , FTQUAL_label }, + { FEATDEF_GC_signal , FTQUAL_map }, + { FEATDEF_GC_signal , FTQUAL_usedin }, + + { FEATDEF_iDNA , FTQUAL_allele }, + { FEATDEF_iDNA , FTQUAL_function }, + { FEATDEF_iDNA , FTQUAL_label }, + { FEATDEF_iDNA , FTQUAL_map }, + { FEATDEF_iDNA , FTQUAL_number }, + { FEATDEF_iDNA , FTQUAL_standard_name }, + { FEATDEF_iDNA , FTQUAL_usedin }, + + { FEATDEF_intron , FTQUAL_allele }, + { FEATDEF_intron , FTQUAL_cons_splice }, + { FEATDEF_intron , FTQUAL_function }, + { FEATDEF_intron , FTQUAL_label }, + { FEATDEF_intron , FTQUAL_map }, + { FEATDEF_intron , FTQUAL_number }, + { FEATDEF_intron , FTQUAL_standard_name }, + { FEATDEF_intron , FTQUAL_usedin }, + + { FEATDEF_J_segment , FTQUAL_allele }, + { FEATDEF_J_segment , FTQUAL_label }, + { FEATDEF_J_segment , FTQUAL_map }, + { FEATDEF_J_segment , FTQUAL_product }, + { FEATDEF_J_segment , FTQUAL_standard_name }, + { FEATDEF_J_segment , FTQUAL_usedin }, + + { FEATDEF_LTR , FTQUAL_allele }, + { FEATDEF_LTR , FTQUAL_function }, + { FEATDEF_LTR , FTQUAL_label }, + { FEATDEF_LTR , FTQUAL_map }, + { FEATDEF_LTR , FTQUAL_standard_name }, + { FEATDEF_LTR , FTQUAL_usedin }, + + { FEATDEF_mat_peptide , FTQUAL_allele }, + { FEATDEF_mat_peptide , FTQUAL_EC_number }, + { FEATDEF_mat_peptide , FTQUAL_function }, + { FEATDEF_mat_peptide , FTQUAL_label }, + { FEATDEF_mat_peptide , FTQUAL_map }, + { FEATDEF_mat_peptide , FTQUAL_product }, + { FEATDEF_mat_peptide , FTQUAL_standard_name }, + { FEATDEF_mat_peptide , FTQUAL_usedin }, + + { FEATDEF_misc_binding , FTQUAL_allele }, + { FEATDEF_misc_binding , FTQUAL_bound_moiety }, + { FEATDEF_misc_binding , FTQUAL_function }, + { FEATDEF_misc_binding , FTQUAL_label }, + { FEATDEF_misc_binding , FTQUAL_map }, + { FEATDEF_misc_binding , FTQUAL_usedin }, + + { FEATDEF_misc_difference , FTQUAL_allele }, + { FEATDEF_misc_difference , FTQUAL_clone }, + { FEATDEF_misc_difference , FTQUAL_label }, + { FEATDEF_misc_difference , FTQUAL_map }, + { FEATDEF_misc_difference , FTQUAL_phenotype }, + { FEATDEF_misc_difference , FTQUAL_replace }, + { FEATDEF_misc_difference , FTQUAL_standard_name }, + { FEATDEF_misc_difference , FTQUAL_usedin }, + + { FEATDEF_misc_feature , FTQUAL_allele }, + { FEATDEF_misc_feature , FTQUAL_function }, + { FEATDEF_misc_feature , FTQUAL_label }, + { FEATDEF_misc_feature , FTQUAL_map }, + { FEATDEF_misc_feature , FTQUAL_number }, + { FEATDEF_misc_feature , FTQUAL_phenotype }, + { FEATDEF_misc_feature , FTQUAL_product }, + { FEATDEF_misc_feature , FTQUAL_standard_name }, + { FEATDEF_misc_feature , FTQUAL_usedin }, + + { FEATDEF_misc_recomb , FTQUAL_allele }, + { FEATDEF_misc_recomb , FTQUAL_label }, + { FEATDEF_misc_recomb , FTQUAL_map }, + { FEATDEF_misc_recomb , FTQUAL_organism }, + { FEATDEF_misc_recomb , FTQUAL_standard_name }, + { FEATDEF_misc_recomb , FTQUAL_usedin }, + + { FEATDEF_misc_signal , FTQUAL_allele }, + { FEATDEF_misc_signal , FTQUAL_function }, + { FEATDEF_misc_signal , FTQUAL_label }, + { FEATDEF_misc_signal , FTQUAL_map }, + { FEATDEF_misc_signal , FTQUAL_operon }, + { FEATDEF_misc_signal , FTQUAL_phenotype }, + { FEATDEF_misc_signal , FTQUAL_standard_name }, + { FEATDEF_misc_signal , FTQUAL_usedin }, + + { FEATDEF_misc_structure , FTQUAL_allele }, + { FEATDEF_misc_structure , FTQUAL_function }, + { FEATDEF_misc_structure , FTQUAL_label }, + { FEATDEF_misc_structure , FTQUAL_map }, + { FEATDEF_misc_structure , FTQUAL_standard_name }, + { FEATDEF_misc_structure , FTQUAL_usedin }, + + { FEATDEF_modified_base , FTQUAL_allele }, + { FEATDEF_modified_base , FTQUAL_frequency }, + { FEATDEF_modified_base , FTQUAL_label }, + { FEATDEF_modified_base , FTQUAL_map }, + { FEATDEF_modified_base , FTQUAL_mod_base }, + { FEATDEF_modified_base , FTQUAL_usedin }, + + { FEATDEF_N_region , FTQUAL_allele }, + { FEATDEF_N_region , FTQUAL_label }, + { FEATDEF_N_region , FTQUAL_map }, + { FEATDEF_N_region , FTQUAL_product }, + { FEATDEF_N_region , FTQUAL_standard_name }, + { FEATDEF_N_region , FTQUAL_usedin }, + + { FEATDEF_old_sequence , FTQUAL_allele }, + { FEATDEF_old_sequence , FTQUAL_label }, + { FEATDEF_old_sequence , FTQUAL_map }, + { FEATDEF_old_sequence , FTQUAL_replace }, + { FEATDEF_old_sequence , FTQUAL_usedin }, + + { FEATDEF_polyA_signal , FTQUAL_allele }, + { FEATDEF_polyA_signal , FTQUAL_label }, + { FEATDEF_polyA_signal , FTQUAL_map }, + { FEATDEF_polyA_signal , FTQUAL_usedin }, + + { FEATDEF_polyA_site , FTQUAL_allele }, + { FEATDEF_polyA_site , FTQUAL_label }, + { FEATDEF_polyA_site , FTQUAL_map }, + { FEATDEF_polyA_site , FTQUAL_usedin }, + + { FEATDEF_prim_transcript , FTQUAL_allele }, + { FEATDEF_prim_transcript , FTQUAL_function }, + { FEATDEF_prim_transcript , FTQUAL_label }, + { FEATDEF_prim_transcript , FTQUAL_map }, + { FEATDEF_prim_transcript , FTQUAL_operon }, + { FEATDEF_prim_transcript , FTQUAL_standard_name }, + { FEATDEF_prim_transcript , FTQUAL_usedin }, + + { FEATDEF_primer_bind , FTQUAL_allele }, + { FEATDEF_primer_bind , FTQUAL_label }, + { FEATDEF_primer_bind , FTQUAL_map }, + { FEATDEF_primer_bind , FTQUAL_PCR_conditions }, + { FEATDEF_primer_bind , FTQUAL_standard_name }, + { FEATDEF_primer_bind , FTQUAL_usedin }, + + { FEATDEF_promoter , FTQUAL_allele }, + { FEATDEF_promoter , FTQUAL_function }, + { FEATDEF_promoter , FTQUAL_label }, + { FEATDEF_promoter , FTQUAL_map }, + { FEATDEF_promoter , FTQUAL_operon }, + { FEATDEF_promoter , FTQUAL_phenotype }, + { FEATDEF_promoter , FTQUAL_standard_name }, + { FEATDEF_promoter , FTQUAL_usedin }, + + { FEATDEF_protein_bind , FTQUAL_allele }, + { FEATDEF_protein_bind , FTQUAL_bound_moiety }, + { FEATDEF_protein_bind , FTQUAL_function }, + { FEATDEF_protein_bind , FTQUAL_label }, + { FEATDEF_protein_bind , FTQUAL_map }, + { FEATDEF_protein_bind , FTQUAL_standard_name }, + { FEATDEF_protein_bind , FTQUAL_usedin }, + + { FEATDEF_RBS , FTQUAL_allele }, + { FEATDEF_RBS , FTQUAL_label }, + { FEATDEF_RBS , FTQUAL_map }, + { FEATDEF_RBS , FTQUAL_standard_name }, + { FEATDEF_RBS , FTQUAL_usedin }, + + { FEATDEF_repeat_region , FTQUAL_allele }, + { FEATDEF_repeat_region , FTQUAL_function }, + { FEATDEF_repeat_region , FTQUAL_insertion_seq }, + { FEATDEF_repeat_region , FTQUAL_label }, + { FEATDEF_repeat_region , FTQUAL_map }, + { FEATDEF_repeat_region , FTQUAL_rpt_family }, + { FEATDEF_repeat_region , FTQUAL_rpt_type }, + { FEATDEF_repeat_region , FTQUAL_rpt_unit }, + { FEATDEF_repeat_region , FTQUAL_standard_name }, + { FEATDEF_repeat_region , FTQUAL_transposon }, + { FEATDEF_repeat_region , FTQUAL_usedin }, + + { FEATDEF_repeat_unit , FTQUAL_allele }, + { FEATDEF_repeat_unit , FTQUAL_function }, + { FEATDEF_repeat_unit , FTQUAL_label }, + { FEATDEF_repeat_unit , FTQUAL_map }, + { FEATDEF_repeat_unit , FTQUAL_rpt_family }, + { FEATDEF_repeat_unit , FTQUAL_rpt_type }, + { FEATDEF_repeat_unit , FTQUAL_usedin }, + + { FEATDEF_rep_origin , FTQUAL_allele }, + { FEATDEF_rep_origin , FTQUAL_direction }, + { FEATDEF_rep_origin , FTQUAL_label }, + { FEATDEF_rep_origin , FTQUAL_map }, + { FEATDEF_rep_origin , FTQUAL_standard_name }, + { FEATDEF_rep_origin , FTQUAL_usedin }, + + { FEATDEF_S_region , FTQUAL_allele }, + { FEATDEF_S_region , FTQUAL_label }, + { FEATDEF_S_region , FTQUAL_map }, + { FEATDEF_S_region , FTQUAL_product }, + { FEATDEF_S_region , FTQUAL_standard_name }, + { FEATDEF_S_region , FTQUAL_usedin }, + + { FEATDEF_satellite , FTQUAL_allele }, + { FEATDEF_satellite , FTQUAL_label }, + { FEATDEF_satellite , FTQUAL_map }, + { FEATDEF_satellite , FTQUAL_rpt_family }, + { FEATDEF_satellite , FTQUAL_rpt_type }, + { FEATDEF_satellite , FTQUAL_rpt_unit }, + { FEATDEF_satellite , FTQUAL_standard_name }, + { FEATDEF_satellite , FTQUAL_usedin }, + + { FEATDEF_sig_peptide , FTQUAL_allele }, + { FEATDEF_sig_peptide , FTQUAL_function }, + { FEATDEF_sig_peptide , FTQUAL_label }, + { FEATDEF_sig_peptide , FTQUAL_map }, + { FEATDEF_sig_peptide , FTQUAL_product }, + { FEATDEF_sig_peptide , FTQUAL_standard_name }, + { FEATDEF_sig_peptide , FTQUAL_usedin }, + + { FEATDEF_stem_loop , FTQUAL_allele }, + { FEATDEF_stem_loop , FTQUAL_function }, + { FEATDEF_stem_loop , FTQUAL_label }, + { FEATDEF_stem_loop , FTQUAL_map }, + { FEATDEF_stem_loop , FTQUAL_operon }, + { FEATDEF_stem_loop , FTQUAL_standard_name }, + { FEATDEF_stem_loop , FTQUAL_usedin }, + + { FEATDEF_STS , FTQUAL_allele }, + { FEATDEF_STS , FTQUAL_label }, + { FEATDEF_STS , FTQUAL_map }, + { FEATDEF_STS , FTQUAL_standard_name }, + { FEATDEF_STS , FTQUAL_usedin }, + + { FEATDEF_TATA_signal , FTQUAL_allele }, + { FEATDEF_TATA_signal , FTQUAL_label }, + { FEATDEF_TATA_signal , FTQUAL_map }, + { FEATDEF_TATA_signal , FTQUAL_usedin }, + + { FEATDEF_terminator , FTQUAL_allele }, + { FEATDEF_terminator , FTQUAL_label }, + { FEATDEF_terminator , FTQUAL_map }, + { FEATDEF_terminator , FTQUAL_operon }, + { FEATDEF_terminator , FTQUAL_standard_name }, + { FEATDEF_terminator , FTQUAL_usedin }, + + { FEATDEF_transit_peptide , FTQUAL_allele }, + { FEATDEF_transit_peptide , FTQUAL_function }, + { FEATDEF_transit_peptide , FTQUAL_label }, + { FEATDEF_transit_peptide , FTQUAL_map }, + { FEATDEF_transit_peptide , FTQUAL_product }, + { FEATDEF_transit_peptide , FTQUAL_standard_name }, + { FEATDEF_transit_peptide , FTQUAL_usedin }, + + { FEATDEF_unsure , FTQUAL_allele }, + { FEATDEF_unsure , FTQUAL_label }, + { FEATDEF_unsure , FTQUAL_map }, + { FEATDEF_unsure , FTQUAL_replace }, + { FEATDEF_unsure , FTQUAL_usedin }, + + { FEATDEF_V_region , FTQUAL_allele }, + { FEATDEF_V_region , FTQUAL_label }, + { FEATDEF_V_region , FTQUAL_map }, + { FEATDEF_V_region , FTQUAL_product }, + { FEATDEF_V_region , FTQUAL_standard_name }, + { FEATDEF_V_region , FTQUAL_usedin }, + + { FEATDEF_V_segment , FTQUAL_allele }, + { FEATDEF_V_segment , FTQUAL_label }, + { FEATDEF_V_segment , FTQUAL_map }, + { FEATDEF_V_segment , FTQUAL_product }, + { FEATDEF_V_segment , FTQUAL_standard_name }, + { FEATDEF_V_segment , FTQUAL_usedin }, + + { FEATDEF_variation , FTQUAL_allele }, + { FEATDEF_variation , FTQUAL_frequency }, + { FEATDEF_variation , FTQUAL_label }, + { FEATDEF_variation , FTQUAL_map }, + { FEATDEF_variation , FTQUAL_phenotype }, + { FEATDEF_variation , FTQUAL_product }, + { FEATDEF_variation , FTQUAL_replace }, + { FEATDEF_variation , FTQUAL_standard_name }, + { FEATDEF_variation , FTQUAL_usedin }, + + { FEATDEF_3clip , FTQUAL_allele }, + { FEATDEF_3clip , FTQUAL_function }, + { FEATDEF_3clip , FTQUAL_label }, + { FEATDEF_3clip , FTQUAL_map }, + { FEATDEF_3clip , FTQUAL_standard_name }, + { FEATDEF_3clip , FTQUAL_usedin }, + + { FEATDEF_3UTR , FTQUAL_allele }, + { FEATDEF_3UTR , FTQUAL_function }, + { FEATDEF_3UTR , FTQUAL_label }, + { FEATDEF_3UTR , FTQUAL_map }, + { FEATDEF_3UTR , FTQUAL_standard_name }, + { FEATDEF_3UTR , FTQUAL_usedin }, + + { FEATDEF_5clip , FTQUAL_allele }, + { FEATDEF_5clip , FTQUAL_function }, + { FEATDEF_5clip , FTQUAL_label }, + { FEATDEF_5clip , FTQUAL_map }, + { FEATDEF_5clip , FTQUAL_standard_name }, + { FEATDEF_5clip , FTQUAL_usedin }, + + { FEATDEF_5UTR , FTQUAL_allele }, + { FEATDEF_5UTR , FTQUAL_function }, + { FEATDEF_5UTR , FTQUAL_label }, + { FEATDEF_5UTR , FTQUAL_map }, + { FEATDEF_5UTR , FTQUAL_standard_name }, + { FEATDEF_5UTR , FTQUAL_usedin }, + + { FEATDEF_10_signal , FTQUAL_allele }, + { FEATDEF_10_signal , FTQUAL_label }, + { FEATDEF_10_signal , FTQUAL_map }, + { FEATDEF_10_signal , FTQUAL_operon }, + { FEATDEF_10_signal , FTQUAL_standard_name }, + { FEATDEF_10_signal , FTQUAL_usedin }, + + { FEATDEF_35_signal , FTQUAL_allele }, + { FEATDEF_35_signal , FTQUAL_label }, + { FEATDEF_35_signal , FTQUAL_map }, + { FEATDEF_35_signal , FTQUAL_operon }, + { FEATDEF_35_signal , FTQUAL_standard_name }, + { FEATDEF_35_signal , FTQUAL_usedin }, + + { FEATDEF_REGION , FTQUAL_function }, + { FEATDEF_REGION , FTQUAL_label }, + { FEATDEF_REGION , FTQUAL_map }, + { FEATDEF_REGION , FTQUAL_number }, + { FEATDEF_REGION , FTQUAL_phenotype }, + { FEATDEF_REGION , FTQUAL_product }, + { FEATDEF_REGION , FTQUAL_standard_name }, + { FEATDEF_REGION , FTQUAL_usedin }, + + { FEATDEF_mat_peptide_aa , FTQUAL_allele }, + { FEATDEF_mat_peptide_aa , FTQUAL_label }, + { FEATDEF_mat_peptide_aa , FTQUAL_map }, + { FEATDEF_mat_peptide_aa , FTQUAL_product }, + { FEATDEF_mat_peptide_aa , FTQUAL_standard_name }, + { FEATDEF_mat_peptide_aa , FTQUAL_usedin }, + + { FEATDEF_sig_peptide_aa , FTQUAL_allele }, + { FEATDEF_sig_peptide_aa , FTQUAL_label }, + { FEATDEF_sig_peptide_aa , FTQUAL_map }, + { FEATDEF_sig_peptide_aa , FTQUAL_product }, + { FEATDEF_sig_peptide_aa , FTQUAL_standard_name }, + { FEATDEF_sig_peptide_aa , FTQUAL_usedin }, + + { FEATDEF_transit_peptide_aa , FTQUAL_allele }, + { FEATDEF_transit_peptide_aa , FTQUAL_label }, + { FEATDEF_transit_peptide_aa , FTQUAL_map }, + { FEATDEF_transit_peptide_aa , FTQUAL_product }, + { FEATDEF_transit_peptide_aa , FTQUAL_standard_name }, + { FEATDEF_transit_peptide_aa , FTQUAL_usedin }, + + { FEATDEF_snoRNA , FTQUAL_allele }, + { FEATDEF_snoRNA , FTQUAL_function }, + { FEATDEF_snoRNA , FTQUAL_label }, + { FEATDEF_snoRNA , FTQUAL_map }, + { FEATDEF_snoRNA , FTQUAL_product }, + { FEATDEF_snoRNA , FTQUAL_standard_name }, + { FEATDEF_snoRNA , FTQUAL_usedin }, + + { FEATDEF_operon , FTQUAL_allele }, + { FEATDEF_operon , FTQUAL_function }, + { FEATDEF_operon , FTQUAL_label }, + { FEATDEF_operon , FTQUAL_map }, + { FEATDEF_operon , FTQUAL_operon }, + { FEATDEF_operon , FTQUAL_phenotype }, + { FEATDEF_operon , FTQUAL_standard_name }, + { FEATDEF_operon , FTQUAL_usedin }, + + { FEATDEF_oriT , FTQUAL_allele }, + { FEATDEF_oriT , FTQUAL_direction }, + { FEATDEF_oriT , FTQUAL_label }, + { FEATDEF_oriT , FTQUAL_map }, + { FEATDEF_oriT , FTQUAL_rpt_type }, + { FEATDEF_oriT , FTQUAL_rpt_type }, + { FEATDEF_oriT , FTQUAL_rpt_unit }, + { FEATDEF_oriT , FTQUAL_standard_name }, + { FEATDEF_oriT , FTQUAL_usedin } }; /* comparison of ValQual's -- first compare featdef then ftqual */ @@ -9406,7 +9745,27 @@ static CharPtr validConsSpliceString [] = { }; static CharPtr validExceptionString [] = { - "RNA editing", "reasons given in citation", NULL + "RNA editing", + "reasons given in citation", + NULL +}; + +static CharPtr validRefSeqExceptionString [] = { + "RNA editing", + "reasons given in citation", + "ribosomal slippage", + "ribosome slippage", + "trans splicing", + "trans-splicing", + "alternative processing", + "alternate processing", + "artificial frameshift", + "non-consensus splice site", + "nonconsensus splice site", + "rearrangement required for product", + "unclassified transcription discrepancy", + "unclassified translation discrepancy", + NULL }; static Boolean StringInStringList (CharPtr testString, CharPtr PNTR stringList) { @@ -9580,6 +9939,7 @@ static Boolean ValidateRptUnit ( ) { +#if 0 CharPtr str; Char tmp [255]; @@ -9607,6 +9967,7 @@ static Boolean ValidateRptUnit ( while (IS_DIGIT (*str)) str++; if (*str != '\0') return FALSE; /* mustn't be anything after the yyy */ } +#endif return TRUE; } @@ -10796,6 +11157,7 @@ static void AddLocusBlock ( ) { + size_t acclen; IntAsn2gbJobPtr ajp; Asn2gbSectPtr asp; BaseBlockPtr bbp; @@ -10822,9 +11184,11 @@ static void AddLocusBlock ( Int2 istrand; Boolean is_nm = FALSE; Boolean is_np = FALSE; + Boolean is_nz = FALSE; Boolean is_transgenic = FALSE; Char len [32]; Int4 length; + size_t loclen; Char locus [41]; MolInfoPtr mip; Char mol [30]; @@ -10887,6 +11251,8 @@ static void AddLocusBlock ( } else if (StringNCmp (tsip->accession, "NP_", 3) == 0 || StringNCmp (tsip->accession, "XP_", 3) == 0) { is_np = TRUE; + } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) { + is_nz = TRUE; } } break; @@ -10936,6 +11302,7 @@ static void AddLocusBlock ( } } if (nm != NULL) { + /* sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext); if (sfp != NULL) { StringNCpy_0 (gene, fcontext.label, sizeof (gene)); @@ -10946,6 +11313,7 @@ static void AddLocusBlock ( gene [0] = '\0'; } } + */ } /* more complicated code to get parent locus, if segmented, goes here */ @@ -10992,10 +11360,15 @@ static void AddLocusBlock ( case SEQID_DDBJ : tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL && tsip->accession != NULL) { - if (StringLen (tsip->accession) == 12) { + acclen = StringLen (tsip->accession); + if (acclen == 12) { if (StringCmp (tsip->accession + 6, "000000") == 0) { wgsmaster = TRUE; } + } else if (acclen == 13) { + if (StringCmp (tsip->accession + 6, "0000000") == 0) { + wgsmaster = TRUE; + } } } break; @@ -11060,7 +11433,7 @@ static void AddLocusBlock ( if (awp->newLocusLine) { - if (wgsmaster) { + if (wgsmaster && (! is_nz)) { sprintf (len, "%ld rc", (long) length); } else { sprintf (len, "%ld bp", (long) length); @@ -11333,7 +11706,10 @@ static void AddLocusBlock ( } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) { FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE); FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE); - FFAddNChar(ffstring, ' ', 15 - 5 - StringLen(locus), FALSE); + loclen = StringLen(locus); + if (14 - 5 - loclen > 0) { + FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE); + } if (awp->hup) { FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE); } else { @@ -11428,9 +11804,9 @@ static void AddDeflineBlock ( Asn2gbSectPtr asp; BaseBlockPtr bbp; BioseqPtr bsp; - Char buf[1024]; + Char buf[4096]; /*CharPtr buf; - size_t buflen = 1024;*/ + size_t buflen = 4096;*/ SeqMgrDescContext dcontext; GBSeqPtr gbseq; ItemInfo ii; @@ -11506,6 +11882,7 @@ static void AddAccessionBlock ( ) { + size_t acclen; SeqIdPtr accn = NULL; IntAsn2gbJobPtr ajp; Asn2gbSectPtr asp; @@ -11554,9 +11931,13 @@ static void AddAccessionBlock ( accn = sip; tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL) { - if (StringLen (tsip->accession) == 12) { + acclen = StringLen (tsip->accession); + if (acclen == 12) { wgsaccn = tsip->accession; len = 12; + } else if (acclen == 13) { + wgsaccn = tsip->accession; + len = 13; } } break; @@ -11661,9 +12042,12 @@ static void AddAccessionBlock ( mip = (MolInfoPtr) sdp->data.ptrvalue; if (mip != NULL && mip->tech == MI_TECH_wgs) { StringNCpy_0 (buf, wgsaccn, sizeof (buf)); - if (StringCmp (buf + len - 6, "000000") != 0) { + acclen = StringLen (buf); + if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) { StringCpy (buf + len - 6, "000000"); - } else if (StringCmp (buf + len - 8, "00000000") != 0) { + } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) { + StringCpy (buf + len - 7, "0000000"); + } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) { StringCpy (buf + len - 8, "00000000"); } else { buf [0] = '\0'; @@ -13253,7 +13637,9 @@ static void AddSourceBlock ( IntAsn2gbJobPtr ajp; BaseBlockPtr bbp; BioseqPtr bsp; + SeqFeatPtr cds; SeqMgrDescContext dcontext; + BioseqPtr dna; SeqMgrFeatContext fcontext; GBBlockPtr gbp; SeqDescrPtr sdp; @@ -13290,6 +13676,29 @@ static void AddSourceBlock ( bbp->entityID = fcontext.entityID; bbp->itemID = fcontext.itemID; bbp->itemtype = OBJ_SEQFEAT; + } else if (ISA_aa (bsp->mol)) { + + /* if protein with no sources, get sources applicable to DNA location of CDS */ + + cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext); + if (cds != NULL) { + sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext); + if (sfp != NULL) { + bbp->entityID = fcontext.entityID; + bbp->itemID = fcontext.itemID; + bbp->itemtype = OBJ_SEQFEAT; + } else { + dna = BioseqFindFromSeqLoc (cds->location); + if (dna != NULL) { + sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext); + if (sdp != NULL) { + bbp->entityID = dcontext.entityID; + bbp->itemID = dcontext.itemID; + bbp->itemtype = OBJ_SEQDESC; + } + } + } + } } } } @@ -13360,7 +13769,7 @@ static RefBlockPtr AddPub ( ) { - Char buf [121]; + Char buf [521]; /* increased for consortium in citsub */ CitArtPtr cap; CitBookPtr cbp; CitGenPtr cgp; @@ -13665,6 +14074,7 @@ static int LIBCALLBACK SortReferences ( rbp1 = rbp2; rbp2 = temp; } + /* if same uid, one with just uids goes last to be excised but remembered */ if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) { @@ -13794,6 +14204,20 @@ static CharPtr GetAuthorsPlusConsortium ( return tmp; } +static Boolean HasNoPmidOrMuid ( + PubdescPtr pdp +) + +{ + ValNodePtr vnp; + + if (pdp == NULL) return TRUE; + for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) { + if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE; + } + return TRUE; +} + typedef struct cdspubs { Asn2gbWorkPtr awp; BioseqPtr target; @@ -13810,6 +14234,7 @@ static Boolean LIBCALLBACK GetRefsOnCDS ( Asn2gbWorkPtr awp; CdsPubsPtr cpp; IntRefBlockPtr irp; + Boolean okay; PubdescPtr pdp; RefBlockPtr rbp; @@ -13818,21 +14243,30 @@ static Boolean LIBCALLBACK GetRefsOnCDS ( awp = cpp->awp; if (awp == NULL) return TRUE; + okay = TRUE; pdp = (PubdescPtr) sfp->data.value.ptrvalue; - rbp = AddPub (awp, &(awp->pubhead), pdp); - if (rbp != NULL) { + if (awp->format == FTABLE_FMT) { + if (HasNoPmidOrMuid (pdp)) { + okay = FALSE; + } + } - rbp->entityID = context->entityID; - rbp->itemID = context->itemID; - rbp->itemtype = OBJ_SEQFEAT; + if (okay) { + rbp = AddPub (awp, &(awp->pubhead), pdp); + if (rbp != NULL) { - irp = (IntRefBlockPtr) rbp; - irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE); - alp = GetAuthListPtr (pdp, NULL); - if (alp != NULL) { - irp->authstr = GetAuthorsPlusConsortium (awp->format, alp); + rbp->entityID = context->entityID; + rbp->itemID = context->itemID; + rbp->itemtype = OBJ_SEQFEAT; + + irp = (IntRefBlockPtr) rbp; + irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE); + alp = GetAuthListPtr (pdp, NULL); + if (alp != NULL) { + irp->authstr = GetAuthorsPlusConsortium (awp->format, alp); + } + irp->index = 0; } - irp->index = 0; } return TRUE; @@ -13917,6 +14351,12 @@ static void GetRefsOnBioseq ( } } } + if (awp->format == FTABLE_FMT) { + pdp = (PubdescPtr) sdp->data.ptrvalue; + if (HasNoPmidOrMuid (pdp)) { + okay = FALSE; + } + } if (okay) { pdp = (PubdescPtr) sdp->data.ptrvalue; @@ -13939,8 +14379,6 @@ static void GetRefsOnBioseq ( sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext); } - SeqIdFree (sint.id); - /* if protein with no pubs, get pubs applicable to DNA location of CDS */ if (cdsloc != NULL) { @@ -13950,6 +14388,8 @@ static void GetRefsOnBioseq ( SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS); } + SeqIdFree (sint.id); + /* features are indexed on parent if segmented */ bsp = awp->parent; @@ -13976,6 +14416,12 @@ static void GetRefsOnBioseq ( takeIt = TRUE; } } + if (awp->format == FTABLE_FMT) { + pdp = (PubdescPtr) sdp->data.ptrvalue; + if (HasNoPmidOrMuid (pdp)) { + takeIt = FALSE; + } + } if (takeIt /* stop >= from && stop <= to */) { @@ -13999,7 +14445,7 @@ static void GetRefsOnBioseq ( right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END); strand = SeqLocStrand (ajp->ajp.slp); split = FALSE; - newloc = SeqLocReMap (sip, ajp->ajp.slp, irp->loc, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle); /* newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split); */ @@ -14089,6 +14535,7 @@ static Boolean AddReferenceBlock ( ValNodePtr head = NULL; Int2 i; IntRefBlockPtr irp; + Boolean is_aa; Boolean is_embl = FALSE; Boolean is_patent = FALSE; IntRefBlockPtr lastirp; @@ -14128,6 +14575,8 @@ static Boolean AddReferenceBlock ( } } + is_aa = (Boolean) ISA_aa (bsp->mol); + if (bsp->repr == Seq_repr_seg) { /* collect publication descriptors on local parts */ @@ -14254,9 +14703,14 @@ static Boolean AddReferenceBlock ( if (rbp != NULL) { irp = (IntRefBlockPtr) rbp; if (irp->justuids) { - /* do not allow justuids reference to appear by itself - S79174.1 */ - excise = TRUE; - /* justuids should still combine, even if no authors - S67070.1 */ + if (isRefSeq && is_aa) { + /* if allowing justuid in protein RefSeq, try to look up dynamically */ + excise = TRUE; /* Back to old behavior, do not fetch */ + } else { + /* do not allow justuids reference to appear by itself - S79174.1 */ + excise = TRUE; + /* justuids should still combine, even if no authors - S67070.1 */ + } } else if (is_embl && is_patent) { /* EMBL patent records do not need author or title - A29528.1 */ } else if (StringHasNoText (irp->authstr)) { @@ -14563,6 +15017,7 @@ static void AddWGSMasterCommentString ( ) { + size_t acclen; BioSourcePtr biop; Char buf [256]; SeqMgrDescContext dcontext; @@ -14618,10 +15073,14 @@ static void AddWGSMasterCommentString ( last = "?"; } ver [0] = '\0'; - if (StringLen (wgsname) == 12) { + acclen = StringLen (wgsname); + if (acclen == 12) { + StringCpy (ver, wgsname + 4); + ver [2] = '\0'; + } else if (acclen == 13) { StringCpy (ver, wgsname + 4); ver [2] = '\0'; - } else if (StringLen (wgsname) == 15) { + } else if (acclen == 15) { StringCpy (ver, wgsname + 7); ver [2] = '\0'; } @@ -14630,7 +15089,7 @@ static void AddWGSMasterCommentString ( FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); sprintf (buf, " This version of the project (%s) has the accession number %s,", ver, wgsname); - FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND); + FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND); if (StringCmp (first, last) != 0) { sprintf (buf, " and consists of sequences %s-%s.", first, last); @@ -14736,13 +15195,15 @@ static CharPtr GetStrForBankit ( return ptr; } -static CharPtr reftxt0 = "The reference sequence was derived from "; -static CharPtr reftxt1 = " This record is predicted by genome sequence analysis and is not yet supported by experimental evidence. "; -static CharPtr reftxt2 = " This record has not yet been subject to final NCBI review. "; -static CharPtr reftxt3 = " The mRNA record is supported by experimental evidence; however, the coding sequence is predicted. "; -static CharPtr reftxt4 = " This record has undergone preliminary review of the sequence, but has not yet been subject to final NCBI review. "; +static CharPtr reftxt0 = " The reference sequence was derived from "; +static CharPtr reftxt1 = " This record is predicted by genome sequence analysis and is not yet supported by experimental evidence."; +static CharPtr reftxt2 = " This record has not yet been subject to final NCBI review."; +static CharPtr reftxt3 = " The mRNA record is supported by experimental evidence; however, the coding sequence is predicted."; +static CharPtr reftxt4 = " This record has undergone preliminary review of the sequence, but has not yet been subject to final review."; static CharPtr reftxt5 = " This record has been curated by "; -static CharPtr reftxt6 = " This RefSeq record is provided to represent a collection of whole genome shotgun sequences. "; +static CharPtr reftxt6 = " This record is predicted by automated computational analysis."; +static CharPtr reftxt7 = " This record is provided to represent a collection of whole genome shotgun sequences."; +static CharPtr reftxt8 = " This record is derived from an annotated genomic sequence ("; static CharPtr GetStatusForRefTrack ( UserObjectPtr uop @@ -14762,7 +15223,7 @@ static CharPtr GetStatusForRefTrack ( urf = ufp; } } - if (urf == NULL || urf->choice != 11) return NULL; + /* if (urf == NULL || urf->choice != 11) return NULL; */ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { oip = ufp->label; if (StringCmp (oip->str, "Status") == 0) { @@ -14777,6 +15238,8 @@ static CharPtr GetStatusForRefTrack ( return "VALIDATED "; } else if (StringICmp (st, "Reviewed") == 0) { return "REVIEWED "; + } else if (StringICmp (st, "Model") == 0) { + return "MODEL "; } else if (StringICmp (st, "WGS") == 0) { return "WGS "; } @@ -14793,7 +15256,7 @@ static void AddStrForRefTrack ( ) { - CharPtr accn, curator = NULL, st; + CharPtr accn, curator = NULL, source = NULL, st; ObjectIdPtr oip; UserFieldPtr ufp, tmp, u, urf = NULL; Int2 i = 0; @@ -14822,14 +15285,21 @@ static void AddStrForRefTrack ( review = 4; } else if (StringICmp (st, "Reviewed") == 0) { review = 5; - } else if (StringICmp (st, "WGS") == 0) { + } else if (StringICmp (st, "Model") == 0) { review = 6; + } else if (StringICmp (st, "WGS") == 0) { + review = 7; } } else if (StringCmp (oip->str, "Collaborator") == 0) { st = (CharPtr) ufp->data.ptrvalue; if (! StringHasNoText (st)) { curator = st; } + } else if (StringCmp (oip->str, "GenomicSource") == 0) { + st = (CharPtr) ufp->data.ptrvalue; + if (! StringHasNoText (st)) { + source = st; + } } } if (urf != NULL && urf->choice == 11) { @@ -14842,74 +15312,120 @@ static void AddStrForRefTrack ( } } } - if ( GetWWW(ajp) ) { - FFAddTextToString(ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); - } else { - FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); - } - FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE); - if (review == 1) { - FFAddOneString (ffstring, reftxt1, FALSE, FALSE, TILDE_IGNORE); - } else if (review == 2) { - FFAddOneString (ffstring, reftxt2, FALSE, FALSE, TILDE_IGNORE); - } else if (review == 3) { - FFAddOneString (ffstring, reftxt3, FALSE, FALSE, TILDE_IGNORE); - } else if (review == 4) { - FFAddOneString (ffstring, reftxt4, FALSE, FALSE, TILDE_IGNORE); - } else if (review == 5) { - if (curator == NULL) { - curator = "NCBI staff"; - } - FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, ". ", FALSE, FALSE, TILDE_IGNORE); - } else if (review == 6) { - FFAddOneString (ffstring, reftxt6, FALSE, FALSE, TILDE_IGNORE); - } - if (i > 0) { - FFAddOneString (ffstring, reftxt0, FALSE, FALSE, TILDE_IGNORE); - - for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) { - is_accn = TRUE; - for (u = tmp->data.ptrvalue; u != NULL; u = u->next) { - oip = u->label; - if (StringCmp (oip->str, "accession") == 0) break; - if (StringCmp (oip->str, "name") == 0) { - is_accn = FALSE; - break; - } + } + if ( GetWWW(ajp) ) { + FFAddTextToString(ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } else { + FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE); + if (review == 1) { + FFAddOneString (ffstring, reftxt1, FALSE, FALSE, TILDE_IGNORE); + } else if (review == 2) { + FFAddOneString (ffstring, reftxt2, FALSE, FALSE, TILDE_IGNORE); + } else if (review == 3) { + FFAddOneString (ffstring, reftxt3, FALSE, FALSE, TILDE_IGNORE); + } else if (review == 4) { + FFAddOneString (ffstring, reftxt4, FALSE, FALSE, TILDE_IGNORE); + } else if (review == 5) { + if (curator == NULL) { + curator = "NCBI staff"; + } + FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE); + } else if (review == 6) { + FFAddOneString (ffstring, reftxt6, FALSE, FALSE, TILDE_IGNORE); + } else if (review == 7) { + FFAddOneString (ffstring, reftxt7, FALSE, FALSE, TILDE_IGNORE); + } + if (review != 5 && curator != NULL) { + FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE); + } + if (source != NULL) { + FFAddOneString (ffstring, reftxt8, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, source, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, ").", FALSE, FALSE, TILDE_IGNORE); + } + if (i > 0) { + FFAddOneString (ffstring, reftxt0, FALSE, FALSE, TILDE_IGNORE); + + for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) { + is_accn = TRUE; + for (u = tmp->data.ptrvalue; u != NULL; u = u->next) { + oip = u->label; + if (StringCmp (oip->str, "accession") == 0) break; + if (StringCmp (oip->str, "name") == 0) { + is_accn = FALSE; + break; } - if (u == NULL) continue; - accn = (CharPtr) u->data.ptrvalue; - if (StringHasNoText (accn)) continue; - if (is_accn && GetWWW(ajp) ) { - FFAddTextToString(ffstring, "<a href=", link_seq, NULL, FALSE, FALSE, TILDE_IGNORE); - FFAddTextToString(ffstring, "val=", accn, ">", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + if (u == NULL) continue; + accn = (CharPtr) u->data.ptrvalue; + if (StringHasNoText (accn)) continue; + if (is_accn && GetWWW(ajp) ) { + FFAddTextToString(ffstring, "<a href=", link_seq, NULL, FALSE, FALSE, TILDE_IGNORE); + FFAddTextToString(ffstring, "val=", accn, ">", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } else { + FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE); + } + if (tmp->next != NULL) { + ufp = tmp->next; + if (ufp->next != NULL) { + FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE); } else { - FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE); } - if (tmp->next != NULL) { - ufp = tmp->next; - if (ufp->next != NULL) { - FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE); - } else { - FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE); + } + } + FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND); + } +} + +static CharPtr GetGenomeBuildNumber ( + UserObjectPtr uop +) + +{ + ObjectIdPtr oip; + CharPtr str; + UserFieldPtr ufp; + + if (uop == NULL) return NULL; + if ((oip = uop->type) == NULL) return NULL; + if (StringCmp (oip->str, "GenomeBuild") != 0) return NULL; + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (StringCmp(oip->str, "NcbiAnnotation") == 0) { + if (ufp->choice == 1) { /* string */ + str = ufp->data.ptrvalue; + if (! StringHasNoText (str)) return str; + } + } else if (StringCmp (oip->str, "Annotation") == 0) { + if (ufp->choice == 1) { /* string */ + str = ufp->data.ptrvalue; + if (! StringHasNoText (str)) { + if (StringNICmp (str, "NCBI build ", 11) == 0) { + if (! StringHasNoText (str + 11)) { + return (str + 11); + } } } } - FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND); } } + return NULL; } -static CharPtr reftxt11 = "This model reference sequence was predicted from NCBI contig"; -static CharPtr reftxt12 = "by automated computational analysis"; -static CharPtr reftxt13 = "using gene prediction method:"; +static CharPtr reftxt11 = "This record is predicted by automated computational analysis. This record is derived from an annotated genomic sequence"; +static CharPtr reftxt12 = "using gene prediction method:"; static void FindModelEvidenceUop ( UserObjectPtr uop, @@ -15113,7 +15629,10 @@ static Boolean GetGeneAndLocus ( return TRUE; } -static CharPtr reftxt21 = "GENOME ANNOTATION REFSEQ: NCBI contigs are derived from assembled genomic sequence data. They may include both draft and finished sequence."; +static CharPtr reftxt21 = "NCBI contigs are derived from assembled genomic sequence data."; + +static CharPtr reftxt22 = "Features on this sequence have been produced for build "; +static CharPtr reftxt23 = " of the NCBI's genome annotation"; static CharPtr nsAreGapsString = "The strings of n's in this record represent gaps between contigs, and the length of each string corresponds to the length of the gap."; @@ -15492,6 +16011,7 @@ static void AddCommentBlock ( ) { + size_t acclen; IntAsn2gbJobPtr ajp; BioseqPtr bsp; Char buf [128]; @@ -15508,15 +16028,19 @@ static void AddCommentBlock ( Boolean first = TRUE; GBBlockPtr gbp; CharPtr geneName = NULL; + CharPtr genomeBuildNumber = NULL; Int4 gi = 0; CommentBlockPtr gsdbcbp = NULL; Int4 gsdbid = 0; Boolean has_gaps = FALSE; + Boolean hasRefTrackStatus = FALSE; SeqHistPtr hist; + Boolean is_collab = FALSE; Boolean is_other = FALSE; Boolean is_tpa = FALSE; Boolean is_wgs = FALSE; SeqLitPtr litp; + ObjectIdPtr localID = NULL; Char locusID [32]; CharPtr method = NULL; MolInfoPtr mip; @@ -15545,31 +16069,137 @@ static void AddCommentBlock ( ffstring = FFGetString(ajp); if ( ffstring == NULL ) return; + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); + while (sdp != NULL) { + uop = (UserObjectPtr) sdp->data.ptrvalue; + if (uop != NULL) { + str = GetStatusForRefTrack (uop); + if (str != NULL) { + hasRefTrackStatus = TRUE; + } + if (genomeBuildNumber == NULL) { + genomeBuildNumber = GetGenomeBuildNumber (uop); + } + } + sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); + } + for (sip = bsp->id; sip != NULL; sip = sip->next) { if (sip->choice == SEQID_OTHER) { tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL) { is_other = TRUE; - if (StringNCmp(tsip->accession, "NT_", 3) == 0 || StringNCmp(tsip->accession, "NW_", 3) == 0) { + if (StringNCmp (tsip->accession, "NC_", 3) == 0) { + if (! StringHasNoText (genomeBuildNumber)) { + cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock)); + if (cbp != NULL) { - cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock)); - if (cbp != NULL) { + cbp->first = first; + first = FALSE; - cbp->first = first; - first = FALSE; + if (cbp->first) { + FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE); + } else { + FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); + } - if (cbp->first) { - FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE); - } else { - FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); + FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE); + + if ( GetWWW(ajp) ) { + FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE); + + FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND); + + FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND); + + if ( GetWWW(ajp) ) { + FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE); + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + + FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND); + + cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); + FFRecycleString(ajp, ffstring); + ffstring = FFGetString(ajp); } + } - FFAddOneString (ffstring, reftxt21, TRUE, FALSE, TILDE_EXPAND); + } else if (StringNCmp(tsip->accession, "NT_", 3) == 0 || StringNCmp(tsip->accession, "NW_", 3) == 0) { - cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); - FFRecycleString(ajp, ffstring); - ffstring = FFGetString(ajp); + if (! hasRefTrackStatus) { + cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock)); + if (cbp != NULL) { + + cbp->first = first; + first = FALSE; + + if (cbp->first) { + FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE); + } else { + FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); + } + + FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE); + + if ( GetWWW(ajp) ) { + FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE); + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE); + + if (! StringHasNoText (genomeBuildNumber)) { + FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND); + FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND); + + FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND); + + if ( GetWWW(ajp) ) { + FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE); + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + + FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND); + } else { + + FFAddOneString (ffstring, reftxt21, TRUE, FALSE, TILDE_EXPAND); + + FFAddOneString (ffstring, "~Also see:~ ", FALSE, FALSE, TILDE_EXPAND); + + if ( GetWWW(ajp) ) { + FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE); + } + FFAddOneString (ffstring, "Documentation", FALSE, FALSE, TILDE_IGNORE); + if ( GetWWW(ajp) ) { + FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); + } + + FFAddOneString (ffstring, " of NCBI's Annotation Process~ ", FALSE, FALSE, TILDE_EXPAND); + } + + cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC"); + FFRecycleString(ajp, ffstring); + ffstring = FFGetString(ajp); + } } } else if (StringNCmp(tsip->accession, "XP_", 3) == 0 || @@ -15595,7 +16225,7 @@ static void AddCommentBlock ( FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); } - FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, "MODEL ", FALSE, FALSE, TILDE_IGNORE); if ( GetWWW(ajp) ) { FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE); @@ -15606,7 +16236,7 @@ static void AddCommentBlock ( } FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE); - FFAddTextToString (ffstring, NULL, reftxt11, " ", FALSE, FALSE, TILDE_IGNORE); + FFAddTextToString (ffstring, NULL, reftxt11, " (", FALSE, FALSE, TILDE_IGNORE); if ( GetWWW(ajp) ) { FFAddTextToString (ffstring, "<a href=", nt_link, name, FALSE, FALSE, TILDE_IGNORE); @@ -15617,12 +16247,11 @@ static void AddCommentBlock ( FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE); } - FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, reftxt12, FALSE, FALSE, TILDE_IGNORE); - + FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE); + if (method != NULL) { FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE); - FFAddOneString (ffstring, reftxt13, FALSE, FALSE, TILDE_IGNORE); + FFAddOneString (ffstring, reftxt12, FALSE, FALSE, TILDE_IGNORE); FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE); FFAddOneString (ffstring, method, FALSE, FALSE, TILDE_IGNORE); } @@ -15691,14 +16320,23 @@ static void AddCommentBlock ( } else if (sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) { + is_collab = TRUE; + tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL && tsip->accession != NULL) { - if (StringLen (tsip->accession) == 12) { + acclen = StringLen (tsip->accession); + if (acclen == 12) { is_wgs = TRUE; if (StringCmp (tsip->accession + 6, "000000") == 0) { wgsaccn = tsip->accession; wgsname = tsip->name; /* master accession has 8 zeroes, name has project version plus 6 zeroes */ } + } else if (acclen == 13) { + is_wgs = TRUE; + if (StringCmp (tsip->accession + 6, "0000000") == 0) { + wgsaccn = tsip->accession; + wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */ + } } else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) { ch = tsip->accession [0]; if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') { @@ -15726,6 +16364,45 @@ static void AddCommentBlock ( } else if (sip->choice == SEQID_GI) { gi = (Int4) sip->data.intvalue; + + } else if (sip->choice == SEQID_LOCAL) { + localID = (ObjectIdPtr) sip->data.ptrvalue; + } + } + + if (localID != NULL) { + if (is_tpa || is_collab) { + if (awp->mode == SEQUIN_MODE || awp->mode == DUMP_MODE) { + buf [0] = '\0'; + if (! StringHasNoText (localID->str)) { + if (StringLen (localID->str) < 100) { + sprintf (buf, "LocalID: %s", localID->str); + } else { + sprintf (buf, "LocalID string too large"); + } + } else { + sprintf (buf, "LocalID: %ld", (long) localID->id); + } + + cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock)); + if (cbp != NULL) { + + cbp->first = first; + first = FALSE; + + if (cbp->first) { + FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE); + } else { + FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE); + } + + FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND); + + cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC"); + FFRecycleString(ajp, ffstring); + ffstring = FFGetString(ajp); + } + } } } @@ -15933,7 +16610,7 @@ static void AddCommentBlock ( } } - if (hist->replace_ids != NULL && hist->replace_date != NULL) { + if (hist->replace_ids != NULL && hist->replace_date != NULL && awp->mode != SEQUIN_MODE) { okay = TRUE; for (sip = hist->replace_ids; sip != NULL; sip = sip->next) { @@ -16713,7 +17390,7 @@ static void GetSourcesOnBioseq ( right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END); strand = SeqLocStrand (ajp->ajp.slp); split = FALSE; - newloc = SeqLocReMap (sip, ajp->ajp.slp, isp->loc, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, isp->loc, 0, FALSE, ajp->masterStyle); /* newloc = SeqLocCopyRegion (sip, isp->loc, bsp, left, right, strand, &split); */ @@ -17066,6 +17743,27 @@ static void AddSourceFeatBlock ( #endif } + str = GetMolTypeQual (bsp); + if (str == NULL) { + switch (bsp->mol) { + case Seq_mol_dna : + str = "unassigned DNA"; + break; + case Seq_mol_rna : + str = "unassigned RNA"; + break; + case Seq_mol_aa : + break; + default : + str = "unassigned DNA"; + break; + } + } + if (str != NULL) { + FFAddNewLine(ffstring); + FFAddTextToString (ffstring, "/mol_type=\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES); + } + str = FFEndPrint(ajp, ffstring, awp->format, 5, 21, 5, 21, "FT"); bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, SOURCEFEAT_BLOCK, sizeof (IntSrcBlock)); @@ -17303,7 +18001,7 @@ static void GetFeatsOnCdsProduct ( slp = SeqLocMerge (nbsp, location, NULL, FALSE, TRUE, FALSE); if (slp != NULL) { sip = SeqIdParse ("lcl|dummy"); - newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle); SeqIdFree (sip); SeqLocFree (slp); if (newloc == NULL) { @@ -17532,7 +18230,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, TRUE, FALSE); if (slp == NULL) return TRUE; sip = SeqIdParse ("lcl|dummy"); - newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle); SeqIdFree (sip); SeqLocFree (slp); if (newloc == NULL) return TRUE; @@ -17600,7 +18298,8 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( } else if (sfp->product != NULL) { sip = SeqLocIdForProduct (sfp->product); if (sip != NULL) { - if (sip->choice == SEQID_GI && sip->data.intvalue > 0) { + if ((sip->choice == SEQID_GI && sip->data.intvalue > 0) || + sip->choice == SEQID_LOCAL) { sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID); oldscope = SeqEntrySetScope (sep); prod = BioseqFind (sip); @@ -17622,7 +18321,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( } } } - } else { + } else if (sip->choice == SEQID_GI && sip->data.intvalue > 0) { /* RELEASE_MODE requires that /protein_id is an accession */ gi = sip->data.intvalue; if (GetAccnVerFromServer (gi, buf)) { @@ -17649,6 +18348,12 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq ( } } } + } else { + if (sfp->excpt && (! StringHasNoText (sfp->except_text))) { + if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) { + okay = TRUE; + } + } } } else { okay = TRUE; @@ -17863,17 +18568,36 @@ static Boolean LIBCALLBACK GetFeatsOnSeg ( ) { - Asn2gbWorkPtr awp; - BioseqPtr bsp; - Uint2 entityID; - Int4 from; - SeqLocPtr loc; - SeqIdPtr sip; - Int4 to; + IntAsn2gbJobPtr ajp; + Asn2gbWorkPtr awp; + BioseqPtr bsp; + Uint2 entityID; + Int4 from; + Int4 left; + SeqLocPtr loc; + Int4 right; + SeqIdPtr sip; + Int4 to; if (slp == NULL || context == NULL) return FALSE; awp = (Asn2gbWorkPtr) context->userdata; + if (awp == NULL) return FALSE; + ajp = awp->ajp; + if (ajp == NULL) return FALSE; + + /* do not fetch outside of desired component */ + + if (ajp->ajp.slp != NULL) { + left = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_LEFT_END); + right = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_RIGHT_END); + + from = context->cumOffset; + to = from + context->to - context->from; + if (left > to) return TRUE; + if (right < from) return TRUE; + } + from = awp->from; to = awp->to; @@ -18684,6 +19408,7 @@ static void DoOneSection ( ) { + size_t acclen; IntAsn2gbJobPtr ajp; Asn2gbSectPtr asp; CharPtr bases = NULL; @@ -18785,10 +19510,15 @@ static void DoOneSection ( sip->choice == SEQID_DDBJ) { tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL && tsip->accession != NULL) { - if (StringLen (tsip->accession) == 12) { + acclen = StringLen (tsip->accession); + if (acclen == 12) { if (StringCmp (tsip->accession + 6, "000000") == 0) { wgsmaster = TRUE; } + } else if (acclen == 13) { + if (StringCmp (tsip->accession + 6, "0000000") == 0) { + wgsmaster = TRUE; + } } } } else if (sip->choice == SEQID_OTHER) { @@ -18818,11 +19548,20 @@ static void DoOneSection ( } } + for (sip = bsp->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_OTHER) { + isRefSeq = TRUE; + } + } + /* start exploring and populating paragraphs */ if (awp->format == FTABLE_FMT) { AddFeatHeaderBlock (awp); + if (awp->showRefs) { + AddReferenceBlock (awp, isRefSeq); + } AddFeatureBlock (awp); } else { @@ -18875,11 +19614,6 @@ static void DoOneSection ( /* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */ - for (sip = bsp->id; sip != NULL; sip = sip->next) { - if (sip->choice == SEQID_OTHER) { - isRefSeq = TRUE; - } - } hasRefs = AddReferenceBlock (awp, isRefSeq); if (! hasRefs) { if (ajp->flags.needAtLeastOneRef) { @@ -18916,7 +19650,9 @@ static void DoOneSection ( } if (ISA_na (bsp->mol) && ajp->gbseq == NULL) { - AddBasecountBlock (awp, bases); + if (awp->showBaseCount) { + AddBasecountBlock (awp, bases); + } } AddOriginBlock (awp); @@ -18941,7 +19677,9 @@ static void DoOneSection ( } if (ISA_na (bsp->mol) && ajp->gbseq == NULL) { - AddBasecountBlock (awp, bases); + if (awp->showBaseCount) { + AddBasecountBlock (awp, bases); + } } AddOriginBlock (awp); @@ -19125,6 +19863,7 @@ static void DoOneBioseq ( { IntAsn2gbJobPtr ajp; Asn2gbWorkPtr awp; + BioseqSetPtr bssp; SeqMgrSegmentContext context; Boolean contig = FALSE; Int4 from; @@ -19145,8 +19884,29 @@ static void DoOneBioseq ( if (ISA_na (bsp->mol)) { if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return; + + /* only do mRNA feature tables in GPS if targeted to a specific mRNA */ + + if (ajp->format == FTABLE_FMT && ajp->skipMrnas) { + if (bsp->idx.parenttype == OBJ_BIOSEQSET) { + bssp = (BioseqSetPtr) bsp->idx.parentptr; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) { + if (bsp->idx.parenttype == OBJ_BIOSEQSET) { + bssp = (BioseqSetPtr) bsp->idx.parentptr; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { + return; + } + } + } + } + } + } else if (ISA_aa (bsp->mol)) { if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return; + + /* only do protein feature tables if targeted to a specific protein */ + + if (ajp->format == FTABLE_FMT && ajp->skipProts) return; } if (awp->style == SEGMENT_STYLE) { @@ -19376,7 +20136,7 @@ static CharPtr GetGOtext ( break; } } - if (StringHasNoText (textstr)) return NULL; + /* if (StringHasNoText (textstr)) return NULL; */ str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 50); if (str == NULL) return NULL; @@ -19397,10 +20157,31 @@ static CharPtr GetGOtext ( StringCat (str, tmp); StringCat (str, "]"); } + TrimSpacesAroundString (str); return str; } +static Boolean DbxrefAlreadyInGeneXref ( + DbtagPtr dbt, + ValNodePtr dbxref +) + +{ + DbtagPtr gdbt; + ValNodePtr vnp; + + if (dbt == NULL) return FALSE; + + for (vnp = dbxref; vnp != NULL; vnp = vnp->next) { + gdbt = (DbtagPtr) vnp->data.ptrvalue; + if (gdbt == NULL) continue; + if (DbtagMatch (dbt, gdbt)) return TRUE; + } + + return FALSE; +} + /* FormatFeatureblockQuals should not be called directly, except from FormatFeatureBlock. It performs no input validation. (perhaps it should?) */ @@ -19460,6 +20241,7 @@ static void FormatFeatureBlockQuals ( ObjectIdPtr oip; Boolean okay; Boolean only_digits; + BioseqPtr pbsp; ValNodePtr ppr; CharPtr prefix; CharPtr protein_seq = NULL; @@ -19908,7 +20690,7 @@ static void FormatFeatureBlockQuals ( } /* in release_mode, must be of the form 123..4567 or a single-token label, - or (technically illegal but common) letters and semicolons */ + or (technically illegal but common) letters and semicolons - NO LONGER CHECKED */ while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) { if (! StringHasNoText (gbq->val)) { @@ -19927,18 +20709,22 @@ static void FormatFeatureBlockQuals ( ptr++; } if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) { - FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=", + TrimSpacesAroundString (str); + FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"", FALSE, TRUE, TILDE_IGNORE); FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES); + FFAddOneChar(ffstring, '\"', FALSE); FFAddOneChar(ffstring, '\n', FALSE); } str = ptr; } } else { if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) { - FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=", + TrimSpacesAroundString (str); + FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"", FALSE, TRUE, TILDE_IGNORE); FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES); + FFAddOneChar(ffstring, '\"', FALSE); FFAddOneChar(ffstring, '\n', FALSE); } } @@ -20081,7 +20867,7 @@ static void FormatFeatureBlockQuals ( if (ajp->ajp.slp != NULL) { sip = SeqIdParse ("lcl|dummy"); split = FALSE; - newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle); SeqIdFree (sip); if (newloc != NULL) { @@ -20114,10 +20900,10 @@ static void FormatFeatureBlockQuals ( case Qual_class_anti_codon : slp = qvp [FTQUAL_anticodon].slp; newloc = NULL; - if (ajp->ajp.slp != NULL) { + if (slp != NULL && ajp->ajp.slp != NULL) { sip = SeqIdParse ("lcl|dummy"); split = FALSE; - newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle); /* newloc = SeqLocCopyRegion (sip, slp, bsp, left, right, strand, &split); */ @@ -20211,6 +20997,11 @@ static void FormatFeatureBlockQuals ( } } } + if (okay && idx == FTQUAL_db_xref && qvp [FTQUAL_gene_xref].vnp != NULL) { + if (DbxrefAlreadyInGeneXref (dbt, qvp [FTQUAL_gene_xref].vnp)) { + okay = FALSE; + } + } if (okay) { if (! StringHasNoText (oip->str)) { @@ -20290,6 +21081,16 @@ static void FormatFeatureBlockQuals ( } } */ + } else if (dbt != NULL) { + pbsp = BioseqFind (sip); + if (pbsp != NULL && pbsp->id != NULL && pbsp->id->next == NULL) { + if (SeqIdWrite (sip, seqid, PRINTID_REPORT, sizeof (seqid)) != NULL) { + FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"", + FALSE, FALSE, TILDE_IGNORE); + FF_www_protein_id(ajp, ffstring, seqid); + FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE); + } + } } } } @@ -20497,7 +21298,7 @@ static void FormatFeatureBlockQuals ( case Qual_class_go : if (qvp [jdx].ufp != NULL) { for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) { - if (entry == NULL || entry->choice != 11) break; + if (entry == NULL || entry->choice != 11) break; ufp = (UserFieldPtr) entry->data.ptrvalue; str = GetGOtext (ufp); if (! StringHasNoText (str)) { @@ -20806,10 +21607,73 @@ static void FormatFeatureBlockQuals ( } break; + case Qual_class_seq_id : + sip = qvp [jdx].sip; + if (sip != NULL) { + /* should always be found above for protein_id or transcript_id + prod = BioseqFind (sip); + */ + if (prod != NULL) { + choice = 0; + for (sip = prod->id; sip != NULL; sip = sip->next) { + if (sip->choice == SEQID_GENBANK || + sip->choice == SEQID_EMBL || + sip->choice == SEQID_DDBJ || + sip->choice == SEQID_OTHER || + sip->choice == SEQID_TPG || + sip->choice == SEQID_TPE || + sip->choice == SEQID_TPD) { + choice = sip->choice; + if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) { + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } else if (sip->choice == SEQID_GI) { + if (choice == 0) { + sprintf (seqid, "%ld", (long) sip->data.intvalue); + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } + } + } else { + if (sip->choice == SEQID_GI) { + gi = sip->data.intvalue; + if (GetAccnVerFromServer (gi, seqid)) { + if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } else { + sip = GetSeqIdForGI(gi); + if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) { + if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } else if (! ajp->flags.dropIllegalQuals) { + sprintf (seqid, "%ld", (long) gi); + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } + } else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) { + if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) { + FFAddTextToString(unique, prefix, "transcript found in: ", seqid, + FALSE, FALSE, TILDE_IGNORE); + } + } + } + prefix = "; "; + add_period = FALSE; + } + break; + default : break; } } + if ( !FFEmpty(unique) ) { notestr = FFToCharPtr(unique); TrimSpacesAroundString (notestr); @@ -20947,7 +21811,9 @@ static CharPtr FormatFeatureBlock ( SeqLocPtr newloc; Boolean noLeft; Boolean noRight; + SeqMgrFeatContext ocontext; SeqEntryPtr oldscope; + SeqFeatPtr operon = NULL; Uint2 partial; SeqMgrFeatContext pcontext; BioseqPtr prd; @@ -21135,7 +22001,7 @@ static CharPtr FormatFeatureBlock ( } } - FFStartPrint(ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", ifp->firstfeat); + FFStartPrint(ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE); if (ajp->ajp.slp != NULL) { FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE); } else if ( GetWWW(ajp) /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) { @@ -21161,7 +22027,7 @@ static CharPtr FormatFeatureBlock ( right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END); strand = SeqLocStrand (ajp->ajp.slp); split = FALSE; - newloc = SeqLocReMap (sip, ajp->ajp.slp, location, 0, FALSE); + newloc = SeqLocReMapEx (sip, ajp->ajp.slp, location, 0, FALSE, ajp->masterStyle); /* newloc = SeqLocCopyRegion (sip, location, bsp, left, right, strand, &split); */ @@ -21216,8 +22082,12 @@ static CharPtr FormatFeatureBlock ( } } - /* a few features cannot show /partial in RELEASE_MODE - later no features will */ + /* hide unclassified /partial in RELEASE_MODE and ENTREZ_MODE */ + if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) { + qvp [FTQUAL_partial].ble = FALSE; + } + /* if (ajp->flags.checkQualSyntax) { switch (featdeftype) { case FEATDEF_conflict: @@ -21230,6 +22100,7 @@ static CharPtr FormatFeatureBlock ( break; } } + */ } if (ifp->mapToProt) { qvp [FTQUAL_partial].ble = FALSE; @@ -21271,6 +22142,14 @@ static CharPtr FormatFeatureBlock ( qvp [FTQUAL_gene_syn_refseq].vnp = qvp [FTQUAL_gene_syn].vnp; qvp [FTQUAL_gene_syn].vnp = NULL; } + operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext); + if (operon != NULL) { + for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) { + if (StringCmp (gbq->qual, "operon") == 0) { + qvp [FTQUAL_operon].gbq = gbq; + } + } + } } else { @@ -21318,10 +22197,26 @@ static CharPtr FormatFeatureBlock ( gene_syn = vnp; } } + if (grp != NULL && fcontext.featdeftype != FEATDEF_variation) { + qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */ + } if (fcontext.seqfeattype != SEQFEAT_CDREGION && fcontext.seqfeattype != SEQFEAT_RNA) { qvp [FTQUAL_gene_xref].vnp = NULL; } + if (fcontext.featdeftype != FEATDEF_operon) { + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) { + operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext); + if (operon != NULL) { + for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) { + if (StringCmp (gbq->qual, "operon") == 0) { + qvp [FTQUAL_operon].gbq = gbq; + } + } + } + } + } /* specific fields set here */ @@ -21363,7 +22258,11 @@ static CharPtr FormatFeatureBlock ( residue = SeqMapTableConvert (smtp, residue); } if (residue == 'U') { - qvp [FTQUAL_selenocysteine].str = "selenocysteine"; + if (ajp->flags.selenocysteineToNote) { + qvp [FTQUAL_selenocysteine_note].str = "selenocysteine"; + } else { + qvp [FTQUAL_selenocysteine].ble = TRUE; + } } } } @@ -21504,6 +22403,43 @@ static CharPtr FormatFeatureBlock ( qvp [FTQUAL_transl_table].num = 0; } } + for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) { + seqcode = 0; + sctp = NULL; + cbaa = cbp->aa; + switch (cbaa.choice) { + case 1 : + seqcode = Seq_code_ncbieaa; + break; + case 2 : + seqcode = Seq_code_ncbi8aa; + break; + case 3 : + seqcode = Seq_code_ncbistdaa; + break; + default : + break; + } + if (seqcode != 0) { + sctp = SeqCodeTableFind (seqcode); + if (sctp != NULL) { + residue = cbaa.value.intvalue; + if (residue != 42) { + if (seqcode != Seq_code_ncbieaa) { + smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa); + residue = SeqMapTableConvert (smtp, residue); + } + if (residue == 'U') { + if (ajp->flags.selenocysteineToNote) { + qvp [FTQUAL_selenocysteine_note].str = "selenocysteine"; + } else { + qvp [FTQUAL_selenocysteine].ble = TRUE; + } + } + } + } + } + } } } break; @@ -21574,9 +22510,12 @@ static CharPtr FormatFeatureBlock ( if (rrp->type == 2) { sip = SeqLocIdForProduct (sfp->product); if (sip != NULL) { - /* for RefSeq records or GenBank not release_mode */ - if (is_other || (! ajp->flags.forGbRelease)) { + /* for RefSeq records or GenBank not release_mode or entrez_mode */ + if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) { qvp [FTQUAL_transcript_id].sip = sip; + } else { + /* otherwise now goes in note */ + qvp [FTQUAL_transcript_id_note].sip = sip; /* !!! remove October 15, 2003 !!! */ } prod = BioseqFind (sip); } @@ -21651,8 +22590,12 @@ static CharPtr FormatFeatureBlock ( } else { shift = 1; } - idx = aa - (64 + shift); - if (idx > 0 && idx < 25) { + if (aa != '*') { + idx = aa - (64 + shift); + } else { + idx = 25; + } + if (idx > 0 && idx < 26) { str = trnaList [idx]; qvp [FTQUAL_product].str = str; if (StringNICmp (str, "tRNA-", 5) == 0) { @@ -21830,14 +22773,22 @@ static CharPtr FormatFeatureBlock ( /* !!! if ajp->flags.dropIllegalQuals, check CDS list here as well !!! */ if (ajp->flags.dropIllegalQuals && - (! StringInStringList (qvp [FTQUAL_seqfeat_note].str, validExceptionString)) ) { + (! StringInStringList (qvp [FTQUAL_seqfeat_note].str, validExceptionString))) { qvp [FTQUAL_exception].str = NULL; } } - if (ajp->flags.dropIllegalQuals && - (! StringInStringList (qvp [FTQUAL_exception].str, validExceptionString))) { - qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str; - qvp [FTQUAL_exception].str = NULL; + if (ajp->flags.dropIllegalQuals) { + if (is_other) { + if (! StringInStringList (qvp [FTQUAL_exception].str, validRefSeqExceptionString)) { + qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str; + qvp [FTQUAL_exception].str = NULL; + } + } else { + if (! StringInStringList (qvp [FTQUAL_exception].str, validExceptionString)) { + qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str; + qvp [FTQUAL_exception].str = NULL; + } + } } } else { qvp [FTQUAL_exception_note].str = sfp->except_text; @@ -22041,7 +22992,13 @@ static CharPtr FormatFeatureBlock ( /* suppress selenocysteine note if already in comment */ if (StringStr (sfp->comment, "selenocysteine") != NULL) { - qvp [FTQUAL_selenocysteine].str = NULL; + qvp [FTQUAL_selenocysteine_note].str = NULL; + } + + /* if /allele inherited from gene, suppress allele gbqual on feature */ + + if (qvp [FTQUAL_gene_allele].str != NULL) { + qvp [FTQUAL_allele].gbq = NULL; } /* now print qualifiers from table */ @@ -22443,7 +23400,7 @@ static Boolean IsSepRefseq ( } typedef struct modeflags { - Boolean flags [24]; + Boolean flags [25]; } ModeFlags, PNTR ModeFlagsPtr; static ModeFlags flagTable [] = { @@ -22453,28 +23410,28 @@ static ModeFlags flagTable [] = { TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, - TRUE, TRUE, TRUE, TRUE}, + TRUE, TRUE, TRUE, TRUE, TRUE}, /* ENTREZ_MODE */ {FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, - TRUE, TRUE, TRUE, FALSE}, + TRUE, TRUE, TRUE, TRUE, FALSE}, /* SEQUIN_MODE */ {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, - FALSE, FALSE, FALSE, FALSE}, + FALSE, FALSE, FALSE, FALSE, FALSE}, /* DUMP_MODE */ {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, - FALSE, FALSE, FALSE, FALSE} + FALSE, FALSE, FALSE, FALSE, FALSE} }; static void SetFlagsFromMode ( @@ -22521,6 +23478,7 @@ static void SetFlagsFromMode ( ajp->flags.hideEmptySource = *(bp++); ajp->flags.goQualsToNote = *(bp++); ajp->flags.geneSynsToNote = *(bp++); + ajp->flags.selenocysteineToNote = *(bp++); ajp->flags.forGbRelease = *(bp++); /* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */ @@ -22537,7 +23495,13 @@ static void SetFlagsFromMode ( */ sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID); - if (! IsSepRefseq (sep)) { + if (IsSepRefseq (sep)) { + + /* selenocysteine always a separate qualifier for RefSeq */ + + ajp->flags.selenocysteineToNote = FALSE; + + } else { /* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */ @@ -22583,6 +23547,82 @@ static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata) } +typedef struct lookforids { + Boolean isGED; + Boolean isNTorNW; + Boolean isNC; + Boolean isTPA; + Boolean isNuc; + Boolean isProt; +} LookForIDs, PNTR LookForIDsPtr; + +static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata) + +{ + LookForIDsPtr lfip; + SeqIdPtr sip; + TextSeqIdPtr tsip; + + lfip = (LookForIDsPtr) userdata; + if (ISA_na (bsp->mol)) { + lfip->isNuc = TRUE; + } + if (ISA_aa (bsp->mol)) { + lfip->isProt = TRUE; + } + for (sip = bsp->id; sip != NULL; sip = sip->next) { + switch (sip->choice) { + case SEQID_GENBANK : + case SEQID_EMBL : + case SEQID_DDBJ : + lfip->isGED = TRUE; + break; + case SEQID_TPG : + case SEQID_TPE : + case SEQID_TPD : + lfip->isTPA = TRUE; + break; + case SEQID_OTHER : + tsip = (TextSeqIdPtr) sip->data.ptrvalue; + if (tsip != NULL) { + if (StringNCmp (tsip->accession, "NC_", 3) == 0) { + lfip->isNC = TRUE; + } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) { + lfip->isNTorNW = TRUE; + } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) { + lfip->isNTorNW = TRUE; + } + } + break; + default : + break; + } + } +} + +static void LookForGEDetc ( + SeqEntryPtr topsep, + BoolPtr isGED, + BoolPtr isNTorNW, + BoolPtr isNC, + BoolPtr isTPA, + BoolPtr isNuc, + BoolPtr isProt +) + +{ + LookForIDs lfi; + + MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs)); + VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs); + *isGED = lfi.isGED; + *isNTorNW = lfi.isNTorNW; + *isNC = lfi.isNC; + *isTPA = lfi.isTPA; + *isNuc = lfi.isNuc; + *isProt = lfi.isProt; +} + #define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS) #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE) #define GENE_RIF_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | LATEST_GENE_RIFS) @@ -22610,6 +23650,12 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( GBSeqPtr gbseq = NULL; Int4 i; IndxPtr index = NULL; + Boolean isGED; + Boolean isNTorNW; + Boolean isNC; + Boolean isNuc; + Boolean isProt; + Boolean isTPA; Int4 j; Int4 k; Boolean lockFarComp; @@ -22631,6 +23677,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( SubmitBlockPtr sbp; SeqEntryPtr sep; SeqIntPtr sintp; + Boolean skipMrnas = FALSE; + Boolean skipProts = FALSE; SeqSubmitPtr ssp; BioseqSetPtr topbssp; ValNodePtr vnp; @@ -22668,6 +23716,14 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( entityID = ObjMgrGetEntityIDForPointer (bsp); } else if (bssp != NULL) { entityID = ObjMgrGetEntityIDForPointer (bssp); + if (format == FTABLE_FMT) { + skipProts = TRUE; + skipMrnas = TRUE; + } + } + if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) { + skipProts = FALSE; + skipMrnas = FALSE; } if (entityID == 0) return NULL; @@ -22727,7 +23783,11 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( /* lock all bioseqs in advance, including remote genome components */ sep = GetTopSeqEntryForEntityID (entityID); - ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd); + if (ajp->ajp.slp != NULL && lockFarComp) { + ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp); + } else { + ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL); + } } lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0); @@ -22753,6 +23813,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( } ajp->relModeError = FALSE; + ajp->skipProts = skipProts; + ajp->skipMrnas = skipMrnas; MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork)); aw.ajp = ajp; @@ -22767,9 +23829,40 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0); aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0); - aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES); - aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS); - aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS); + aw.format = format; + aw.mode = mode; + aw.style = style; + + sep = GetTopSeqEntryForEntityID (entityID); + + /* special types of records override feature fetching parameters */ + + aw.onlyNearFeats = FALSE; + aw.farFeatsSuppress = FALSE; + aw.nearFeatsSuppress = FALSE; + LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isNuc, &isProt); + if (ajp->ajp.slp != NULL) { + /* specified location obeys fetching parameters, for now */ + aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES); + aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS); + aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS); + } else if (mode == ENTREZ_MODE) { + /* entrez_mode overrides settings to avoid far fetches */ + aw.onlyNearFeats = TRUE; + aw.showconfeats = TRUE; + } else if (isNTorNW || isTPA) { + aw.onlyNearFeats = TRUE; + } else if (isNC) { + if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) { + aw.onlyNearFeats = TRUE; + } else { + aw.nearFeatsSuppress = TRUE; + } + } else { + aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES); + aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS); + aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS); + } aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0); aw.hideRemImpFeats = (Boolean) ((custom & HIDE_REM_IMP_FEATS) != 0); @@ -22789,8 +23882,9 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( aw.onlyGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == ONLY_GENE_RIFS); aw.latestGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == LATEST_GENE_RIFS); + aw.showRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0); + aw.isGPS = FALSE; - sep = GetTopSeqEntryForEntityID (entityID); if (sep != NULL && IS_Bioseq_set (sep)) { topbssp = (BioseqSetPtr) sep->data.ptrvalue; if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) { @@ -22808,17 +23902,18 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup ( */ aw.newLocusLine = TRUE; + aw.showBaseCount = FALSE; - if ((Boolean) (flags & DDBJ_VARIANT_FORMAT) != 0) { + if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) { aw.citSubsFirst = TRUE; aw.hideGeneFeats = TRUE; aw.newLocusLine = FALSE; + aw.showBaseCount = TRUE; ajp->newSourceOrg = FALSE; } - - aw.format = format; - aw.mode = mode; - aw.style = style; + if (mode == SEQUIN_MODE || mode == DUMP_MODE) { + aw.showBaseCount = TRUE; + } aw.hup = FALSE; aw.ssp = NULL; @@ -22952,17 +24047,32 @@ static void PrintFtableIntervals ( ) { - Boolean partial5; - Boolean partial3; - SeqLocPtr slp; - Int4 start; - Int4 stop; - Char str [64]; - Char str1 [32]; - Char str2 [32]; + IntFuzzPtr ifp; + Boolean partial5; + Boolean partial3; + SeqLocPtr slp; + SeqPntPtr spp; + Int4 start; + Int4 stop; + Char str [64]; + Char str1 [32]; + Char str2 [32]; if (head == NULL || target == NULL || location == NULL || label == NULL) return; + if (location->choice == SEQLOC_PNT) { + spp = (SeqPntPtr) location->data.ptrvalue; + if (spp != NULL) { + ifp = spp->fuzz; + if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) { + sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1), + (long) (spp->point + 2), label); + ValNodeCopyStr (head, 0, str); + return; + } + } + } + slp = SeqLocFindNext (location, NULL); if (slp == NULL) return; @@ -23074,7 +24184,7 @@ static void PrintFTUserFld ( break; } } - if (StringHasNoText (textstr)) break; + /* if (StringHasNoText (textstr)) break; */ str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 40); if (str == NULL) return; @@ -23182,6 +24292,33 @@ static void PrintFTCodeBreak ( } } +static SeqIdPtr SeqIdFindForTable (SeqIdPtr sip) + +{ + Uint1 order [NUM_SEQID]; + + SeqIdBestRank (order, NUM_SEQID); + order [SEQID_LOCAL] = 20; + order [SEQID_GENBANK] = 5; + order [SEQID_EMBL] = 5; + order [SEQID_PIR] = 5; + order [SEQID_SWISSPROT] = 5; + order [SEQID_DDBJ] = 5; + order [SEQID_PRF] = 5; + order [SEQID_PDB] = 5; + order [SEQID_TPG] = 5; + order [SEQID_TPE] = 5; + order [SEQID_TPD] = 5; + order [SEQID_PATENT] = 10; + order [SEQID_OTHER] = 8; + order [SEQID_GENERAL] = 15; + order [SEQID_GIBBSQ] = 15; + order [SEQID_GIBBMT] = 15; + order [SEQID_GIIM] = 20; + order [SEQID_GI] = 20; + return SeqIdSelect (sip, order, NUM_SEQID); +} + /* #define MAKE_MRNA_GPS_FEAT */ static void PrintFtableLocAndQuals ( @@ -23193,28 +24330,39 @@ static void PrintFtableLocAndQuals ( ) { - CodeBreakPtr cbp; - CdRegionPtr crp; - DbtagPtr dbt; - GBQualPtr gbq; - ValNodePtr geneorprotdb; - GeneRefPtr grp; - CharPtr label; - ObjectIdPtr oip; - BioseqPtr prod; - SeqFeatPtr prot; - ProtRefPtr prp; - Boolean pseudo; - RnaRefPtr rrp; - SeqIdPtr sip; - SeqIdPtr sip2; - Char str [256]; - Char tmp [300]; - tRNAPtr trp; - ValNodePtr vnp; + Int2 bondidx; + BioseqSetPtr bssp; + CodeBreakPtr cbp; + BioseqPtr cdna; + SeqFeatPtr cds; + CdRegionPtr crp; + SeqMgrDescContext dcontext; + DbtagPtr dbt; + SeqMgrFeatContext fcontext; + GBQualPtr gbq; + ValNodePtr geneorprotdb; + GeneRefPtr grp; + Boolean is_gps_genomic = FALSE; + CharPtr label; + MolInfoPtr mip; + ObjectIdPtr oip; + BioseqPtr prod; + SeqFeatPtr prot; + ProtRefPtr prp; + Boolean pseudo; + RnaRefPtr rrp; + SeqDescrPtr sdp; + Int4 sec_str; + SeqIdPtr sip; + SeqIdPtr sip2; + Int2 siteidx; + Char str [256]; + Char tmp [300]; + tRNAPtr trp; + ValNodePtr vnp; #ifdef MAKE_MRNA_GPS_FEAT - CharPtr rnaid; - CharPtr rnaprod; + CharPtr rnaid; + CharPtr rnaprod; #endif if (head == NULL || target == NULL || sfp == NULL || context == NULL) return; @@ -23227,6 +24375,23 @@ static void PrintFtableLocAndQuals ( label = "???"; } + /* check if genomic sequence in genomic product set */ + + if (target->idx.parenttype == OBJ_BIOSEQSET) { + bssp = (BioseqSetPtr) target->idx.parentptr; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { + sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext); + if (sdp != NULL) { + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) { +#ifndef MAKE_MRNA_GPS_FEAT + is_gps_genomic = TRUE; +#endif + } + } + } + } + PrintFtableIntervals (head, target, sfp->location, label); geneorprotdb = NULL; @@ -23335,7 +24500,8 @@ static void PrintFtableLocAndQuals ( } } if (prod != NULL) { - for (sip = prod->id; sip != NULL; sip = sip->next) { + sip = SeqIdFindForTable (prod->id); + if (sip != NULL) { if (sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ || @@ -23380,6 +24546,39 @@ static void PrintFtableLocAndQuals ( } } } + if (is_gps_genomic) { + cds = SeqMgrGetCDSgivenProduct (prod, NULL); + if (cds != NULL) { + cdna = BioseqFindFromSeqLoc (cds->location); + if (cdna != NULL) { + sip = SeqIdFindWorst (cdna->id); + if (sip != NULL) { + if (sip->choice == SEQID_GENBANK || + sip->choice == SEQID_EMBL || + sip->choice == SEQID_DDBJ || + sip->choice == SEQID_OTHER || + sip->choice == SEQID_TPG || + sip->choice == SEQID_TPE || + sip->choice == SEQID_TPD) { + if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } else if (sip->choice == SEQID_LOCAL && (! ajp->flags.suppressLocalID)) { + if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\ttranscript_id\tlcl|%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } else if (sip->choice == SEQID_GENERAL) { + if (SeqIdWrite (sip, str, PRINTID_FASTA_GENERAL, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + } + } + } + } } else if (sfp->product != NULL) { sip = SeqLocId (sfp->product); if (sip != NULL) { @@ -23441,7 +24640,8 @@ static void PrintFtableLocAndQuals ( } } if (prod != NULL) { - for (sip = prod->id; sip != NULL; sip = sip->next) { + sip = SeqIdFindForTable (prod->id); + if (sip != NULL) { if (sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ || @@ -23465,6 +24665,39 @@ static void PrintFtableLocAndQuals ( } } } + if (is_gps_genomic) { + cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext); + if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) { + prod = BioseqFindFromSeqLoc (cds->product); + if (prod != NULL) { + sip = SeqIdFindWorst (prod->id); + if (sip != NULL) { + if (sip->choice == SEQID_GENBANK || + sip->choice == SEQID_EMBL || + sip->choice == SEQID_DDBJ || + sip->choice == SEQID_OTHER || + sip->choice == SEQID_TPG || + sip->choice == SEQID_TPE || + sip->choice == SEQID_TPD) { + if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\tprotein_id\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } else if (sip->choice == SEQID_LOCAL && (! ajp->flags.suppressLocalID)) { + if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\tprotein_id\tlcl|%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } else if (sip->choice == SEQID_GENERAL) { + if (SeqIdWrite (sip, str, PRINTID_FASTA_GENERAL, sizeof (str)) != NULL) { + sprintf (tmp, "\t\t\tprotein_id\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + } + } + } + } } else if (sfp->product != NULL) { sip = SeqLocId (sfp->product); if (sip != NULL) { @@ -23499,6 +24732,87 @@ static void PrintFtableLocAndQuals ( } } break; + case SEQFEAT_PROT : + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (prp != NULL) { + if (prp->name != NULL) { + for (vnp = prp->name; vnp != NULL; vnp = vnp->next) { + StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tproduct\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + } + if (prp->desc != NULL) { + StringNCpy_0 (str, prp->desc, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tprot_desc\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) { + StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tfunction\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) { + StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tEC_number\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + } + } + StringNCpy_0 (str, sfp->comment, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tprot_note\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + break; + case SEQFEAT_REGION : + StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\tregion\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + break; + case SEQFEAT_BOND : + bondidx = (Int2) sfp->data.value.intvalue; + if (bondidx == 255) { + bondidx = 5; + } + if (bondidx > 0 && bondidx < 6) { + sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]); + ValNodeCopyStr (head, 0, tmp); + } + break; + case SEQFEAT_SITE : + siteidx = (Int2) sfp->data.value.intvalue; + if (siteidx == 255) { + siteidx = 26; + } + if (siteidx > 0 && siteidx < 27) { + sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]); + ValNodeCopyStr (head, 0, tmp); + } + break; + case SEQFEAT_PSEC_STR : + sec_str = (Int2) sfp->data.value.intvalue; + if (sec_str > 0 && sec_str <= 3) { + sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]); + ValNodeCopyStr (head, 0, tmp); + } + break; + case SEQFEAT_HET : + StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str)); + if (! StringHasNoText (str)) { + sprintf (tmp, "\t\t\theterogen\t%s\n", str); + ValNodeCopyStr (head, 0, tmp); + } + break; default : break; } @@ -23655,6 +24969,7 @@ NLM_EXTERN CharPtr asn2gnbk_format ( ValNodePtr head; IntAsn2gbJobPtr iajp; Char id [42]; + IntRefBlockPtr irp; size_t max; SeqEntryPtr oldscope; QualValPtr qv; @@ -23662,6 +24977,7 @@ NLM_EXTERN CharPtr asn2gnbk_format ( SeqEntryPtr sep; SeqFeatPtr sfp; SeqIdPtr sip; + SeqIdPtr sip2; CharPtr str = NULL; BioseqPtr target; Char tmp [53]; @@ -23717,12 +25033,37 @@ NLM_EXTERN CharPtr asn2gnbk_format ( if (blocktype == FEATHEADER_BLOCK) { sip = SeqIdFindBest (target->id, 0); + if (sip != NULL && sip->choice == SEQID_GI) { + sip2 = GetSeqIdForGI (sip->data.intvalue); + if (sip2 != NULL) { + sip = sip2; + } + } SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1); if (! StringHasNoText (id)) { sprintf (tmp, ">Feature %s\n", id); str = StringSave (tmp); } + } else if (blocktype == REFERENCE_BLOCK) { + + irp = (IntRefBlockPtr) bbp; + if (irp->loc != NULL) { + if (irp->rb.pmid != 0 || irp->rb.muid != 0) { + head = NULL; + PrintFtableIntervals (&head, target, irp->loc, "REFERENCE"); + if (irp->rb.pmid != 0) { + sprintf (tmp, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid); + ValNodeCopyStr (&head, 0, tmp); + } else if (irp->rb.muid != 0) { + sprintf (tmp, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid); + ValNodeCopyStr (&head, 0, tmp); + } + str = MergeValNodeStrings (head); + ValNodeFreeData (head); + } + } + } else if (blocktype == FEATURE_BLOCK) { sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext); @@ -23968,7 +25309,11 @@ NLM_EXTERN Boolean SeqEntryToGnbk ( if (lockFarComp || lockFarLocs || lockFarProd) { locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS); - bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd); + if (slp != NULL && lockFarComp) { + bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp); + } else { + bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL); + } } lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0); diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h index 0a1c056e..1c2bfc61 100644 --- a/api/asn2gnbk.h +++ b/api/asn2gnbk.h @@ -29,7 +29,7 @@ * * Version Creation Date: 10/21/98 * -* $Revision: 6.52 $ +* $Revision: 6.53 $ * * File Description: New GenBank flatfile generator * @@ -142,6 +142,9 @@ typedef unsigned long CstType; #define ONLY_GENE_RIFS 2048 #define LATEST_GENE_RIFS 3072 +#define SHOW_PROT_FTABLE 4096 +#define SHOW_FTABLE_REFS 8192 + /* opaque pointer for special extensions */ struct XtraData; diff --git a/api/edutil.c b/api/edutil.c index b0227a9a..b6ea951e 100644 --- a/api/edutil.c +++ b/api/edutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/4/94 * -* $Revision: 6.19 $ +* $Revision: 6.20 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,9 @@ * ------- ---------- ----------------------------------------------------- * * $Log: edutil.c,v $ +* Revision 6.20 2003/06/03 20:25:34 kans +* SeqLocReplaceID works on bonds if both ends bonded to the same Seq-id +* * Revision 6.19 2003/02/10 22:57:45 kans * added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source * @@ -3490,6 +3493,7 @@ NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip) SeqLocPtr curr; PackSeqPntPtr pspp; SeqIntPtr target_sit; + SeqBondPtr sbp; SeqPntPtr spp; switch (slp->choice) { @@ -3519,10 +3523,23 @@ NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip) target_sit->id = SeqIdDup (new_sip); break; case SEQLOC_PNT : - spp = (SeqPntPtr)slp->data.ptrvalue; + spp = (SeqPntPtr) slp->data.ptrvalue; SeqIdFree(spp->id); spp->id = SeqIdDup(new_sip); break; + case SEQLOC_BOND : + sbp = (SeqBondPtr) slp->data.ptrvalue; + if (sbp == NULL || sbp->a == NULL || sbp->b == NULL) break; + /* only do this if both ends bonded to same Seq-id */ + if (SeqIdMatch (sbp->a->id, sbp->b->id)) { + spp = sbp->a; + SeqIdFree(spp->id); + spp->id = SeqIdDup(new_sip); + spp = sbp->b; + SeqIdFree(spp->id); + spp->id = SeqIdDup(new_sip); + } + break; default : break; } diff --git a/api/explore.h b/api/explore.h index a4923d10..5d8ed27d 100644 --- a/api/explore.h +++ b/api/explore.h @@ -29,7 +29,7 @@ * * Version Creation Date: 6/30/98 * -* $Revision: 6.43 $ +* $Revision: 6.45 $ * * File Description: Reengineered and optimized exploration functions * to be used for future code @@ -208,6 +208,11 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene ( SeqMgrFeatContext PNTR context ); +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon ( + SeqLocPtr slp, + SeqMgrFeatContext PNTR context +); + /***************************************************************************** * * SeqMgrGetOverlappingXXX returns the overlapping mRNA/CDS/publication/biosource @@ -482,6 +487,7 @@ NLM_EXTERN Boolean LIBCALL SeqMgrGetBioseqContext ( #define LOCATION_SUBSET 2 /* SeqLocAinB must be satisfied, no boundary checking */ #define CHECK_INTERVALS 3 /* SeqLocAinB plus internal exon-intron boundaries must match */ #define INTERVAL_OVERLAP 4 /* at least one pair of intervals must overlap */ +#define COMMON_INTERVAL 5 /* at least one pair of intervals must match */ NLM_EXTERN VoidPtr LIBCALL SeqMgrBuildFeatureIndex ( BioseqPtr bsp, diff --git a/api/fdlKludge.h b/api/fdlKludge.h index 13d3c8bf..96783f80 100644 --- a/api/fdlKludge.h +++ b/api/fdlKludge.h @@ -28,13 +28,25 @@ * * Version Creation Date: 10/15/01 * -* $Revision: 6.8 $ +* $Revision: 6.12 $ * * File Description: * * Modifications: * -------------------------------------------------------------------------- * $Log: fdlKludge.h,v $ +* Revision 6.12 2003/06/11 20:15:45 jianye +* changed unigene linkout +* +* Revision 6.11 2003/06/02 20:02:23 jianye +* Added geo linkout +* +* Revision 6.10 2003/05/05 19:33:54 jianye +* Change url for structure linkout +* +* Revision 6.9 2003/04/21 21:49:37 jianye +* changed some url +* * Revision 6.8 2003/04/14 20:43:22 jianye * Adde geo url and modified structure linkout url * @@ -63,7 +75,6 @@ #include <objloc.h> -#define total_linkout 4 #define linkout_locuslink (1<<0) #define linkout_unigene (1<<1) @@ -72,12 +83,12 @@ /* url for linkout*/ #define URL_LocusLink "<a href=\"http://www.ncbi.nlm.nih.gov/LocusLink/list.cgi?Q=%d%s\"><img border=0 height=16 width=16 src=\"/blast/images/L.gif\" alt=\"LocusLink info\"></a>" -#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/UniGene/query.cgi?ORG=%s&TEXT=@gi(%d)\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>" - -#define URL_Structure "<a href=\"http://ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>" +#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=unigene&cmd=search&term=%d[Nucleotide+UID]\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>" -#define URL_Structure_Overview "<a href=\"http://ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>" +#define URL_Structure "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>" -#define URL_Geo "<a href=\"http://boris.ncbi.nlm.nih.gov:2441/entrez/testers/edgar/query.fcgi?term=%d[gi]&db=geo\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Geo\"></a>" +#define URL_Structure_Overview "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>" +#define URL_Geo "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=geo&term=%d[gi]\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Geo\"></a>" + #endif diff --git a/api/ffprint.c b/api/ffprint.c index dfe12693..b0fa78ce 100644 --- a/api/ffprint.c +++ b/api/ffprint.c @@ -29,13 +29,16 @@ * * Version Creation Date: 7/15/95 * -* $Revision: 6.7 $ +* $Revision: 6.8 $ * * File Description: * * Modifications: * -------------------------------------------------------------------------- * $Log: ffprint.c,v $ + * Revision 6.8 2003/07/15 14:35:56 dondosha + * Added #defines for substitutes to fprintf and fflush, needed for gzip compression of Web BLAST results + * * Revision 6.7 2002/08/26 22:06:57 kans * ff_RecalculateLinks (MS) to fix hotlink artifact * @@ -125,6 +128,11 @@ parameters combined into Asn2ffJobPtr structure #include <ffprint.h> #include <ncbithr.h> +int (*ff_fprintf)(FILE*, const char *, ...) = fprintf; +int (*ff_fflush)(FILE*) = fflush; + +#define fprintf ff_fprintf +#define fflush ff_fflush static TNlmTls ffprint_tls = NULL; diff --git a/api/findrepl.c b/api/findrepl.c index 43b5ca45..37e91158 100644 --- a/api/findrepl.c +++ b/api/findrepl.c @@ -44,6 +44,15 @@ * RCS Modification History: * ------------------------- * $Log: findrepl.c,v $ +* Revision 6.10 2003/07/31 20:54:54 kans +* FindReplaceString does not need do_replace argument +* +* Revision 6.9 2003/07/31 18:18:03 kans +* added FindReplaceString +* +* Revision 6.8 2003/05/11 21:12:50 kans +* FindReplAligns loops through StdSegPtr chain, also does ssp->ids within +* * Revision 6.7 2002/06/11 14:41:20 kans * added support for locus_tag * @@ -1214,9 +1223,13 @@ static void FindReplAligns ( } break; case SAS_STD : - ssp = (StdSegPtr) sap->segs; - for (slp = ssp->loc; slp != NULL; slp = slp->next) { - VisitSeqIdsInSeqLoc (slp, userdata, FindReplSeqId); + for (ssp = (StdSegPtr) sap->segs; ssp != NULL; ssp = ssp->next) { + for (sip = ssp->ids; sip != NULL; sip = sip->next) { + FindReplSeqId (sip, userdata); + } + for (slp = ssp->loc; slp != NULL; slp = slp->next) { + VisitSeqIdsInSeqLoc (slp, userdata, FindReplSeqId); + } } break; case SAS_DISC : @@ -1699,3 +1712,56 @@ NLM_EXTERN void FindReplaceInEntity ( } } +/*=======================================================================*/ +/* */ +/* FindReplaceString() - find/replace just one string. */ +/* */ +/*=======================================================================*/ + +NLM_EXTERN void FindReplaceString ( + CharPtr PNTR strp, + CharPtr find_string, + CharPtr replace_string, + Boolean case_counts, + Boolean whole_word +) + +{ + int ch; + FindStruct fs; + int j; + + if (strp == NULL || StringHasNoText (find_string)) return; + + MemSet ((Pointer) &fs, 0, sizeof (FindStruct)); + + fs.entityID = 0; + fs.find_string = find_string; + fs.replace_string = replace_string; + fs.case_counts = case_counts; + fs.whole_word = whole_word; + fs.do_replace = TRUE; + fs.select_item = FALSE; + fs.send_update = FALSE; + + fs.did_find = FALSE; + fs.did_replace = FALSE; + fs.dirty = FALSE; + + /* build Boyer-Moore displacement array in advance */ + + fs.subLen = StringLen (find_string); + + for (ch = 0; ch < 256; ch++) { + fs.d [ch] = fs.subLen; + } + for (j = 0; j < (int) (fs.subLen - 1); j++) { + ch = (int) (case_counts ? find_string [j] : TO_UPPER (find_string [j])); + if (ch >= 0 && ch <= 255) { + fs.d [ch] = fs.subLen - j - 1; + } + } + + FindReplString (strp, &fs); +} + diff --git a/api/findrepl.h b/api/findrepl.h index 84f16c72..20df8b12 100644 --- a/api/findrepl.h +++ b/api/findrepl.h @@ -44,6 +44,12 @@ * RCS Modification History: * ------------------------- * $Log: findrepl.h,v $ +* Revision 6.3 2003/07/31 20:54:54 kans +* FindReplaceString does not need do_replace argument +* +* Revision 6.2 2003/07/31 18:18:03 kans +* added FindReplaceString +* * Revision 6.1 2000/11/03 20:36:00 kans * FindReplaceInEntity replaces FindInEntity and FindInEntityX - complete redesign, no longer using AsnExpOptExplore because of the difficulty of replacing with a larger string (TF + JK) * @@ -112,6 +118,14 @@ NLM_EXTERN void FindReplaceInEntity ( Boolean do_seqid_local ); +NLM_EXTERN void FindReplaceString ( + CharPtr PNTR strp, + CharPtr find_string, + CharPtr replace_string, + Boolean case_counts, + Boolean whole_word +); + #ifdef __cplusplus extern "C" } diff --git a/api/gbfeat.c b/api/gbfeat.c index dd2fa347..55eeac51 100644 --- a/api/gbfeat.c +++ b/api/gbfeat.c @@ -3,9 +3,13 @@ * -- all routines for checking genbank feature table * -- all extern variables are in gbftglob.c * 10-11-93 -$Revision: 6.8 $ +$Revision: 6.9 $ * * $Log: gbfeat.c,v $ +* Revision 6.9 2003/10/09 15:35:51 bazhin +* Qualifier "rpt_unit" is removed from the list of ones to be splitted +* by commas. +* * Revision 6.8 2001/12/06 17:00:41 kans * TextSave takes size_t, not Int2, otherwise titin protein tries to allocate negative number * @@ -74,9 +78,9 @@ $Revision: 6.8 $ #include <gbfeat.h> #include <errdefn.h> -#define ParFlat_SPLIT_IGNORE 5 +#define ParFlat_SPLIT_IGNORE 4 CharPtr GBQual_names_split_ignore[ParFlat_SPLIT_IGNORE] = { -"citation", "EC_number", "rpt_type", "rpt_unit", "usedin"}; +"citation", "EC_number", "rpt_type", "usedin"}; /*------------------------- GBQualNameValid() ------------------------*/ /**************************************************************************** diff --git a/api/gbftdef.h b/api/gbftdef.h index fa06de1a..f9d5df3c 100644 --- a/api/gbftdef.h +++ b/api/gbftdef.h @@ -3,6 +3,15 @@ * -- GenBank Feature table define file * * $Log: gbftdef.h,v $ +* Revision 6.17 2003/10/07 13:50:36 kans +* added gap, operon, oriT features and ecotype, estimated_length and operon qualifiers +* +* Revision 6.16 2003/08/19 15:18:37 kans +* added GBQUAL_segment, increased ParFlat_TOTAL_GBQUAL and opt_qual array size +* +* Revision 6.15 2003/05/07 22:03:31 kans +* added GBQUAL_mol_type, raised opt_qual array to 51 elements +* * Revision 6.14 2003/02/22 21:20:05 kans * added GBQUAL_locus_tag, legal for now in gene features * @@ -175,13 +184,18 @@ #define GBQUAL_isolation_source 79 #define GBQUAL_serovar 80 #define GBQUAL_locus_tag 81 +#define GBQUAL_mol_type 82 +#define GBQUAL_segment 83 +#define GBQUAL_ecotype 84 +#define GBQUAL_estimated_length 85 +#define GBQUAL_operon 86 -#define ParFlat_TOTAL_GBQUAL 82 +#define ParFlat_TOTAL_GBQUAL 87 #define ParFlat_TOTAL_IntOr 3 #define ParFlat_TOTAL_LRB 3 #define ParFlat_TOTAL_Exp 2 #define ParFlat_TOTAL_Rpt 7 -#define ParFlat_TOTAL_GBFEAT 64 +#define ParFlat_TOTAL_GBFEAT 67 #define Class_pos_aa 1 #define Class_text 2 @@ -214,7 +228,7 @@ typedef struct sematic_gbfeature { Int2 mand_num; Int2 mand_qual[5]; Int2 opt_num; - Int2 opt_qual[50]; + Int2 opt_qual[55]; } SematicFeat, PNTR SematicFeatPtr; typedef struct gbfeat_name { diff --git a/api/gbftglob.c b/api/gbftglob.c index 894ed6b6..e9b26887 100644 --- a/api/gbftglob.c +++ b/api/gbftglob.c @@ -4,6 +4,31 @@ * -- all the defined variables in the gbfeat.h * * $Log: gbftglob.c,v $ +* Revision 6.37 2003/10/07 17:05:58 kans +* added allele and operon to many features +* +* Revision 6.36 2003/10/07 13:50:36 kans +* added gap, operon, oriT features and ecotype, estimated_length and operon qualifiers +* +* Revision 6.35 2003/10/06 16:19:45 kans +* rpt_unit went from Class_token to Class_text +* +* Revision 6.34 2003/10/03 15:16:14 bazhin +* Numeric value "opt_num" in STATIC__ParFlat_GBFeat array changed +* from 13 to 15 for feature "allele" to cover "usedin" and "locus_tag" +* qualifiers. +* +* Revision 6.33 2003/10/03 12:44:27 bazhin +* Numeric value "opt_num" in STATIC__ParFlat_GBFeat array changed +* from 13 to 14 for feature "precursor_RNA" to cover "locus_tag" +* qualifier. +* +* Revision 6.32 2003/08/19 15:19:04 kans +* added GBQUAL_segment, increased ParFlat_TOTAL_GBQUAL and opt_qual array size +* +* Revision 6.31 2003/05/07 22:03:31 kans +* added GBQUAL_mol_type, raised opt_qual array to 51 elements +* * Revision 6.30 2003/03/06 16:23:13 kans * when gene qualifier was removed as required on gene feature, it was not put back as optional qualifier * @@ -181,7 +206,7 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = { {"proviral", Class_none}, {"pseudo", Class_none}, {"rearranged", Class_none}, { "replace", Class_text}, {"rpt_family", Class_text}, {"rpt_type", Class_rpt}, - { "rpt_unit", Class_token}, { "sex", Class_text}, + { "rpt_unit", Class_text}, { "sex", Class_text}, {"sequenced_mol", Class_text}, { "serotype", Class_text}, {"specific_host", Class_text}, {"standard_name", Class_text}, {"strain", Class_text}, {"sub_clone", Class_text}, @@ -195,8 +220,9 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = { {"organelle", Class_text}, {"transcript_id", Class_text}, {"transgenic", Class_none}, {"environmental_sample", Class_none}, {"isolation_source", Class_text}, {"serovar", Class_text}, - {"locus_tag", Class_text}, - }; + {"locus_tag", Class_text}, {"mol_type", Class_text}, + {"segment", Class_text},{"ecotype", Class_text}, + {"estimated_length", Class_text}, {"operon", Class_text} }; NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) { return STATIC__ParFlat_GBQual_names; @@ -214,70 +240,72 @@ CharPtr ParFlat_RptString[ParFlat_TOTAL_Rpt] = { "dispersed", "other"}; static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { - {"allele", 0, {-1, -1, -1, -1, -1}, 13, + {"allele", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency, GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype, GBQUAL_product, GBQUAL_replace, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"attenuator", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"attenuator", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"C_region", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"C_region", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"CAAT_signal", 0, {-1, -1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"CAAT_signal", 0, {-1, -1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"CDS", 0, {-1, -1, -1, -1, -1}, 25, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"CDS", 0, {-1, -1, -1, -1, -1}, 26, {GBQUAL_allele, GBQUAL_citation, GBQUAL_codon, GBQUAL_codon_start, GBQUAL_db_xref, GBQUAL_EC_number,GBQUAL_evidence, GBQUAL_exception, GBQUAL_function, GBQUAL_gdb_xref, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_number, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_translation, GBQUAL_transl_except, GBQUAL_transl_table, GBQUAL_usedin, GBQUAL_protein_id, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1}}, - {"conflict", 1, {GBQUAL_citation, -1, -1, -1, -1}, 8, + GBQUAL_locus_tag, GBQUAL_operon, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"conflict", 1, {GBQUAL_citation, -1, -1, -1, -1}, 9, {GBQUAL_db_xref,GBQUAL_evidence, GBQUAL_map, GBQUAL_note, GBQUAL_gene, GBQUAL_usedin, - GBQUAL_replace, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_replace, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"D-loop", 0, {-1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"D-loop", 0, {-1, -1, -1, -1}, 11, {GBQUAL_evidence, GBQUAL_citation, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, GBQUAL_db_xref, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"D_segment", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"D_segment", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_db_xref, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"enhancer", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"enhancer", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"exon", 0, {-1, -1, -1, -1, -1}, 17, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_EC_number, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, @@ -285,28 +313,34 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"GC_signal", 0, {-1, -1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"gap", 1, {GBQUAL_estimated_length, -1, -1, -1, -1}, 0, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"GC_signal", 0, {-1, -1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"gene", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"gene", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_pseudo, GBQUAL_phenotype, GBQUAL_usedin, GBQUAL_locus_tag, + GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"iDNA", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"iDNA", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_label, GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_number, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"intron", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_allele, GBQUAL_citation, GBQUAL_cons_splice, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, @@ -314,244 +348,259 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1,}}, - {"J_segment", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"J_segment", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"LTR", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"LTR", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, - GBQUAL_gene, - GBQUAL_label, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, - GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_gene, GBQUAL_label, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, + GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 16, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 17, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_EC_number, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_pseudo, GBQUAL_product, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_binding", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_binding", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_difference", 0, {-1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_difference", 0, {-1, -1, -1, -1}, 15, {GBQUAL_phenotype, GBQUAL_citation, GBQUAL_clone, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_partial, GBQUAL_replace, GBQUAL_note, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1}}, - {"misc_feature", 0, {-1, -1, -1, -1, -1}, 16, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_feature", 0, {-1, -1, -1, -1, -1}, 17, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_number, GBQUAL_phenotype, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, - GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_recomb", 0, {-1, -1, -1, -1, -1}, 12, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_recomb", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_organism, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_signal", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_signal", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"misc_structure", 0, {-1, -1, -1, -1, -1}, 12, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"misc_structure", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"modified_base", 1, {GBQUAL_mod_base, -1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"modified_base", 1, {GBQUAL_mod_base, -1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"mRNA", 0, {-1, -1, -1, -1, -1}, 16, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"mRNA", 0, {-1, -1, -1, -1, -1}, 17, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_transcript_id, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_transcript_id, GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"mutation", 0, {-1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_phenotype, GBQUAL_product, GBQUAL_replace, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"N_region", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"old_sequence", 1, {GBQUAL_citation, -1, -1, -1}, 9, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"old_sequence", 1, {GBQUAL_citation, -1, -1, -1}, 10, {GBQUAL_db_xref, GBQUAL_gene, GBQUAL_evidence, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_replace, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"polyA_signal", 0, {-1, -1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"operon", 1, {GBQUAL_operon, -1, -1, -1, -1}, 12, + {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, + GBQUAL_function, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, + GBQUAL_pseudo, GBQUAL_phenotype, GBQUAL_usedin, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"oriT", 0, {-1, -1, -1, -1, -1}, 16, + {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_direction, GBQUAL_evidence, + GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, + GBQUAL_rpt_family, GBQUAL_rpt_type, GBQUAL_rpt_unit, GBQUAL_standard_name, + GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"polyA_signal", 0, {-1, -1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"polyA_site", 0, {-1, -1, -1, -1, -1}, 9, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"polyA_site", 0, {-1, -1, -1, -1, -1}, 10, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"primer_bind", 0, {-1, -1, -1, -1, -1}, 12, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"primer_bind", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_PCR_conditions,GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"promoter", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"promoter", 0, {-1, -1, -1, -1, -1}, 16, {GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"protein_bind", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 12, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"protein_bind", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"RBS", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"RBS", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map,GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, - GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"repeat_region", 0, {-1, -1, -1, -1, -1}, 17, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"repeat_region", 0, {-1, -1, -1, -1, -1}, 18, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_insertion_seq, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_rpt_type, GBQUAL_rpt_family, GBQUAL_rpt_unit, GBQUAL_standard_name, GBQUAL_transposon, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1}}, - {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_rpt_family, GBQUAL_rpt_type, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"rep_origin", 0, {-1, -1, -1, -1, -1}, 12, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"rep_origin", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_direction, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"rRNA", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"rRNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"S_region", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"S_region", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"satellite", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"satellite", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_rpt_type, GBQUAL_rpt_family,GBQUAL_rpt_unit, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"scRNA", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"scRNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"sig_peptide", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"sig_peptide", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"snoRNA", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"snoRNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"snRNA", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"snRNA", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"source", 1, {GBQUAL_organism, -1, -1, -1, -1}, 50, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"source", 1, {GBQUAL_organism, -1, -1, -1, -1}, 53, {GBQUAL_cell_line, GBQUAL_cell_type, GBQUAL_chloroplast, GBQUAL_chromoplast, GBQUAL_chromosome, GBQUAL_citation, GBQUAL_clone, GBQUAL_clone_lib, GBQUAL_country, GBQUAL_cultivar, @@ -566,70 +615,75 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_tissue_type, GBQUAL_transposon, GBQUAL_usedin, GBQUAL_specimen_voucher, GBQUAL_variety, GBQUAL_virion, GBQUAL_organelle, GBQUAL_transgenic, GBQUAL_environmental_sample, - GBQUAL_isolation_source, GBQUAL_serovar}}, - {"stem_loop", 0, {-1, -1, -1, -1, -1}, 12, + GBQUAL_isolation_source, GBQUAL_serovar, GBQUAL_mol_type, GBQUAL_segment, + GBQUAL_ecotype, -1, -1}}, + {"stem_loop", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"STS", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"STS", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation, GBQUAL_standard_name, GBQUAL_db_xref, GBQUAL_gene, GBQUAL_label, GBQUAL_usedin, GBQUAL_note, GBQUAL_partial, GBQUAL_map, - GBQUAL_evidence, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_evidence, GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"terminator", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"terminator", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map,GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 14, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 15, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note,GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"tRNA", 0, {-1, -1, -1, -1, -1}, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"tRNA", 0, {-1, -1, -1, -1, -1}, 16, {GBQUAL_anticodon, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"unsure", 0, {-1, -1, -1, -1}, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"unsure", 0, {-1, -1, -1, -1}, 11, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_usedin, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_replace, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"V_region", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"V_region", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"V_segment", 0, {-1, -1, -1, -1, -1}, 13, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"V_segment", 0, {-1, -1, -1, -1, -1}, 14, {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"variation", 0, {-1, -1, -1, -1}, 16, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, @@ -637,49 +691,49 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = { GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"3'clip", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"3'UTR", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"5'clip", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_function, GBQUAL_gene, GBQUAL_evidence, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, {"5'UTR", 0, {-1, -1, -1, -1, -1}, 13, {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"-10_signal", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"-10_signal", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map,GBQUAL_note,GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - {"-35_signal", 0, {-1, -1, -1, -1, -1}, 11, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + {"-35_signal", 0, {-1, -1, -1, -1, -1}, 12, {GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_label, GBQUAL_map,GBQUAL_note,GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin, - GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}} + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -1, -1, -1, -1}} }; NLM_EXTERN SematicFeatPtr x_ParFlat_GBFeat(void) { diff --git a/api/lsqfetch.c b/api/lsqfetch.c index fb778990..68378ca3 100644 --- a/api/lsqfetch.c +++ b/api/lsqfetch.c @@ -37,6 +37,12 @@ * Date Name Description of modification * * $Log: lsqfetch.c,v $ +* Revision 6.16 2003/08/27 21:24:05 kans +* enable alt indexed fasta looks up previously registered function, changes settings for new path +* +* Revision 6.15 2003/08/27 19:27:43 kans +* added AltIndexedFastaLibFetch functions for chimpanzee genome project +* * Revision 6.14 2002/11/13 23:07:37 johnson * Changed make_lib such that it looks to see if it matches the *whole* seq-id * (defined by the next character being non-alphanumeric). @@ -81,6 +87,12 @@ * Revision changed to 6.0 * * $Log: lsqfetch.c,v $ +* Revision 6.16 2003/08/27 21:24:05 kans +* enable alt indexed fasta looks up previously registered function, changes settings for new path +* +* Revision 6.15 2003/08/27 19:27:43 kans +* added AltIndexedFastaLibFetch functions for chimpanzee genome project +* * Revision 6.14 2002/11/13 23:07:37 johnson * Changed make_lib such that it looks to see if it matches the *whole* seq-id * (defined by the next character being non-alphanumeric). @@ -1240,12 +1252,13 @@ static FastaIndexPtr ReadFastaIndex ( return fip; } -/* object manager registerable fetch function */ +/* human genome object manager registerable fetch function */ static CharPtr fastalibfetchproc = "IndexedFastaLibBioseqFetch"; typedef struct flibftch { CharPtr path; + CharPtr fastaname; FastaIndexPtr currentfip; } FastaLibFetchData, PNTR FastaLibFetchPtr; @@ -1352,10 +1365,145 @@ NLM_EXTERN void IndexedFastaLibFetchDisable (void) flfp = (FastaLibFetchPtr) ompp->procdata; if (flfp == NULL) return; MemFree (flfp->path); + /* MemFree (flfp->fastaname); */ + FreeFastaIndex (flfp->currentfip); + MemFree (flfp); +} + +/* chimpanzee genome object manager registerable fetch function */ + +static CharPtr altfastalibfetchproc = "AltIndexedFastaLibBioseqFetch"; + +static void ChangeLocalToGenbank (BioseqPtr bsp, Pointer userdata) + +{ + Char id [41], tmp [41]; + SeqIdPtr sip; + + for (sip = bsp->id; sip != NULL && sip->choice != SEQID_LOCAL; sip = sip->next) continue; + if (sip == NULL) return; + SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id)); + sprintf (tmp, "gb|%s", id); + sip = SeqIdParse (tmp); + bsp->id = SeqIdSetFree (bsp->id); + bsp->id = sip; + SeqMgrReplaceInBioseqIndex (bsp); +} + +static Int2 LIBCALLBACK AltIndexedFastaLibBioseqFetchFunc (Pointer data) + +{ + BioseqPtr bsp; + Pointer dataptr = NULL; + Uint2 datatype, entityID = 0; + Char file [FILENAME_MAX], path [PATH_MAX], id [41]; + FastaLibFetchPtr flfp; + FILE *fp; + Int4 offset; + OMProcControlPtr ompcp; + ObjMgrProcPtr ompp; + SeqEntryPtr sep = NULL; + SeqIdPtr sip; + + ompcp = (OMProcControlPtr) data; + if (ompcp == NULL) return OM_MSG_RET_ERROR; + ompp = ompcp->proc; + if (ompp == NULL) return OM_MSG_RET_ERROR; + flfp = (FastaLibFetchPtr) ompp->procdata; + if (flfp == NULL) return OM_MSG_RET_ERROR; + sip = (SeqIdPtr) ompcp->input_data; + if (sip == NULL) return OM_MSG_RET_ERROR; + + if (sip->choice == SEQID_GENBANK) { + + SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id)); + if (flfp->currentfip != NULL) { + offset = SearchFastaIndex (flfp->currentfip, id); + if (offset < 0) return OM_MSG_RET_ERROR; + sprintf (file, "%s.fsa", flfp->fastaname); + StringNCpy_0 (path, flfp->path, sizeof (path)); + FileBuildPath (path, NULL, file); + fp = FileOpen (path, "r"); + if (fp == NULL) return OM_MSG_RET_ERROR; + fseek (fp, offset, SEEK_SET); + dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, + FALSE, FALSE, TRUE, FALSE); + if (dataptr != NULL) { + sep = GetTopSeqEntryForEntityID (entityID); + } + FileClose (fp); + } + } + + if (sep == NULL) return OM_MSG_RET_ERROR; + VisitBioseqsInSep (sep, NULL, ChangeLocalToGenbank); + bsp = BioseqFindInSeqEntry (sip, sep); + ompcp->output_data = (Pointer) bsp; + ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep); + return OM_MSG_RET_DONE; +} + +NLM_EXTERN Boolean AltIndexedFastaLibFetchEnable (CharPtr path, CharPtr fastaname) + +{ + Char file [FILENAME_MAX]; + FastaLibFetchPtr flfp = NULL; + Boolean is_new = FALSE; + ObjMgrPtr omp; + ObjMgrProcPtr ompp; + Char str [PATH_MAX]; + + StringNCpy_0 (str, path, sizeof (str)); + TrimSpacesAroundString (str); + omp = ObjMgrGet (); + ompp = ObjMgrProcFind (omp, 0, altfastalibfetchproc, OMPROC_FETCH); + if (ompp != NULL) { + flfp = (FastaLibFetchPtr) ompp->procdata; + if (flfp != NULL) { + flfp->path = MemFree (flfp->path); + flfp->fastaname = MemFree (flfp->fastaname); + flfp->currentfip = FreeFastaIndex (flfp->currentfip); + } + } else { + flfp = (FastaLibFetchPtr) MemNew (sizeof (FastaLibFetchData)); + is_new = TRUE; + } + if (flfp != NULL) { + flfp->path = StringSave (str); + flfp->fastaname = StringSave (fastaname); + sprintf (file, "%s.idx", fastaname); + FileBuildPath (str, NULL, file); + flfp->currentfip = ReadFastaIndex (str); + } + if (is_new) { + ObjMgrProcLoad (OMPROC_FETCH, altfastalibfetchproc, altfastalibfetchproc, + OBJ_SEQID, 0, OBJ_BIOSEQ, 0, (Pointer) flfp, + AltIndexedFastaLibBioseqFetchFunc, PROC_PRIORITY_DEFAULT); + } + return TRUE; +} + +NLM_EXTERN void AltIndexedFastaLibFetchDisable (void) + +{ + FastaLibFetchPtr flfp; + ObjMgrPtr omp; + ObjMgrProcPtr ompp; + + omp = ObjMgrGet (); + ompp = ObjMgrProcFind (omp, 0, altfastalibfetchproc, OMPROC_FETCH); + if (ompp == NULL) return; + ObjMgrFreeUserData (0, ompp->procid, OMPROC_FETCH, 0); + flfp = (FastaLibFetchPtr) ompp->procdata; + if (flfp == NULL) return; + MemFree (flfp->path); + MemFree (flfp->fastaname); FreeFastaIndex (flfp->currentfip); MemFree (flfp); } +/* common function for creating indexes of fasta library files */ + NLM_EXTERN void CreateFastaIndex ( CharPtr file ) diff --git a/api/lsqfetch.h b/api/lsqfetch.h index a0e9daac..52a59363 100644 --- a/api/lsqfetch.h +++ b/api/lsqfetch.h @@ -29,7 +29,7 @@ * * Version Creation Date: 5/25/95 * -* $Revision: 6.2 $ +* $Revision: 6.3 $ * * File Description: Utilities for fetching local sequences * @@ -40,6 +40,9 @@ * * * $Log: lsqfetch.h,v $ +* Revision 6.3 2003/08/27 19:27:43 kans +* added AltIndexedFastaLibFetch functions for chimpanzee genome project +* * Revision 6.2 2001/03/12 23:19:33 kans * added IndexedFastaLib functions - currently uses genome contig naming conventions * @@ -246,10 +249,18 @@ NLM_EXTERN Boolean CheckDnaResidue PROTO((CharPtr seq_ptr, Int4 ck_len, Int4Ptr /**********************************************************************/ -/* indexed FASTA lib functions - currently uses genome contig naming convention */ +/* indexed FASTA lib functions - currently uses human genome contig naming convention */ NLM_EXTERN Boolean IndexedFastaLibFetchEnable (CharPtr path); NLM_EXTERN void IndexedFastaLibFetchDisable (void); + +/* alternative indexed FASTA lib functions - currently uses chimpanzee genome contig naming convention */ + +NLM_EXTERN Boolean AltIndexedFastaLibFetchEnable (CharPtr path, CharPtr fastaname); +NLM_EXTERN void AltIndexedFastaLibFetchDisable (void); + +/* common function for creating indexes of fasta library files */ + NLM_EXTERN void CreateFastaIndex (CharPtr file); diff --git a/api/salpstat.c b/api/salpstat.c index 68ca2aae..4f741ace 100644 --- a/api/salpstat.c +++ b/api/salpstat.c @@ -631,7 +631,7 @@ NLM_EXTERN Int4Ptr LIBCALL SeqAlignListGapList(SeqAlignPtr sap,Int4Ptr gap_num){ /* Utility subroutine for SeqAlignWindowStats */ -static void LIBCALL UpdateWindow(Int4 win_size,Int4 cur_pos,Uint1Ptr win_buf1,Uint1Ptr win_buf2,Uint1 res_1,Uint1 res_2,Uint1 code,Int4Ptr win_gap,Int4Ptr win_gapmismatch,Int4Ptr win_mismatch,Int4Ptr win_match,Boolean N_are_not_mismatches){ +static void LIBCALL UpdateWindowStats(Int4 win_size,Int4 cur_pos,Uint1Ptr win_buf1,Uint1Ptr win_buf2,Uint1 res_1,Uint1 res_2,Uint1 code,Int4Ptr win_gap,Int4Ptr win_gapmismatch,Int4Ptr win_mismatch,Int4Ptr win_match,Boolean N_are_not_mismatches){ Uint1 exit_char1,exit_char2; Int4 loc; if(win_size<=0) return; /* For case where user didn't care about window */ @@ -828,7 +828,7 @@ NLM_EXTERN Int4 LIBCALL SeqAlignWindowStats(SeqAlignPtr align, BioseqPtr bsp_1, res_1 = SeqPortGetResidue(spp_1); res_2= '-'; } - UpdateWindow(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches); + UpdateWindowStats(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches); CheckMinMax(win_size,cur_pos,win_gap, win_gapmismatch,win_mismatch,win_match, mmin_mismatch,mmax_mismatch, @@ -864,7 +864,7 @@ NLM_EXTERN Int4 LIBCALL SeqAlignWindowStats(SeqAlignPtr align, BioseqPtr bsp_1, } else ++mismatches; - UpdateWindow(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches); + UpdateWindowStats(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches); CheckMinMax(win_size,cur_pos,win_gap, win_gapmismatch,win_mismatch,win_match, mmin_mismatch,mmax_mismatch, diff --git a/api/seqmgr.c b/api/seqmgr.c index 65f24262..9994a256 100644 --- a/api/seqmgr.c +++ b/api/seqmgr.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.203 $ +* $Revision: 6.210 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -39,6 +39,27 @@ * ------- ---------- ----------------------------------------------------- * * $Log: seqmgr.c,v $ +* Revision 6.210 2003/10/24 19:49:11 kans +* operon feature of equal range sorted before gene, mRNA, CDS +* +* Revision 6.209 2003/10/23 17:40:01 kans +* added SeqMgrGetOverlappingOperon and bspextra operonsByPos and numoperons fields +* +* Revision 6.208 2003/10/02 16:12:31 bollin +* added COMMON_INTERVAL overlap type to TestForOverlap +* +* Revision 6.207 2003/09/22 17:27:29 kans +* strip RNA- prefix, not just - on RNAs +* +* Revision 6.206 2003/09/22 16:13:20 kans +* LockFarComponentsEx takes new SeqLocPtr parameter +* +* Revision 6.205 2003/09/22 15:55:06 kans +* all rna context labels were searched for dash, now just trna +* +* Revision 6.204 2003/08/04 20:41:20 kans +* SeqMgrProcessNonIndexedBioseq needed to reset version to 0 each time through outer loop (EY) +* * Revision 6.203 2003/04/03 22:40:09 kans * feature index location problem now reports latest identifier in record to make it easier to find problem * @@ -3420,7 +3441,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it) Boolean indexed; TextSeqIdPtr tsip; SeqMgrPtr smp; - Int2 version = 0; + Int2 version; Boolean sort_now = TRUE; smp = SeqMgrReadLock(); @@ -3456,6 +3477,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it) if (bsp->id != NULL) { indexed = TRUE; + version = 0; for (sip = bsp->id; sip != NULL; sip = sip->next) { oldchoice = 0; @@ -4041,6 +4063,7 @@ static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp) bspextra->CDSsByPos = MemFree (bspextra->CDSsByPos); bspextra->pubsByPos = MemFree (bspextra->pubsByPos); bspextra->orgsByPos = MemFree (bspextra->orgsByPos); + bspextra->operonsByPos = MemFree (bspextra->operonsByPos); /* free list of descriptor information */ @@ -4104,6 +4127,7 @@ static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp) bspextra->numCDSs = 0; bspextra->numpubs = 0; bspextra->numorgs = 0; + bspextra->numoperons = 0; bspextra->numsegs = 0; bspextra->min = INT4_MAX; @@ -5467,9 +5491,9 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp, FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT); ptr = buf; if (sfp->data.choice == SEQFEAT_RNA) { - ptr = StringChr (buf, '-'); + ptr = StringStr (buf, "RNA-"); if (ptr != NULL) { - ptr++; + ptr += 4; } else { ptr = buf; } @@ -6163,10 +6187,19 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2) return -1; /* was 1 */ } else if (sp1->right < sp2->right) { return 1; /* was -1 */ + } + + /* given identical extremes, put operon features first */ + + if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) { + return -1; + } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) { + return 1; + } - /* given identical extremes, put gene features first */ + /* then gene features */ - } else if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) { + if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) { return -1; } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) { return 1; @@ -6324,10 +6357,19 @@ static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2) return -1; } else if (sp1->left > sp2->left) { return 1; + } + + /* given identical extremes, put operon features first */ + + if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) { + return -1; + } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) { + return 1; + } - /* given identical extremes, put gene features first */ + /* then gene features */ - } else if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) { + if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) { return -1; } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) { return 1; @@ -6703,6 +6745,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats) bspextra->CDSsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numCDSs), 0, FEATDEF_CDS); bspextra->pubsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numpubs), 0, FEATDEF_PUB); bspextra->orgsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numorgs), 0, FEATDEF_BIOSRC); + bspextra->operonsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numoperons), 0, FEATDEF_operon); } if (dorevfeats) { @@ -7679,9 +7722,10 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp, } } - } else if (overlapType == INTERVAL_OVERLAP) { + } else if (overlapType == INTERVAL_OVERLAP || overlapType == COMMON_INTERVAL) { - /* requires overlap between at least one pair of intervals */ + /* requires overlap between at least one pair of intervals (INTERVAL_OVERLAP) */ + /* or one complete shared interval (COMMON_INTERVAL) */ if (feat->right >= left && feat->left <= right) { sfp = feat->sfp; @@ -7690,7 +7734,11 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp, while (a != NULL) { b = SeqLocFindNext (sfp->location, NULL); while (b != NULL) { - if (SeqLocCompare (a, b) != SLC_NO_MATCH) { + if ((overlapType == INTERVAL_OVERLAP + && SeqLocCompare (a, b) != SLC_NO_MATCH) + || (overlapType == COMMON_INTERVAL + && SeqLocCompare (a, b) == SLC_A_EQ_B)) + { diff = ABS (left - feat->left) + ABS (feat->right - right); return diff; } @@ -7825,6 +7873,9 @@ static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype, array = bspextra->orgsByPos; num = bspextra->numorgs; break; + case FEATDEF_operon : + array = bspextra->operonsByPos; + num = bspextra->numoperons; default : break; } @@ -8058,6 +8109,12 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrF return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL); } +NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (SeqLocPtr slp, SeqMgrFeatContext PNTR context) + +{ + return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL); +} + /***************************************************************************** * * SeqMgrGetFeatureByLabel returns the a feature with the desired label @@ -9542,7 +9599,24 @@ static void LockAllProducts (SeqFeatPtr sfp, Pointer userdata) } } -NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products) +static void LockAllSublocs (SeqLocPtr loc, Pointer userdata) + +{ + SeqLocPtr slp = NULL; + ValNodePtr PNTR vnpp; + + if (loc == NULL) return; + vnpp = (ValNodePtr PNTR) userdata; + if (vnpp == NULL) return; + + while ((slp = SeqLocFindNext (loc, slp)) != NULL) { + if (slp != NULL && slp->choice != SEQLOC_NULL) { + LockAllSegments (slp, vnpp); + } + } +} + +NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc) { ValNodePtr bsplist = NULL; @@ -9559,6 +9633,9 @@ NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, if (products) { VisitFeaturesInSep (sep, (Pointer) &bsplist, LockAllProducts); } + if (loc != NULL) { + LockAllSublocs (sep, (Pointer) &bsplist); + } SeqEntrySetScope (oldsep); return bsplist; } @@ -9566,7 +9643,7 @@ NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep) { - return LockFarComponentsEx (sep, TRUE, FALSE, FALSE); + return LockFarComponentsEx (sep, TRUE, FALSE, FALSE, NULL); } NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist) diff --git a/api/seqmgr.h b/api/seqmgr.h index 6a2d8b4e..86731995 100644 --- a/api/seqmgr.h +++ b/api/seqmgr.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/94 * -* $Revision: 6.50 $ +* $Revision: 6.52 $ * * File Description: Manager for Bioseqs and BioseqSets * @@ -40,6 +40,12 @@ * * * $Log: seqmgr.h,v $ +* Revision 6.52 2003/10/23 17:40:01 kans +* added SeqMgrGetOverlappingOperon and bspextra operonsByPos and numoperons fields +* +* Revision 6.51 2003/09/22 16:13:20 kans +* LockFarComponentsEx takes new SeqLocPtr parameter +* * Revision 6.50 2003/02/12 14:20:47 kans * added IsNonGappedLiteral, used to allow compressed deltas as (previously always raw) parts of segsets * @@ -931,6 +937,7 @@ typedef struct bioseqextra { SMFeatItemPtr PNTR CDSsByPos; /* subset of featsByPos array containing only CDS features */ SMFeatItemPtr PNTR pubsByPos; /* subset of featsByPos array containing only publication features */ SMFeatItemPtr PNTR orgsByPos; /* subset of featsByPos array containing only biosource features */ + SMFeatItemPtr PNTR operonsByPos; /* subset of featsByPos array containing only operon features */ BioseqPtr parentBioseq; /* segmented parent of this raw part all packaged together */ SMSeqIdxPtr segparthead; /* linked list to speed mapping from parts to segmented bioseq */ @@ -946,6 +953,7 @@ typedef struct bioseqextra { Int4 numCDSs; /* number of elements in CDSsByPos array */ Int4 numpubs; /* number of elements in pubsByPos array */ Int4 numorgs; /* number of elements in orgsByPos array */ + Int4 numoperons; /* number of elements in operonsByPos array */ Int4 numsegs; /* number of segments in partslist array */ @@ -1048,7 +1056,7 @@ NLM_EXTERN SeqAlignPtr LIBCALL SeqMgrFindSeqAlignByID PROTO((Uint2 entityID, Uin NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep); -NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products); +NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc); NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist); diff --git a/api/seqport.c b/api/seqport.c index e19eb81e..a42d70d3 100644 --- a/api/seqport.c +++ b/api/seqport.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/13/91 * -* $Revision: 6.79 $ +* $Revision: 6.82 $ * * File Description: Ports onto Bioseqs * @@ -39,6 +39,15 @@ * ------- ---------- ----------------------------------------------------- * * $Log: seqport.c,v $ +* Revision 6.82 2003/08/18 21:07:35 kans +* RevCompStr was stepping on str variable +* +* Revision 6.81 2003/08/18 20:09:46 kans +* SeqPortStreamLoc calls SeqPortStream recursively to local buffer, reverse complements if necessary, and passes appropriate subsequence to callback - speeds up far deltas that point to other far deltas +* +* Revision 6.80 2003/08/07 19:54:33 kans +* TransTableTranslateCommon turns on no_start only if CDS location is 5prime partial, not if product is missing the amino end +* * Revision 6.79 2002/11/11 18:02:40 kans * added SeqPortStream to efficiently stream through a sequence * @@ -2202,6 +2211,84 @@ static void SeqPortStreamLit ( BioseqFree (bsp); } +static void RevCompStr ( + CharPtr str +) + +{ + Char ch; + CharPtr complementBase = " TVGH CD M KN YSAABW R "; + Int2 i; + Uint1 letterToComp [256]; + Char lttr; + CharPtr nd; + CharPtr tmp; + + if (str == NULL) return; + + /* set up complementation lookup table */ + + for (i = 0; i < 256; i++) { + letterToComp [i] = '\0'; + } + for (ch = 'A', i = 1; ch <= 'Z'; ch++, i++) { + lttr = complementBase [i]; + if (lttr != ' ') { + letterToComp [(int) (Uint1) ch] = lttr; + } + } + for (ch = 'a', i = 1; ch <= 'z'; ch++, i++) { + lttr = complementBase [i]; + if (lttr != ' ') { + letterToComp [(int) (Uint1) ch] = lttr; + } + } + + /* reverse string */ + + nd = str; + while (*nd != '\0') { + nd++; + } + nd--; + + tmp = str; + while (nd > tmp) { + ch = *nd; + *nd = *tmp; + *tmp = ch; + nd--; + tmp++; + } + + /* complement string */ + + nd = str; + ch = *nd; + while (ch != '\0') { + *nd = letterToComp [(int) (Uint1) ch]; + nd++; + ch = *nd; + } +} + +static void LIBCALLBACK SaveLocStream ( + CharPtr sequence, + Pointer userdata +) + +{ + CharPtr tmp; + CharPtr PNTR tmpp; + + tmpp = (CharPtr PNTR) userdata; + tmp = *tmpp; + + tmp = StringMove (tmp, sequence); + + *tmpp = tmp; +} + static void SeqPortStreamLoc ( SeqLocPtr slp, Boolean expandGaps, @@ -2212,7 +2299,9 @@ static void SeqPortStreamLoc ( { BioseqPtr bsp; Int4 from; + CharPtr str; Uint1 strand; + CharPtr tmp; Int4 to; if (slp == NULL || proc == NULL) return; @@ -2224,7 +2313,28 @@ static void SeqPortStreamLoc ( to = SeqLocStop (slp); strand = SeqLocStrand (slp); - SeqPortStreamRaw (bsp, from, to, strand, expandGaps, userdata, proc); + str = str = MemNew (sizeof (Char) * (bsp->length + 10)); + if (str != NULL) { + tmp = str; + SeqPortStream (bsp, TRUE, (Pointer) &tmp, SaveLocStream); + + if (to > 0 && to < bsp->length) { + str [to + 1] = '\0'; + } + tmp = str; + if (from > 0 && from < bsp->length) { + tmp += from; + } + if (strand == Seq_strand_minus && ISA_na (bsp->mol)) { + RevCompStr (tmp); + } + + proc (tmp, userdata); + + MemFree (str); + } + + /* SeqPortStreamRaw (bsp, from, to, strand, expandGaps, userdata, proc); */ BioseqUnlock (bsp); } @@ -4906,7 +5016,7 @@ static ByteStorePtr TransTableTranslateCommon ( no_start = FALSE; part_loc = SeqLocPartialCheck (location); part_prod = SeqLocPartialCheck (product); - if ((part_loc & SLP_START) || (part_prod & SLP_START)) { + if ((part_loc & SLP_START) /* || (part_prod & SLP_START) */) { no_start = TRUE; } if (StringHasNoText (tbl->sncbieaa) || no_start || frame > 1) { diff --git a/api/sequtil.c b/api/sequtil.c index 722119e6..6b014789 100644 --- a/api/sequtil.c +++ b/api/sequtil.c @@ -29,13 +29,37 @@ * * Version Creation Date: 4/1/91 * -* $Revision: 6.131 $ +* $Revision: 6.139 $ * * File Description: Sequence Utilities for objseq and objsset * * Modifications: * -------------------------------------------------------------------------- * $Log: sequtil.c,v $ +* Revision 6.139 2003/10/24 14:36:12 kans +* added CH as GenBank CONN to WHICH_db_accession +* +* Revision 6.138 2003/09/09 20:08:18 kans +* SeqLocPartialCheck locks bioseq if seqloc_whole and far +* +* Revision 6.137 2003/09/02 15:11:50 kans +* WHICH_db_accession takes ZP_ with 8 digits as refseq_prot_predicted +* +* Revision 6.136 2003/08/11 13:45:18 kans +* added CG as ncbi gss +* +* Revision 6.135 2003/07/14 20:17:53 kans +* added CF as ncbi est to WHICH_db_accession +* +* Revision 6.134 2003/07/02 14:35:21 kans +* added CE as ncbi gss +* +* Revision 6.133 2003/05/20 22:15:24 yaschenk +* SeqIdSelect loops indefinitely on corrupted memory +* +* Revision 6.132 2003/04/30 16:40:41 kans +* added CD as GenBank EST +* * Revision 6.131 2003/03/25 13:32:22 kans * added CC as ncbi gss accession prefix * @@ -3063,10 +3087,12 @@ NLM_EXTERN SeqIdPtr SeqIdSelect (SeqIdPtr sip, Uint1Ptr order, Int2 num) else if (order[sip->choice] < order[bestid->choice]) bestid = sip; } - } - else + } else { ErrPostEx(SEV_ERROR, 0,0, "SeqIdSelect: choice [%d] out of range [%d]", (int)(sip->choice), (int)num); + if(sip->choice > NUM_SEQID) /*** something is really wrong ***/ + return NULL; + } } return bestid; @@ -5974,68 +6000,82 @@ NLM_EXTERN Uint2 SeqLocPartialCheck(SeqLocPtr head) break; case SEQLOC_WHOLE: found_molinfo = FALSE; - bsp = BioseqFind((SeqIdPtr)(slp->data.ptrvalue)); - if (bsp == NULL) break; - bcp = BioseqContextNew(bsp); - if (bcp == NULL) break; - vnp = NULL; - while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_molinfo, vnp, NULL)) != NULL) + locked = FALSE; + bsp = BioseqFindCore((SeqIdPtr)(slp->data.ptrvalue)); + if (bsp == NULL) { - found_molinfo = TRUE; - mip = (MolInfoPtr)(vnp->data.ptrvalue); - switch (mip->completeness) - { - case 3: /* no left */ - if (slp == first) - retval |= SLP_START; - else - retval |= SLP_INTERNAL; - break; - case 4: /* no right */ - if (slp == last) - retval |= SLP_STOP; - else - retval |= SLP_INTERNAL; - break; - case 2: /* partial */ - retval |= SLP_OTHER; - break; - case 5: /* no ends */ - retval |= SLP_START; - retval |= SLP_STOP; - break; - default: - break; - } + bsp = BioseqLockById((SeqIdPtr)(slp->data.ptrvalue)); + if (bsp != NULL) + locked = TRUE; } - if (! found_molinfo) - { - while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_modif, vnp, NULL)) != NULL) - { - for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next) + if (bsp == NULL) break; + bcp = BioseqContextNew(bsp); + if (bcp != NULL) { + vnp = NULL; + while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_molinfo, vnp, NULL)) != NULL) { - switch (vnp2->data.intvalue) + found_molinfo = TRUE; + mip = (MolInfoPtr)(vnp->data.ptrvalue); + switch (mip->completeness) { - case 16: /* no left */ + case 3: /* no left */ if (slp == first) retval |= SLP_START; else retval |= SLP_INTERNAL; break; - case 17: /* no right */ + case 4: /* no right */ if (slp == last) retval |= SLP_STOP; else retval |= SLP_INTERNAL; break; - case 10: /* partial */ + case 2: /* partial */ retval |= SLP_OTHER; break; + case 5: /* no ends */ + retval |= SLP_START; + retval |= SLP_STOP; + break; + default: + break; } } + if (! found_molinfo) + { + while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_modif, vnp, NULL)) != NULL) + { + for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next) + { + switch (vnp2->data.intvalue) + { + + case 16: /* no left */ + + if (slp == first) + + retval |= SLP_START; + + else + retval |= SLP_INTERNAL; + break; + case 17: /* no right */ + if (slp == last) + retval |= SLP_STOP; + else + retval |= SLP_INTERNAL; + break; + case 10: /* partial */ + retval |= SLP_OTHER; + break; + } + } + } + } + BioseqContextFree(bcp); } - } - BioseqContextFree(bcp); + if (locked) + BioseqUnlock (bsp); break; default: break; @@ -8723,7 +8763,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"BQ") == 0) || (StringICmp(temp,"BU") == 0) || (StringICmp(temp,"CA") == 0) || - (StringICmp(temp,"CB") == 0) ) { /* NCBI EST */ + (StringICmp(temp,"CB") == 0) || + (StringICmp(temp,"CD") == 0) || + (StringICmp(temp,"CF") == 0) ) { /* NCBI EST */ retcode = ACCN_NCBI_EST; } else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */ retcode = ACCN_NCBI_STS; @@ -8734,7 +8776,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) retcode = ACCN_NCBI_DIRSUB; } else if ((StringICmp(temp,"AE") == 0)) { /* NCBI genome project data */ retcode = ACCN_NCBI_GENOME; - } else if ((StringICmp(temp,"AH") == 0)) { /* NCBI segmented set header Bioseq */ + } else if ((StringICmp(temp,"AH") == 0) || + (StringICmp(temp,"CH") == 0)) { /* NCBI segmented set header Bioseq */ retcode = ACCN_NCBI_SEGSET | ACCN_AMBIGOUS_MOL; /* A few segmented proteins are AH */ } else if ((StringICmp(temp,"AS") == 0)) { /* NCBI "other" */ @@ -8745,7 +8788,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) (StringICmp(temp,"AZ") == 0) || (StringICmp(temp,"BH") == 0) || (StringICmp(temp,"BZ") == 0) || - (StringICmp(temp,"CC") == 0) ) { /* NCBI GSS */ + (StringICmp(temp,"CC") == 0) || + (StringICmp(temp,"CE") == 0) || + (StringICmp(temp,"CG") == 0) ) { /* NCBI GSS */ retcode = ACCN_NCBI_GSS; } else if ((StringICmp(temp,"AR") == 0)) { /* NCBI patent */ retcode = ACCN_NCBI_PATENT; @@ -8845,6 +8890,27 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s) s++; } break; + case 11: /* New 11-character accession, two letters +"_"+ 8 digits */ + if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1))) + break; + if(*(s+2)!='_') + break; + temp[0] = *s; s++; + temp[1] = *s; s++; + temp[2] = NULLB; s++; + + if ((StringICmp(temp,"ZP") == 0)) { + retcode = ACCN_REFSEQ_PROT_PREDICTED; + } else + retval = FALSE; + while (*s) { + if (! IS_DIGIT(*s)) { + retval = FALSE; + break; + } + s++; + } + break; case 12: /* whole genome shotgun 12-character accession, four letters + 8 digits */ if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)) || !IS_ALPHA(*(s+2)) || !IS_ALPHA(*(s+3))) break; diff --git a/api/sqnutil1.c b/api/sqnutil1.c index c22f7a65..41e84d32 100644 --- a/api/sqnutil1.c +++ b/api/sqnutil1.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.271 $ +* $Revision: 6.283 $ * * File Description: * @@ -1643,12 +1643,9 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B Char ch; CdRegionPtr crp; Int2 ctr = 1; - DbtagPtr dbt; ValNodePtr descr; SeqFeatPtr first; GBQualPtr gbq; - SeqFeatPtr gene; - GeneRefPtr grp; Int4 i; Char id [64]; SeqEntryPtr last; @@ -1680,6 +1677,11 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B long int val; ValNodePtr vnp; SeqFeatXrefPtr xref; + /* + DbtagPtr dbt; + SeqFeatPtr gene; + GeneRefPtr grp; + */ if (sfp == NULL || bsp == NULL) return; @@ -1693,8 +1695,9 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B sfp = sfp->next; } - /* expand genes specified by qualifiers on other features (except repeat_region) */ + /* no longer expand genes specified by qualifiers on other features (except repeat_region) */ + /* sfp = first; while (sfp != NULL) { prev = &(sfp->xref); @@ -1720,7 +1723,6 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B gene->location = AsnIoMemCopy (sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite); - /* copy dbxrefs from parent feature */ for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { dbt = (DbtagPtr) vnp->data.ptrvalue; if (dbt == NULL) continue; @@ -1741,6 +1743,7 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B } sfp = sfp->next; } + */ /* expand mRNA features into cDNA product sequences */ @@ -2739,7 +2742,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp) if (aa == 0 && curraa != 0) { aa = curraa; trp->aa = curraa; - trp->aatype = Seq_code_ncbieaa; + trp->aatype = 2; } if (aa != 0 && aa == curraa) { if (justTrnaText) { @@ -2758,7 +2761,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp) aa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE); if (aa == 0) return; trp->aa = aa; - trp->aatype = Seq_code_ncbieaa; + trp->aatype = 2; if (justTrnaText) { for (j = 0; j < 6; j++) { if (trp->codon [j] == 255) { @@ -3241,7 +3244,7 @@ static Boolean HandledGBQualOnImp (SeqFeatPtr sfp, GBQualPtr gbq) ptr++; ch = *ptr; } - return TRUE; + /* return TRUE; */ } return FALSE; } @@ -5468,6 +5471,27 @@ static void CopyProtXrefToProtFeat (ProtRefPtr prp, ProtRefPtr prx) } } +static Boolean InGpsGenomic (SeqFeatPtr sfp) + +{ + BioseqPtr bsp; + BioseqSetPtr bssp; + + if (sfp == NULL) return FALSE; + bsp = BioseqFindFromSeqLoc (sfp->location); + if (bsp == NULL) return FALSE; + if (bsp->idx.parenttype == OBJ_BIOSEQSET) { + bssp = (BioseqSetPtr) bsp->idx.parentptr; + while (bssp != NULL) { + if (bssp->_class == BioseqseqSet_class_nuc_prot) return FALSE; + if (bssp->_class == BioseqseqSet_class_gen_prod_set) return TRUE; + if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE; + bssp = (BioseqSetPtr) bssp->idx.parentptr; + } + } + return FALSE; +} + static void HandleXrefOnCDS (SeqFeatPtr sfp) { @@ -5479,6 +5503,7 @@ static void HandleXrefOnCDS (SeqFeatPtr sfp) SeqFeatXrefPtr xref; if (sfp != NULL && sfp->product != NULL) { + if (InGpsGenomic (sfp)) return; prot = GetBestProteinFeatureUnindexed (sfp->product); if (prot != NULL) { prp = (ProtRefPtr) prot->data.value.ptrvalue; @@ -5737,6 +5762,10 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP CleanDoubleQuoteList (grp->syn); grp->db = ValNodeSort (grp->db, SortDbxref); CleanupDuplicateDbxrefs (&(grp->db)); + /* now move grp->dbxref to sfp->dbxref */ + vnp = grp->db; + grp->db = NULL; + ValNodeLink ((&sfp->dbxref), vnp); if (grp->locus != NULL && grp->syn != NULL) { vnp = grp->syn; str = (CharPtr) vnp->data.ptrvalue; @@ -5772,6 +5801,10 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP CleanDoubleQuoteList (prp->activity); prp->db = ValNodeSort (prp->db, SortDbxref); CleanupDuplicateDbxrefs (&(prp->db)); + /* now move prp->dbxref to sfp->dbxref */ + vnp = prp->db; + prp->db = NULL; + ValNodeLink ((&sfp->dbxref), vnp); if (prp->processed != 3 && prp->processed != 4 && prp->name == NULL && sfp->comment != NULL) { if (StringICmp (sfp->comment, "putative") != 0) { @@ -6563,6 +6596,8 @@ static void BasicSeqEntryCleanupInternal (SeqEntryPtr sep, ValNodePtr PNTR publi OrgRefPtr orp; Boolean partial5; Boolean partial3; + Uint1 processed; + ProtRefPtr prp; ValNodePtr psp; RnaRefPtr rrp; Uint1 rrptype; @@ -6659,6 +6694,28 @@ static void BasicSeqEntryCleanupInternal (SeqEntryPtr sep, ValNodePtr PNTR publi sfp->data.value.ptrvalue = rrp; rrp->type = rrptype; sfp->idx.subtype = FindFeatDefType (sfp); + } else { + processed = 0; + if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) { + processed = 1; + } else if (StringCmp (ifp->key, "mat_peptide") == 0) { + processed = 2; + } else if (StringCmp (ifp->key, "sig_peptide") == 0) { + processed = 3; + } else if (StringCmp (ifp->key, "transit_peptide") == 0) { + processed = 4; + } + if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) { + bsp = BioseqFind (SeqLocId (sfp->location)); + if (bsp != NULL && ISA_aa (bsp->mol)) { + sfp->data.value.ptrvalue = ImpFeatFree (ifp); + sfp->data.choice = SEQFEAT_PROT; + prp = ProtRefNew (); + sfp->data.value.ptrvalue = prp; + prp->processed = processed; + sfp->idx.subtype = FindFeatDefType (sfp); + } + } } } } @@ -7267,6 +7324,83 @@ NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep) VisitFeaturesInSep (sep, NULL, ResynchMRNAPartials); } +NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata) + +{ + SeqFeatPtr bestprot; + BioseqPtr bsp; + MolInfoPtr mip; + Boolean partial5; + Boolean partial3; + ProtRefPtr prp; + SeqEntryPtr sep; + SeqIdPtr sip; + SeqLocPtr slp; + ValNodePtr vnp; + + if (sfp->data.choice != SEQFEAT_PROT) return; + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (prp == NULL) return; + if (prp->processed < 1 || prp->processed > 4) return; + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + sfp->partial = (Boolean) (sfp->partial || partial5 || partial3); + slp = SeqLocFindNext (sfp->location, NULL); + if (slp == NULL) return; + sip = SeqLocId (sfp->product); + if (sip == NULL) return; + bsp = BioseqFind (sip); + if (bsp != NULL && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_raw) { + sep = SeqMgrGetSeqEntryForData (bsp); + if (sep == NULL) return; + bestprot = SeqMgrGetBestProteinFeature (bsp, NULL); + if (bestprot == NULL) { + bestprot = GetBestProteinFeatureUnindexed (sfp->product); + } + if (bestprot != NULL) { + bestprot->location = SeqLocFree (bestprot->location); + bestprot->location = CreateWholeInterval (sep); + SetSeqLocPartial (bestprot->location, partial5, partial3); + bestprot->partial = (partial5 || partial3); + } + vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL); + if (vnp == NULL) { + vnp = CreateNewDescriptor (sep, Seq_descr_molinfo); + if (vnp != NULL) { + mip = MolInfoNew (); + vnp->data.ptrvalue = (Pointer) mip; + if (mip != NULL) { + mip->biomol = 8; + mip->tech = 13; + } + } + } + if (vnp != NULL) { + mip = (MolInfoPtr) vnp->data.ptrvalue; + if (mip != NULL) { + if (partial5 && partial3) { + mip->completeness = 5; + } else if (partial5) { + mip->completeness = 3; + } else if (partial3) { + mip->completeness = 4; + /* + } else if (partial) { + mip->completeness = 2; + */ + } else { + mip->completeness = 0; + } + } + } + } +} + +NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep) + +{ + VisitFeaturesInSep (sep, NULL, ResynchPeptidePartials); +} + /* SeqIdStripLocus removes the SeqId.name field if accession is set */ NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip) @@ -7479,9 +7613,12 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void) { Boolean dataFound; Char path [PATH_MAX]; + Char appPath[PATH_MAX]; CharPtr ptr; - ProgramPath (path, sizeof (path)); + ProgramPath (appPath, sizeof (path)); + StrCpy(path, appPath); + /* data a sibling of our application? */ ptr = StringRChr (path, DIRDELIMCHR); if (ptr != NULL) { ptr++; @@ -7489,6 +7626,7 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void) } dataFound = CheckDataPath (path, "data"); if (! (dataFound)) { + /* data an uncle of our application? */ if (ptr != NULL) { ptr--; *ptr = '\0'; @@ -7501,50 +7639,12 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void) } } #ifdef OS_UNIX_DARWIN - /* Mac OS X package has application in Programname.app/Contents/MacOS/Programname */ - if (! (dataFound)) { - if (ptr != NULL) { - /* check within Contents/Resources */ + if (! (dataFound) && IsApplicationPackage(appPath)) { + /* is data inside our application within Contents/Resources? */ + StrCpy(path, appPath); + FileBuildPath(path, "Contents", NULL); FileBuildPath (path, "Resources", NULL); dataFound = CheckDataPath (path, "data"); - /* did not change ptr, so if it failed just go up to next higher level */ - } - } - if (! (dataFound)) { - if (ptr != NULL) { - ptr--; - *ptr = '\0'; - ptr = StringRChr (path, DIRDELIMCHR); - if (ptr != NULL) { - ptr++; - *ptr = '\0'; - } - dataFound = CheckDataPath (path, "data"); - } - } - if (! (dataFound)) { - if (ptr != NULL) { - ptr--; - *ptr = '\0'; - ptr = StringRChr (path, DIRDELIMCHR); - if (ptr != NULL) { - ptr++; - *ptr = '\0'; - } - dataFound = CheckDataPath (path, "data"); - } - } - if (! (dataFound)) { - if (ptr != NULL) { - ptr--; - *ptr = '\0'; - ptr = StringRChr (path, DIRDELIMCHR); - if (ptr != NULL) { - ptr++; - *ptr = '\0'; - } - dataFound = CheckDataPath (path, "data"); - } } #endif if (dataFound) { @@ -7941,9 +8041,18 @@ NLM_EXTERN Uint2 FindFeatFromFeatDefType (Uint2 subtype) if (subtype >= FEATDEF_IMP && subtype <= FEATDEF_site_ref) { return SEQFEAT_IMP; } + if (subtype == FEATDEF_oriT) { + return SEQFEAT_IMP; + } if (subtype >= FEATDEF_preprotein && subtype <= FEATDEF_transit_peptide_aa) { return SEQFEAT_PROT; } + if (subtype == FEATDEF_operon) { + return SEQFEAT_IMP; + } + if (subtype == FEATDEF_gap) { + return SEQFEAT_IMP; + } } return 0; } @@ -8263,6 +8372,7 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqId SeqPntPtr spp; if (slp == NULL) return index; + while (slp != NULL) { switch (slp->choice) { case SEQLOC_NULL : @@ -8324,6 +8434,155 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqId } slp = slp->next; } + + return index; +} + +NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback) + +{ + CodeBreakPtr cbp; + CdRegionPtr crp; + Int4 index = 0; + RnaRefPtr rrp; + tRNAPtr trp; + + if (sfp == NULL) return index; + + index += VisitSeqIdsInSeqLoc (sfp->location, userdata, callback); + index += VisitSeqIdsInSeqLoc (sfp->product, userdata, callback); + + switch (sfp->data.choice) { + case SEQFEAT_CDREGION : + crp = (CdRegionPtr) sfp->data.value.ptrvalue; + if (crp != NULL) { + for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) { + index += VisitSeqIdsInSeqLoc (cbp->loc, userdata, callback); + } + } + break; + case SEQFEAT_RNA : + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (rrp != NULL && rrp->ext.choice == 2) { + trp = (tRNAPtr) rrp->ext.value.ptrvalue; + if (trp != NULL && trp->anticodon != NULL) { + index += VisitSeqIdsInSeqLoc (trp->anticodon, userdata, callback); + } + } + break; + default : + break; + } + + return index; +} + +NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback) + +{ + DenseDiagPtr ddp; + DenseSegPtr dsp; + Int4 index = 0; + SeqIdPtr sip; + SeqLocPtr slp = NULL; + StdSegPtr ssp; + + if (sap == NULL) return index; + + if (sap->bounds != NULL) { + sip = SeqLocId (sap->bounds); + index += VisitSeqIdList (sip, userdata, callback); + } + + if (sap->segs == NULL) return index; + + switch (sap->segtype) { + case SAS_DENDIAG : + ddp = (DenseDiagPtr) sap->segs; + if (ddp != NULL) { + for (sip = ddp->id; sip != NULL; sip = sip->next) { + index += VisitSeqIdList (sip, userdata, callback); + } + } + break; + case SAS_DENSEG : + dsp = (DenseSegPtr) sap->segs; + if (dsp != NULL) { + for (sip = dsp->ids; sip != NULL; sip = sip->next) { + index += VisitSeqIdList (sip, userdata, callback); + } + } + break; + case SAS_STD : + ssp = (StdSegPtr) sap->segs; + for (slp = ssp->loc; slp != NULL; slp = slp->next) { + sip = SeqLocId (slp); + index += VisitSeqIdList (sip, userdata, callback); + } + break; + case SAS_DISC : + /* recursive */ + for (sap = (SeqAlignPtr) sap->segs; sap != NULL; sap = sap->next) { + index += VisitSeqIdsInSeqAlign (sap, userdata, callback); + } + break; + default : + break; + } + + return index; +} + +NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback) + +{ + Int4 index = 0; + SeqIdPtr sip; + + if (sgp == NULL) return index; + + if (sgp->loc != NULL) { + sip = SeqLocId (sgp->loc); + index += VisitSeqIdList (sip, userdata, callback); + } + + return index; +} + +NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback) + +{ + Int4 index = 0; + SeqAlignPtr sap; + SeqFeatPtr sfp; + SeqGraphPtr sgp; + + if (annot == NULL || annot->data == NULL) return index; + + switch (annot->type) { + + case 1 : + for (sfp = (SeqFeatPtr) annot->data; sfp != NULL; sfp = sfp->next) { + index += VisitSeqIdsInSeqFeat (sfp, userdata, callback); + } + break; + + case 2 : + for (sap = (SeqAlignPtr) annot->data; sap != NULL; sap = sap->next) { + index += VisitSeqIdsInSeqAlign (sap, userdata, callback); + } + break; + + case 3 : + for (sgp = (SeqGraphPtr) annot->data; sgp != NULL; sgp = sgp->next) { + index += VisitSeqIdsInSeqGraph (sgp, userdata, callback); + } + break; + + default : + break; + } + return index; } diff --git a/api/sqnutil2.c b/api/sqnutil2.c index f2e27242..a5817f57 100644 --- a/api/sqnutil2.c +++ b/api/sqnutil2.c @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.144 $ +* $Revision: 6.152 $ * * File Description: * @@ -3054,14 +3054,17 @@ static SimpleSeqPtr ByteStoreToSimpleSeq (ByteStorePtr bs, CharPtr seqid, CharPt #define strandStr field [STRAND_TAG] static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, - BoolPtr partial5P, BoolPtr partial3P, - CharPtr PNTR featP, CharPtr PNTR qualP, + BoolPtr partial5P, BoolPtr partial3P, BoolPtr ispointP, + BoolPtr isminusP, CharPtr PNTR featP, CharPtr PNTR qualP, CharPtr PNTR valP, Int4 offset) { Boolean badNumber; CharPtr field [NUM_FTABLE_COLUMNS]; Int2 i; + Boolean isminus = FALSE; + Boolean ispoint = FALSE; + size_t len; ValNodePtr parsed; Boolean partial5 = FALSE; Boolean partial3 = FALSE; @@ -3098,6 +3101,11 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, partial5 = TRUE; str++; } + len = StringLen (str); + if (len > 1 && str [len - 1] == '^') { + ispoint = TRUE; + str [len - 1] = '\0'; + } if (str != NULL && sscanf (str, "%ld", &val) == 1) { start = val; } else { @@ -3131,6 +3139,7 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, start = stop; stop = tmp; } + isminus = TRUE; } } } @@ -3139,6 +3148,8 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP, *stopP = stop + offset; *partial5P = partial5; *partial3P = partial3; + *ispointP = ispoint; + *isminusP = isminus; *featP = featType; *qualP = qualType; *valP = qualVal; @@ -3297,7 +3308,8 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c } } else if (StringICmp ("tRNA", str) != 0 && StringICmp ("transfer", str) != 0 && - StringICmp ("RNA", str) != 0) { + StringICmp ("RNA", str) != 0 && + StringICmp ("product", str) != 0) { if (cdP != NULL && StringLen (str) == 3) { StringCpy (codon, str); for (i = 0; i < 3; i++) { @@ -3323,7 +3335,8 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c if (curraa != 0) { } else if (StringICmp ("tRNA", str) != 0 && StringICmp ("transfer", str) != 0 && - StringICmp ("RNA", str) != 0) { + StringICmp ("RNA", str) != 0 && + StringICmp ("product", str) != 0) { if (cdP != NULL && StringLen (str) == 3) { StringCpy (codon, str); for (i = 0; i < 3; i++) { @@ -4127,6 +4140,12 @@ NLM_EXTERN void AddQualifierToFeature (SeqFeatPtr sfp, CharPtr qual, CharPtr val prp->desc = StringSaveNoNull (val); } else if (sfp->data.choice == SEQFEAT_CDREGION && StringCmp (qual, "prot_note") == 0) { bail = FALSE; + } else if (sfp->data.choice == SEQFEAT_PROT && StringCmp (qual, "prot_desc") == 0) { + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (prp != NULL) { + prp->desc = MemFree (prp->desc); + prp->desc = StringSaveNoNull (val); + } } else if (ifp != NULL && StringICmp (ifp->key, "variation") == 0 && ParseQualIntoSnpUserObject (sfp, qual, val)) { } else if (ifp != NULL && StringICmp (ifp->key, "STS") == 0 && ParseQualIntoStsUserObject (sfp, qual, val)) { } else if (ifp != NULL && StringICmp (ifp->key, "misc_feature") == 0 && ParseQualIntoCloneUserObject (sfp, qual, val)) { @@ -4313,6 +4332,20 @@ NLM_EXTERN void AddQualifierToFeature (SeqFeatPtr sfp, CharPtr qual, CharPtr val } } } + } else if (sfp->data.choice == SEQFEAT_PROT) { + if (qnum == GBQUAL_function || qnum == GBQUAL_EC_number || qnum == GBQUAL_product) { + prp = (ProtRefPtr) sfp->data.value.ptrvalue; + if (prp != NULL) { + if (qnum == GBQUAL_function) { + ValNodeCopyStr (&(prp->activity), 0, val); + } else if (qnum == GBQUAL_EC_number) { + ValNodeCopyStr (&(prp->ec), 0, val); + } else if (qnum == GBQUAL_product) { + ValNodeCopyStr (&(prp->name), 0, val); + } + return; + } + } } else if (sfp->data.choice == SEQFEAT_RNA) { if (qnum == GBQUAL_product) { rrp = (RnaRefPtr) sfp->data.value.ptrvalue; @@ -4485,16 +4518,94 @@ NLM_EXTERN SeqLocPtr AddIntervalToLocation (SeqLocPtr loc, SeqIdPtr sip, return rsult; } +static CharPtr TokenizeAtWhiteSpace (CharPtr str) + +{ + Char ch; + CharPtr ptr; + + if (str == NULL) return NULL; + ptr = str; + ch = *ptr; + + while (ch != '\0' && (IS_WHITESP (ch))) { + ptr++; + ch = *ptr; + } + while (ch != '\0' && (! IS_WHITESP (ch))) { + ptr++; + ch = *ptr; + } + if (ch != '\0') { + *ptr = '\0'; + ptr++; + } + + return ptr; +} + +static void ParseWhitespaceIntoTabs (CharPtr line) + +{ + Char ch; + size_t len; + CharPtr ptr; + CharPtr str; + CharPtr tmp; + + if (StringHasNoText (line)) return; + len = StringLen (line) + 10; + + str = MemNew (len); + if (str == NULL) return; + + ptr = line; + ch = *ptr; + if (IS_WHITESP (ch)) { + /* qualifier value line */ + StringCat (str, "\t\t\t"); + TrimSpacesAroundString (ptr); + tmp = TokenizeAtWhiteSpace (ptr); + StringCat (str, ptr); + StringCat (str, "\t"); + StringCat (str, tmp); + } else { + /* location and possible feature key line */ + TrimSpacesAroundString (ptr); + tmp = TokenizeAtWhiteSpace (ptr); + StringCat (str, ptr); + StringCat (str, "\t"); + ptr = tmp; + tmp = TokenizeAtWhiteSpace (ptr); + StringCat (str, ptr); + ptr = tmp; + if (! StringHasNoText (ptr)) { + tmp = TokenizeAtWhiteSpace (ptr); + StringCat (str, "\t"); + StringCat (str, ptr); + } + } + + /* replace original with tab-delimited table */ + StringCpy (line, str); + + MemFree (str); +} + static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname) { + Boolean allowWhitesp = TRUE; BioSourcePtr biop; CdRegionPtr crp; AnnotDescrPtr desc; CharPtr feat; + IntFuzzPtr fuzz; GeneRefPtr grp; Int2 idx; ImpFeatPtr ifp; + Boolean isminus; + Boolean ispoint; Int2 j; CharPtr label; Char line [2047]; @@ -4508,12 +4619,15 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname) PubdescPtr pdp; Int4 pos; SeqFeatPtr prev = NULL; + ProtRefPtr prp; CharPtr qual; Uint1 rnatype; RnaRefPtr rrp; SeqAnnotPtr sap = NULL; SeqFeatPtr sfp = NULL; SeqIdPtr sip; + SeqLocPtr slp; + SeqPntPtr spp; Int4 start; Int4 stop; SqnTagPtr stp; @@ -4543,7 +4657,11 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname) return sap; } - if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &feat, &qual, &val, offset)) { + if (allowWhitesp) { + ParseWhitespaceIntoTabs (line); + } + + if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &ispoint, &isminus, &feat, &qual, &val, offset)) { if (feat != NULL && start >= 0 && stop >= 0) { if (sap == NULL) { @@ -4620,6 +4738,23 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname) rrp->type = rnatype; } + } else if (StringCmp (feat, "Protein") == 0) { + + sfp->data.choice = SEQFEAT_PROT; + prp = ProtRefNew (); + if (prp != NULL) { + sfp->data.value.ptrvalue = (Pointer) prp; + } + + } else if (StringCmp (feat, "proprotein") == 0) { + + sfp->data.choice = SEQFEAT_PROT; + prp = ProtRefNew (); + if (prp != NULL) { + sfp->data.value.ptrvalue = (Pointer) prp; + prp->processed = 1; + } + } else if (StringCmp (feat, "source") == 0) { sfp->data.choice = SEQFEAT_BIOSRC; @@ -4672,7 +4807,30 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname) } } - sfp->location = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3); + if (ispoint) { + spp = SeqPntNew (); + if (spp != NULL) { + spp->point = start; + if (isminus) { + spp->strand = Seq_strand_minus; + } + spp->id = SeqIdDup (sip); + fuzz = IntFuzzNew (); + if (fuzz != NULL) { + fuzz->choice = 4; + fuzz->a = 3; + spp->fuzz = fuzz; + } + slp = ValNodeNew (NULL); + if (slp != NULL) { + slp->choice = SEQLOC_PNT; + slp->data.ptrvalue = (Pointer) spp; + sfp->location = slp; + } + } + } else { + sfp->location = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3); + } if (partial5 || partial3) { sfp->partial = TRUE; @@ -5250,7 +5408,7 @@ NLM_EXTERN Pointer ReadAsnFastaOrFlatFile (FILE *fp, Uint2Ptr datatypeptr, Uint2 Boolean inLetters; Boolean isProt = FALSE; Int4 j; - Char line [1023]; + Char line [4096]; Boolean mayBeAccessionList = TRUE; Boolean mayBePlainFasta = TRUE; SeqFeatPtr nextsfp; diff --git a/api/sqnutil3.c b/api/sqnutil3.c index 76bcdcc8..c1554dcd 100644 --- a/api/sqnutil3.c +++ b/api/sqnutil3.c @@ -29,7 +29,7 @@ * * Version Creation Date: 2/7/00 * -* $Revision: 6.23 $ +* $Revision: 6.28 $ * * File Description: * @@ -977,6 +977,7 @@ static FeatdefNameData featdefWithName [] = { { FEATDEF_D_segment , "D_segment" }, { FEATDEF_enhancer , "enhancer" }, { FEATDEF_exon , "exon" }, + { FEATDEF_gap , "gap" }, { FEATDEF_GC_signal , "GC_signal" }, { FEATDEF_GENE , "Gene" }, { FEATDEF_HET , "Het" }, @@ -1001,6 +1002,8 @@ static FeatdefNameData featdefWithName [] = { { FEATDEF_NUM , "Num" }, { FEATDEF_N_region , "N_region" }, { FEATDEF_old_sequence , "old_sequence" }, + { FEATDEF_operon , "operon" }, + { FEATDEF_oriT , "oriT" }, { FEATDEF_polyA_signal , "polyA_signal" }, { FEATDEF_polyA_site , "polyA_site" }, { FEATDEF_preRNA , "precursor_RNA" }, @@ -1159,11 +1162,14 @@ static CharPtr featurekeys [] = { "NonStdRes" , "Het" , "Src" , - "pro_peptide" , + "proprotein" , "mat_peptide" , "sig_peptide" , "transit_peptide", - "snoRNA" + "snoRNA", + "gap", + "operon", + "oriT" }; NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF) @@ -1527,3 +1533,23 @@ NLM_EXTERN void SegOrDeltaBioseqToRaw (BioseqPtr bsp) bsp->seq_data_type = Seq_code_iupacna; } + +static PubMedFetchFunc pmf_pubfetch = NULL; + +NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func) + +{ + pmf_pubfetch = func; +} + +NLM_EXTERN PubmedEntryPtr LIBCALL GetPubMedForUid (Int4 uid) + +{ + PubMedFetchFunc func; + + if (uid < 1) return NULL; + func = pmf_pubfetch; + if (func == NULL) return NULL; + return func (uid); +} + diff --git a/api/sqnutils.h b/api/sqnutils.h index 20b6e762..e3a061e3 100644 --- a/api/sqnutils.h +++ b/api/sqnutils.h @@ -29,7 +29,7 @@ * * Version Creation Date: 9/2/97 * -* $Revision: 6.87 $ +* $Revision: 6.92 $ * * File Description: * @@ -47,6 +47,7 @@ #include <ncbi.h> #include <sequtil.h> +#include <objpubme.h> #undef NLM_EXTERN #ifdef NLM_IMPORT @@ -195,10 +196,15 @@ NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep); NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep); +/* resynchronizes protein feature with product peptide bioseq */ + +NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep); + /* individual feature callbacks for above functions */ NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata); NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata); +NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata); /* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */ /* for example, passing "gene" to SqnTagFind returns "lacZ" */ @@ -493,11 +499,16 @@ NLM_EXTERN Int4 VisitSetsInSet (BioseqSetPtr bssp, Pointer userdata, VisitSetsFu typedef void (*VisitElementsFunc) (SeqEntryPtr sep, Pointer userdata); NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElementsFunc callback); -/* visits all SeqIds within a SeqLoc */ +/* visits all SeqIds within a SeqLoc, or within features, alignments, graphs, or annots */ typedef void (*VisitSeqIdFunc) (SeqIdPtr sip, Pointer userdata); NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqIdFunc callback); +NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback); +NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback); +NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback); +NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback); + /* visits all sub UserFields - if the data type is 11, VisitUserFieldsInUfp recurses */ typedef void (*VisitUserFieldsFunc) (UserFieldPtr ufp, Pointer userdata); @@ -538,6 +549,17 @@ NLM_EXTERN Int4 VisitBioSourcesInSep (SeqEntryPtr sep, Pointer userdata, VisitBi typedef void (*ScanBioseqSetFunc) (SeqEntryPtr sep, Pointer userdata); NLM_EXTERN Int4 ScanBioseqSetRelease (CharPtr inputFile, Boolean binary, Boolean compressed, Pointer userdata, ScanBioseqSetFunc callback); +/* PubMed registered fetch functionality */ + +NLM_EXTERN PubmedEntryPtr LIBCALL GetPubMedForUid (Int4 uid); + +/* internal support type, registration function */ + +typedef PubmedEntryPtr (LIBCALLBACK * PubMedFetchFunc) (Int4 uid); + +NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func); + + #ifdef __cplusplus } diff --git a/api/subutil.c b/api/subutil.c index ff5c748d..a4cbcaed 100644 --- a/api/subutil.c +++ b/api/subutil.c @@ -29,7 +29,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.52 $ +* $Revision: 6.55 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -40,6 +40,17 @@ * * * $Log: subutil.c,v $ +* Revision 6.55 2003/10/16 17:16:33 mjohnson +* +* Added ORG_* and IS_ORG_* defines for origins. Use these constants +* and macros instead of small integers. +* +* Revision 6.54 2003/10/08 16:46:44 kans +* fix in AddCompleteness (KT) +* +* Revision 6.53 2003/07/11 18:22:45 kans +* AddSourceToRefGeneTrackUserObject +* * Revision 6.52 2002/11/05 17:01:55 kans * refgene tracking user object uses comment as name if accession is empty * @@ -2570,7 +2581,7 @@ NLM_EXTERN Boolean AddGenomeToEntry ( bio = BioSourceNew(); } bio->genome = (Uint1)type; - bio->origin = 0; /* unknown */ + bio->origin = ORG_DEFAULT; /* unknown */ vnp->data.ptrvalue = (Pointer) bio; return TRUE; @@ -2732,19 +2743,19 @@ NLM_EXTERN void AddCompleteness(NCBISubPtr submission, SeqEntryPtr sep, SeqFeatP Boolean partial = FALSE; retval = SeqLocPartialCheck(sfp->location); - if (retval & SLP_START) { + if ((retval & SLP_START) && (retval & SLP_STOP)) { + AddCompleteToEntry(submission, sep, 5); /* no_ends */ + partial = TRUE; + } else if (retval & SLP_START) { AddCompleteToEntry(submission, sep, 3); /* no_left */ partial = TRUE; - } - if (retval & SLP_STOP) { + } else if (retval & SLP_STOP) { AddCompleteToEntry(submission, sep, 4); /* no_right */ partial = TRUE; - } - if (retval & (SLP_OTHER | SLP_INTERNAL)) { + } else if (retval & (SLP_OTHER | SLP_INTERNAL)) { AddCompleteToEntry(submission, sep, 2); /* partial */ partial = TRUE; - } - if (!partial && sfp->partial) { + } else if (!partial && sfp->partial) { AddCompleteToEntry(submission, sep, 2); /* partial */ } } @@ -4420,6 +4431,7 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c { UserFieldPtr curr; ObjectIdPtr oip; + UserFieldPtr prev = NULL; if (uop == NULL || collaborator == NULL) return; oip = uop->type; @@ -4430,6 +4442,7 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c if (oip != NULL && StringICmp (oip->str, "Collaborator") == 0) { break; } + prev = curr; } if (curr == NULL) { @@ -4439,10 +4452,13 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c curr->label = oip; curr->choice = 1; /* visible string */ - /* link status at beginning of list */ + /* link curator at end of list */ - curr->next = uop->data; - uop->data = curr; + if (prev != NULL) { + prev->next = curr; + } else { + uop->data = curr; + } } if (curr == NULL || curr->choice != 1) return; @@ -4454,6 +4470,50 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c curr->data.ptrvalue = (Pointer) StringSave (collaborator); } +NLM_EXTERN void AddSourceToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr genomicSource) + +{ + UserFieldPtr curr; + ObjectIdPtr oip; + UserFieldPtr prev = NULL; + + if (uop == NULL || genomicSource == NULL) return; + oip = uop->type; + if (oip == NULL || StringICmp (oip->str, "RefGeneTracking") != 0) return; + + for (curr = uop->data; curr != NULL; curr = curr->next) { + oip = curr->label; + if (oip != NULL && StringICmp (oip->str, "GenomicSource") == 0) { + break; + } + prev = curr; + } + + if (curr == NULL) { + curr = UserFieldNew (); + oip = ObjectIdNew (); + oip->str = StringSave ("GenomicSource"); + curr->label = oip; + curr->choice = 1; /* visible string */ + + /* link source at end of list */ + + if (prev != NULL) { + prev->next = curr; + } else { + uop->data = curr; + } + } + + if (curr == NULL || curr->choice != 1) return; + + /* replace any existing source indication */ + + curr->data.ptrvalue = MemFree (curr->data.ptrvalue); + + curr->data.ptrvalue = (Pointer) StringSave (genomicSource); +} + NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr field, CharPtr accn, Int4 gi, Boolean sequenceChange, diff --git a/api/subutil.h b/api/subutil.h index 480c5157..41645047 100644 --- a/api/subutil.h +++ b/api/subutil.h @@ -31,7 +31,7 @@ * * Version Creation Date: 11/3/93 * -* $Revision: 6.43 $ +* $Revision: 6.46 $ * * File Description: Utilities for creating ASN.1 submissions * @@ -42,6 +42,17 @@ * * * $Log: subutil.h,v $ +* Revision 6.46 2003/10/21 18:16:05 bazhin +* Replaced C++ comments with C ones. +* +* Revision 6.45 2003/10/16 17:16:33 mjohnson +* +* Added ORG_* and IS_ORG_* defines for origins. Use these constants +* and macros instead of small integers. +* +* Revision 6.44 2003/07/11 18:22:45 kans +* AddSourceToRefGeneTrackUserObject +* * Revision 6.43 2002/07/09 16:17:35 kans * AddAccessionToTpaAssemblyUserObject takes from and to parameters * @@ -949,6 +960,26 @@ NLM_EXTERN Boolean AddSubSourceToEntry ( #define ORGMOD_old_name 254 #define ORGMOD_other 255 +/* Defines for BioSrc.origin + */ +#define ORG_UNKNOWN 0 +#define ORG_NATURAL 1 +#define ORG_NATMUT 2 +#define ORG_MUT 3 +#define ORG_ARTIFICIAL 4 +#define ORG_SYNTHETIC 5 +#define ORG_OTHER 255 +#define ORG_DEFAULT ORG_UNKNOWN + +#define IS_ORG_UNKNOWN(S) ((S).origin == ORG_UNKNOWN) +#define IS_ORG_NATURAL(S) ((S).origin == ORG_NATURAL) +#define IS_ORG_NATMUT(S) ((S).origin == ORG_NATMUT) +#define IS_ORG_MUT(S) ((S).origin == ORG_MUT) +#define IS_ORG_ARTIFICIAL(S) ((S).origin == ORG_ARTIFICIAL) +#define IS_ORG_SYNTHETIC(S) ((S).origin == ORG_SYNTHETIC) +#define IS_ORG_OTHER(S) ((S).origin == ORG_OTHER) + + /********************************************* * OrgMod defines subclasses of organism names * (also see SubSource above for subclasses of source material) @@ -1557,6 +1588,7 @@ NLM_EXTERN Boolean AddPhrapGraphToSeqLit ( NLM_EXTERN UserObjectPtr CreateRefGeneTrackUserObject (void); NLM_EXTERN void AddStatusToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr status); NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr collaborator); +NLM_EXTERN void AddSourceToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr genomicSource); NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr field, CharPtr accn, Int4 gi, Boolean sequenceChange, diff --git a/api/tofasta.c b/api/tofasta.c index bba0ed88..c605b2a0 100644 --- a/api/tofasta.c +++ b/api/tofasta.c @@ -29,7 +29,7 @@ * * Version Creation Date: 7/12/91 * -* $Revision: 6.114 $ +* $Revision: 6.121 $ * * File Description: various sequence objects to fasta output * @@ -39,6 +39,27 @@ * ------- ---------- ----------------------------------------------------- * * $Log: tofasta.c,v $ +* Revision 6.121 2003/08/04 19:51:02 kans +* for complete chromosome title, if > 3 clones (by counting semicolons) then just display count, not full text of clones +* +* Revision 6.120 2003/07/25 16:15:25 kans +* FindProtDefLine of hypothetical protein only needs to look for locus_tag +* +* Revision 6.119 2003/07/25 15:34:07 kans +* protect FindProtDefLine against no parent CDS (e.g., SWISS-PROT segmented record P33072) +* +* Revision 6.118 2003/07/24 21:51:04 kans +* if hypothetical protein, find gene and add to name +* +* Revision 6.117 2003/07/23 20:37:02 kans +* if making htgs title, do not set iip values +* +* Revision 6.116 2003/07/22 18:31:44 kans +* Added support for EMBLBlockPtr keywords in addition to GBBlockPtr keywords in suppressing sequencing in progress message +* +* Revision 6.115 2003/05/02 16:22:24 kans +* added FindNRDefLine to make NR_ deflines on the fly +* * Revision 6.114 2003/03/25 17:00:53 kans * CreateDefLine htgs suffix only shows if delta seq with more than 0 gaps * @@ -2603,6 +2624,7 @@ static ValNodePtr IndexedGatherDescrOnBioseq (ItemInfoPtr iip, BioseqPtr bsp, Ui SeqDescrPtr sdp; sdp = SeqMgrGetNextDescriptor (bsp, NULL, choice, &dcontext); + if (sdp == NULL) return NULL; if (ISA_aa(bsp->mol) && !is_pdb(bsp)) { if (dcontext.level != 0) return NULL; } @@ -2836,6 +2858,89 @@ static CharPtr FindNMDefLine (BioseqPtr bsp) return str; } +static CharPtr FindNRDefLine (BioseqPtr bsp) + +{ + BioSourcePtr biop; + Char buf [512]; + Uint2 entityID; + CharPtr gene; + size_t len; + MolInfoPtr mip; + NMDef nd; + OrgRefPtr orp; + CharPtr rna = "miscRNA"; + SeqEntryPtr sep; + CharPtr str; + ValNodePtr vnp; + + MemSet ((Pointer) &nd, 0, sizeof (NMDef)); + entityID = ObjMgrGetEntityIDForPointer (bsp); + sep = GetBestTopParentForDataEx (entityID, bsp, TRUE); + + VisitFeaturesInSep (sep, (Pointer) &nd, FindNMFeats); + if (nd.numgenes < 1) return NULL; + + vnp = GatherDescrOnBioseq (NULL, bsp, Seq_descr_source, FALSE); + if (vnp == NULL) return NULL; + biop = (BioSourcePtr) vnp->data.ptrvalue; + orp = biop->org; + if (orp == NULL || StringHasNoText (orp->taxname)) return NULL; + + FeatDefLabel (nd.gene, buf, sizeof (buf) - 1, OM_LABEL_CONTENT); + gene = StringSaveNoNull (buf); + + vnp = GatherDescrOnBioseq (NULL, bsp, Seq_descr_molinfo,TRUE); + if (vnp != NULL) { + mip = (MolInfoPtr) vnp->data.ptrvalue; + if (mip != NULL) { + switch (mip->biomol) { + case MOLECULE_TYPE_PRE_MRNA : + rna = "precursorRNA"; + break; + case MOLECULE_TYPE_MRNA : + rna = "mRNA"; + break; + case MOLECULE_TYPE_RRNA : + rna = "rRNA"; + break; + case MOLECULE_TYPE_TRNA : + rna = "tRNA"; + break; + case MOLECULE_TYPE_SNRNA : + rna = "snRNA"; + break; + case MOLECULE_TYPE_SCRNA : + rna = "scRNA"; + break; + case MOLECULE_TYPE_CRNA : + rna = "cRNA"; + break; + case MOLECULE_TYPE_SNORNA : + rna = "snoRNA"; + break; + case MOLECULE_TYPE_TRANSCRIBED_RNA : + rna = "miscRNA"; + break; + default : + break; + } + } + } + + len = StringLen (orp->taxname) + StringLen (gene) + + StringLen (", ") + 30; + + str = (CharPtr) MemNew (len); + if (str != NULL) { + sprintf (str, "%s %s, %s", orp->taxname, gene, rna); + } + + MemFree (gene); + + return str; +} + static CharPtr FindProtDefLine(BioseqPtr bsp) { SeqFeatPtr sfp = NULL, f; @@ -2880,6 +2985,58 @@ static CharPtr FindProtDefLine(BioseqPtr bsp) (CharPtr) vnp->data.ptrvalue); s += StringLen((CharPtr)vnp->data.ptrvalue) + 2; } + /* if hypothetical protein, append locus_tag */ + if (StringICmp (title, "hypothetical protein") == 0) { + sfp = NULL; + if (indexed) { + sfp = SeqMgrGetCDSgivenProduct (bsp, NULL); + } else { + sfp = GatherProtCDS(bsp); + } + if (sfp != NULL) { + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL) { + loc = sfp->location; + best_gene = NULL; + if (indexed) { + best_gene = SeqMgrGetOverlappingGene (loc, NULL); + } else { + vnp = GatherGenesForCDS(loc); + for (v=vnp; v; v=v->next) { + f = (SeqFeatPtr) v->data.ptrvalue; + diff_current = SeqLocAinB(loc, f->location); + if (! diff_current) { + best_gene = f; + break; + } else if (diff_current > 0) { + if ((diff_lowest == -1) || (diff_current<diff_lowest)) { + diff_lowest = diff_current; + best_gene = f; + } + } + } + ValNodeFree(vnp); + } + if (best_gene != NULL) { + grp = (GeneRefPtr) best_gene->data.value.ptrvalue; + } + } + } + if (grp != NULL) { + geneprod = NULL; + if (grp->locus_tag != NULL) { + geneprod = grp->locus_tag; + } + if (geneprod != NULL) { + s = (CharPtr) MemNew (StringLen (geneprod) + StringLen (title) + 20); + if (s != NULL) { + sprintf (s, "%s %s", title, geneprod); + MemFree (title); + title = s; + } + } + } + } } else if (prp->desc) { title = StringSave(prp->desc); } @@ -3084,6 +3241,27 @@ static Boolean StrainNotAtEndOfTaxname (CharPtr name, CharPtr strain) return FALSE; } +static Int2 GetNumClones (CharPtr str) + +{ + Char ch; + Int2 count; + + if (StringHasNoText (str)) return 0; + + count = 1; + ch = *str; + while (ch != '\0') { + if (ch == ';') { + count++; + } + str++; + ch = *str; + } + + return count; +} + static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix) { ItemInfoPtr iip = NULL; @@ -3096,6 +3274,7 @@ static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix) CharPtr name = NULL, chr = NULL, str = NULL, cln = NULL, map = NULL, def=NULL; Int2 deflen = 0; + Int2 numclones; if (bsp == NULL) { return NULL; @@ -3119,9 +3298,16 @@ static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix) } if (ssp->subtype == 3) { /* clone */ if (ssp->name != NULL) { - cln = (CharPtr) MemNew(StringLen(ssp->name) + 8); - deflen += StringLen(ssp->name) + 8; - sprintf(cln, " clone %s", ssp->name); + numclones = GetNumClones (ssp->name); + if (numclones > 3) { + cln = (CharPtr) MemNew (20); + sprintf (cln, ", %d clones,", (int) numclones); + deflen += StringLen (cln) + 2; + } else { + cln = (CharPtr) MemNew(StringLen(ssp->name) + 8); + deflen += StringLen(ssp->name) + 8; + sprintf(cln, " clone %s", ssp->name); + } } } if (ssp->subtype == 2) { /* map */ @@ -3446,9 +3632,11 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, "WORKING DRAFT SEQUENCE", "*** SEQUENCING IN PROGRESS ***" }; Boolean htg_tech = FALSE, htgs_draft = FALSE, htgs_cancelled = FALSE, - is_nc = FALSE, is_nm = FALSE, is_tpa = FALSE; + is_nc = FALSE, is_nm = FALSE, is_nr = FALSE, is_tpa = FALSE; MolInfoPtr mip; GBBlockPtr gbp = NULL; + EMBLBlockPtr ebp = NULL; + ValNodePtr keywords = NULL; Boolean wgsmaster = FALSE; CharPtr suffix = NULL; SeqIdPtr sip; @@ -3474,6 +3662,8 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, is_nc = TRUE; } else if (StringNICmp (tsip->accession, "NM_", 3) == 0) { is_nm = TRUE; + } else if (StringNICmp (tsip->accession, "NR_", 3) == 0) { + is_nr = TRUE; } } break; @@ -3519,6 +3709,16 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_genbank,TRUE); if (vnp != NULL) { gbp = (GBBlockPtr) vnp->data.ptrvalue; + if (gbp != NULL) { + keywords = gbp->keywords; + } + } + vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_embl,TRUE); + if (vnp != NULL) { + ebp = (EMBLBlockPtr) vnp->data.ptrvalue; + if (ebp != NULL) { + keywords = ebp->keywords; + } } } if (! ignoreTitle) @@ -3528,8 +3728,13 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, title = StringSaveNoNull((CharPtr)vnp->data.ptrvalue); } if (tech == MI_TECH_htgs_0 || tech == MI_TECH_htgs_1 || tech == MI_TECH_htgs_2) { - MemFree(title); /* manufacture all HTG titles */ - title = NULL; + MemFree(title); /* manufacture all HTG titles */ + title = NULL; + if (iip != NULL) { + iip->entityID = 0; + iip->itemID = 0; + iip->itemtype = 0; + } if (title == NULL || *title == '\0') { title = UseOrgMods(bsp, NULL); organism = NULL; @@ -3578,6 +3783,13 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, iip->itemID = 0; iip->itemtype = 0; } + } else if (is_nr && title == NULL) { + title = FindNRDefLine (bsp); + if (title != NULL && iip != NULL) { + iip->entityID = 0; + iip->itemID = 0; + iip->itemtype = 0; + } } /* some titles may have zero length */ if (title != NULL && *title != '\0') { @@ -3736,8 +3948,8 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf, i = 0; } } else { - if (gbp != NULL) { - for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) { + if (keywords != NULL) { + for (vnp = keywords; vnp != NULL; vnp = vnp->next) { if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_DRAFT") == 0) { htgs_draft = TRUE; } else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_CANCELLED") == 0) { diff --git a/api/tomedlin.c b/api/tomedlin.c index 3a39dab6..0f779966 100644 --- a/api/tomedlin.c +++ b/api/tomedlin.c @@ -29,7 +29,7 @@ * * Version Creation Date: 10/15/91 * -* $Revision: 6.8 $ +* $Revision: 6.10 $ * * File Description: conversion to medlars format * @@ -40,6 +40,12 @@ * * * $Log: tomedlin.c,v $ +* Revision 6.10 2003/09/28 20:22:47 kans +* added PubmedEntryToXXXFile functions +* +* Revision 6.9 2003/09/26 18:57:51 kans +* MedlineEntryToDataFile calls MakeMLAuthString for structured author +* * Revision 6.8 2001/10/29 20:37:06 kans * MakeAuthorString for structured authors * @@ -187,15 +193,66 @@ static ColData table [2] = {{0, 6, 0, 'l', TRUE, TRUE, FALSE}, static Char *months[13] = {"", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; -NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp) +static CharPtr MakeMLAuthString ( + CharPtr name, + CharPtr initials, + CharPtr suffix +) + +{ + Char ch; + size_t len; + CharPtr ptr; + CharPtr str; + CharPtr tmp; + + if (name == NULL) return NULL; + + len = StringLen (name) + StringLen (initials) * 3 + StringLen (suffix); + str = MemNew (sizeof (Char) * (len + 4)); + if (str == NULL) return NULL; + + tmp = str; + + tmp = StringMove (tmp, name); + + ptr = initials; + if (! StringHasNoText (initials)) { + tmp = StringMove (tmp, " "); + ch = *ptr; + while (ch != '\0') { + if (ch == '-') { + *tmp = '-'; + tmp++; + } else if (ch != '.') { + *tmp = ch; + tmp++; + } + ptr++; + ch = *ptr; + } + *tmp = '\0'; + } + + if (! StringHasNoText (suffix)) { + tmp = StringMove (tmp, " "); + tmp = StringMove (tmp, suffix); + } + + return str; +} + +static Boolean MedlineEntryToDataFileEx (MedlineEntryPtr mep, Int4 pmid, FILE *fp) { CharPtr abstract; AffilPtr affil; + AuthorPtr ap; AuthListPtr authors = NULL; CitArtPtr cit; CitJourPtr citjour; Int2 count; + CharPtr curr; DatePtr date = NULL; ValNodePtr gene; Int2 i; @@ -204,9 +261,11 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp) size_t len; MedlineMeshPtr mesh; ValNodePtr names; + NameStdPtr nsp; CharPtr p; CharPtr pages = NULL; ParData para; + PersonIdPtr pid; CharPtr ptr; ValNodePtr qual; Boolean rsult; @@ -255,20 +314,59 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp) cit = mep->cit; if (cit != NULL) { authors = cit->authors; - if (authors != NULL && (authors->choice == 2 || authors->choice == 3)) { - names = authors->names; - count = 0; - while (names != NULL) { - if (count >= 20) { - rsult = (Boolean) (SendTextToFile (fp, buffer, ¶, table) && rsult); - ClearString (); - count = 0; + if (authors != NULL) { + if (authors->choice == 1) { + names = authors->names; + count = 0; + while (names != NULL) { + if (count >= 20) { + rsult = (Boolean) (SendTextToFile (fp, buffer, ¶, table) && rsult); + ClearString (); + count = 0; + } + curr = NULL; + ap = (AuthorPtr) names->data.ptrvalue; + if (ap != NULL) { + pid = ap->name; + if (pid != NULL) { + if (pid->choice == 2) { + nsp = (NameStdPtr) pid->data; + if (nsp != NULL) { + if (! StringHasNoText (nsp->names [0])) { + curr = MakeMLAuthString (nsp->names [0], nsp->names [4], nsp->names [5]); + } else if (! StringHasNoText (nsp->names [3])) { + curr = MakeMLAuthString (nsp->names [3], NULL, NULL); + } + } + } else if (pid->choice == 3 || pid->choice == 4) { + curr = MakeMLAuthString ((CharPtr) pid->data, NULL, NULL); + } + } + } + if (curr != NULL) { + AddString ("AU -\t"); + AddString (curr); + AddString ("\n"); + curr = MemFree (curr); + } + names = names->next; + count++; + } + } else if (authors->choice == 2 || authors->choice == 3) { + names = authors->names; + count = 0; + while (names != NULL) { + if (count >= 20) { + rsult = (Boolean) (SendTextToFile (fp, buffer, ¶, table) && rsult); + ClearString (); + count = 0; + } + AddString ("AU -\t"); + AddString (names->data.ptrvalue); + AddString ("\n"); + names = names->next; + count++; } - AddString ("AU -\t"); - AddString (names->data.ptrvalue); - AddString ("\n"); - names = names->next; - count++; } } rsult = (Boolean) (SendTextToFile (fp, buffer, ¶, table) && rsult); @@ -581,6 +679,23 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp) return rsult; } +NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp) + +{ + return MedlineEntryToDataFileEx (mep, 0, fp); +} + +NLM_EXTERN Boolean PubmedEntryToDataFile (PubmedEntryPtr pep, FILE *fp) + +{ + MedlineEntryPtr mep; + + if (pep == NULL || fp == NULL) return FALSE; + mep = (MedlineEntryPtr) pep->medent; + if (mep == NULL) return FALSE; + return MedlineEntryToDataFileEx (mep, pep->pmid, fp); +} + #ifdef VAR_ARGS static CharPtr CDECL StrngAppend (first, va_alist) CharPtr first; @@ -1036,7 +1151,7 @@ static ColData colFmt [3] = {{0, 0, 0, 'l', TRUE, TRUE, FALSE}, static ColData mshFmt [1] = {{0, 80, 0, 'l', FALSE, FALSE, TRUE}}; -static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, FILE *fp, Boolean showMesh) +static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, Int4 pmid, FILE *fp, Boolean showMesh) { size_t len; @@ -1122,13 +1237,35 @@ static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, FILE *fp, Boolea NLM_EXTERN Boolean MedlineEntryToDocFile (MedlineEntryPtr mep, FILE *fp) { - return MedlineEntryToDocOrAbsFile (mep, fp, TRUE); + return MedlineEntryToDocOrAbsFile (mep, 0, fp, TRUE); } NLM_EXTERN Boolean MedlineEntryToAbsFile (MedlineEntryPtr mep, FILE *fp) { - return MedlineEntryToDocOrAbsFile (mep, fp, FALSE); + return MedlineEntryToDocOrAbsFile (mep, 0, fp, FALSE); +} + +NLM_EXTERN Boolean PubmedEntryToDocFile (PubmedEntryPtr pep, FILE *fp) + +{ + MedlineEntryPtr mep; + + if (pep == NULL || fp == NULL) return FALSE; + mep = (MedlineEntryPtr) pep->medent; + if (mep == NULL) return FALSE; + return MedlineEntryToDocOrAbsFile (mep, pep->pmid, fp, TRUE); +} + +NLM_EXTERN Boolean PubmedEntryToAbsFile (PubmedEntryPtr pep, FILE *fp) + +{ + MedlineEntryPtr mep; + + if (pep == NULL || fp == NULL) return FALSE; + mep = (MedlineEntryPtr) pep->medent; + if (mep == NULL) return FALSE; + return MedlineEntryToDocOrAbsFile (mep, pep->pmid, fp, FALSE); } #define IBM_MEDLINE_DIVSS '$' diff --git a/api/tomedlin.h b/api/tomedlin.h index 487bb1c1..1be1818f 100644 --- a/api/tomedlin.h +++ b/api/tomedlin.h @@ -29,7 +29,7 @@ * * Version Creation Date: 10/15/91 * -* $Revision: 6.1 $ +* $Revision: 6.2 $ * * File Description: conversion to medlars format * @@ -40,6 +40,9 @@ * * * $Log: tomedlin.h,v $ +* Revision 6.2 2003/09/28 20:22:47 kans +* added PubmedEntryToXXXFile functions +* * Revision 6.1 1997/12/22 18:51:23 grisha * update unit-record number for PmId to 969 * @@ -78,6 +81,7 @@ #include <objmedli.h> #include <objmdrs.h> +#include <objpubme.h> #define MEDLINE_BADCODE 0 /* BAD */ #define MEDLINE_EOF -1 /* END OF ENTRY */ @@ -163,6 +167,10 @@ NLM_EXTERN Boolean MedlarsEntryToDataFile PROTO((MedlarsEntryPtr mep, FILE *fp)) NLM_EXTERN Boolean MedlarsEntryToDocFile PROTO((MedlarsEntryPtr mep, FILE *fp)); NLM_EXTERN Boolean MedlarsEntryToAbsFile PROTO((MedlarsEntryPtr mep, FILE *fp)); +NLM_EXTERN Boolean PubmedEntryToDataFile PROTO((PubmedEntryPtr pep, FILE *fp)); +NLM_EXTERN Boolean PubmedEntryToDocFile PROTO((PubmedEntryPtr pep, FILE *fp)); +NLM_EXTERN Boolean PubmedEntryToAbsFile PROTO((PubmedEntryPtr pep, FILE *fp)); + NLM_EXTERN MedlinePtr ParseMedline PROTO((MedlineEntryPtr mep)); NLM_EXTERN MedlinePtr FreeMedline PROTO((MedlinePtr mPtr)); diff --git a/api/txalign.c b/api/txalign.c index 3253529f..127cc1a5 100644 --- a/api/txalign.c +++ b/api/txalign.c @@ -1,4 +1,4 @@ -/* $Id: txalign.c,v 6.72 2003/01/23 23:31:58 dondosha Exp $ +/* $Id: txalign.c,v 6.79 2003/09/26 20:54:10 dondosha Exp $ *************************************************************************** * * * COPYRIGHT NOTICE * @@ -27,13 +27,34 @@ * * File Name: txalign.c * -* $Revision: 6.72 $ +* $Revision: 6.79 $ * * File Description: Formating of text alignment for the BLAST output * * Modifications: * -------------------------------------------------------------------------- * $Log: txalign.c,v $ +* Revision 6.79 2003/09/26 20:54:10 dondosha +* Revert change in revision 6.77, as it turned trace.cgi links should have stayed as they were +* +* Revision 6.78 2003/08/20 21:29:13 dondosha +* Correction for OOF alignments with nucleotide coordinates starting at 1 +* +* Revision 6.77 2003/07/30 14:07:36 dondosha +* Changed hrefs to trace.cgi in accordance with the new taxonomy web interface +* +* Revision 6.76 2003/07/21 22:15:23 dondosha +* Added support for out-of-frame tblastn alignments +* +* Revision 6.75 2003/07/15 14:36:06 dondosha +* Added a #define for fprintf substitute, needed for gzip compression of Web BLAST results +* +* Revision 6.74 2003/06/11 20:15:35 jianye +* changed unigene linkout +* +* Revision 6.73 2003/06/02 20:02:15 jianye +* Added geo linkout +* * Revision 6.72 2003/01/23 23:31:58 dondosha * Added a global variable for the query number, needed in make_dumpgnl_links * @@ -511,6 +532,9 @@ int query_number_glb; /*Indicate if db contains sequence with gi*/ Boolean DbHasGi=FALSE; +int (*tx_fprintf)(FILE*, const char *, ...) = fprintf; +#define fprintf tx_fprintf + /* Used by the functions that format the one-line descriptions. */ @@ -643,6 +667,7 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){ /*add space in front of linkout*/ fprintf(fp, " "); bdlpTemp=bdlp; + while(bdlpTemp){ if(checkLinkoutType(bdlpTemp, linkout_locuslink)){ hasLinkout=TRUE; @@ -654,20 +679,10 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){ } bdlpTemp=bdlp; while(bdlpTemp){ - if(checkLinkoutType(bdlpTemp, linkout_unigene)){ - + if(checkLinkoutType(bdlpTemp, linkout_unigene)){ hasLinkout=TRUE; gi=GetGIForSeqId(bdlpTemp->seqid); - rnp=FDGetTaxNamesFromBioseq(bsp, bdlpTemp->taxid); - if(rnp&&rnp->sci_name){ - unigeneName=getNameInitials(rnp->sci_name); - if(unigeneName){ - fprintf(fp, URL_Unigene, unigeneName, gi); - } - MemFree(unigeneName); - } - - RDBTaxNamesFree(rnp); + fprintf(fp, URL_Unigene, gi); break; } bdlpTemp=bdlpTemp->next; @@ -682,6 +697,16 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){ } bdlpTemp=bdlpTemp->next; } + bdlpTemp=bdlp; + while(bdlpTemp){ + if(checkLinkoutType(bdlpTemp, linkout_geo)){ + gi=GetGIForSeqId(bdlpTemp->seqid); + fprintf(fp, URL_Geo, gi); + break; + } + bdlpTemp=bdlpTemp->next; + } + } BlastDefLineSetFree(bdlp); } @@ -722,21 +747,16 @@ static void addLinkoutForBioseq(BioseqPtr bsp, SeqIdPtr sip, SeqIdPtr firstSip, if(checkLinkoutType(actualBdlp, linkout_unigene)){ hasLinkout=TRUE; - rnp=FDGetTaxNamesFromBioseq(bsp, actualBdlp->taxid); - if(rnp&&rnp->sci_name){ - unigeneName=getNameInitials(rnp->sci_name); - if(unigeneName){ - fprintf(fp, URL_Unigene, unigeneName, gi); - } - MemFree(unigeneName); - } - - RDBTaxNamesFree(rnp); + fprintf(fp, URL_Unigene, gi); } if(checkLinkoutType(actualBdlp, linkout_structure)){ hasLinkout=TRUE; fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onepair", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none"); } + + if(checkLinkoutType(actualBdlp, linkout_geo)){ + fprintf(fp, URL_Geo, gi); + } } BlastDefLineSetFree(bdlp); } @@ -1722,7 +1742,7 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon); load = TRUE; } else if (!StringICmp(db_tag->db, "TI")) { - sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id, (long) oip->id); + sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id, (long) oip->id); html_len = StringLen(HTML_buffer); sprintf(docbuf+pos, HTML_buffer); @@ -2616,8 +2636,13 @@ static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp) return FALSE; if(asp->ooframe) { - master_is_translated = TRUE; - target_is_translated = FALSE; + if (SeqLocStrand(ssp->loc) != Seq_strand_unknown) { + master_is_translated = TRUE; + target_is_translated = FALSE; + } else { + master_is_translated = FALSE; + target_is_translated = TRUE; + } } else { /* Check for valid sequence. */ if (SeqLocLen(ssp->loc) == 3*SeqLocLen(ssp->loc->next)) @@ -2748,10 +2773,17 @@ static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp) } if(asp->ooframe) { - if(ssp->loc->next->choice != SEQLOC_EMPTY) - asp->totlen += SeqLocLen(ssp->loc->next); - else - asp->totlen += SeqLocLen(ssp->loc)/3; + if (master_is_translated) { + if(ssp->loc->next->choice != SEQLOC_EMPTY) + asp->totlen += SeqLocLen(ssp->loc->next); + else + asp->totlen += SeqLocLen(ssp->loc)/3; + } else { + if(ssp->loc->choice != SEQLOC_EMPTY) + asp->totlen += SeqLocLen(ssp->loc); + else + asp->totlen += SeqLocLen(ssp->loc->next)/3; + } } else { if (ssp->loc->choice != SEQLOC_EMPTY) { @@ -4589,7 +4621,7 @@ PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp #endif if(!StringICmp(blast_type, "fruitfly")) { - fprintf(stdout, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n"); + fprintf(outfp, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n"); } asn2ff_set_output(outfp, NULL); @@ -4884,7 +4916,7 @@ PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) { oip = db_tag->tag; if(oip->id != 0) { - fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id); + fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id); } } else { make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE); @@ -5417,7 +5449,7 @@ static CharPtr FSFPrintOneDefline(AlignStatOptionPtr asop, Boolean is_na, } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) { oip = db_tag->tag; if(oip->id != 0) { - fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id); + fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id); } } else { /** * links to incomplete genomes */ @@ -5960,22 +5992,27 @@ NLM_EXTERN Uint4 GetTxAlignOptionValue (Uint1 tx_option, BoolPtr hide_feature, return option; } -Int4 OOFGetDNAStrand(StdSegPtr sseg) +/** The following function assumes that neither of the locations in + * the first link in StdSeg is empty. + * @param sseg Alignment segments [in] + * @param dna_strand The strand of the nucleotide sequence [out] + * @return TRUE for tblastn, FALSE for blastx. + */ +static Boolean OOFGetDNAStrand(StdSegPtr sseg, Int4Ptr dna_strand) { - Int4 dna_strand; - SeqIntPtr seq_int1; - SeqLocPtr slp1; + Uint1 strand; + Boolean reverse; - for(; sseg != NULL; sseg= sseg->next) { - slp1 = sseg->loc; - - if(slp1->choice == SEQLOC_INT) { - seq_int1 = (SeqIntPtr) slp1->data.ptrvalue; - return seq_int1->strand; - } + if ((strand = SeqLocStrand(sseg->loc)) != Seq_strand_unknown) { + *dna_strand = (Int4) strand; + reverse = FALSE; + } else { + *dna_strand = (Int4) SeqLocStrand(sseg->loc->next); + reverse = TRUE; } - return Seq_strand_unknown; + return reverse; } + static Int4 SetDNALineEnd(Int4 dna_index, Int4 dna_strand) { Int4 dna_line_end; @@ -6041,6 +6078,7 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, Int4 i, lines, k, shift_info = 0; Char c1, c2, c3; Int4 dna_strand, max_digits, num_pad; + Boolean reverse = FALSE; if(sap == NULL || sap->segtype != 3) /* Should be StdSeg here! */ return FALSE; @@ -6051,8 +6089,8 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, pro_index = 0; pro_line_end = 0; dna_line_end = 0; - - dna_strand = OOFGetDNAStrand((StdSegPtr) sap->segs); + + reverse = OOFGetDNAStrand((StdSegPtr) sap->segs, &dna_strand); /* Needed for printing nice alignment with normal spacing */ max_digits = GetMaxFROMDigits((StdSegPtr) sap->segs); @@ -6065,8 +6103,18 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, length_pro = 0; b_store = NULL; - slp1 = sseg->loc; - + if (reverse) { + slp2 = sseg->loc; + slp1 = sseg->loc->next; + sip2 = sseg->ids; /* Protein */ + sip1 = sseg->ids->next; /* DNA */ + } else { + slp1 = sseg->loc; + slp2 = sseg->loc->next; + sip1 = sseg->ids; /* DNA */ + sip2 = sseg->ids->next; /* Protein */ + } + if(slp1->choice == SEQLOC_INT) seq_int1 = (SeqIntPtr) slp1->data.ptrvalue; else if (slp1->choice == SEQLOC_EMPTY) @@ -6074,7 +6122,6 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, else return FALSE; /* Invalid SeqLoc */ - slp2 = sseg->loc->next; if(slp2->choice == SEQLOC_INT) seq_int2 = (SeqIntPtr) slp2->data.ptrvalue; @@ -6087,9 +6134,6 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, if(seq_int1 == NULL && seq_int2 == NULL) continue; - sip1 = sseg->ids; /* DNA */ - sip2 = sseg->ids->next; /* Protein */ - /* printf("shift_info = %d\n", shift_info); */ if(shift_info%3) @@ -6171,12 +6215,12 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, } if(line_index == 0) { - dna_line_start = dna_index; + dna_line_start = dna_index + 1; pro_line_start = pro_index + 1; } if (dna_line_start == 0) - dna_line_start = dna_index; + dna_line_start = dna_index + 1; if(pro_line_start == 0) pro_line_start = pro_index + 1; @@ -6293,37 +6337,64 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, /* ------- Printout of the alignment ------------- */ - fprintf(fp, "Query: %d", dna_line_start+1); + if (reverse) { + fprintf(fp, "Query: %d", pro_line_start); + num_pad = + max_digits - GetDigitsInINT(pro_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n", line2, pro_line_end); - num_pad = max_digits - GetDigitsInINT(dna_line_start+1) + 1; + num_pad = 8 + max_digits; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); - for(k=0; k < num_pad; k++) - fprintf(fp, " "); + fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start); - fprintf(fp, "%s %d\n", line1, dna_line_end+3); - - num_pad = 8 + max_digits; - - for(k=0; k < num_pad; k++) - fprintf(fp, " "); + num_pad = + max_digits - GetDigitsInINT(dna_line_start) + 1; - fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start); + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n\n", line1, dna_line_end+3); - num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1; + } else { + fprintf(fp, "Query: %d", dna_line_start); + num_pad = + max_digits - GetDigitsInINT(dna_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n", line1, dna_line_end+3); + + num_pad = 8 + max_digits; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); - for(k=0; k < num_pad; k++) - fprintf(fp, " "); + fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start); - fprintf(fp, "%s %d\n\n", line2, pro_line_end); + num_pad = + max_digits - GetDigitsInINT(pro_line_start) + 1; + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n\n", line2, pro_line_end); + } /* --------------------------------------------------- */ if(dna_line_end != 0) { if(dna_strand != Seq_strand_minus) - dna_line_start = dna_line_end+3; /*takes 3 bases*/ + dna_line_start = dna_line_end+4; /*takes 3 bases*/ else - dna_line_start = dna_line_end+1; /*takes 3 bases*/ + dna_line_start = dna_line_end+2; /*takes 3 bases*/ } if(pro_line_end != 0) pro_line_start = pro_line_end+1; @@ -6357,35 +6428,58 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask, /* ------- Printout of the alignment remainder ------- */ + if (reverse) { + fprintf(fp, "Query: %d", pro_line_start); - fprintf(fp, "Query: %d", dna_line_start+1); - - num_pad = max_digits - GetDigitsInINT(dna_line_start+1) + 1; - - for(k=0; k < num_pad; k++) - fprintf(fp, " "); - - fprintf(fp, "%s %d\n", line1, dna_line_end+3); - - num_pad = 8 + max_digits; - - for(k=0; k < num_pad; k++) - fprintf(fp, " "); - - fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start); - - num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1; - - for(k=0; k < num_pad; k++) - fprintf(fp, " "); - - fprintf(fp, "%s %d\n\n\n", line2, pro_line_end); + num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n", line2, pro_line_end); + + num_pad = 8 + max_digits; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start); + + num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n\n\n", line1, dna_line_end+3); + } else { + fprintf(fp, "Query: %d", dna_line_start); + num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n", line1, dna_line_end+3); + + num_pad = 8 + max_digits; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start); + + num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1; + + for(k=0; k < num_pad; k++) + fprintf(fp, " "); + + fprintf(fp, "%s %d\n\n\n", line2, pro_line_end); + } /* --------------------------------------------------- */ /* fprintf(fp, "\nQuery: %-5d %s %-5d\n " "%s\nSbjct: %-5d %s %-5d\n\n", - dna_line_start+1, line1, dna_line_end+3, line3, + dna_line_start, line1, dna_line_end+3, line3, pro_line_start, line2, pro_line_end); */ return TRUE; diff --git a/api/valid.c b/api/valid.c index 1b52746f..667a2876 100644 --- a/api/valid.c +++ b/api/valid.c @@ -29,7 +29,7 @@ * * Version Creation Date: 1/1/94 * -* $Revision: 6.364 $ +* $Revision: 6.403 $ * * File Description: Sequence editing utilities * @@ -39,6 +39,123 @@ * ------- ---------- ----------------------------------------------------- * * $Log: valid.c,v $ +* Revision 6.403 2003/10/24 21:31:00 kans +* added test for ERR_SEQ_FEAT_UTRdoesNotAbutCDS on mRNA +* +* Revision 6.402 2003/10/24 17:50:35 kans +* added ERR_SEQ_INST_SeqLitGapLength0 +* +* Revision 6.401 2003/10/24 04:41:50 kans +* ValidateImpFeat warns if repeat_region /rpt_unit has same length as sfp->location but does not have matching sequence +* +* Revision 6.400 2003/10/23 20:29:38 kans +* warn about allele gbqual when inheriting allele from gene +* +* Revision 6.399 2003/10/20 19:44:47 kans +* added * Terminator codon +* +* Revision 6.398 2003/10/20 16:53:05 kans +* suppress validator warning for synthetic sequences with molinfo other genetic when origin is artificial +* +* Revision 6.397 2003/10/17 21:12:27 kans +* added ERR_SEQ_FEAT_OnlyGeneXrefs test +* +* Revision 6.396 2003/10/10 22:38:39 kans +* added tests for BadTrnaCodon and BadTrnaAA +* +* Revision 6.395 2003/10/06 16:19:26 kans +* commented out check on rpt_unit content - now any text will be allowed +* +* Revision 6.394 2003/10/01 19:46:39 kans +* suppress partial not at end warning for CDD region +* +* Revision 6.393 2003/09/30 20:35:30 kans +* fixed IsSynthetic to look at div if origin was not set +* +* Revision 6.392 2003/09/23 12:33:37 kans +* Check DeltaLitOnly and allow test for terminal Ns +* +* Revision 6.391 2003/09/18 18:28:53 kans +* fixed IsMicroRNA - was using continue statements in while loop instead of for loop +* +* Revision 6.390 2003/09/11 15:24:35 kans +* duplicate feat severity warning check was only done for cds, not mrna +* +* Revision 6.389 2003/09/10 14:29:13 kans +* IsMicroRNA feature test for molinfo-biomol.other, do mrnatrans base comparison even if polyA test fails +* +* Revision 6.388 2003/09/09 20:09:21 kans +* lower severity for far product partial inconsistency and mrnatranscheck, also check for 95% polyA +* +* Revision 6.387 2003/08/13 21:45:30 kans +* added ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus +* +* Revision 6.386 2003/08/11 15:08:08 kans +* REBASE is legal refseq dbxref +* +* Revision 6.385 2003/08/01 21:33:38 kans +* ERR_SEQ_INST_InternalNsInSeqLit dropped to warning, cutoff still at 80 +* +* Revision 6.384 2003/08/01 21:30:28 kans +* added CountAdjacentNsInSeqLit for htgs 1 and 2 +* +* Revision 6.383 2003/07/30 21:44:31 kans +* comment out archaic locations messages because TMSMART thinks it should not promote locations +* +* Revision 6.382 2003/07/29 15:59:11 kans +* use new ERR_SEQ_PKG_ArchaicFeatureLocation and ERR_SEQ_PKG_ArchaicFeatureProduct tokens, also do not complain if location or product is local and Bioseq has TMSMART or BankIt general +* +* Revision 6.381 2003/07/28 22:11:04 kans +* check for archaic feature locations and products +* +* Revision 6.380 2003/07/22 16:18:07 kans +* added Kerguelen Archipelago to country list +* +* Revision 6.379 2003/07/15 16:46:02 kans +* suppress BadDeltaSeq - HTGS 2 delta seq has no gaps and no graphs - if HTGS_ACTIVEFIN keyword present +* +* Revision 6.378 2003/07/07 15:35:01 kans +* ERR_SEQ_INST_TerminalNs is SEV_ERROR if 10 or more Ns at either end +* +* Revision 6.377 2003/07/02 19:36:47 kans +* added CheckCDSPartial to check cds->location partials against product molinfo +* +* Revision 6.376 2003/06/17 21:15:46 kans +* germline and rearranged are mutually exclusive - proviral and virion are because there is only one biop->genome +* +* Revision 6.375 2003/06/17 21:05:13 kans +* synthetic biosource should have molinfo biomol other +* +* Revision 6.374 2003/06/17 20:03:38 kans +* NT-036298 (dash instead of underscore) gives REJECT level BadSeqIdFormat error +* +* Revision 6.373 2003/06/02 21:42:11 kans +* allow 4 + 2 + 7 wgs master accessions +* +* Revision 6.372 2003/05/09 18:46:47 kans +* severity of first and last delta seq component is gap message lowered if not HTGS +* +* Revision 6.371 2003/05/02 19:19:28 kans +* added rearrangement exception to list that suppressed CdTransCheck +* +* Revision 6.370 2003/05/01 20:08:57 kans +* Serbia and Montenegro restored to list of countries, but Yugoslavia also remains +* +* Revision 6.369 2003/04/30 16:38:37 kans +* added CdsProductIdCheck +* +* Revision 6.368 2003/04/27 20:16:23 kans +* ribosomal slippage exception suppresses CDSmRNArange warning +* +* Revision 6.367 2003/04/24 19:51:47 kans +* rearrangement required okay for all records, not just refseq, at least for now +* +* Revision 6.366 2003/04/24 19:19:12 kans +* added support for new rearrangement required for product exception +* +* Revision 6.365 2003/04/23 16:56:16 kans +* ERR_GENERIC_BadPageNumbering dropped to SEV_WARNING +* * Revision 6.364 2003/04/21 16:39:22 kans * CheckRnaProductType was doing the wrong thing for rRNAs * @@ -1337,6 +1454,7 @@ NLM_EXTERN void SpellCheckSeqFeat (GatherContextPtr gcp); NLM_EXTERN void SpellCheckString (ValidStructPtr vsp, CharPtr str); NLM_EXTERN void SpliceCheck (ValidStructPtr vsp, SeqFeatPtr sfp); static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll); +static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp); static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop); static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPtr pdp); static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp); @@ -1929,6 +2047,14 @@ static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_bio } } +typedef struct ftprob { + Uint4 num_misplaced_features; + Uint4 num_archaic_locations; + Uint4 num_archaic_products; + Uint4 num_gene_feats; + Uint4 num_gene_xrefs; +} FeatProb, PNTR FeatProbPtr; + static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_misplaced_features) { SeqAnnotPtr sap; @@ -1967,22 +2093,101 @@ static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_mispla } } +static Boolean IdIsArchaic (SeqIdPtr sip) + +{ + BioseqPtr bsp; + DbtagPtr dbt; + SeqIdPtr id; + + if (sip == NULL) return FALSE; + if (sip->choice != SEQID_LOCAL && sip->choice != SEQID_GENERAL) return FALSE; + bsp = BioseqFind (sip); + if (bsp == NULL) return FALSE; + for (id = bsp->id; id != NULL; id = id->next) { + switch (id->choice) { + case SEQID_GENERAL : + if (sip->choice == SEQID_LOCAL) { + dbt = (DbtagPtr) id->data.ptrvalue; + if (dbt != NULL) { + if (StringICmp (dbt->db, "TMSMART") != 0 && StringICmp (dbt->db, "BankIt") != 0) { + return TRUE; + } + } + } + break; + case SEQID_GI : + case SEQID_GENBANK : + case SEQID_EMBL : + case SEQID_PATENT : + case SEQID_OTHER : + case SEQID_DDBJ : + case SEQID_TPG : + case SEQID_TPE : + case SEQID_TPD : + return TRUE; + default : + break; + } + } + return FALSE; +} + +static void CheckFeatLocAndProd (SeqFeatPtr sfp, FeatProbPtr fpp) + +{ + SeqLocPtr slp; + + if (sfp == NULL || fpp == NULL) return; + if (sfp->product != NULL && IdIsArchaic (SeqLocId (sfp->product))) { + (fpp->num_archaic_products)++; + } + slp = SeqLocFindNext (sfp->location, NULL); + while (slp != NULL) { + if (IdIsArchaic (SeqLocId (slp))) { + (fpp->num_archaic_locations)++; + return; + } + slp = SeqLocFindNext (sfp->location, slp); + } +} + static Boolean LIBCALLBACK CountMisplacedFeatures (BioseqPtr bsp, SeqMgrBioseqContextPtr bcontext) { - Uint4Ptr num_misplaced_features; + FeatProbPtr fpp; SeqFeatPtr sfp; SeqMgrFeatContext fcontext; - num_misplaced_features = (Uint4Ptr) bcontext->userdata; + fpp = (FeatProbPtr) bcontext->userdata; sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); while (sfp != NULL) { - CheckFeatPacking (bsp, sfp, num_misplaced_features); + CheckFeatPacking (bsp, sfp, &(fpp->num_misplaced_features)); + CheckFeatLocAndProd (sfp, fpp); sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext); } return TRUE; } +static void CountGeneXrefs (SeqFeatPtr sfp, Pointer userdata) + +{ + FeatProbPtr fpp; + GeneRefPtr grp; + + if (sfp == NULL || userdata == NULL) return; + fpp = (FeatProbPtr) userdata; + + if (sfp->data.choice == SEQFEAT_GENE) { + (fpp->num_gene_feats)++; + } + + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return; + + (fpp->num_gene_xrefs)++; +} + static Boolean IsNoncuratedRefSeq (BioseqPtr bsp, ErrSev *sev) { @@ -2020,7 +2225,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) Int2 errors[6], i; Boolean suppress_no_pubs = TRUE; Boolean suppress_no_biosrc = TRUE; - Uint4 num_misplaced_features = 0; + FeatProb featprob; GatherContextPtr gcp = NULL; GatherContext gc; SeqEntryPtr fsep; @@ -2040,6 +2245,8 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) for (i = 0; i < 6; i++) /* keep errors between clears */ errors[i] = 0; + MemSet ((Pointer) &featprob, 0, sizeof (FeatProb)); + if (vsp->useSeqMgrIndexes) { entityID = ObjMgrGetEntityIDForChoice (sep); @@ -2048,7 +2255,10 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) SeqMgrIndexFeatures (entityID, NULL); ErrSetMessageLevel (oldsev); } - SeqMgrExploreBioseqs (entityID, NULL, (Pointer) &num_misplaced_features, CountMisplacedFeatures, TRUE, TRUE, TRUE); + SeqMgrExploreBioseqs (entityID, NULL, (Pointer) &featprob, CountMisplacedFeatures, TRUE, TRUE, TRUE); + + topsep = GetTopSeqEntryForEntityID (entityID); + VisitFeaturesInSep (topsep, (Pointer) &featprob, CountGeneXrefs); } else { /* if not using indexing, still need feature->idx.subtype now */ @@ -2169,11 +2379,30 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp) } } - if (num_misplaced_features > 1) { - ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There are %d mispackaged features in this record.", (int) num_misplaced_features); - } else if (num_misplaced_features == 1) { - ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There is %d mispackaged feature in this record.", (int) num_misplaced_features); + if (featprob.num_misplaced_features > 1) { + ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There are %d mispackaged features in this record.", (int) featprob.num_misplaced_features); + } else if (featprob.num_misplaced_features == 1) { + ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There is %d mispackaged feature in this record.", (int) featprob.num_misplaced_features); } + + /* + if (featprob.num_archaic_locations > 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureLocation, "There are %d archaic feature locations in this record.", (int) featprob.num_archaic_locations); + } else if (featprob.num_archaic_locations == 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureLocation, "There is %d archaic feature location in this record.", (int) featprob.num_archaic_locations); + } + + if (featprob.num_archaic_products > 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureProduct, "There are %d archaic feature products in this record.", (int) featprob.num_archaic_products); + } else if (featprob.num_archaic_products == 1) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureProduct, "There is %d archaic feature product in this record.", (int) featprob.num_archaic_products); + } + */ + + if (featprob.num_gene_feats == 0 && featprob.num_gene_xrefs > 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_OnlyGeneXrefs, "There are %ld gene xrefs and no gene features in this record.", (long) featprob.num_gene_xrefs); + } + first = FALSE; } @@ -3034,6 +3263,89 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi } } +typedef struct enrun { + Int4 ncount; + Int4 maxrun; +} RunOfNs, PNTR RunOfNsPtr; + +static void LIBCALLBACK CountAdjacentProc (CharPtr sequence, Pointer userdata) + +{ + Char ch; + RunOfNsPtr ronp; + CharPtr str; + + ronp = (RunOfNsPtr) userdata; + if (sequence == NULL || ronp == NULL) return; + + str = sequence; + ch = *str; + while (ch != '\0') { + if (ch == 'N') { + (ronp->ncount)++; + if (ronp->ncount > ronp->maxrun) { + ronp->maxrun = ronp->ncount; + } + } else { + ronp->ncount = 0; + } + str++; + ch = *str; + } +} + +static Int4 CountAdjacentNsInSeqLit (SeqLitPtr slitp, Boolean is_na) + +{ + BioseqPtr bsp; + RunOfNs ron; + + if (slitp == NULL || slitp->length < 1 || slitp->seq_data == NULL) return 0; + + bsp = BioseqNew (); + if (bsp == NULL) return 0; + + if (slitp->seq_data != NULL) { + bsp->repr = Seq_repr_raw; + } else { + bsp->repr = Seq_repr_virtual; + } + if (is_na) { + bsp->mol = Seq_mol_dna; + } else { + bsp->mol = Seq_mol_aa; + } + bsp->seq_data_type = slitp->seq_data_type; + bsp->seq_data = slitp->seq_data; + bsp->length = slitp->length; + bsp->id = SeqIdParse ("lcl|countseqlitns"); + + ron.ncount = 0; + ron.maxrun = 0; + + SeqPortStream (bsp, TRUE, (Pointer) &ron, CountAdjacentProc); + + bsp->seq_data = NULL; + + BioseqFree (bsp); + + return ron.maxrun; +} + +static Boolean DeltaLitOnly ( + BioseqPtr bsp +) + +{ + ValNodePtr vnp; + + if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE; + for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) { + if (vnp->choice == 1) return FALSE; + } + return TRUE; +} + static void ValidateBioseqInst (GatherContextPtr gcp) { Boolean retval = TRUE; @@ -3061,6 +3373,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) Boolean litHasData; SeqMgrDescContext context; SeqFeatPtr cds; + GBBlockPtr gbp; GeneRefPtr grp; SeqFeatPtr gene; SeqMgrFeatContext genectxt; @@ -3075,6 +3388,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) Boolean multitoken; Boolean hasGi = FALSE; SeqHistPtr hist; + Boolean isActiveFin = FALSE; Boolean isGenBankEMBLorDDBJ; Boolean isPatent = FALSE; Boolean isPDB = FALSE; @@ -3091,7 +3405,6 @@ static void ValidateBioseqInst (GatherContextPtr gcp) Int2 trailingX = 0; Int2 numletters, numdigits, numunderscores; Boolean letterAfterDigit, badIDchars; - GBBlockPtr gbp; EMBLBlockPtr ebp; SeqDescrPtr sdp; SeqMgrDescContext dcontext; @@ -3103,9 +3416,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp) ObjValNodePtr ovp; BioseqSetPtr bssp; UserObjectPtr uop; + UserFieldPtr ufp; ObjectIdPtr oip; + Boolean hasRefTrackStatus; Int2 accn_count = 0; Int2 gi_count = 0; + Int4 runsofn; /* set up data structures */ @@ -3192,6 +3508,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp) } else if (numletters == 2 && numdigits == 6 && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_seg) { } else if (numletters == 4 && numdigits == 8 && ISA_na (bsp->mol) && (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) { + } else if (numletters == 4 && numdigits == 9 && ISA_na (bsp->mol) && + (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) { } else { ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession); } @@ -3260,7 +3578,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) } } if (letterAfterDigit || badIDchars) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession); + ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession); } else if (isNZ && numletters == 4 && numdigits == 8 && numunderscores == 0) { } else if (numletters == 2 && numdigits == 6 && numunderscores == 1) { } else if (numletters == 2 && numdigits == 8 && numunderscores == 1) { @@ -3347,6 +3665,23 @@ static void ValidateBioseqInst (GatherContextPtr gcp) gcp->itemID = olditemid; gcp->thistype = olditemtype; } + } else if (oip != NULL && StringICmp (oip->str, "RefGeneTracking") == 0) { + hasRefTrackStatus = FALSE; + for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { + oip = ufp->label; + if (oip != NULL && StringCmp (oip->str, "Status") == 0) { + hasRefTrackStatus = TRUE; + } + } + if (! hasRefTrackStatus) { + olditemid = gcp->itemID; + olditemtype = gcp->thistype; + gcp->itemID = context.itemID; + gcp->thistype = OBJ_SEQDESC; + ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus, "RefGeneTracking object needs to have Status set"); + gcp->itemID = olditemid; + gcp->thistype = olditemtype; + } } } vnp = SeqMgrGetNextDescriptor (bsp, vnp, Seq_descr_user, &context); @@ -3727,6 +4062,39 @@ static void ValidateBioseqInst (GatherContextPtr gcp) mip = NULL; if (bsp->repr == Seq_repr_delta) { + vnp = NULL; + if (vsp->useSeqMgrIndexes) { + vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context); + } else { + bcp = BioseqContextNew (bsp); + vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_genbank, NULL, NULL); + BioseqContextFree (bcp); + } + if (vnp != NULL) { + gbp = (GBBlockPtr) vnp->data.ptrvalue; + if (gbp != NULL) { + for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) { + str = (CharPtr) vnp->data.ptrvalue; + if (StringICmp (str, "HTGS_ACTIVEFIN") == 0) { + isActiveFin = TRUE; + } + } + } + } + } + + if (bsp->repr == Seq_repr_delta) { + vnp = NULL; + if (vsp->useSeqMgrIndexes) { + vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); + } else { + bcp = BioseqContextNew (bsp); + vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL); + BioseqContextFree (bcp); + } + if (vnp != NULL) { + mip = (MolInfoPtr) vnp->data.ptrvalue; + } len = 0; for (vnp = (ValNodePtr) (bsp->seq_ext); vnp != NULL; vnp = vnp->next) { if (vnp->data.ptrvalue == NULL) @@ -3778,6 +4146,16 @@ static void ValidateBioseqInst (GatherContextPtr gcp) default: break; } + if (mip != NULL) { + if (mip->tech == MI_TECH_htgs_1 || mip->tech == MI_TECH_htgs_2) { + runsofn = CountAdjacentNsInSeqLit (slitp, (Boolean) ISA_na (bsp->mol)); + if (runsofn > 80) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta chain", (long) runsofn); + } + } + } + } else if (slitp->length == 0) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqLitGapLength0, "Gap of length 0 in delta chain"); } len += slitp->length; break; @@ -3792,31 +4170,28 @@ static void ValidateBioseqInst (GatherContextPtr gcp) } else if (bsp->length < len) { ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqDataLenWrong, "Bioseq.seq_data is larger [%ld] than given length [%ld]", (long) (len), (long) bsp->length); } - vnp = NULL; - if (vsp->useSeqMgrIndexes) { - vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); - } else { - bcp = BioseqContextNew (bsp); - vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL); - BioseqContextFree (bcp); - } - if (vnp != NULL) { - mip = (MolInfoPtr) vnp->data.ptrvalue; - if (mip != NULL) { - is_gps = FALSE; - sep = vsp->sep; - if (sep != NULL && IS_Bioseq_set (sep)) { - bssp = (BioseqSetPtr) sep->data.ptrvalue; - if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { - is_gps = TRUE; - } - } - if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 && - mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs && - mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Delta seq technique should not be [%d]", (int) (mip->tech)); + if (mip != NULL) { + is_gps = FALSE; + sep = vsp->sep; + if (sep != NULL && IS_Bioseq_set (sep)) { + bssp = (BioseqSetPtr) sep->data.ptrvalue; + if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) { + is_gps = TRUE; } } + if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 && + mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs && + mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Delta seq technique should not be [%d]", (int) (mip->tech)); + } + } + } + + sev = SEV_ERROR; + if (mip != NULL) { + if (mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 && + mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3) { + sev = SEV_WARNING; } } @@ -3825,7 +4200,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp) if (vnp != NULL && vnp->choice == 2) { slitp = (SeqLitPtr) vnp->data.ptrvalue; if (slitp != NULL && slitp->seq_data == NULL) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "First delta seq component is a gap"); + ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "First delta seq component is a gap"); } } last_is_gap = FALSE; @@ -3856,13 +4231,15 @@ static void ValidateBioseqInst (GatherContextPtr gcp) if (vnp != NULL && vnp->choice == 2) { slitp = (SeqLitPtr) vnp->data.ptrvalue; if (slitp != NULL && slitp->seq_data == NULL) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Last delta seq component is a gap"); + ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "Last delta seq component is a gap"); } } if (num_gaps == 0 && mip != NULL) { if (/* mip->tech == MI_TECH_htgs_1 || */ mip->tech == MI_TECH_htgs_2) { if (VisitGraphsInSep (sep, NULL, NULL) == 0) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadDeltaSeq, "HTGS 2 delta seq has no gaps and no graphs"); + if (! isActiveFin) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadDeltaSeq, "HTGS 2 delta seq has no gaps and no graphs"); + } } } } @@ -3993,28 +4370,38 @@ static void ValidateBioseqInst (GatherContextPtr gcp) } } - if (ISA_na (bsp->mol) && bsp->repr == Seq_repr_raw && bsp->length > 5) { + if (ISA_na (bsp->mol) && (bsp->repr == Seq_repr_raw || (bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp))) && bsp->length > 10) { /* check for N bases at start or stop of sequence */ sfp = (SeqFeatPtr) MemNew (sizeof (SeqFeat)); if (sfp == NULL) return; sfp->data.choice = SEQFEAT_COMMENT; - sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 2, FALSE, FALSE); + sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 9, FALSE, FALSE); str = GetSequenceByFeature (sfp); if (str != NULL) { if (str [0] == 'n' || str [0] == 'N') { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence"); + if (StringICmp (str, "NNNNNNNNNN") == 0) { + sev = SEV_ERROR; + } else { + sev = SEV_WARNING; + } + ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence"); } } MemFree (str); sfp->location = SeqLocFree (sfp->location); - sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 3, bsp->length - 1, FALSE, FALSE); + sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 10, bsp->length - 1, FALSE, FALSE); str = GetSequenceByFeature (sfp); len = StringLen (str); if (str != NULL && len > 0) { if (str [len - 1] == 'n' || str [len - 1] == 'N') { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_TerminalNs, "N at end of sequence"); + if (StringICmp (str, "NNNNNNNNNN") == 0) { + sev = SEV_ERROR; + } else { + sev = SEV_WARNING; + } + ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at end of sequence"); } } MemFree (str); @@ -4226,6 +4613,7 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Journal pages missing"); } if (! noPages) { + sev = SEV_WARNING; StringNCpy_0 (temp, imp->pages, sizeof (temp)); ptr = StringChr (temp, '-'); if (ptr != NULL) { @@ -4295,9 +4683,10 @@ static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr typedef struct bioseqvalid { ValidStructPtr vsp; - Boolean is_aa; /* bioseq is protein? */ - Boolean is_mrna; /* molinfo is mrna? */ - Boolean is_prerna; /* molinfo is precursor rna? */ + Boolean is_aa; /* bioseq is protein? */ + Boolean is_mrna; /* molinfo is mrna? */ + Boolean is_prerna; /* molinfo is precursor rna? */ + Boolean is_artificial; /* biosource origin is artificial? */ Boolean got_a_pub; int last_na_mol, last_na_mod, last_organelle, last_partialness, last_left_right, last_biomol, last_tech, last_completeness, num_full_length_src_feat, /* number full length src feats */ num_full_length_prot_ref; @@ -4729,6 +5118,7 @@ static CharPtr countrycodes[] = { "Juan de Nova Island", "Kazakhstan", "Kenya", + "Kerguelen Archipelago", "Kingman Reef", "Kiribati", "Kuwait", @@ -4811,6 +5201,7 @@ static CharPtr countrycodes[] = { "Sao Tome and Principe", "Saudi Arabia", "Senegal", + "Serbia and Montenegro", "Seychelles", "Sierra Leone", "Singapore", @@ -5028,11 +5419,13 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour CharPtr countryname; ValNodePtr db; DbtagPtr dbt; + Boolean germline = FALSE; Int2 i; Int4 id; OrgNamePtr onp; OrgModPtr omp; OrgRefPtr orp; + Boolean rearranged = FALSE; SubSourcePtr ssp; if (vsp->sourceQualTags == NULL) { @@ -5070,9 +5463,16 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadSubSource, "Unknown subsource subtype %d", (int) (ssp->subtype)); } else if (ssp->subtype == SUBSRC_other) { ValidateSourceQualTags (vsp, gcp, biop, ssp->name); + } else if (ssp->subtype == SUBSRC_germline) { + germline = TRUE; + } else if (ssp->subtype == SUBSRC_rearranged) { + rearranged = TRUE; } ssp = ssp->next; } + if (germline && rearranged) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadSubSource, "Germline and rearranged should not both be present"); + } if (chromcount > 1) { if (chromconf) { ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleChromosomes, "Multiple conflicting chromosome qualifiers"); @@ -5166,6 +5566,65 @@ static Boolean IsXr (ValNodePtr sdp) return FALSE; } +static Boolean IsSynthetic (BioseqPtr bsp) + +{ + BioSourcePtr biop; + SeqMgrDescContext dcontext; + OrgNamePtr onp; + OrgRefPtr orp; + SeqDescrPtr sdp; + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); + if (sdp == NULL) return FALSE; + biop = (BioSourcePtr) sdp->data.ptrvalue; + if (biop == NULL) return FALSE; + if (biop->origin == 5) return TRUE; + orp = biop->org; + if (orp == NULL) return FALSE; + onp = orp->orgname; + if (onp == NULL) return FALSE; + if (StringICmp (onp->div, "SYN") == 0) return TRUE; + return FALSE; +} + +static Boolean IsMicroRNA (BioseqPtr bsp) + +{ + SeqMgrFeatContext fcontext; + RnaRefPtr rrp; + SeqFeatPtr sfp; + CharPtr str; + + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_otherRNA, &fcontext); + while (sfp != NULL) { + if (sfp->data.choice == SEQFEAT_RNA) { + rrp = (RnaRefPtr) sfp->data.value.ptrvalue; + if (rrp != NULL && rrp->ext.choice == 1) { + str = (CharPtr) rrp->ext.value.ptrvalue; + if (StringStr (str, "microRNA") != NULL) return TRUE; + } + } + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_otherRNA, &fcontext); + } + return FALSE; +} + +static Boolean IsOtherDNA (BioseqPtr bsp) + +{ + SeqMgrDescContext dcontext; + MolInfoPtr mip; + SeqDescrPtr sdp; + + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); + if (sdp == NULL) return FALSE; + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (mip == NULL) return FALSE; + if (mip->biomol == 255) return TRUE; + return FALSE; +} + static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, ValidStructPtr vsp, Uint2 descitemid) { ValNodePtr vnp, vnp2; @@ -5371,6 +5830,12 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V } } } + if (biop != NULL && biop->origin == 5) { + bsp = bvsp->bsp; + if (! IsOtherDNA (bsp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other should be used if Biosource-location is synthetic"); + } + } /* ValidateBioSource (vsp, gcp, biop); */ this_org = biop->org; /* fall into Seq_descr_org */ @@ -5415,18 +5880,25 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V switch (mip->biomol) { case MOLECULE_TYPE_PEPTIDE: /* peptide */ if (!bvsp->is_aa) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Nucleic acid with Molinfo-biomol = peptide"); + ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Nucleic acid with Molinfo-biomol = peptide"); } break; case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL: - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol = other genetic"); + if (! bvsp->is_artificial) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol = other genetic"); + } break; case 0: /* unknown */ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol unknown used"); break; case 255: /* other */ if (! IsXr (vnp)) { - ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other used"); + bsp = bvsp->bsp; + if (! IsSynthetic (bsp)) { + if (! IsMicroRNA (bsp)) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other used"); + } + } } break; default: /* the rest are nucleic acid */ @@ -5776,6 +6248,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv int overlapPepSev; BioSourcePtr biop = NULL; OrgRefPtr orp = NULL; + Int4 fiveUTRright; + Int4 cdsRight; gcp = bvsp->gcp; vsp = bvsp->vsp; @@ -5858,9 +6332,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv } } } - if (GPSorNTorNC (vsp->sep, sfp->location)) { - severity = SEV_WARNING; - } + } + if (GPSorNTorNC (vsp->sep, sfp->location)) { + severity = SEV_WARNING; } if (FlybaseDbxrefs (last->dbxref) || FlybaseDbxrefs (sfp->dbxref)) { severity = SEV_ERROR; @@ -5960,6 +6434,52 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv sfp = SeqMgrGetNextFeatureByLabel (bsp, sfp, SEQFEAT_GENE, 0, &fcontext); } + if (bvsp->is_mrna) { + fiveUTRright = 0; + cdsRight = 0; + sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext); + while (sfp != NULL) { + if (sfp->idx.subtype == FEATDEF_5UTR) { + fiveUTRright = fcontext.right; + } else if (sfp->idx.subtype == FEATDEF_CDS) { + cdsRight = fcontext.right; + if (fiveUTRright > 0) { + if (fiveUTRright + 1 != fcontext.left) { + if (gcp != NULL) { + gcp->itemID = fcontext.itemID; + gcp->thistype = OBJ_SEQFEAT; + } + vsp->descr = NULL; + vsp->sfp = sfp; + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "5'UTR does not abut CDS"); + vsp->sfp = NULL; + if (gcp != NULL) { + gcp->itemID = olditemid; + gcp->thistype = olditemtype; + } + } + } + } else if (sfp->idx.subtype == FEATDEF_3UTR) { + if (cdsRight > 0) { + if (cdsRight + 1 != fcontext.left) { + if (gcp != NULL) { + gcp->itemID = fcontext.itemID; + gcp->thistype = OBJ_SEQFEAT; + } + vsp->descr = NULL; + vsp->sfp = sfp; + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR"); + if (gcp != NULL) { + gcp->itemID = olditemid; + gcp->thistype = olditemtype; + } + } + } + } + sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext); + } + } + SeqMgrExploreDescriptors (bsp, (Pointer) bvsp, ValidateSeqDescrIndexed, NULL); omdp = ObjMgrGetData (gcp->entityID); @@ -6019,6 +6539,7 @@ static Boolean ValidateBioseqContextGather (GatherContextPtr gcp) *****************************************************************************/ static void ValidateBioseqContext (GatherContextPtr gcp) { + size_t acclen; ValidStructPtr vsp; BioseqPtr bsp; GatherScope gs; @@ -6038,6 +6559,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp) Boolean is_gb = FALSE; ErrSev sev; TextSeqIdPtr tsip; + BioSourcePtr biop; vsp = (ValidStructPtr) (gcp->userdata); bsp = (BioseqPtr) (gcp->thisitem); @@ -6081,6 +6603,15 @@ static void ValidateBioseqContext (GatherContextPtr gcp) if (vnp != NULL) { mip = (MolInfoPtr) vnp->data.ptrvalue; } + vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); + if (vnp != NULL) { + biop = (BioSourcePtr) vnp->data.ptrvalue; + if (biop != NULL) { + if (biop->origin == ORG_ARTIFICIAL) { + bvs.is_artificial = TRUE; + } + } + } } bvs.is_mrna = FALSE; @@ -6179,7 +6710,10 @@ static void ValidateBioseqContext (GatherContextPtr gcp) is_gb = TRUE; tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip != NULL && tsip->accession != NULL) { - if (StringLen (tsip->accession) == 12) { + acclen = StringLen (tsip->accession); + if (acclen == 12) { + is_wgs = TRUE; + } else if (acclen == 13) { is_wgs = TRUE; } } @@ -6321,6 +6855,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt GeneRefPtr grp; Int2 i; Int2 index; + Boolean just_nuc_letters; CharPtr key; Boolean multi_rpt_unit; Boolean no_white_space; @@ -6472,11 +7007,34 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt multi_rpt_unit = FALSE; } } + /* if (found) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal value for qualifier %s", gbqual->qual); } else if ((!multi_rpt_unit) && StringLen (gbqual->val) > 48) { ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal value for qualifier %s", gbqual->qual); } + */ + if (StringICmp (key,"repeat_region") == 0) { + if (! multi_rpt_unit) { + if (StringLen (gbqual->val) == SeqLocLen (sfp->location)) { + just_nuc_letters = TRUE; + for (ptr = gbqual->val, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) { + if (StringChr ("ACGTNacgtn", ch) == NULL) { + just_nuc_letters = FALSE; + } + } + if (just_nuc_letters) { + tmp = GetSequenceByFeature (sfp); + if (tmp != NULL) { + if (StringICmp (tmp, gbqual->val) != 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "repeat_region /rpt_unit and underlying sequence do not match"); + } + MemFree (tmp); + } + } + } + } + } } else if (val == GBQUAL_label) { no_white_space = TRUE; only_digits = TRUE; @@ -6641,21 +7199,54 @@ static Boolean PartialAtSpliceSite (SeqLocPtr head, Uint2 slpTag) static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, tRNAPtr trp) { - Uint1 aa; + Uint1 aa = 0; BioseqPtr bsp; Int2 code; CharPtr codes = NULL; Uint1 from; GeneticCodePtr gncp; + Uint2 idx; Int2 j; SeqEntryPtr sep; ErrSev sev = SEV_ERROR; + Uint1 shift; SeqMapTablePtr smtp; Uint1 taa; ValNodePtr vnp; if (vsp == NULL || gcp == NULL || sfp == NULL || trp == NULL) return; + + aa = 0; + if (trp->aatype == 2) { + aa = trp->aa; + } else { + from = 0; + switch (trp->aatype) { + case 0: + from = 0; + break; + case 1: + from = Seq_code_iupacaa; + break; + case 2: + from = Seq_code_ncbieaa; + break; + case 3: + from = Seq_code_ncbi8aa; + break; + case 4: + from = Seq_code_ncbistdaa; + break; + default: + break; + } + smtp = SeqMapTableFind (Seq_code_ncbieaa, from); + if (smtp != NULL) { + aa = SeqMapTableConvert (smtp, trp->aa); + } + } + for (j = 0; j < 6; j++) { if (trp->codon[j] < 64) { if (codes == NULL) { @@ -6677,35 +7268,6 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt if (codes == NULL) return; taa = codes[trp->codon[j]]; - aa = 0; - if (trp->aatype == 2) { - aa = trp->aa; - } else { - from = 0; - switch (trp->aatype) { - case 0: - from = 0; - break; - case 1: - from = Seq_code_iupacaa; - break; - case 2: - from = Seq_code_ncbieaa; - break; - case 3: - from = Seq_code_ncbi8aa; - break; - case 4: - from = Seq_code_ncbistdaa; - break; - default: - break; - } - smtp = SeqMapTableFind (Seq_code_ncbieaa, from); - if (smtp != NULL) { - aa = SeqMapTableConvert (smtp, trp->aa); - } - } if (aa > 0 && aa != 255) { if (taa != aa) { if (aa == 'U') { @@ -6714,6 +7276,28 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt ValidErr (vsp, sev, ERR_SEQ_FEAT_TrnaCodonWrong, "tRNA codon does not match genetic code"); } } + } else if (trp->codon [j] < 255) { + ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaCodon, "tRNA codon value %d is greater than maximum 63", (int) trp->codon [j]); + } + } + + if (aa > 0 && aa != 255) { + if (aa <= 74) { + shift = 0; + } else if (aa > 79) { + shift = 2; + } else { + shift = 1; + } + if (aa != '*') { + idx = aa - (64 + shift); + } else { + idx = 25; + } + if (idx > 0 && idx < 26) { + /* valid trna amino acid */ + } else { + ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid"); } } } @@ -6780,6 +7364,71 @@ static Boolean NucAndProtNotInNPS (BioseqPtr nuc, BioseqPtr prot) return FALSE; } +static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp) + +{ + BioseqPtr bsp; + SeqMgrDescContext context; + MolInfoPtr mip; + Boolean partial5; + Boolean partial3; + SeqDescrPtr sdp; + + if (vsp == NULL || sfp == NULL) return; + if (sfp->product == NULL) return; + if (!vsp->useSeqMgrIndexes) return; + bsp = BioseqFindFromSeqLoc (sfp->product); + if (bsp == NULL) return; + sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context); + if (sdp == NULL) return; + mip = (MolInfoPtr) sdp->data.ptrvalue; + if (mip == NULL) return; + CheckSeqLocForPartial (sfp->location, &partial5, &partial3); + switch (mip->completeness) { + case 0 : /* unknown */ + break; + case 1 : /* complete */ + if (partial5 || partial3) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is partial but protein is complete"); + } + break; + case 2 : /* partial */ + break; + case 3 : /* no-left */ + if (! partial5) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' complete but protein is NH2 partial"); + } + if (partial3) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' partial but protein is NH2 partial"); + } + break; + case 4 : /* no-right */ + if (! partial3) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' complete but protein is CO2 partial"); + } + if (partial5) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' partial but protein is CO2 partial"); + } + break; + case 5 : /* no-ends */ + if (partial5 && partial3) { + } else if (partial5) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' partial but protein has neither end"); + } else if (partial3) { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' partial but protein has neither end"); + } else { + ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is complete but protein has neither end"); + } + break; + case 6 : /* has-left */ + break; + case 7 : /* has-right */ + break; + default : + break; + } +} + static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp) { BioseqPtr bsp; @@ -6988,7 +7637,10 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp) } mrna = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, &fcontext); if (mrna != NULL) { - ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match"); + if (StringISearch (sfp->except_text, "ribosomal slippage") == NULL && + StringISearch (sfp->except_text, "ribosome slippage") == NULL) { + ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match"); + } } else { ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA overlaps or contains CDS but does not completely contain intervals"); } @@ -7044,6 +7696,7 @@ static CharPtr legalDbXrefOnRefSeq [] = { "GenBank", "EMBL", "DDBJ", + "REBASE", NULL }; @@ -7112,6 +7765,7 @@ static CharPtr legal_exception_strings [] = { "artificial frameshift", "non-consensus splice site", "nonconsensus splice site", + "rearrangement required for product", NULL }; @@ -7279,6 +7933,24 @@ NLM_EXTERN Boolean IsNuclAcc (CharPtr name) return TRUE; } +static Boolean IsCddFeat ( + SeqFeatPtr sfp +) + +{ + DbtagPtr dbt; + ValNodePtr vnp; + + if (sfp == NULL || sfp->data.choice != SEQFEAT_REGION) return FALSE; + + for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { + dbt = (DbtagPtr) vnp->data.ptrvalue; + if (dbt != NULL && StringCmp (dbt->db, "CDD") == 0) return TRUE; + } + + return FALSE; +} + NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) { Int2 type, i, j; @@ -7322,6 +7994,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) SeqMgrDescContext context; GeneRefPtr grpx; SeqFeatPtr sfpx; + SeqFeatPtr operon; Boolean redundantgenexref; SeqMgrFeatContext fcontext; CharPtr syn1, syn2, label = NULL; @@ -7329,6 +8002,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) SeqIdPtr sip; TextSeqIdPtr tsip; BioseqPtr protBsp; + ErrSev sev; vsp = (ValidStructPtr) (gcp->userdata); sfp = (SeqFeatPtr) (gcp->thisitem); @@ -7358,8 +8032,35 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "Gene of 'order' with otherwise complete location should have partial flag set"); } } - /* inconsistent combination of partial/complete product,location,partial flag */ - else if (((partials[0] == SLP_COMPLETE) && (sfp->product != NULL)) || (partials[1] == SLP_COMPLETE) || (!sfp->partial)) { + /* inconsistent combination of partial/complete product,location,partial flag - part 1 */ + else if (((partials[0] == SLP_COMPLETE) && (sfp->product != NULL))) { + sev = SEV_WARNING; + bsp = GetBioseqGivenSeqLoc (sfp->product, gcp->entityID); + /* if not local bioseq product, lower severity */ + if (bsp == NULL) { + sev = SEV_INFO; + } + tmp = StringMove (buf, "Inconsistent: "); + if (sfp->product != NULL) { + tmp = StringMove (tmp, "Product= "); + if (partials[0]) + tmp = StringMove (tmp, "partial, "); + else + tmp = StringMove (tmp, "complete, "); + } + tmp = StringMove (tmp, "Location= "); + if (partials[1]) + tmp = StringMove (tmp, "partial, "); + else + tmp = StringMove (tmp, "complete, "); + tmp = StringMove (tmp, "Feature.partial= "); + if (sfp->partial) + tmp = StringMove (tmp, "TRUE"); + else + tmp = StringMove (tmp, "FALSE"); + ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, buf); + /* inconsistent combination of partial/complete product,location,partial flag - part 2 */ + } else if ((partials[1] == SLP_COMPLETE) || (!sfp->partial)) { tmp = StringMove (buf, "Inconsistent: "); if (sfp->product != NULL) { tmp = StringMove (tmp, "Product= "); @@ -7390,7 +8091,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) errtype = SLP_NOSTART; for (j = 0; j < 4; j++) { if (partials[i] & errtype) { - if (i == 1 && j < 2 && PartialAtSpliceSite (sfp->location, errtype)) { + if (i == 1 && j < 2 && IsCddFeat (sfp)) { + /* suppresses warning */ + } else if (i == 1 && j < 2 && PartialAtSpliceSite (sfp->location, errtype)) { ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem, "%s: %s (but is at consensus splice site)", parterr[i], parterrs[j]); @@ -7411,7 +8114,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } else { ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "%s: %s", parterr[i], parterrs[j]); - } + } } errtype <<= 1; } @@ -7513,6 +8216,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) CdTransCheck (vsp, sfp); SpliceCheck (vsp, sfp); } + CdsProductIdCheck (vsp, sfp); crp = (CdRegionPtr) (sfp->data.value.ptrvalue); if (crp != NULL) { for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) { @@ -7591,6 +8295,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) CheckForBadGeneOverlap (vsp, sfp); CheckForBadMRNAOverlap (vsp, sfp); CheckForCommonCDSProduct (vsp, sfp); + CheckCDSPartial (vsp, sfp); break; case 4: /* Prot-ref */ prp = (ProtRefPtr) (sfp->data.value.ptrvalue); @@ -7852,6 +8557,26 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } if (type != SEQFEAT_GENE) { grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL) { + sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext); + if (sfpx != NULL) { + grp = (GeneRefPtr) sfpx->data.value.ptrvalue; + } + } + if (grp != NULL && (! SeqMgrGeneIsSuppressed (grp))) { + if (! StringHasNoText (grp->allele)) { + for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) { + if (StringCmp (gbq->qual, "allele") == 0) { + if (StringICmp (gbq->val, grp->allele) == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Redundant allele qualifier (%s) on gene and feature", gbq->val); + } else { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Mismatched allele qualifier on gene (%s) and feature (%s)", grp->allele, gbq->val); + } + } + } + } + } + grp = SeqMgrGetGeneXref (sfp); if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return; sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext); @@ -7883,8 +8608,22 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp) } ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryGeneXref, "Unnecessary gene cross-reference %s", label); } + } else { + operon = SeqMgrGetOverlappingOperon (sfp->location, &fcontext); + if (operon != NULL) { + if (SeqMgrGetDesiredFeature (sfp->idx.entityID, 0, 0, 0, sfp, &fcontext) == sfp) { + if (! StringHasNoText (fcontext.label)) { + for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) { + if (StringCmp (gbq->qual, "operon") == 0) { + if (StringICmp (gbq->val, fcontext.label) == 0) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Operon is same as gene - %s", gbq->val); + } + } + } + } + } + } } - return; } /***************************************************************************** @@ -7903,11 +8642,16 @@ static CharPtr bypass_mrna_trans_check [] = { NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) { + BioseqPtr bsp; + Char ch; + Int4 counta, countnona; + GatherContextPtr gcp; Int2 i; Int4 mismatch, total; CharPtr mrseq, pdseq; Int4 mlen, plen; CharPtr ptr1, ptr2; + ErrSev sev; SeqFeat sf; SeqIdPtr sip; ValNode vn; @@ -7929,6 +8673,16 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) if (sip == NULL) return; + sev = SEV_ERROR; + gcp = vsp->gcp; + if (gcp != NULL) { + bsp = GetBioseqGivenSeqLoc (sfp->product, gcp->entityID); + /* if not local bioseq product, lower severity */ + if (bsp == NULL) { + sev = SEV_WARNING; + } + } + mrseq = GetSequenceByFeature (sfp); if (mrseq == NULL) return; @@ -7946,8 +8700,32 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) mlen = StringLen (mrseq); plen = StringLen (pdseq); if (mlen != plen) { - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] does not match product length [%ld]", (long) mlen, (long) plen); - } else if (mlen > 0 && StringICmp (mrseq, pdseq) != 0) { + if (mlen < plen) { + ptr1 = pdseq + mlen; + counta = 0; + countnona = 0; + ch = *ptr1; + while (ch != '\0') { + if (ch == 'A' || ch == 'a') { + counta++; + } else { + countnona++; + } + ptr1++; + ch = *ptr1; + } + if (counta < 19 * countnona) { + ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than product length [%ld], and tail < 95%s polyA", (long) mlen, (long) plen, "%"); + plen = mlen; /* even if it fails polyA test, allow base-by-base comparison on common length */ + } else { + ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than product length [%ld], but tail >= 95%s polyA", (long) mlen, (long) plen, "%"); + plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */ + } + } else { + ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] greater than product length [%ld]", (long) mlen, (long) plen); + } + } + if (mlen == plen && mlen > 0 && StringICmp (mrseq, pdseq) != 0) { mismatch = 0; total = 0; ptr1 = mrseq; @@ -7960,7 +8738,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp) ptr2++; total++; } - ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TranscriptMismatches, + ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptMismatches, "There are %ld mismatches out of %ld bases between the transcript and product sequence", (long) mismatch, (long) total); } MemFree (pdseq); @@ -8035,6 +8813,7 @@ static CharPtr bypass_cds_trans_check [] = { "reasons given in citation", "artificial frameshift", "unclassified translation discrepancy", + "rearrangement required for product", NULL }; @@ -8628,6 +9407,115 @@ NLM_EXTERN void SpliceCheck (ValidStructPtr vsp, SeqFeatPtr sfp) /***************************************************************************** * +* CdsProductIdCheck (vsp, sfp) +* code taken from asn2gnbk.c - release mode expects CDS product Bioseqs +* +*****************************************************************************/ +static SeqIdPtr SeqLocIdForProduct ( + SeqLocPtr product +) + +{ + SeqIdPtr sip; + SeqLocPtr slp; + + /* in case product is a SEQLOC_EQUIV */ + + if (product == NULL) return NULL; + sip = SeqLocId (product); + if (sip != NULL) return sip; + slp = SeqLocFindNext (product, NULL); + while (slp != NULL) { + sip = SeqLocId (slp); + if (sip != NULL) return sip; + slp = SeqLocFindNext (product, slp); + } + return NULL; +} + +static Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf) + +{ + AccnVerLookupFunc func; + SeqMgrPtr smp; + CharPtr str; + + if (buf == NULL) return FALSE; + *buf = '\0'; + smp = SeqMgrWriteLock (); + if (smp == NULL) return FALSE; + func = smp->accn_ver_lookup_func; + SeqMgrUnlock (); + if (func == NULL) return FALSE; + str = (*func) (gi); + if (str == NULL) return FALSE; + if (StringLen (str) < 40) { + StringCpy (buf, str); + } + MemFree (str); + return TRUE; +} + +static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp) + +{ + SeqFeatPtr gene; + GeneRefPtr grp; + Boolean juststop = FALSE; + Boolean okay = FALSE; + SeqEntryPtr oldscope; + Boolean partial5; + Boolean partial3; + Boolean pseudo = FALSE; + SeqEntryPtr sep; + + /* non-pseudo CDS must have /product */ + if (sfp->pseudo) { + pseudo = TRUE; + } + grp = SeqMgrGetGeneXref (sfp); + if (grp == NULL) { + sep = GetTopSeqEntryForEntityID (sfp->idx.entityID); + oldscope = SeqEntrySetScope (sep); + gene = SeqMgrGetOverlappingGene (sfp->location, NULL); + SeqEntrySetScope (oldscope); + if (gene != NULL) { + grp = (GeneRefPtr) gene->data.value.ptrvalue; + if (gene->pseudo) { + pseudo = TRUE; + } + } + } + if (grp != NULL && grp->pseudo) { + pseudo = TRUE; + } + if (sfp->location != NULL) { + if (CheckSeqLocForPartial (sfp->location, &partial5, &partial3)) { + if (partial5 && (! partial3)) { + if (SeqLocLen (sfp->location) <= 5) { + juststop = TRUE; + } + } + } + } + if (pseudo || juststop) { + okay = TRUE; + } else if (sfp->product != NULL) { + okay = TRUE; + } else { + if (sfp->excpt && (! StringHasNoText (sfp->except_text))) { + if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) { + okay = TRUE; + } + } + } + if (! okay) { + ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MissingCDSproduct, "Expected CDS product absent"); + } +} + +/***************************************************************************** +* * ValidateSeqLoc(vsp, slp, prefix) * *****************************************************************************/ diff --git a/api/valid.msg b/api/valid.msg index 4bb11461..a7923795 100644 --- a/api/valid.msg +++ b/api/valid.msg @@ -152,6 +152,13 @@ $^ UnexpectedIdentifierChange, 41 The set of sequence identifiers on a Bioseq are not consistent with the previous version of the record in the database. +$^ InternalNsInSeqLit, 42 +There are runs of many Ns inside the SeqLit component of a delta Bioseq. + +$^ SeqLitGapLength0, 43 +A SeqLit component of a delta Bioseq can specify a gap, but it should not be a gap +of 0 length. + $$ SEQ_DESCR, 2 $^ BioSourceMissing, 1 @@ -252,6 +259,9 @@ $^ UnnecessaryBioSourceFocus, 24 Focus should not be set on a BioSource descriptor in records where there is no BioSource feature. +$^ RefGeneTrackingWithoutStatus, 25 +The RefGeneTracking user object does not have the required Status field set. + $$ GENERIC, 3 $^ NonAsciiAsn, 1 @@ -318,6 +328,12 @@ however be referenced remotely. $^ InconsistentMolInfoBiomols, 12 Mol-info.biomol is inconsistent within a segset or parts set. +$^ ArchaicFeatureLocation, 13 +A feature location should refer to the accession or gi number, not a local or general ID. + +$^ ArchaicFeatureProduct, 14 +A feature product should refer to the accession or gi number, not a local or general ID. + $$ SEQ_FEAT, 5 $^ InvalidForType, 1 @@ -573,6 +589,25 @@ $^ RnaProductMismatch, 61 The RNA feature product type does not correspond to the RNA feature type. These need to be consistent. +$^ MissingCDSproduct, 62 +The CDS should have a product, but does not. Pseudo or short CDSs (less than 6 +amino acids), or those marked with a rearrangement required for product exception, +are exempt from needing a product. + +$^ BadTrnaCodon, 63 +The tRNA codon recognized is an illegal value. + +$^ BadTrnaAA, 64 +The tRNA encoded amino acid is an illegal value. + +$^ OnlyGeneXrefs, 65 +There are gene xrefs but no gene features. Records should normally have single-interval +gene features covering other biological features. Gene xrefs are used only to override +the inheritance by overlap. + +$^ UTRdoesNotAbutCDS, 66 +The 5'UTR and 3'UTR features should exactly abut the CDS feature. + $$ SEQ_ALIGN, 6 $^ SeqIdProblem, 1 diff --git a/api/validerr.h b/api/validerr.h index 5ac8939a..b4ced162 100644 --- a/api/validerr.h +++ b/api/validerr.h @@ -43,6 +43,8 @@ #define ERR_SEQ_INST_HistAssemblyMissing 1,39 #define ERR_SEQ_INST_TerminalNs 1,40 #define ERR_SEQ_INST_UnexpectedIdentifierChange 1,41 +#define ERR_SEQ_INST_InternalNsInSeqLit 1,42 +#define ERR_SEQ_INST_SeqLitGapLength0 1,43 #define ERR_SEQ_DESCR 2,0 #define ERR_SEQ_DESCR_BioSourceMissing 2,1 #define ERR_SEQ_DESCR_InvalidForType 2,2 @@ -68,6 +70,7 @@ #define ERR_SEQ_DESCR_ObsoleteSourceQual 2,22 #define ERR_SEQ_DESCR_StructuredSourceNote 2,23 #define ERR_SEQ_DESCR_UnnecessaryBioSourceFocus 2,24 +#define ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus 2,25 #define ERR_GENERIC 3,0 #define ERR_GENERIC_NonAsciiAsn 3,1 #define ERR_GENERIC_Spell 3,2 @@ -88,6 +91,8 @@ #define ERR_SEQ_PKG_FeaturePackagingProblem 4,10 #define ERR_SEQ_PKG_GenomicProductPackagingProblem 4,11 #define ERR_SEQ_PKG_InconsistentMolInfoBiomols 4,12 +#define ERR_SEQ_PKG_ArchaicFeatureLocation 4,13 +#define ERR_SEQ_PKG_ArchaicFeatureProduct 4,14 #define ERR_SEQ_FEAT 5,0 #define ERR_SEQ_FEAT_InvalidForType 5,1 #define ERR_SEQ_FEAT_PartialProblem 5,2 @@ -150,6 +155,11 @@ #define ERR_SEQ_FEAT_FeatContentDup 5,59 #define ERR_SEQ_FEAT_BadProductSeqId 5,60 #define ERR_SEQ_FEAT_RnaProductMismatch 5,61 +#define ERR_SEQ_FEAT_MissingCDSproduct 5,62 +#define ERR_SEQ_FEAT_BadTrnaCodon 5,63 +#define ERR_SEQ_FEAT_BadTrnaAA 5,64 +#define ERR_SEQ_FEAT_OnlyGeneXrefs 5,65 +#define ERR_SEQ_FEAT_UTRdoesNotAbutCDS 5,66 #define ERR_SEQ_ALIGN 6,0 #define ERR_SEQ_ALIGN_SeqIdProblem 6,1 #define ERR_SEQ_ALIGN_StrandRev 6,2 |