summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
committerAaron M. Ucko <ucko@debian.org>2005-03-23 23:49:09 +0000
commit5349ec8772bc373e4c2349a04e57d7952c006326 (patch)
treeb733fe1df8c0c2d2418b3ce82ebcbd3d0db12a1f /api
parent0eff2d00595b4adcf6f1c4e6bbbcf0f416c70310 (diff)
Load ncbi (6.1.20031028) into ncbi-tools6/branches/upstream/current.
Diffstat (limited to 'api')
-rw-r--r--api/alignmgr2.c267
-rw-r--r--api/alignmgr2.h75
-rw-r--r--api/alignval.c10
-rw-r--r--api/asn2ff3.c12
-rw-r--r--api/asn2ffp.h22
-rw-r--r--api/asn2gnbk.c2827
-rw-r--r--api/asn2gnbk.h5
-rw-r--r--api/edutil.c21
-rw-r--r--api/explore.h8
-rw-r--r--api/fdlKludge.h25
-rw-r--r--api/ffprint.c10
-rw-r--r--api/findrepl.c72
-rw-r--r--api/findrepl.h14
-rw-r--r--api/gbfeat.c10
-rw-r--r--api/gbftdef.h20
-rw-r--r--api/gbftglob.c416
-rw-r--r--api/lsqfetch.c150
-rw-r--r--api/lsqfetch.h15
-rw-r--r--api/salpstat.c6
-rw-r--r--api/seqmgr.c103
-rw-r--r--api/seqmgr.h12
-rw-r--r--api/seqport.c116
-rw-r--r--api/sequtil.c164
-rw-r--r--api/sqnutil1.c363
-rw-r--r--api/sqnutil2.c174
-rw-r--r--api/sqnutil3.c32
-rw-r--r--api/sqnutils.h26
-rw-r--r--api/subutil.c84
-rw-r--r--api/subutil.h34
-rw-r--r--api/tofasta.c230
-rw-r--r--api/tomedlin.c173
-rw-r--r--api/tomedlin.h10
-rw-r--r--api/txalign.c286
-rw-r--r--api/valid.c1072
-rw-r--r--api/valid.msg35
-rw-r--r--api/validerr.h10
36 files changed, 5567 insertions, 1342 deletions
diff --git a/api/alignmgr2.c b/api/alignmgr2.c
index 6afaf900..096953e2 100644
--- a/api/alignmgr2.c
+++ b/api/alignmgr2.c
@@ -28,13 +28,28 @@
*
* Version Creation Date: 10/01
*
-* $Revision: 6.44 $
+* $Revision: 6.49 $
*
* File Description: SeqAlign indexing, access, and manipulation functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr2.c,v $
+* Revision 6.49 2003/10/20 17:54:34 kans
+* AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
+*
+* Revision 6.48 2003/10/09 13:46:52 rsmith
+* Add AlnMgr2GetFirstNForSipList.
+*
+* Revision 6.47 2003/05/15 18:53:10 rsmith
+* in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
+*
+* Revision 6.46 2003/04/24 20:28:48 rsmith
+* made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
+*
+* Revision 6.45 2003/04/23 20:36:13 rsmith
+* Added four functions in Section 11 to get information about Std-Seg alignments.
+*
* Revision 6.44 2003/03/31 20:17:11 todorov
* Added AlnMgr2IndexSeqAlignEx
*
@@ -7159,6 +7174,29 @@ NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip)
/***************************************************************************
*
+* AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
+* or -1 if none of the seqids are in the alignment or if there is another
+* error.
+* Handy if sip comes from a BioSeq, where it can point to a linked list
+* of SeqIds.
+*
+***************************************************************************/
+NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip)
+{
+ Int4 i;
+ if (sap == NULL || sap->saip == NULL)
+ return -1;
+
+ for (; sip; sip = sip->next) {
+ i = AlnMgr2GetFirstNForSip(sap, sip);
+ if (i != -1)
+ return i;
+ }
+ return -1;
+}
+
+/***************************************************************************
+*
* AlnMgr2GetParent returns the top-level seqalign associated with a given
* indexed alignment. It returns the actual pointer, not a copy.
*
@@ -8392,30 +8430,32 @@ NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 t
{
sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
bsp = BioseqLockById(sip);
- for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
- {
- counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
- ctr = 0;
- while (ctr < counter)
- {
- res = buf[ctr];
- if (isna)
- {
- if (res == 1 || res == 2)
- afp->freq[res][j]++;
- else if (res == 4)
- afp->freq[3][j]++;
- else if (res == 8)
- afp->freq[4][j]++;
- else
- afp->freq[5][j]++;
- } else
- afp->freq[res][j]++;
- j++;
- ctr++;
- }
+ if (bsp != NULL) {
+ for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
+ {
+ counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
+ ctr = 0;
+ while (ctr < counter)
+ {
+ res = buf[ctr];
+ if (isna)
+ {
+ if (res == 1 || res == 2)
+ afp->freq[res][j]++;
+ else if (res == 4)
+ afp->freq[3][j]++;
+ else if (res == 8)
+ afp->freq[4][j]++;
+ else
+ afp->freq[5][j]++;
+ } else
+ afp->freq[res][j]++;
+ j++;
+ ctr++;
+ }
+ }
+ BioseqUnlock(bsp);
}
- BioseqUnlock(bsp);
SeqIdFree(sip);
}
}
@@ -9792,6 +9832,185 @@ NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr
*stop = SeqLocStop(slp);
}
+
+/***************************************************************************
+*
+* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence
+* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the
+* strand type. Either start, stop or strand can be NULL to only retrieve some of them.
+* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
+* alignment or the alignment is one big insert on that id. Returns true if the sip was found
+* in the alignment with real coordinates, i.e. *start would not be -1. RANGE
+*
+***************************************************************************/
+NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
+{
+ Int4 c_start, c_stop;
+ Uint1 c_strand;
+ StdSegPtr ssp;
+ Boolean range_found = FALSE;
+ Boolean strands_inconsistent = FALSE;
+
+ if (start) *start = -1;
+ if (stop) *stop = -1;
+ if (strand) *strand = Seq_strand_unknown;
+
+ if (sap->segtype != SAS_STD)
+ return FALSE;
+
+ ssp = (StdSegPtr)(sap->segs);
+ while (ssp) {
+ if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) &&
+ c_start != -1) /* skip inserts on our bioseq */
+ {
+ range_found = TRUE;
+
+ if (start) {
+ if (*start == -1) {
+ *start = c_start;
+ } else {
+ *start = MIN(*start, c_start);
+ }
+ }
+ if (stop) {
+ *stop = MAX(*stop, c_stop);
+ }
+ if (strand && ! strands_inconsistent) {
+ /* if strands are different each time, ignore them. */
+ if (*strand != Seq_strand_unknown && *strand != c_strand) {
+ *strand = Seq_strand_unknown;
+ strands_inconsistent = TRUE;
+ } else {
+ *strand = c_strand;
+ }
+ }
+ }
+ ssp = ssp->next;
+ }
+ return range_found;
+}
+
+
+/***************************************************************************
+*
+* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence
+* coordinates in a Std-Segment for a given Sequence Id. Also return the
+* strand type. Either start, stop or strand can be NULL to only retrieve some of them.
+* If start and stop are -1, the SeqID was not found in this segment.
+* Returns true if the sip was found, even if it is a gap (start, stop = -1). RANGE
+*
+***************************************************************************/
+NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
+ StdSegPtr ssp,
+ SeqIdPtr sip,
+ Int4Ptr start,
+ Int4Ptr stop,
+ Uint1Ptr strand,
+ Uint1Ptr segType) /* AM_SEQ, AM_GAP, AM_INSERT */
+{
+ SeqLocPtr loc;
+ Uint1 m_strand;
+ Int4 m_start, m_stop, m_swap;
+ Boolean s_present = FALSE;
+ Boolean m_present = FALSE;
+ Boolean found_id = FALSE;
+
+ for ( loc = ssp->loc;
+ loc != NULL;
+ loc = loc->next ) {
+ /* One SeqLoc for each Sequence aligned by this segment. */
+ /* find the one that matches the sip parameter. */
+ if (SeqIdForSameBioseq(sip, SeqLocId(loc))) {
+ m_strand = SeqLocStrand(loc);
+ m_start = SeqLocStart(loc);
+ m_stop = SeqLocStop(loc);
+ /* Might have to reverse the order of start and stop on minus strands.
+ /* so that start is less than stop. */
+ if (m_start > m_stop) {
+ m_swap = m_start;
+ m_start = m_stop;
+ m_stop = m_swap;
+ }
+ if (start) *start = m_start;
+ if (stop) *stop = m_stop;
+ if (strand) *strand = m_strand;
+ if (m_start != -1)
+ m_present = TRUE;
+
+ /* found our sequence in this segment. */
+ found_id = TRUE;
+ } else { /* a different sequence */
+ if (SeqLocStart(loc) != -1)
+ s_present = TRUE;
+ }
+ }
+
+ if (segType) {
+ if (m_present && s_present)
+ *segType = AM_SEQ;
+ else if (!m_present && s_present)
+ *segType = AM_INSERT;
+ else if (m_present && !s_present)
+ *segType = AM_GAP;
+ else
+ *segType = AM_GAP; /* start will be -1 */
+ }
+ return found_id;
+}
+
+
+/***************************************************************************
+*
+* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of
+* a standard segment alignment. Numbering starts with 1.
+* returns NULL if not n segments or is not a std-seg aligment.
+* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
+*
+***************************************************************************/
+NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n)
+{
+ StdSegPtr ssp;
+ Int2 i;
+
+ if (sap == NULL || sap->segtype != SAS_STD || n < 1)
+ return NULL;
+
+ i = 1;
+ ssp = (StdSegPtr)(sap->segs);
+ while(ssp)
+ {
+ if (i == n)
+ return ssp;
+ ++i;
+ ssp = ssp->next;
+ }
+
+ return NULL;
+}
+
+/***************************************************************************
+*
+* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
+* returns -1 if sap is null or not a standard-seg alignment.
+*
+***************************************************************************/
+NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap)
+{
+ Int4 seg_count = 0;
+ StdSegPtr ssp;
+
+ if (sap == NULL || sap->segtype != SAS_STD)
+ return -1;
+
+ ssp = (StdSegPtr)(sap->segs);
+ while(ssp)
+ {
+ ++seg_count;
+ ssp = ssp->next;
+ }
+ return seg_count;
+}
+
static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)
{
Int4 longest;
diff --git a/api/alignmgr2.h b/api/alignmgr2.h
index c71dccf4..2bd1dd3a 100644
--- a/api/alignmgr2.h
+++ b/api/alignmgr2.h
@@ -28,13 +28,19 @@
*
* Version Creation Date: 10/01
*
-* $Revision: 6.19 $
+* $Revision: 6.21 $
*
* File Description: SeqAlign indexing, access, and manipulation functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr2.h,v $
+* Revision 6.21 2003/10/09 13:46:39 rsmith
+* Add AlnMgr2GetFirstNForSipList.
+*
+* Revision 6.20 2003/04/23 20:37:06 rsmith
+* Added four functions in section 11 to allow examination of Std-Seg alignments.
+*
* Revision 6.19 2003/03/31 20:17:11 todorov
* Added AlnMgr2IndexSeqAlignEx
*
@@ -637,6 +643,17 @@ NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip);
/***************************************************************************
*
+* AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
+* or -1 if none of the seqids are in the alignment or if there is another
+* error.
+* Handy if sip comes from a BioSeq, where it can point to a linked list
+* of SeqIds.
+*
+***************************************************************************/
+NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip);
+
+/***************************************************************************
+*
* AlnMgr2GetParent returns the top-level seqalign associated with a given
* indexed alignment. It returns the actual pointer, not a copy.
*
@@ -885,6 +902,62 @@ NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall);
NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip);
NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n);
NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
+
+/***************************************************************************
+*
+* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence
+* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the
+* strand type if it is the same on every segment, else set it to Seq_strand_unknown.
+* Either start, stop or strand can be NULL to only retrieve some of them.
+* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
+* alignment or the alignment is one big insert on that id. Returns true if the sip was found
+* in the alignment with real coordinates, i.e. *start would not be -1. RANGE
+*
+***************************************************************************/
+NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand);
+
+/***************************************************************************
+*
+* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence
+* coordinates in a Std-Segment for a given Sequence Id. Also return the
+* strand type. Either start, stop, strand or segType can be NULL to only retrieve some of them.
+* Returns false if the SeqID was not found in this segment, so no meaningful
+* data was passed back in other arguments.
+* Returns true if the sip was found, even if it is a gap (start, stop = -1).
+* segType is set to AM_SEQ if the SeqID Sequence is not empty and one of
+* the other sequences aligned with it is also not empty. To AM_GAP if
+* the other sequences are all empty, and to AM_INSERT if the main sequence
+* is empty.
+* RANGE
+*
+***************************************************************************/
+NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
+ StdSegPtr ssp,
+ SeqIdPtr sip,
+ Int4Ptr start,
+ Int4Ptr stop,
+ Uint1Ptr strand,
+ Uint1Ptr segType); /* AM_SEQ, AM_GAP, AM_INSERT */
+
+/***************************************************************************
+*
+* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of
+* a standard segment alignment.
+* returns NULL if not n segments or is not a std-seg aligment.
+* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
+*
+***************************************************************************/
+NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n);
+
+/***************************************************************************
+*
+* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
+* returns -1 if sap is null or not a standard-seg alignment.
+* the Std-Seg version of AlnMgr2GetNumSegs
+*
+***************************************************************************/
+NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap);
+
/***************************************************************************
*
* The two mapping functions act a little differently for std-segs. The
diff --git a/api/alignval.c b/api/alignval.c
index 9b317c04..bf3a64b7 100644
--- a/api/alignval.c
+++ b/api/alignval.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/3/99
*
-* $Revision: 6.35 $
+* $Revision: 6.37 $
*
* File Description: To validate sequence alignment.
*
@@ -443,7 +443,7 @@ static void ValMessage (SeqAlignPtr salp, Int1 MessageCode, ErrSev errlevel, Seq
pos = valmsggetseqpos(salp, Intvalue, id);
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
sprintf(string1, "Segs");
- sprintf(string2, "Segment %ld (near alignment position %ld) contains only gaps. Each segment must contain at least one actual sequence -- look for columns with all gaps and delete them.", (long) Intvalue, (long) pos);
+ sprintf(string2, "Segment %ld (near alignment position %ld) in the context of %s contains only gaps. Each segment must contain at least one actual sequence -- look for columns with all gaps and delete them.", (long) Intvalue, (long) pos, buf3);
break;
case Err_Segs_Dim_One:
@@ -1495,7 +1495,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp)
/*if no more positive start value is found after the initial -1 start value, then it's fasta like */
if(k==dsp->numseg-1&&gap)
{
- ValMessage (salp, Err_Fastalike, SEV_ERROR, siptemp, dsp->ids, 0);
+ ValMessage (salp, Err_Fastalike, SEV_WARNING, siptemp, dsp->ids, 0);
return TRUE;
}
}
@@ -1541,7 +1541,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp)
/*if no more segment is found for this sequence, then it's fasta like */
if(k==psp->numseg-1&&gap)
{
- ValMessage (salp, Err_Fastalike, SEV_ERROR, siptemp, psp->ids, 0);
+ ValMessage (salp, Err_Fastalike, SEV_WARNING, siptemp, psp->ids, 0);
return TRUE;
}
@@ -1598,7 +1598,7 @@ static Boolean Is_Fasta_Seqalign (SeqAlignPtr salp)
break;
if(!ssptemp2->next&&gap)
{
- ValMessage (salp, Err_Fastalike, SEV_ERROR, sip, SipInSegs, 0);
+ ValMessage (salp, Err_Fastalike, SEV_WARNING, sip, SipInSegs, 0);
ValNodeFree(FinishedSip);
SeqIdSetFree(SipInSegs);
return TRUE;
diff --git a/api/asn2ff3.c b/api/asn2ff3.c
index ec9a8880..706036d0 100644
--- a/api/asn2ff3.c
+++ b/api/asn2ff3.c
@@ -35,6 +35,15 @@
* Modifications:
* --------------------------------------------------------------------------
* $Log: asn2ff3.c,v $
+* Revision 6.118 2003/07/22 16:18:27 kans
+* added ZFIN as legal db_xref
+*
+* Revision 6.117 2003/06/10 18:44:10 kans
+* added GeneDB to list of legal db_xrefs
+*
+* Revision 6.116 2003/05/29 20:25:19 kans
+* added Interpro to list of legal dbxrefs
+*
* Revision 6.115 2002/11/30 20:18:27 kans
* added GOA to list of legal db_xrefs
*
@@ -723,6 +732,7 @@ CharPtr dbtag[DBNUM] = {
"FLYBASE",
"GABI",
"GDB",
+ "GeneDB",
"GeneID",
"GI",
"GO",
@@ -731,6 +741,7 @@ CharPtr dbtag[DBNUM] = {
"IMGT/LIGM",
"IMGT/HLA",
"InterimID",
+ "Interpro",
"ISFinder",
"JCM",
"LocusID",
@@ -756,6 +767,7 @@ CharPtr dbtag[DBNUM] = {
"UniSTS",
"WorfDB",
"WormBase",
+ "ZFIN",
};
diff --git a/api/asn2ffp.h b/api/asn2ffp.h
index 87a8b726..f05a9b89 100644
--- a/api/asn2ffp.h
+++ b/api/asn2ffp.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.30 $
+* $Revision: 6.33 $
*
* File Description:
*
@@ -45,6 +45,15 @@
/*************************************
*
* $Log: asn2ffp.h,v $
+ * Revision 6.33 2003/07/22 16:18:27 kans
+ * added ZFIN as legal db_xref
+ *
+ * Revision 6.32 2003/06/10 18:44:10 kans
+ * added GeneDB to list of legal db_xrefs
+ *
+ * Revision 6.31 2003/05/29 20:25:19 kans
+ * added Interpro to list of legal dbxrefs
+ *
* Revision 6.30 2002/11/30 20:18:27 kans
* added GOA to list of legal db_xrefs
*
@@ -192,6 +201,15 @@
/*************************************
*
* $Log: asn2ffp.h,v $
+* Revision 6.33 2003/07/22 16:18:27 kans
+* added ZFIN as legal db_xref
+*
+* Revision 6.32 2003/06/10 18:44:10 kans
+* added GeneDB to list of legal db_xrefs
+*
+* Revision 6.31 2003/05/29 20:25:19 kans
+* added Interpro to list of legal dbxrefs
+*
* Revision 6.30 2002/11/30 20:18:27 kans
* added GOA to list of legal db_xrefs
*
@@ -369,7 +387,7 @@ NLM_EXTERN Boolean asn2ff_flags[13];
#define ASN2FF_SHOW_ERROR_MSG asn2ff_flags[11]
#define ASN2FF_SHOW_GB_STYLE asn2ff_flags[12]
-#define DBNUM 55
+#define DBNUM 58
NLM_EXTERN CharPtr dbtag[DBNUM];
NLM_EXTERN void FlatSpliceOff PROTO((SeqEntryPtr the_set, ValNodePtr desc));
diff --git a/api/asn2gnbk.c b/api/asn2gnbk.c
index 39e5c40d..deccca09 100644
--- a/api/asn2gnbk.c
+++ b/api/asn2gnbk.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.660 $
+* $Revision: 6.729 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -44,6 +44,7 @@
#include <objsset.h>
#include <objsub.h>
#include <objfdef.h>
+#include <objpubme.h>
#include <seqport.h>
#include <sequtil.h>
#include <sqnutils.h>
@@ -98,6 +99,7 @@ typedef struct asn2gbflags {
Boolean hideEmptySource;
Boolean goQualsToNote;
Boolean geneSynsToNote;
+ Boolean selenocysteineToNote;
Boolean forGbRelease;
} Asn2gbFlags, PNTR Asn2gbFlagsPtr;
@@ -137,6 +139,8 @@ typedef struct int_asn2gb_job {
Boolean newSourceOrg;
ValNodePtr lockedBspList;
Boolean relModeError;
+ Boolean skipProts;
+ Boolean skipMrnas;
IndxPtr index;
GBSeqPtr gbseq;
StringItemPtr pool;
@@ -195,6 +199,7 @@ typedef struct asn2gbwork {
Boolean citSubsFirst;
Boolean hideGeneFeats;
Boolean newLocusLine;
+ Boolean showBaseCount;
Boolean hideImpFeats;
Boolean hideRemImpFeats;
@@ -209,6 +214,8 @@ typedef struct asn2gbwork {
Boolean onlyGeneRIFs;
Boolean latestGeneRIFs;
+ Boolean showRefs;
+
Boolean isGPS;
Boolean copyGpsCdsUp;
Boolean copyGpsGeneDown;
@@ -516,6 +523,7 @@ static Uint1 source_qual_order [] = {
SCQUAL_tissue_type,
SCQUAL_clone_lib,
SCQUAL_dev_stage,
+ SCQUAL_ecotype,
SCQUAL_frequency,
SCQUAL_germline,
@@ -565,7 +573,6 @@ static Uint1 source_desc_note_order [] = {
SCQUAL_authority,
SCQUAL_forma,
SCQUAL_forma_specialis,
- SCQUAL_ecotype,
SCQUAL_synonym,
SCQUAL_anamorph,
SCQUAL_teleomorph,
@@ -607,7 +614,6 @@ static Uint1 source_feat_note_order [] = {
SCQUAL_authority,
SCQUAL_forma,
SCQUAL_forma_specialis,
- SCQUAL_ecotype,
SCQUAL_synonym,
SCQUAL_anamorph,
SCQUAL_teleomorph,
@@ -805,6 +811,7 @@ typedef enum {
FTQUAL_modelev,
FTQUAL_note,
FTQUAL_number,
+ FTQUAL_operon,
FTQUAL_organism,
FTQUAL_partial,
FTQUAL_PCR_conditions,
@@ -833,12 +840,14 @@ typedef enum {
FTQUAL_rrna_its,
FTQUAL_sec_str_type,
FTQUAL_selenocysteine,
+ FTQUAL_selenocysteine_note,
FTQUAL_seqfeat_note,
FTQUAL_site,
FTQUAL_site_type,
FTQUAL_standard_name,
FTQUAL_transcription,
FTQUAL_transcript_id,
+ FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */
FTQUAL_transl_except,
FTQUAL_transl_table,
FTQUAL_translation,
@@ -859,6 +868,10 @@ static Uint1 feat_qual_order [] = {
FTQUAL_locus_tag,
FTQUAL_gene_syn_refseq,
+ FTQUAL_gene_allele,
+
+ FTQUAL_operon,
+
FTQUAL_product,
FTQUAL_prot_EC_number,
@@ -881,6 +894,7 @@ static Uint1 feat_qual_order [] = {
FTQUAL_number,
FTQUAL_pseudo,
+ FTQUAL_selenocysteine,
FTQUAL_codon_start,
@@ -895,7 +909,6 @@ static Uint1 feat_qual_order [] = {
FTQUAL_frequency,
FTQUAL_EC_number,
FTQUAL_gene_map,
- FTQUAL_gene_allele,
FTQUAL_allele,
FTQUAL_map,
FTQUAL_mod_base,
@@ -944,6 +957,7 @@ pseudo after note - gi|6598562|gb|AC006419.3|AC006419
*/
static Uint1 feat_note_order [] = {
+ FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */
FTQUAL_gene_desc,
FTQUAL_gene_syn,
FTQUAL_trna_codons,
@@ -958,7 +972,7 @@ static Uint1 feat_note_order [] = {
FTQUAL_seqfeat_note,
FTQUAL_exception_note,
FTQUAL_region,
- /* FTQUAL_selenocysteine, */
+ FTQUAL_selenocysteine_note,
FTQUAL_prot_names,
FTQUAL_bond,
FTQUAL_site,
@@ -1023,6 +1037,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "model_evidence", Qual_class_model_ev },
{ "note", Qual_class_note },
{ "number", Qual_class_number },
+ { "operon", Qual_class_quote },
{ "organism", Qual_class_quote },
{ "partial", Qual_class_boolean },
{ "PCR_conditions", Qual_class_quote },
@@ -1050,6 +1065,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "rpt_unit", Qual_class_rpt_unit },
{ "rrna_its", Qual_class_its },
{ "sec_str_type", Qual_class_sec_str },
+ { "selenocysteine", Qual_class_boolean },
{ "selenocysteine", Qual_class_string },
{ "seqfeat_note", Qual_class_string },
{ "site", Qual_class_site },
@@ -1057,6 +1073,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "standard_name", Qual_class_quote },
{ "transcription", Qual_class_transcription },
{ "transcript_id", Qual_class_seq_id },
+ { "tscpt_id_note", Qual_class_seq_id }, /* !!! remove October 15, 2003 !!! */
{ "transl_except", Qual_class_code_break },
{ "transl_table", Qual_class_int },
{ "translation", Qual_class_translation },
@@ -1098,7 +1115,7 @@ static Char doc_link [MAX_WWWBUF];
#define DEF_LINK_DOC "http://www.ncbi.nlm.nih.gov/genome/guide/build.html"
static Char ev_link [MAX_WWWBUF];
-#define DEF_LINK_EV "http://www.ncbi.nlm.nih.gov/cgi-bin/Entrez/evv.cgi?"
+#define DEF_LINK_EV "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?"
static Char ec_link [MAX_WWWBUF];
#define DEF_LINK_EC "http://www.expasy.ch/cgi-bin/nicezyme.pl?"
@@ -1106,108 +1123,119 @@ static Char ec_link [MAX_WWWBUF];
static Char link_tax [MAX_WWWBUF];
#define DEF_LINK_TAX "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?"
-static Char link_ff[MAX_WWWBUF];
+static Char link_ff [MAX_WWWBUF];
#define DEF_LINK_FF "/cgi-bin/Entrez/getfeat?"
-static Char link_muid[MAX_WWWBUF];
+static Char link_muid [MAX_WWWBUF];
#define DEF_LINK_MUID "/entrez/utils/qmap.cgi?"
-static Char link_ace[MAX_WWWBUF];
+static Char link_ace [MAX_WWWBUF];
#define DEF_LINK_ACE "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q="
-static Char link_code[MAX_WWWBUF];
+static Char link_code [MAX_WWWBUF];
#define DEF_LINK_CODE "http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?"
-static Char link_fly[MAX_WWWBUF];
+static Char link_fly [MAX_WWWBUF];
#define DEF_LINK_FLY "http://flybase.bio.indiana.edu/.bin/fbidq.html?"
-static Char link_fly_fban[MAX_WWWBUF];
+static Char link_fly_fban [MAX_WWWBUF];
#define DEF_LINK_FBAN "http://www.fruitfly.org/cgi-bin/annot/fban?"
-static Char link_fly_fbgn[MAX_WWWBUF];
+static Char link_fly_fbgn [MAX_WWWBUF];
#define DEF_LINK_FBGN "http://flybase.bio.indiana.edu/.bin/fbidq.html?"
-static Char link_cog[MAX_WWWBUF];
+static Char link_cog [MAX_WWWBUF];
#define DEF_LINK_COG "http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox?"
-static Char link_sgd[MAX_WWWBUF];
+static Char link_sgd [MAX_WWWBUF];
#define DEF_LINK_SGD "/cgi-bin/Entrez/referer?http://genome-www4.stanford.edu/cgi-bin/SGD/locus.pl?locus="
-static Char link_gdb[MAX_WWWBUF];
+static Char link_gdb [MAX_WWWBUF];
#define DEF_LINK_GDB "http://gdbwww.gdb.org/gdb-bin/genera/genera/hgd/DBObject/GDB:"
-static Char link_ck[MAX_WWWBUF];
+static Char link_ck [MAX_WWWBUF];
#define DEF_LINK_CK "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid="
-static Char link_rice[MAX_WWWBUF];
+static Char link_rice [MAX_WWWBUF];
#define DEF_LINK_RICE "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="
-static Char link_sp[MAX_WWWBUF];
+static Char link_sp [MAX_WWWBUF];
#define DEF_LINK_SP "/cgi-bin/Entrez/referer?http://expasy.hcuge.ch/cgi-bin/sprot-search-ac%3f"
-static Char link_pdb[MAX_WWWBUF];
+static Char link_pdb [MAX_WWWBUF];
#define DEF_LINK_PDB "/cgi-bin/Entrez/referer?http://expasy.hcuge.ch/cgi-bin/get-pdb-entry%3f"
-static Char link_UniSTS[MAX_WWWBUF];
+static Char link_UniSTS [MAX_WWWBUF];
#define DEF_LINK_UniSTS "http://www.ncbi.nlm.nih.gov/genome/sts/sts.cgi?uid="
-static Char link_dbSTS[MAX_WWWBUF];
+static Char link_dbSTS [MAX_WWWBUF];
#define DEF_LINK_dbSTS "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?"
-static Char link_dbEST[MAX_WWWBUF];
+static Char link_dbEST [MAX_WWWBUF];
#define DEF_LINK_dbEST "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?"
-static Char link_omim[MAX_WWWBUF];
+static Char link_omim [MAX_WWWBUF];
#define DEF_LINK_OMIM "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id="
-static Char link_locus[MAX_WWWBUF];
+static Char link_locus [MAX_WWWBUF];
#define DEF_LINK_LOCUS "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="
-static Char link_snp[MAX_WWWBUF];
+static Char link_snp [MAX_WWWBUF];
#define DEF_LINK_SNP "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs="
-static Char link_ratmap[MAX_WWWBUF];
+static Char link_ratmap [MAX_WWWBUF];
#define DEF_LINK_RATMAP "http://ratmap.gen.gu.se/action.lasso?-database=RATMAPfmPro&-layout=Detail&-response=/RM/Detail+Format.html&-search&-recid="
-static Char link_rgd[MAX_WWWBUF];
+static Char link_rgd [MAX_WWWBUF];
#define DEF_LINK_RGD "http://rgd.mcw.edu/query/query.cgi?id="
-static Char link_mgd[MAX_WWWBUF];
+static Char link_mgd [MAX_WWWBUF];
#define DEF_LINK_MGD "http://www.informatics.jax.org/searches/accession_report.cgi?id=MGI:"
-static Char link_cdd[MAX_WWWBUF];
+static Char link_cdd [MAX_WWWBUF];
#define DEF_LINK_CDD "http://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid="
-static Char link_niaest[MAX_WWWBUF];
+static Char link_niaest [MAX_WWWBUF];
#define DEF_LINK_NIAEST "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1="
-static Char link_worm_base[MAX_WWWBUF];
+static Char link_worm_base [MAX_WWWBUF];
#define DEF_LINK_WORM_BASE "http://www.wormbase.org/db/get?class=Sequence;name="
-static Char link_worfdb[MAX_WWWBUF];
+static Char link_worfdb [MAX_WWWBUF];
#define DEF_LINK_WORFDB "http://worfdb.dfci.harvard.edu/search.pl?form=1&search="
-static Char link_nextdb[MAX_WWWBUF];
+static Char link_nextdb [MAX_WWWBUF];
#define DEF_LINK_NEXTDB "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk="
-static Char link_imgt[MAX_WWWBUF];
+static Char link_imgt [MAX_WWWBUF];
#define DEF_LINK_IMGT "http://imgt.cines.fr:8104/cgi-bin/IMGTlect.jv?query=202+"
-static Char link_ifo[MAX_WWWBUF];
+static Char link_ifo [MAX_WWWBUF];
#define DEF_LINK_IFO "http://www.ifo.or.jp/index_e.html"
-static Char link_jcm[MAX_WWWBUF];
+static Char link_jcm [MAX_WWWBUF];
#define DEF_LINK_JCM "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="
-static Char link_isfinder[MAX_WWWBUF];
+static Char link_isfinder [MAX_WWWBUF];
#define DEF_LINK_ISFINDER "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name="
-static Char link_gabi[MAX_WWWBUF];
+static Char link_gabi [MAX_WWWBUF];
#define DEF_LINK_GABI "https://gabi.rzpd.de/cgi-bin-protected/GreenCards.pl.cgi?Mode=ShowBioObject&BioObjectName="
-static Char link_fantom[MAX_WWWBUF];
+static Char link_fantom [MAX_WWWBUF];
#define DEF_LINK_FANTOM "http://fantom.gsc.riken.go.jp/db/view/main.cgi?masterid="
+static Char link_interpro [MAX_WWWBUF];
+#define DEF_LINK_INTERPRO "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query="
+
+static Char link_genedb [MAX_WWWBUF];
+#define DEF_LINK_GENEDB "http://www.genedb.org/genedb/Dispatcher?formType=navBar&submit=Search+for&organism=All%3Apombe%3Acerevisiae%3Adicty%3Aasp%3Atryp%3Aleish%3Amalaria%3Astyphi%3Aglossina&desc=yes&ohmr=%2F&name="
+
+static Char link_zfin [MAX_WWWBUF];
+#define DEF_LINK_ZFIN "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID="
+
+static Char link_rebase [MAX_WWWBUF];
+#define DEF_LINK_REBASE "http://rebase.neb.com/rebase/enz/"
/* utility functions */
@@ -1447,7 +1475,7 @@ static void FFAddPeriod (StringItemPtr sip) {
FFRecycleString(ajp, riter);
riter = prev;
riter->next = NULL;
- sip->curr = riter;
+ sip->curr = riter;
break;
}
}
@@ -2185,7 +2213,7 @@ static Int4 FFStringSearch (
return shift;
} else {
shift += MAX( (Int4)good_suffix[j],
- (Int4)(j - last_occurance[FFCharAt(text,shift + j)]));
+ (Int4)(j - last_occurance[FFCharAt(text,shift + j)]));
}
}
@@ -2205,27 +2233,27 @@ static Boolean IsWholeWordSubstr (
CharPtr subStr
)
{
- Boolean left, right;
- Char ch;
+ Boolean left, right;
+ Char ch;
- /* check on the left only if there is a character there */
- if (foundPos > 0) {
- ch = FFCharAt(searchStr, foundPos - 1);
- left = IS_WHITESP(ch) || ispunct(ch);
- } else {
- left = TRUE;
- }
+ /* check on the left only if there is a character there */
+ if (foundPos > 0) {
+ ch = FFCharAt(searchStr, foundPos - 1);
+ left = IS_WHITESP(ch) || ispunct(ch);
+ } else {
+ left = TRUE;
+ }
- foundPos += StringLen(subStr);
+ foundPos += StringLen(subStr);
if ( foundPos == FFLength(searchStr) ) {
right = TRUE;
} else {
ch = FFCharAt(searchStr, foundPos);
- right = IS_WHITESP(ch) || ispunct(ch);
+ right = IS_WHITESP(ch) || ispunct(ch);
}
- return left; /* see comment above */
+ return left; /* see comment above */
/* return left && right; this is how it should be!*/
}
@@ -2297,6 +2325,10 @@ static void InitWWW (IntAsn2gbJobPtr ajp)
GetAppParam ("NCBI", "WWWENTREZ", "LINK_ISFINDER", DEF_LINK_ISFINDER, link_isfinder, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_GABI", DEF_LINK_GABI, link_gabi, MAX_WWWBUF);
GetAppParam ("NCBI", "WWWENTREZ", "LINK_FANTOM", DEF_LINK_FANTOM, link_fantom, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_INTERPRO", DEF_LINK_INTERPRO, link_interpro, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_GENEDB", DEF_LINK_GENEDB, link_genedb, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_ZFIN", DEF_LINK_ZFIN, link_zfin, MAX_WWWBUF);
+ GetAppParam ("NCBI", "WWWENTREZ", "LINK_REBASE", DEF_LINK_REBASE, link_rebase, MAX_WWWBUF);
}
@@ -2525,6 +2557,19 @@ static void FF_www_db_xref_gdb(
}
}
+static void FF_www_db_xref_rebase (
+ StringItemPtr ffstring,
+ CharPtr db,
+ CharPtr identifier
+)
+{
+ while (*identifier == ' ')
+ identifier++;
+
+ FFAddTextToString(ffstring, NULL, db, ":", FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString(ffstring, "<a href=", link_rebase, identifier, FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString(ffstring, ".html>", identifier, "</a>", FALSE, FALSE, TILDE_IGNORE);
+}
static void Do_www_db_xref(
@@ -2536,67 +2581,75 @@ static void Do_www_db_xref(
if ( ffstring == NULL || db == NULL || identifier == NULL ) return;
if ( StringCmp(db, "FLYBASE") == 0) {
- FF_www_db_xref_fly(ffstring, db, identifier);
+ FF_www_db_xref_fly(ffstring, db, identifier);
} else if ( StringCmp(db , "COG") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_cog);
+ FF_www_db_xref_std(ffstring, db, identifier, link_cog);
} else if ( StringCmp(db , "UniSTS") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_UniSTS);
+ FF_www_db_xref_std(ffstring, db, identifier, link_UniSTS);
} else if ( StringCmp(db , "LocusID") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_locus);
+ FF_www_db_xref_std(ffstring, db, identifier, link_locus);
} else if ( StringCmp(db , "InterimID") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_locus);
+ FF_www_db_xref_std(ffstring, db, identifier, link_locus);
} else if ( StringCmp(db , "MIM") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_omim);
+ FF_www_db_xref_std(ffstring, db, identifier, link_omim);
} else if ( StringCmp(db , "SGD") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_sgd);
+ FF_www_db_xref_std(ffstring, db, identifier, link_sgd);
} else if ( StringCmp(db , "IMGT/LIGM") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_imgt);
+ FF_www_db_xref_std(ffstring, db, identifier, link_imgt);
} else if ( StringCmp(db , "CK") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_ck);
+ FF_www_db_xref_std(ffstring, db, identifier, link_ck);
} else if ( StringCmp(db , "RiceGenes") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_rice);
+ FF_www_db_xref_std(ffstring, db, identifier, link_rice);
} else if ( StringCmp(db , "dbSNP") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_snp);
+ FF_www_db_xref_std(ffstring, db, identifier, link_snp);
} else if ( StringCmp(db , "RATMAP") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_ratmap);
+ FF_www_db_xref_std(ffstring, db, identifier, link_ratmap);
} else if ( StringCmp(db , "RGD") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_rgd);
+ FF_www_db_xref_std(ffstring, db, identifier, link_rgd);
} else if ( StringCmp(db , "MGD") == 0) {
- FF_www_db_xref_mgd(ffstring, db, identifier);
+ FF_www_db_xref_mgd(ffstring, db, identifier);
} else if ( StringCmp(db , "CDD") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_cdd);
+ FF_www_db_xref_std(ffstring, db, identifier, link_cdd);
} else if ( StringCmp(db , "JCM") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_jcm);
+ FF_www_db_xref_std(ffstring, db, identifier, link_jcm);
} else if ( StringCmp(db , "ISFinder") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_isfinder);
+ FF_www_db_xref_std(ffstring, db, identifier, link_isfinder);
} else if ( StringCmp(db , "GABI") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_gabi);
+ FF_www_db_xref_std(ffstring, db, identifier, link_gabi);
+ } else if ( StringCmp(db , "ZFIN") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_zfin);
} else if ( StringCmp(db , "FANTOM_DB") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_fantom);
+ FF_www_db_xref_std(ffstring, db, identifier, link_fantom);
+ } else if ( StringCmp(db , "Interpro") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_interpro);
+ } else if ( StringCmp(db , "GeneDB") == 0) {
+ FF_www_db_xref_std(ffstring, db, identifier, link_genedb);
} else if ( StringCmp(db , "PID") == 0) {
- FF_www_db_xref_pid(ffstring, db, identifier);
+ FF_www_db_xref_pid(ffstring, db, identifier);
} else if ( StringCmp(db , "dbEST") == 0) {
- FF_www_db_xref_dbEST(ffstring, db, identifier);
+ FF_www_db_xref_dbEST(ffstring, db, identifier);
} else if ( StringCmp(db , "dbSTS") == 0) {
- FF_www_db_xref_dbSTS(ffstring, db, identifier);
+ FF_www_db_xref_dbSTS(ffstring, db, identifier);
} else if ( StringCmp(db , "niaEST") == 0) {
- FF_www_db_xref_niaEST(ffstring, db, identifier);
+ FF_www_db_xref_niaEST(ffstring, db, identifier);
} else if ( StringCmp(db , "WormBase") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_worm_base);
+ FF_www_db_xref_std(ffstring, db, identifier, link_worm_base);
} else if ( StringCmp(db , "AceView/WormGenes") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_ace);
+ FF_www_db_xref_std(ffstring, db, identifier, link_ace);
} else if ( StringCmp(db , "WorfDB") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_worfdb);
+ FF_www_db_xref_std(ffstring, db, identifier, link_worfdb);
} else if ( StringCmp(db , "NextDB") == 0) {
- FF_www_db_xref_std(ffstring, db, identifier, link_nextdb);
+ FF_www_db_xref_std(ffstring, db, identifier, link_nextdb);
} else if ( StringCmp(db , "IFO") == 0) {
- FF_www_db_xref_ifo(ffstring, db, identifier);
+ FF_www_db_xref_ifo(ffstring, db, identifier);
} else if ( StringCmp(db , "GDB") == 0) {
- FF_www_db_xref_gdb(ffstring, db, identifier);
+ FF_www_db_xref_gdb(ffstring, db, identifier);
+ } else if ( StringCmp(db , "REBASE") == 0) {
+ FF_www_db_xref_rebase(ffstring, db, identifier);
} else {
- /* default: no link just the text */
- FFAddTextToString(ffstring, db, ":", identifier, FALSE, FALSE, TILDE_IGNORE);
+ /* default: no link just the text */
+ FFAddTextToString(ffstring, db, ":", identifier, FALSE, FALSE, TILDE_IGNORE);
}
}
@@ -2968,7 +3021,7 @@ static void FFAddString_NoRedund (
if (StringNICmp (string, "tRNA-", 5) == 0) {
str = string+5;
- }
+ }
while ( foundPos >= 0 && !wholeWord ) {
foundPos = FFStringSearch(unique, str, foundPos);
@@ -3190,6 +3243,7 @@ static CharPtr legalDbXrefs [] = {
"FLYBASE",
"GABI",
"GDB",
+ "GeneDB",
"GeneID",
"GI",
"GO",
@@ -3224,10 +3278,12 @@ static CharPtr legalDbXrefs [] = {
"UniSTS",
"WorfDB",
"WormBase",
+ "ZFIN",
NULL
};
static CharPtr legalRefSeqDbXrefs [] = {
+ "REBASE",
NULL
};
@@ -3523,6 +3579,7 @@ static CharPtr FormatOrganismBlock (
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
BioSourcePtr biop = NULL;
+ Char ch;
CharPtr common = NULL;
DbtagPtr dbt;
SeqMgrDescContext dcontext;
@@ -3539,6 +3596,8 @@ static CharPtr FormatOrganismBlock (
CharPtr str;
Int4 taxid = -1;
CharPtr taxname = NULL;
+ CharPtr tmp;
+ CharPtr ptr;
ValNodePtr vnp;
StringItemPtr ffstring, temp;
Char buf [16];
@@ -3631,8 +3690,20 @@ static CharPtr FormatOrganismBlock (
FFAddOneString(temp, "<a href=", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(temp, link_tax, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE);
- sprintf (buf, "%ld", (long) taxid);
- FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
+ tmp = StringSave (taxname);
+ if (tmp != NULL) {
+ ptr = tmp;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_WHITESP (ch)) {
+ *ptr = '+';
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE);
+ MemFree (tmp);
+ }
FFAddOneString(temp, ">", FALSE, FALSE, TILDE_IGNORE);
}
FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
@@ -5583,7 +5654,7 @@ static CharPtr FormatCitSub (
affil = GetAffil (afp);
if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
if (StringNCmp(affil, " to the EMBL/GenBank/DDBJ databases.", 36) != 0) {
- ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases\n");
+ ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
} else {
ValNodeCopyStr (&head, 0, " ");
}
@@ -5593,7 +5664,7 @@ static CharPtr FormatCitSub (
ValNodeCopyStr (&head, 0, affil);
MemFree (affil);
} else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
- ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases\n");
+ ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
}
}
@@ -5929,6 +6000,21 @@ static void AddReferenceToGbseq (
MemFree (copy);
}
+static Boolean IsCitSub (
+ PubdescPtr pdp,
+ CitSubPtr csp
+)
+
+{
+ ValNodePtr vnp;
+
+ if (csp != NULL) return TRUE;
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_Sub) return TRUE;
+ }
+ return FALSE;
+}
+
static CharPtr FormatReferenceBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
@@ -5958,12 +6044,15 @@ static CharPtr FormatReferenceBlock (
IntRefBlockPtr irp;
size_t len;
SeqLocPtr loc = NULL;
+ MedlineEntryPtr mep;
Int4 muid = 0;
Boolean needsPeriod = FALSE;
SeqLocPtr nextslp;
Boolean notFound;
ObjMgrDataPtr omdp;
PubdescPtr pdp = NULL;
+ PubdescPtr pdpcopy = NULL;
+ PubmedEntryPtr pep = NULL;
Int4 pmid = 0;
CharPtr prefix = NULL;
RefBlockPtr rbp;
@@ -6047,6 +6136,30 @@ static CharPtr FormatReferenceBlock (
return NULL;
}
+ /* any justuids left at this point is RefSeq protein, and should be fetched */
+
+ irp = (IntRefBlockPtr) rbp;
+ if (irp->justuids) {
+ if (rbp->pmid != 0) {
+ pep = GetPubMedForUid (rbp->pmid);
+ } else if (rbp->muid != 0) {
+ pep = GetPubMedForUid (rbp->muid);
+ }
+ if (pep != NULL) {
+ mep = (MedlineEntryPtr) pep->medent;
+ if (mep != NULL && mep->cit != NULL) {
+ pdpcopy = AsnIoMemCopy ((Pointer) pdp,
+ (AsnReadFunc) PubdescAsnRead,
+ (AsnWriteFunc) PubdescAsnWrite);
+ cap = AsnIoMemCopy ((Pointer) mep->cit,
+ (AsnReadFunc) CitArtAsnRead,
+ (AsnWriteFunc) CitArtAsnWrite);
+ vnp = ValNodeAddPointer (&(pdpcopy->pub), PUB_Article, (Pointer) cap);
+ pdp = pdpcopy;
+ }
+ }
+ }
+
/* print serial number */
FFStartPrint(temp, afp->format, 0, 12, "REFERENCE", 12, 5, 5, "RN", TRUE);
@@ -6304,7 +6417,9 @@ static CharPtr FormatReferenceBlock (
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
needsPeriod = FALSE;
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
- needsPeriod = TRUE;
+ if (! IsCitSub (pdp, csp)) {
+ needsPeriod = TRUE;
+ }
}
FFAddOneString (temp, str, FALSE, FALSE, TILDE_IGNORE);
@@ -6386,6 +6501,13 @@ static CharPtr FormatReferenceBlock (
FFRecycleString(ajp, ffstring);
FFRecycleString(ajp, temp);
+ if (pep != NULL) {
+ PubmedEntryFree (pep);
+ }
+ if (pdpcopy != NULL) {
+ PubdescFree (pdpcopy);
+ }
+
return str;
}
@@ -6536,6 +6658,13 @@ static CharPtr FormatReferenceBlock (
FFRecycleString(ajp, ffstring);
FFRecycleString(ajp, temp);
+ if (pep != NULL) {
+ PubmedEntryFree (pep);
+ }
+ if (pdpcopy != NULL) {
+ PubdescFree (pdpcopy);
+ }
+
return str;
}
@@ -6552,8 +6681,8 @@ static Boolean IsTildeEOL(CharPtr str) {
for ( ptr = str;
- IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
- ++ptr) continue;
+ IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
+ ++ptr) continue;
return *ptr == '/' ? FALSE : TRUE;
}
@@ -7455,7 +7584,6 @@ static CharPtr FlatLoc (
order [SEQID_GENBANK] = num++;
order [SEQID_EMBL] = num++;
order [SEQID_DDBJ] = num++;
- order [SEQID_LOCAL] = num++;
order [SEQID_OTHER] = num++;
order [SEQID_TPG] = num++;
order [SEQID_TPE] = num++;
@@ -7469,6 +7597,7 @@ static CharPtr FlatLoc (
order [SEQID_PATENT] = num++;
order [SEQID_GI] = num++;;
order [SEQID_GENERAL] = num++;
+ order [SEQID_LOCAL] = num++;
order [SEQID_GIIM] = num++;
order_initialized = TRUE;
}
@@ -7521,6 +7650,114 @@ static CharPtr FlatLoc (
}
+
+
+static void PromoteSeqId (SeqIdPtr sip, Pointer userdata)
+
+{
+ SeqIdPtr bestid, newid, oldid;
+
+ bestid = (SeqIdPtr) userdata;
+
+ newid = SeqIdDup (bestid);
+ if (newid == NULL) return;
+
+ oldid = ValNodeNew (NULL);
+ if (oldid == NULL) return;
+
+ MemCopy (oldid, sip, sizeof (ValNode));
+ oldid->next = NULL;
+
+ sip->choice = newid->choice;
+ sip->data.ptrvalue = newid->data.ptrvalue;
+
+ SeqIdFree (oldid);
+ ValNodeFree (newid);
+
+ SeqIdStripLocus (sip);
+}
+
+static SeqLocPtr SeqLocReMapEx (SeqIdPtr newid, SeqLocPtr seq_loc, SeqLocPtr location, Int4 offset, Boolean rev, Boolean masterStyle)
+
+{
+ BioseqPtr bsp;
+ Boolean hasNulls;
+ IntFuzzPtr fuzz = NULL;
+ SeqLocPtr loc;
+ Boolean noLeft;
+ Boolean noRight;
+ Uint1 num = 1;
+ SeqEntryPtr scope;
+ SeqIdPtr sip;
+ SeqLocPtr slp = NULL;
+ SeqPntPtr spp;
+ SeqLocPtr tmp;
+
+ if (newid == NULL || seq_loc == NULL || location == NULL) return NULL;
+
+ if (masterStyle) {
+
+ sip = SeqLocId (seq_loc);
+ if (sip == NULL) return NULL;
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) {
+ scope = SeqEntrySetScope (NULL);
+ bsp = BioseqFind (sip);
+ SeqEntrySetScope (scope);
+ }
+ if (bsp == NULL) return NULL;
+ sip = SeqIdFindBest (bsp->id, 0);
+
+ /* map location from parts to segmented bioseq */
+
+ if (location->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) location->data.ptrvalue;
+ if (spp != NULL) {
+ fuzz = spp->fuzz;
+ }
+ }
+
+ CheckSeqLocForPartial (location, &noLeft, &noRight);
+ hasNulls = LocationHasNullsBetween (location);
+ loc = SeqLocMerge (bsp, location, NULL, FALSE, TRUE, hasNulls);
+ if (loc == NULL) {
+ tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
+ loc = SeqLocMerge (bsp, tmp, NULL, FALSE, TRUE, hasNulls);
+ SeqLocFree (tmp);
+ }
+ if (loc == NULL) {
+ return NULL;
+ }
+ FreeAllFuzz (loc);
+ SetSeqLocPartial (loc, noLeft, noRight);
+
+ if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
+ spp = (SeqPntPtr) loc->data.ptrvalue;
+ if (spp != NULL && spp->fuzz == NULL) {
+ spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
+ (AsnReadFunc) IntFuzzAsnRead,
+ (AsnWriteFunc) IntFuzzAsnWrite);
+ }
+ }
+
+ scope = SeqEntrySetScope (NULL);
+ slp = SeqLocReMap (newid, seq_loc, loc, offset, rev);
+ SeqEntrySetScope (scope);
+
+ SeqLocFree (loc);
+
+ VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId);
+ } else {
+
+ scope = SeqEntrySetScope (NULL);
+ slp = SeqLocReMap (newid, seq_loc, location, offset, rev);
+ SeqEntrySetScope (scope);
+ }
+
+ return slp;
+}
+
+
/******************************************************************************/
/* End FlatLoc functions. */
/******************************************************************************/
@@ -7836,7 +8073,7 @@ static CharPtr GetMolTypeQual (
}
break;
case MOLECULE_TYPE_PRE_MRNA :
- return "pre-mRNA";
+ return "pre-RNA";
case MOLECULE_TYPE_MRNA :
return "mRNA";
case MOLECULE_TYPE_RRNA :
@@ -8038,7 +8275,23 @@ static CharPtr FormatSourceFeatBlock (
qvp [SCQUAL_focus].ble = TRUE;
}
- qvp [SCQUAL_mol_type].str = GetMolTypeQual (bsp);
+ str = GetMolTypeQual (bsp);
+ if (str == NULL) {
+ switch (bsp->mol) {
+ case Seq_mol_dna :
+ str = "unassigned DNA";
+ break;
+ case Seq_mol_rna :
+ str = "unassigned RNA";
+ break;
+ case Seq_mol_aa :
+ break;
+ default :
+ str = "unassigned DNA";
+ break;
+ }
+ }
+ qvp [SCQUAL_mol_type].str = str;
SubSourceToQualArray (biop->subtype, qvp);
@@ -8450,7 +8703,7 @@ typedef struct qualfeatur {
Uint1 featurclass;
} QualFeatur, PNTR QualFeaturPtr;
-#define NUM_GB_QUALS 25
+#define NUM_GB_QUALS 26
static QualFeatur qualToFeature [NUM_GB_QUALS] = {
{ "allele", FTQUAL_allele },
@@ -8467,6 +8720,7 @@ static QualFeatur qualToFeature [NUM_GB_QUALS] = {
{ "map", FTQUAL_map },
{ "mod_base", FTQUAL_mod_base },
{ "number", FTQUAL_number },
+ { "operon", FTQUAL_operon },
{ "organism", FTQUAL_organism },
{ "PCR_conditions", FTQUAL_PCR_conditions },
{ "phenotype", FTQUAL_phenotype },
@@ -8581,6 +8835,7 @@ static CharPtr trnaList [] = {
"tRNA-OTHER",
"tRNA-Tyr",
"tRNA-Glx",
+ "tRNA-TERM",
NULL
};
@@ -8908,441 +9163,525 @@ typedef struct valqualstruc {
static ValQual legalGbqualList [] = {
- {FEATDEF_GENE , FTQUAL_allele},
- {FEATDEF_GENE , FTQUAL_function},
- {FEATDEF_GENE , FTQUAL_label},
- {FEATDEF_GENE , FTQUAL_map},
- {FEATDEF_GENE , FTQUAL_phenotype},
- {FEATDEF_GENE , FTQUAL_product},
- {FEATDEF_GENE , FTQUAL_standard_name},
- {FEATDEF_GENE , FTQUAL_usedin},
-
- {FEATDEF_CDS , FTQUAL_allele},
- {FEATDEF_CDS , FTQUAL_codon},
- {FEATDEF_CDS , FTQUAL_label},
- {FEATDEF_CDS , FTQUAL_map},
- {FEATDEF_CDS , FTQUAL_number},
- {FEATDEF_CDS , FTQUAL_standard_name},
- {FEATDEF_CDS , FTQUAL_usedin},
-
- {FEATDEF_PROT , FTQUAL_product},
-
- {FEATDEF_preRNA , FTQUAL_allele},
- {FEATDEF_preRNA , FTQUAL_function},
- {FEATDEF_preRNA , FTQUAL_label},
- {FEATDEF_preRNA , FTQUAL_map},
- {FEATDEF_preRNA , FTQUAL_product},
- {FEATDEF_preRNA , FTQUAL_standard_name},
- {FEATDEF_preRNA , FTQUAL_usedin},
-
- {FEATDEF_mRNA , FTQUAL_allele},
- {FEATDEF_mRNA , FTQUAL_function},
- {FEATDEF_mRNA , FTQUAL_label},
- {FEATDEF_mRNA , FTQUAL_map},
- {FEATDEF_mRNA , FTQUAL_product},
- {FEATDEF_mRNA , FTQUAL_standard_name},
- {FEATDEF_mRNA , FTQUAL_usedin},
-
- {FEATDEF_tRNA , FTQUAL_function},
- {FEATDEF_tRNA , FTQUAL_label},
- {FEATDEF_tRNA , FTQUAL_map},
- {FEATDEF_tRNA , FTQUAL_product},
- {FEATDEF_tRNA , FTQUAL_standard_name},
- {FEATDEF_tRNA , FTQUAL_usedin},
-
- {FEATDEF_rRNA , FTQUAL_function},
- {FEATDEF_rRNA , FTQUAL_label},
- {FEATDEF_rRNA , FTQUAL_map},
- {FEATDEF_rRNA , FTQUAL_product},
- {FEATDEF_rRNA , FTQUAL_standard_name},
- {FEATDEF_rRNA , FTQUAL_usedin},
-
- {FEATDEF_snRNA , FTQUAL_function},
- {FEATDEF_snRNA , FTQUAL_label},
- {FEATDEF_snRNA , FTQUAL_map},
- {FEATDEF_snRNA , FTQUAL_product},
- {FEATDEF_snRNA , FTQUAL_standard_name},
- {FEATDEF_snRNA , FTQUAL_usedin},
-
- {FEATDEF_scRNA , FTQUAL_function},
- {FEATDEF_scRNA , FTQUAL_label},
- {FEATDEF_scRNA , FTQUAL_map},
- {FEATDEF_scRNA , FTQUAL_product},
- {FEATDEF_scRNA , FTQUAL_standard_name},
- {FEATDEF_scRNA , FTQUAL_usedin},
-
- {FEATDEF_otherRNA , FTQUAL_function},
- {FEATDEF_otherRNA , FTQUAL_label},
- {FEATDEF_otherRNA , FTQUAL_map},
- {FEATDEF_otherRNA , FTQUAL_product},
- {FEATDEF_otherRNA , FTQUAL_standard_name},
- {FEATDEF_otherRNA , FTQUAL_usedin},
-
- {FEATDEF_attenuator , FTQUAL_label},
- {FEATDEF_attenuator , FTQUAL_map},
- {FEATDEF_attenuator , FTQUAL_phenotype},
- {FEATDEF_attenuator , FTQUAL_usedin},
-
- {FEATDEF_C_region , FTQUAL_label},
- {FEATDEF_C_region , FTQUAL_map},
- {FEATDEF_C_region , FTQUAL_product},
- {FEATDEF_C_region , FTQUAL_standard_name},
- {FEATDEF_C_region , FTQUAL_usedin},
-
- {FEATDEF_CAAT_signal , FTQUAL_label},
- {FEATDEF_CAAT_signal , FTQUAL_map},
- {FEATDEF_CAAT_signal , FTQUAL_usedin},
-
- {FEATDEF_Imp_CDS , FTQUAL_codon},
- {FEATDEF_Imp_CDS , FTQUAL_EC_number},
- {FEATDEF_Imp_CDS , FTQUAL_function},
- {FEATDEF_Imp_CDS , FTQUAL_label},
- {FEATDEF_Imp_CDS , FTQUAL_map},
- {FEATDEF_Imp_CDS , FTQUAL_number},
- {FEATDEF_Imp_CDS , FTQUAL_product},
- {FEATDEF_Imp_CDS , FTQUAL_standard_name},
- {FEATDEF_Imp_CDS , FTQUAL_usedin},
-
- {FEATDEF_conflict , FTQUAL_label},
- {FEATDEF_conflict , FTQUAL_map},
- {FEATDEF_conflict , FTQUAL_replace},
- {FEATDEF_conflict , FTQUAL_usedin},
-
- {FEATDEF_D_loop , FTQUAL_label},
- {FEATDEF_D_loop , FTQUAL_map},
- {FEATDEF_D_loop , FTQUAL_usedin},
-
- {FEATDEF_D_segment , FTQUAL_label},
- {FEATDEF_D_segment , FTQUAL_map},
- {FEATDEF_D_segment , FTQUAL_product},
- {FEATDEF_D_segment , FTQUAL_standard_name},
- {FEATDEF_D_segment , FTQUAL_usedin},
-
- {FEATDEF_enhancer , FTQUAL_label},
- {FEATDEF_enhancer , FTQUAL_map},
- {FEATDEF_enhancer , FTQUAL_standard_name},
- {FEATDEF_enhancer , FTQUAL_usedin},
-
- {FEATDEF_exon , FTQUAL_allele},
- {FEATDEF_exon , FTQUAL_EC_number},
- {FEATDEF_exon , FTQUAL_function},
- {FEATDEF_exon , FTQUAL_label},
- {FEATDEF_exon , FTQUAL_map},
- {FEATDEF_exon , FTQUAL_number},
- {FEATDEF_exon , FTQUAL_product},
- {FEATDEF_exon , FTQUAL_standard_name},
- {FEATDEF_exon , FTQUAL_usedin},
-
- {FEATDEF_GC_signal , FTQUAL_label},
- {FEATDEF_GC_signal , FTQUAL_map},
- {FEATDEF_GC_signal , FTQUAL_usedin},
-
- {FEATDEF_iDNA , FTQUAL_function},
- {FEATDEF_iDNA , FTQUAL_label},
- {FEATDEF_iDNA , FTQUAL_map},
- {FEATDEF_iDNA , FTQUAL_number},
- {FEATDEF_iDNA , FTQUAL_standard_name},
- {FEATDEF_iDNA , FTQUAL_usedin},
-
- {FEATDEF_intron , FTQUAL_allele},
- {FEATDEF_intron , FTQUAL_cons_splice},
- {FEATDEF_intron , FTQUAL_function},
- {FEATDEF_intron , FTQUAL_label},
- {FEATDEF_intron , FTQUAL_map},
- {FEATDEF_intron , FTQUAL_number},
- {FEATDEF_intron , FTQUAL_standard_name},
- {FEATDEF_intron , FTQUAL_usedin},
-
- {FEATDEF_J_segment , FTQUAL_label},
- {FEATDEF_J_segment , FTQUAL_map},
- {FEATDEF_J_segment , FTQUAL_product},
- {FEATDEF_J_segment , FTQUAL_standard_name},
- {FEATDEF_J_segment , FTQUAL_usedin},
-
- {FEATDEF_LTR , FTQUAL_function},
- {FEATDEF_LTR , FTQUAL_label},
- {FEATDEF_LTR , FTQUAL_map},
- {FEATDEF_LTR , FTQUAL_standard_name},
- {FEATDEF_LTR , FTQUAL_usedin},
-
- {FEATDEF_mat_peptide , FTQUAL_EC_number},
- {FEATDEF_mat_peptide , FTQUAL_function},
- {FEATDEF_mat_peptide , FTQUAL_label},
- {FEATDEF_mat_peptide , FTQUAL_map},
- {FEATDEF_mat_peptide , FTQUAL_product},
- {FEATDEF_mat_peptide , FTQUAL_standard_name},
- {FEATDEF_mat_peptide , FTQUAL_usedin},
-
- {FEATDEF_misc_binding , FTQUAL_bound_moiety},
- {FEATDEF_misc_binding , FTQUAL_function},
- {FEATDEF_misc_binding , FTQUAL_label},
- {FEATDEF_misc_binding , FTQUAL_map},
- {FEATDEF_misc_binding , FTQUAL_usedin},
-
- {FEATDEF_misc_difference , FTQUAL_clone},
- {FEATDEF_misc_difference , FTQUAL_label},
- {FEATDEF_misc_difference , FTQUAL_map},
- {FEATDEF_misc_difference , FTQUAL_phenotype},
- {FEATDEF_misc_difference , FTQUAL_replace},
- {FEATDEF_misc_difference , FTQUAL_standard_name},
- {FEATDEF_misc_difference , FTQUAL_usedin},
-
- {FEATDEF_misc_feature , FTQUAL_function},
- {FEATDEF_misc_feature , FTQUAL_label},
- {FEATDEF_misc_feature , FTQUAL_map},
- {FEATDEF_misc_feature , FTQUAL_number},
- {FEATDEF_misc_feature , FTQUAL_phenotype},
- {FEATDEF_misc_feature , FTQUAL_product},
- {FEATDEF_misc_feature , FTQUAL_standard_name},
- {FEATDEF_misc_feature , FTQUAL_usedin},
-
- {FEATDEF_misc_recomb , FTQUAL_label},
- {FEATDEF_misc_recomb , FTQUAL_map},
- {FEATDEF_misc_recomb , FTQUAL_organism},
- {FEATDEF_misc_recomb , FTQUAL_standard_name},
- {FEATDEF_misc_recomb , FTQUAL_usedin},
-
- {FEATDEF_misc_signal , FTQUAL_function},
- {FEATDEF_misc_signal , FTQUAL_label},
- {FEATDEF_misc_signal , FTQUAL_map},
- {FEATDEF_misc_signal , FTQUAL_phenotype},
- {FEATDEF_misc_signal , FTQUAL_standard_name},
- {FEATDEF_misc_signal , FTQUAL_usedin},
-
- {FEATDEF_misc_structure , FTQUAL_function},
- {FEATDEF_misc_structure , FTQUAL_label},
- {FEATDEF_misc_structure , FTQUAL_map},
- {FEATDEF_misc_structure , FTQUAL_standard_name},
- {FEATDEF_misc_structure , FTQUAL_usedin},
-
- {FEATDEF_modified_base , FTQUAL_frequency},
- {FEATDEF_modified_base , FTQUAL_label},
- {FEATDEF_modified_base , FTQUAL_map},
- {FEATDEF_modified_base , FTQUAL_mod_base},
- {FEATDEF_modified_base , FTQUAL_usedin},
-
- {FEATDEF_N_region , FTQUAL_label},
- {FEATDEF_N_region , FTQUAL_map},
- {FEATDEF_N_region , FTQUAL_product},
- {FEATDEF_N_region , FTQUAL_standard_name},
- {FEATDEF_N_region , FTQUAL_usedin},
-
- {FEATDEF_old_sequence , FTQUAL_label},
- {FEATDEF_old_sequence , FTQUAL_map},
- {FEATDEF_old_sequence , FTQUAL_replace},
- {FEATDEF_old_sequence , FTQUAL_usedin},
-
- {FEATDEF_polyA_signal , FTQUAL_label},
- {FEATDEF_polyA_signal , FTQUAL_map},
- {FEATDEF_polyA_signal , FTQUAL_usedin},
-
- {FEATDEF_polyA_site , FTQUAL_label},
- {FEATDEF_polyA_site , FTQUAL_map},
- {FEATDEF_polyA_site , FTQUAL_usedin},
-
- {FEATDEF_prim_transcript , FTQUAL_allele},
- {FEATDEF_prim_transcript , FTQUAL_function},
- {FEATDEF_prim_transcript , FTQUAL_label},
- {FEATDEF_prim_transcript , FTQUAL_map},
- {FEATDEF_prim_transcript , FTQUAL_standard_name},
- {FEATDEF_prim_transcript , FTQUAL_usedin},
-
- {FEATDEF_primer_bind , FTQUAL_label},
- {FEATDEF_primer_bind , FTQUAL_map},
- {FEATDEF_primer_bind , FTQUAL_PCR_conditions},
- {FEATDEF_primer_bind , FTQUAL_standard_name},
- {FEATDEF_primer_bind , FTQUAL_usedin},
-
- {FEATDEF_promoter , FTQUAL_function},
- {FEATDEF_promoter , FTQUAL_label},
- {FEATDEF_promoter , FTQUAL_map},
- {FEATDEF_promoter , FTQUAL_phenotype},
- {FEATDEF_promoter , FTQUAL_standard_name},
- {FEATDEF_promoter , FTQUAL_usedin},
-
- {FEATDEF_protein_bind , FTQUAL_bound_moiety},
- {FEATDEF_protein_bind , FTQUAL_function},
- {FEATDEF_protein_bind , FTQUAL_label},
- {FEATDEF_protein_bind , FTQUAL_map},
- {FEATDEF_protein_bind , FTQUAL_standard_name},
- {FEATDEF_protein_bind , FTQUAL_usedin},
-
- {FEATDEF_RBS , FTQUAL_label},
- {FEATDEF_RBS , FTQUAL_map},
- {FEATDEF_RBS , FTQUAL_standard_name},
- {FEATDEF_RBS , FTQUAL_usedin},
-
- {FEATDEF_repeat_region , FTQUAL_function},
- {FEATDEF_repeat_region , FTQUAL_insertion_seq},
- {FEATDEF_repeat_region , FTQUAL_label},
- {FEATDEF_repeat_region , FTQUAL_map},
- {FEATDEF_repeat_region , FTQUAL_rpt_family},
- {FEATDEF_repeat_region , FTQUAL_rpt_type},
- {FEATDEF_repeat_region , FTQUAL_rpt_unit},
- {FEATDEF_repeat_region , FTQUAL_standard_name},
- {FEATDEF_repeat_region , FTQUAL_transposon},
- {FEATDEF_repeat_region , FTQUAL_usedin},
-
- {FEATDEF_repeat_unit , FTQUAL_function},
- {FEATDEF_repeat_unit , FTQUAL_label},
- {FEATDEF_repeat_unit , FTQUAL_map},
- {FEATDEF_repeat_unit , FTQUAL_rpt_family},
- {FEATDEF_repeat_unit , FTQUAL_rpt_type},
- {FEATDEF_repeat_unit , FTQUAL_usedin},
-
- {FEATDEF_rep_origin , FTQUAL_direction},
- {FEATDEF_rep_origin , FTQUAL_label},
- {FEATDEF_rep_origin , FTQUAL_map},
- {FEATDEF_rep_origin , FTQUAL_standard_name},
- {FEATDEF_rep_origin , FTQUAL_usedin},
-
- {FEATDEF_S_region , FTQUAL_label},
- {FEATDEF_S_region , FTQUAL_map},
- {FEATDEF_S_region , FTQUAL_product},
- {FEATDEF_S_region , FTQUAL_standard_name},
- {FEATDEF_S_region , FTQUAL_usedin},
-
- {FEATDEF_satellite , FTQUAL_label},
- {FEATDEF_satellite , FTQUAL_map},
- {FEATDEF_satellite , FTQUAL_rpt_family},
- {FEATDEF_satellite , FTQUAL_rpt_type},
- {FEATDEF_satellite , FTQUAL_rpt_unit},
- {FEATDEF_satellite , FTQUAL_standard_name},
- {FEATDEF_satellite , FTQUAL_usedin},
-
- {FEATDEF_sig_peptide , FTQUAL_function},
- {FEATDEF_sig_peptide , FTQUAL_label},
- {FEATDEF_sig_peptide , FTQUAL_map},
- {FEATDEF_sig_peptide , FTQUAL_product},
- {FEATDEF_sig_peptide , FTQUAL_standard_name},
- {FEATDEF_sig_peptide , FTQUAL_usedin},
-
- {FEATDEF_stem_loop , FTQUAL_function},
- {FEATDEF_stem_loop , FTQUAL_label},
- {FEATDEF_stem_loop , FTQUAL_map},
- {FEATDEF_stem_loop , FTQUAL_standard_name},
- {FEATDEF_stem_loop , FTQUAL_usedin},
-
- {FEATDEF_STS , FTQUAL_label},
- {FEATDEF_STS , FTQUAL_map},
- {FEATDEF_STS , FTQUAL_standard_name},
- {FEATDEF_STS , FTQUAL_usedin},
-
- {FEATDEF_TATA_signal , FTQUAL_label},
- {FEATDEF_TATA_signal , FTQUAL_map},
- {FEATDEF_TATA_signal , FTQUAL_usedin},
-
- {FEATDEF_terminator , FTQUAL_label},
- {FEATDEF_terminator , FTQUAL_map},
- {FEATDEF_terminator , FTQUAL_standard_name},
- {FEATDEF_terminator , FTQUAL_usedin},
-
- {FEATDEF_transit_peptide , FTQUAL_function},
- {FEATDEF_transit_peptide , FTQUAL_label},
- {FEATDEF_transit_peptide , FTQUAL_map},
- {FEATDEF_transit_peptide , FTQUAL_product},
- {FEATDEF_transit_peptide , FTQUAL_standard_name},
- {FEATDEF_transit_peptide , FTQUAL_usedin},
-
- {FEATDEF_unsure , FTQUAL_label},
- {FEATDEF_unsure , FTQUAL_map},
- {FEATDEF_unsure , FTQUAL_replace},
- {FEATDEF_unsure , FTQUAL_usedin},
-
- {FEATDEF_V_region , FTQUAL_label},
- {FEATDEF_V_region , FTQUAL_map},
- {FEATDEF_V_region , FTQUAL_product},
- {FEATDEF_V_region , FTQUAL_standard_name},
- {FEATDEF_V_region , FTQUAL_usedin},
-
- {FEATDEF_V_segment , FTQUAL_label},
- {FEATDEF_V_segment , FTQUAL_map},
- {FEATDEF_V_segment , FTQUAL_product},
- {FEATDEF_V_segment , FTQUAL_standard_name},
- {FEATDEF_V_segment , FTQUAL_usedin},
-
- {FEATDEF_variation , FTQUAL_allele},
- {FEATDEF_variation , FTQUAL_frequency},
- {FEATDEF_variation , FTQUAL_label},
- {FEATDEF_variation , FTQUAL_map},
- {FEATDEF_variation , FTQUAL_phenotype},
- {FEATDEF_variation , FTQUAL_product},
- {FEATDEF_variation , FTQUAL_replace},
- {FEATDEF_variation , FTQUAL_standard_name},
- {FEATDEF_variation , FTQUAL_usedin},
-
- {FEATDEF_3clip , FTQUAL_allele},
- {FEATDEF_3clip , FTQUAL_function},
- {FEATDEF_3clip , FTQUAL_label},
- {FEATDEF_3clip , FTQUAL_map},
- {FEATDEF_3clip , FTQUAL_standard_name},
- {FEATDEF_3clip , FTQUAL_usedin},
-
- {FEATDEF_3UTR , FTQUAL_allele},
- {FEATDEF_3UTR , FTQUAL_function},
- {FEATDEF_3UTR , FTQUAL_label},
- {FEATDEF_3UTR , FTQUAL_map},
- {FEATDEF_3UTR , FTQUAL_standard_name},
- {FEATDEF_3UTR , FTQUAL_usedin},
-
- {FEATDEF_5clip , FTQUAL_allele},
- {FEATDEF_5clip , FTQUAL_function},
- {FEATDEF_5clip , FTQUAL_label},
- {FEATDEF_5clip , FTQUAL_map},
- {FEATDEF_5clip , FTQUAL_standard_name},
- {FEATDEF_5clip , FTQUAL_usedin},
-
- {FEATDEF_5UTR , FTQUAL_allele},
- {FEATDEF_5UTR , FTQUAL_function},
- {FEATDEF_5UTR , FTQUAL_label},
- {FEATDEF_5UTR , FTQUAL_map},
- {FEATDEF_5UTR , FTQUAL_standard_name},
- {FEATDEF_5UTR , FTQUAL_usedin},
-
- {FEATDEF_10_signal , FTQUAL_label},
- {FEATDEF_10_signal , FTQUAL_map},
- {FEATDEF_10_signal , FTQUAL_standard_name},
- {FEATDEF_10_signal , FTQUAL_usedin},
-
- {FEATDEF_35_signal , FTQUAL_label},
- {FEATDEF_35_signal , FTQUAL_map},
- {FEATDEF_35_signal , FTQUAL_standard_name},
- {FEATDEF_35_signal , FTQUAL_usedin},
-
- {FEATDEF_REGION , FTQUAL_function},
- {FEATDEF_REGION , FTQUAL_label},
- {FEATDEF_REGION , FTQUAL_map},
- {FEATDEF_REGION , FTQUAL_number},
- {FEATDEF_REGION , FTQUAL_phenotype},
- {FEATDEF_REGION , FTQUAL_product},
- {FEATDEF_REGION , FTQUAL_standard_name},
- {FEATDEF_REGION , FTQUAL_usedin},
-
- {FEATDEF_mat_peptide_aa , FTQUAL_label},
- {FEATDEF_mat_peptide_aa , FTQUAL_map},
- {FEATDEF_mat_peptide_aa , FTQUAL_product},
- {FEATDEF_mat_peptide_aa , FTQUAL_standard_name},
- {FEATDEF_mat_peptide_aa , FTQUAL_usedin},
-
- {FEATDEF_sig_peptide_aa , FTQUAL_label},
- {FEATDEF_sig_peptide_aa , FTQUAL_map},
- {FEATDEF_sig_peptide_aa , FTQUAL_product},
- {FEATDEF_sig_peptide_aa , FTQUAL_standard_name},
- {FEATDEF_sig_peptide_aa , FTQUAL_usedin},
-
- {FEATDEF_transit_peptide_aa , FTQUAL_label},
- {FEATDEF_transit_peptide_aa , FTQUAL_map},
- {FEATDEF_transit_peptide_aa , FTQUAL_product},
- {FEATDEF_transit_peptide_aa , FTQUAL_standard_name},
- {FEATDEF_transit_peptide_aa , FTQUAL_usedin},
-
- {FEATDEF_snoRNA , FTQUAL_function},
- {FEATDEF_snoRNA , FTQUAL_label},
- {FEATDEF_snoRNA , FTQUAL_map},
- {FEATDEF_snoRNA , FTQUAL_product},
- {FEATDEF_snoRNA , FTQUAL_standard_name},
- {FEATDEF_snoRNA , FTQUAL_usedin}
-
+ { FEATDEF_GENE , FTQUAL_allele },
+ { FEATDEF_GENE , FTQUAL_function },
+ { FEATDEF_GENE , FTQUAL_label },
+ { FEATDEF_GENE , FTQUAL_map },
+ { FEATDEF_GENE , FTQUAL_operon },
+ { FEATDEF_GENE , FTQUAL_phenotype },
+ { FEATDEF_GENE , FTQUAL_product },
+ { FEATDEF_GENE , FTQUAL_standard_name },
+ { FEATDEF_GENE , FTQUAL_usedin },
+
+ { FEATDEF_CDS , FTQUAL_allele },
+ { FEATDEF_CDS , FTQUAL_codon },
+ { FEATDEF_CDS , FTQUAL_label },
+ { FEATDEF_CDS , FTQUAL_map },
+ { FEATDEF_CDS , FTQUAL_number },
+ { FEATDEF_CDS , FTQUAL_operon },
+ { FEATDEF_CDS , FTQUAL_standard_name },
+ { FEATDEF_CDS , FTQUAL_usedin },
+
+ { FEATDEF_PROT , FTQUAL_product },
+
+ { FEATDEF_preRNA , FTQUAL_allele },
+ { FEATDEF_preRNA , FTQUAL_function },
+ { FEATDEF_preRNA , FTQUAL_label },
+ { FEATDEF_preRNA , FTQUAL_map },
+ { FEATDEF_preRNA , FTQUAL_operon },
+ { FEATDEF_preRNA , FTQUAL_product },
+ { FEATDEF_preRNA , FTQUAL_standard_name },
+ { FEATDEF_preRNA , FTQUAL_usedin },
+
+ { FEATDEF_mRNA , FTQUAL_allele },
+ { FEATDEF_mRNA , FTQUAL_function },
+ { FEATDEF_mRNA , FTQUAL_label },
+ { FEATDEF_mRNA , FTQUAL_map },
+ { FEATDEF_mRNA , FTQUAL_operon },
+ { FEATDEF_mRNA , FTQUAL_product },
+ { FEATDEF_mRNA , FTQUAL_standard_name },
+ { FEATDEF_mRNA , FTQUAL_usedin },
+
+ { FEATDEF_tRNA , FTQUAL_allele },
+ { FEATDEF_tRNA , FTQUAL_function },
+ { FEATDEF_tRNA , FTQUAL_label },
+ { FEATDEF_tRNA , FTQUAL_map },
+ { FEATDEF_tRNA , FTQUAL_product },
+ { FEATDEF_tRNA , FTQUAL_standard_name },
+ { FEATDEF_tRNA , FTQUAL_usedin },
+
+ { FEATDEF_rRNA , FTQUAL_allele },
+ { FEATDEF_rRNA , FTQUAL_function },
+ { FEATDEF_rRNA , FTQUAL_label },
+ { FEATDEF_rRNA , FTQUAL_map },
+ { FEATDEF_rRNA , FTQUAL_product },
+ { FEATDEF_rRNA , FTQUAL_standard_name },
+ { FEATDEF_rRNA , FTQUAL_usedin },
+
+ { FEATDEF_snRNA , FTQUAL_allele },
+ { FEATDEF_snRNA , FTQUAL_function },
+ { FEATDEF_snRNA , FTQUAL_label },
+ { FEATDEF_snRNA , FTQUAL_map },
+ { FEATDEF_snRNA , FTQUAL_product },
+ { FEATDEF_snRNA , FTQUAL_standard_name },
+ { FEATDEF_snRNA , FTQUAL_usedin },
+
+ { FEATDEF_scRNA , FTQUAL_allele },
+ { FEATDEF_scRNA , FTQUAL_function },
+ { FEATDEF_scRNA , FTQUAL_label },
+ { FEATDEF_scRNA , FTQUAL_map },
+ { FEATDEF_scRNA , FTQUAL_product },
+ { FEATDEF_scRNA , FTQUAL_standard_name },
+ { FEATDEF_scRNA , FTQUAL_usedin },
+
+ { FEATDEF_otherRNA , FTQUAL_allele },
+ { FEATDEF_otherRNA , FTQUAL_function },
+ { FEATDEF_otherRNA , FTQUAL_label },
+ { FEATDEF_otherRNA , FTQUAL_map },
+ { FEATDEF_otherRNA , FTQUAL_operon },
+ { FEATDEF_otherRNA , FTQUAL_product },
+ { FEATDEF_otherRNA , FTQUAL_standard_name },
+ { FEATDEF_otherRNA , FTQUAL_usedin },
+
+ { FEATDEF_attenuator , FTQUAL_allele },
+ { FEATDEF_attenuator , FTQUAL_label },
+ { FEATDEF_attenuator , FTQUAL_map },
+ { FEATDEF_attenuator , FTQUAL_operon },
+ { FEATDEF_attenuator , FTQUAL_phenotype },
+ { FEATDEF_attenuator , FTQUAL_usedin },
+
+ { FEATDEF_C_region , FTQUAL_allele },
+ { FEATDEF_C_region , FTQUAL_label },
+ { FEATDEF_C_region , FTQUAL_map },
+ { FEATDEF_C_region , FTQUAL_product },
+ { FEATDEF_C_region , FTQUAL_standard_name },
+ { FEATDEF_C_region , FTQUAL_usedin },
+
+ { FEATDEF_CAAT_signal , FTQUAL_allele },
+ { FEATDEF_CAAT_signal , FTQUAL_label },
+ { FEATDEF_CAAT_signal , FTQUAL_map },
+ { FEATDEF_CAAT_signal , FTQUAL_usedin },
+
+ { FEATDEF_Imp_CDS , FTQUAL_codon },
+ { FEATDEF_Imp_CDS , FTQUAL_EC_number },
+ { FEATDEF_Imp_CDS , FTQUAL_function },
+ { FEATDEF_Imp_CDS , FTQUAL_label },
+ { FEATDEF_Imp_CDS , FTQUAL_map },
+ { FEATDEF_Imp_CDS , FTQUAL_number },
+ { FEATDEF_Imp_CDS , FTQUAL_operon },
+ { FEATDEF_Imp_CDS , FTQUAL_product },
+ { FEATDEF_Imp_CDS , FTQUAL_standard_name },
+ { FEATDEF_Imp_CDS , FTQUAL_usedin },
+
+ { FEATDEF_conflict , FTQUAL_allele },
+ { FEATDEF_conflict , FTQUAL_label },
+ { FEATDEF_conflict , FTQUAL_map },
+ { FEATDEF_conflict , FTQUAL_replace },
+ { FEATDEF_conflict , FTQUAL_usedin },
+
+ { FEATDEF_D_loop , FTQUAL_allele },
+ { FEATDEF_D_loop , FTQUAL_label },
+ { FEATDEF_D_loop , FTQUAL_map },
+ { FEATDEF_D_loop , FTQUAL_usedin },
+
+ { FEATDEF_D_segment , FTQUAL_allele },
+ { FEATDEF_D_segment , FTQUAL_label },
+ { FEATDEF_D_segment , FTQUAL_map },
+ { FEATDEF_D_segment , FTQUAL_product },
+ { FEATDEF_D_segment , FTQUAL_standard_name },
+ { FEATDEF_D_segment , FTQUAL_usedin },
+
+ { FEATDEF_enhancer , FTQUAL_allele },
+ { FEATDEF_enhancer , FTQUAL_label },
+ { FEATDEF_enhancer , FTQUAL_map },
+ { FEATDEF_enhancer , FTQUAL_standard_name },
+ { FEATDEF_enhancer , FTQUAL_usedin },
+
+ { FEATDEF_exon , FTQUAL_allele },
+ { FEATDEF_exon , FTQUAL_EC_number },
+ { FEATDEF_exon , FTQUAL_function },
+ { FEATDEF_exon , FTQUAL_label },
+ { FEATDEF_exon , FTQUAL_map },
+ { FEATDEF_exon , FTQUAL_number },
+ { FEATDEF_exon , FTQUAL_product },
+ { FEATDEF_exon , FTQUAL_standard_name },
+ { FEATDEF_exon , FTQUAL_usedin },
+
+ { FEATDEF_GC_signal , FTQUAL_allele },
+ { FEATDEF_GC_signal , FTQUAL_label },
+ { FEATDEF_GC_signal , FTQUAL_map },
+ { FEATDEF_GC_signal , FTQUAL_usedin },
+
+ { FEATDEF_iDNA , FTQUAL_allele },
+ { FEATDEF_iDNA , FTQUAL_function },
+ { FEATDEF_iDNA , FTQUAL_label },
+ { FEATDEF_iDNA , FTQUAL_map },
+ { FEATDEF_iDNA , FTQUAL_number },
+ { FEATDEF_iDNA , FTQUAL_standard_name },
+ { FEATDEF_iDNA , FTQUAL_usedin },
+
+ { FEATDEF_intron , FTQUAL_allele },
+ { FEATDEF_intron , FTQUAL_cons_splice },
+ { FEATDEF_intron , FTQUAL_function },
+ { FEATDEF_intron , FTQUAL_label },
+ { FEATDEF_intron , FTQUAL_map },
+ { FEATDEF_intron , FTQUAL_number },
+ { FEATDEF_intron , FTQUAL_standard_name },
+ { FEATDEF_intron , FTQUAL_usedin },
+
+ { FEATDEF_J_segment , FTQUAL_allele },
+ { FEATDEF_J_segment , FTQUAL_label },
+ { FEATDEF_J_segment , FTQUAL_map },
+ { FEATDEF_J_segment , FTQUAL_product },
+ { FEATDEF_J_segment , FTQUAL_standard_name },
+ { FEATDEF_J_segment , FTQUAL_usedin },
+
+ { FEATDEF_LTR , FTQUAL_allele },
+ { FEATDEF_LTR , FTQUAL_function },
+ { FEATDEF_LTR , FTQUAL_label },
+ { FEATDEF_LTR , FTQUAL_map },
+ { FEATDEF_LTR , FTQUAL_standard_name },
+ { FEATDEF_LTR , FTQUAL_usedin },
+
+ { FEATDEF_mat_peptide , FTQUAL_allele },
+ { FEATDEF_mat_peptide , FTQUAL_EC_number },
+ { FEATDEF_mat_peptide , FTQUAL_function },
+ { FEATDEF_mat_peptide , FTQUAL_label },
+ { FEATDEF_mat_peptide , FTQUAL_map },
+ { FEATDEF_mat_peptide , FTQUAL_product },
+ { FEATDEF_mat_peptide , FTQUAL_standard_name },
+ { FEATDEF_mat_peptide , FTQUAL_usedin },
+
+ { FEATDEF_misc_binding , FTQUAL_allele },
+ { FEATDEF_misc_binding , FTQUAL_bound_moiety },
+ { FEATDEF_misc_binding , FTQUAL_function },
+ { FEATDEF_misc_binding , FTQUAL_label },
+ { FEATDEF_misc_binding , FTQUAL_map },
+ { FEATDEF_misc_binding , FTQUAL_usedin },
+
+ { FEATDEF_misc_difference , FTQUAL_allele },
+ { FEATDEF_misc_difference , FTQUAL_clone },
+ { FEATDEF_misc_difference , FTQUAL_label },
+ { FEATDEF_misc_difference , FTQUAL_map },
+ { FEATDEF_misc_difference , FTQUAL_phenotype },
+ { FEATDEF_misc_difference , FTQUAL_replace },
+ { FEATDEF_misc_difference , FTQUAL_standard_name },
+ { FEATDEF_misc_difference , FTQUAL_usedin },
+
+ { FEATDEF_misc_feature , FTQUAL_allele },
+ { FEATDEF_misc_feature , FTQUAL_function },
+ { FEATDEF_misc_feature , FTQUAL_label },
+ { FEATDEF_misc_feature , FTQUAL_map },
+ { FEATDEF_misc_feature , FTQUAL_number },
+ { FEATDEF_misc_feature , FTQUAL_phenotype },
+ { FEATDEF_misc_feature , FTQUAL_product },
+ { FEATDEF_misc_feature , FTQUAL_standard_name },
+ { FEATDEF_misc_feature , FTQUAL_usedin },
+
+ { FEATDEF_misc_recomb , FTQUAL_allele },
+ { FEATDEF_misc_recomb , FTQUAL_label },
+ { FEATDEF_misc_recomb , FTQUAL_map },
+ { FEATDEF_misc_recomb , FTQUAL_organism },
+ { FEATDEF_misc_recomb , FTQUAL_standard_name },
+ { FEATDEF_misc_recomb , FTQUAL_usedin },
+
+ { FEATDEF_misc_signal , FTQUAL_allele },
+ { FEATDEF_misc_signal , FTQUAL_function },
+ { FEATDEF_misc_signal , FTQUAL_label },
+ { FEATDEF_misc_signal , FTQUAL_map },
+ { FEATDEF_misc_signal , FTQUAL_operon },
+ { FEATDEF_misc_signal , FTQUAL_phenotype },
+ { FEATDEF_misc_signal , FTQUAL_standard_name },
+ { FEATDEF_misc_signal , FTQUAL_usedin },
+
+ { FEATDEF_misc_structure , FTQUAL_allele },
+ { FEATDEF_misc_structure , FTQUAL_function },
+ { FEATDEF_misc_structure , FTQUAL_label },
+ { FEATDEF_misc_structure , FTQUAL_map },
+ { FEATDEF_misc_structure , FTQUAL_standard_name },
+ { FEATDEF_misc_structure , FTQUAL_usedin },
+
+ { FEATDEF_modified_base , FTQUAL_allele },
+ { FEATDEF_modified_base , FTQUAL_frequency },
+ { FEATDEF_modified_base , FTQUAL_label },
+ { FEATDEF_modified_base , FTQUAL_map },
+ { FEATDEF_modified_base , FTQUAL_mod_base },
+ { FEATDEF_modified_base , FTQUAL_usedin },
+
+ { FEATDEF_N_region , FTQUAL_allele },
+ { FEATDEF_N_region , FTQUAL_label },
+ { FEATDEF_N_region , FTQUAL_map },
+ { FEATDEF_N_region , FTQUAL_product },
+ { FEATDEF_N_region , FTQUAL_standard_name },
+ { FEATDEF_N_region , FTQUAL_usedin },
+
+ { FEATDEF_old_sequence , FTQUAL_allele },
+ { FEATDEF_old_sequence , FTQUAL_label },
+ { FEATDEF_old_sequence , FTQUAL_map },
+ { FEATDEF_old_sequence , FTQUAL_replace },
+ { FEATDEF_old_sequence , FTQUAL_usedin },
+
+ { FEATDEF_polyA_signal , FTQUAL_allele },
+ { FEATDEF_polyA_signal , FTQUAL_label },
+ { FEATDEF_polyA_signal , FTQUAL_map },
+ { FEATDEF_polyA_signal , FTQUAL_usedin },
+
+ { FEATDEF_polyA_site , FTQUAL_allele },
+ { FEATDEF_polyA_site , FTQUAL_label },
+ { FEATDEF_polyA_site , FTQUAL_map },
+ { FEATDEF_polyA_site , FTQUAL_usedin },
+
+ { FEATDEF_prim_transcript , FTQUAL_allele },
+ { FEATDEF_prim_transcript , FTQUAL_function },
+ { FEATDEF_prim_transcript , FTQUAL_label },
+ { FEATDEF_prim_transcript , FTQUAL_map },
+ { FEATDEF_prim_transcript , FTQUAL_operon },
+ { FEATDEF_prim_transcript , FTQUAL_standard_name },
+ { FEATDEF_prim_transcript , FTQUAL_usedin },
+
+ { FEATDEF_primer_bind , FTQUAL_allele },
+ { FEATDEF_primer_bind , FTQUAL_label },
+ { FEATDEF_primer_bind , FTQUAL_map },
+ { FEATDEF_primer_bind , FTQUAL_PCR_conditions },
+ { FEATDEF_primer_bind , FTQUAL_standard_name },
+ { FEATDEF_primer_bind , FTQUAL_usedin },
+
+ { FEATDEF_promoter , FTQUAL_allele },
+ { FEATDEF_promoter , FTQUAL_function },
+ { FEATDEF_promoter , FTQUAL_label },
+ { FEATDEF_promoter , FTQUAL_map },
+ { FEATDEF_promoter , FTQUAL_operon },
+ { FEATDEF_promoter , FTQUAL_phenotype },
+ { FEATDEF_promoter , FTQUAL_standard_name },
+ { FEATDEF_promoter , FTQUAL_usedin },
+
+ { FEATDEF_protein_bind , FTQUAL_allele },
+ { FEATDEF_protein_bind , FTQUAL_bound_moiety },
+ { FEATDEF_protein_bind , FTQUAL_function },
+ { FEATDEF_protein_bind , FTQUAL_label },
+ { FEATDEF_protein_bind , FTQUAL_map },
+ { FEATDEF_protein_bind , FTQUAL_standard_name },
+ { FEATDEF_protein_bind , FTQUAL_usedin },
+
+ { FEATDEF_RBS , FTQUAL_allele },
+ { FEATDEF_RBS , FTQUAL_label },
+ { FEATDEF_RBS , FTQUAL_map },
+ { FEATDEF_RBS , FTQUAL_standard_name },
+ { FEATDEF_RBS , FTQUAL_usedin },
+
+ { FEATDEF_repeat_region , FTQUAL_allele },
+ { FEATDEF_repeat_region , FTQUAL_function },
+ { FEATDEF_repeat_region , FTQUAL_insertion_seq },
+ { FEATDEF_repeat_region , FTQUAL_label },
+ { FEATDEF_repeat_region , FTQUAL_map },
+ { FEATDEF_repeat_region , FTQUAL_rpt_family },
+ { FEATDEF_repeat_region , FTQUAL_rpt_type },
+ { FEATDEF_repeat_region , FTQUAL_rpt_unit },
+ { FEATDEF_repeat_region , FTQUAL_standard_name },
+ { FEATDEF_repeat_region , FTQUAL_transposon },
+ { FEATDEF_repeat_region , FTQUAL_usedin },
+
+ { FEATDEF_repeat_unit , FTQUAL_allele },
+ { FEATDEF_repeat_unit , FTQUAL_function },
+ { FEATDEF_repeat_unit , FTQUAL_label },
+ { FEATDEF_repeat_unit , FTQUAL_map },
+ { FEATDEF_repeat_unit , FTQUAL_rpt_family },
+ { FEATDEF_repeat_unit , FTQUAL_rpt_type },
+ { FEATDEF_repeat_unit , FTQUAL_usedin },
+
+ { FEATDEF_rep_origin , FTQUAL_allele },
+ { FEATDEF_rep_origin , FTQUAL_direction },
+ { FEATDEF_rep_origin , FTQUAL_label },
+ { FEATDEF_rep_origin , FTQUAL_map },
+ { FEATDEF_rep_origin , FTQUAL_standard_name },
+ { FEATDEF_rep_origin , FTQUAL_usedin },
+
+ { FEATDEF_S_region , FTQUAL_allele },
+ { FEATDEF_S_region , FTQUAL_label },
+ { FEATDEF_S_region , FTQUAL_map },
+ { FEATDEF_S_region , FTQUAL_product },
+ { FEATDEF_S_region , FTQUAL_standard_name },
+ { FEATDEF_S_region , FTQUAL_usedin },
+
+ { FEATDEF_satellite , FTQUAL_allele },
+ { FEATDEF_satellite , FTQUAL_label },
+ { FEATDEF_satellite , FTQUAL_map },
+ { FEATDEF_satellite , FTQUAL_rpt_family },
+ { FEATDEF_satellite , FTQUAL_rpt_type },
+ { FEATDEF_satellite , FTQUAL_rpt_unit },
+ { FEATDEF_satellite , FTQUAL_standard_name },
+ { FEATDEF_satellite , FTQUAL_usedin },
+
+ { FEATDEF_sig_peptide , FTQUAL_allele },
+ { FEATDEF_sig_peptide , FTQUAL_function },
+ { FEATDEF_sig_peptide , FTQUAL_label },
+ { FEATDEF_sig_peptide , FTQUAL_map },
+ { FEATDEF_sig_peptide , FTQUAL_product },
+ { FEATDEF_sig_peptide , FTQUAL_standard_name },
+ { FEATDEF_sig_peptide , FTQUAL_usedin },
+
+ { FEATDEF_stem_loop , FTQUAL_allele },
+ { FEATDEF_stem_loop , FTQUAL_function },
+ { FEATDEF_stem_loop , FTQUAL_label },
+ { FEATDEF_stem_loop , FTQUAL_map },
+ { FEATDEF_stem_loop , FTQUAL_operon },
+ { FEATDEF_stem_loop , FTQUAL_standard_name },
+ { FEATDEF_stem_loop , FTQUAL_usedin },
+
+ { FEATDEF_STS , FTQUAL_allele },
+ { FEATDEF_STS , FTQUAL_label },
+ { FEATDEF_STS , FTQUAL_map },
+ { FEATDEF_STS , FTQUAL_standard_name },
+ { FEATDEF_STS , FTQUAL_usedin },
+
+ { FEATDEF_TATA_signal , FTQUAL_allele },
+ { FEATDEF_TATA_signal , FTQUAL_label },
+ { FEATDEF_TATA_signal , FTQUAL_map },
+ { FEATDEF_TATA_signal , FTQUAL_usedin },
+
+ { FEATDEF_terminator , FTQUAL_allele },
+ { FEATDEF_terminator , FTQUAL_label },
+ { FEATDEF_terminator , FTQUAL_map },
+ { FEATDEF_terminator , FTQUAL_operon },
+ { FEATDEF_terminator , FTQUAL_standard_name },
+ { FEATDEF_terminator , FTQUAL_usedin },
+
+ { FEATDEF_transit_peptide , FTQUAL_allele },
+ { FEATDEF_transit_peptide , FTQUAL_function },
+ { FEATDEF_transit_peptide , FTQUAL_label },
+ { FEATDEF_transit_peptide , FTQUAL_map },
+ { FEATDEF_transit_peptide , FTQUAL_product },
+ { FEATDEF_transit_peptide , FTQUAL_standard_name },
+ { FEATDEF_transit_peptide , FTQUAL_usedin },
+
+ { FEATDEF_unsure , FTQUAL_allele },
+ { FEATDEF_unsure , FTQUAL_label },
+ { FEATDEF_unsure , FTQUAL_map },
+ { FEATDEF_unsure , FTQUAL_replace },
+ { FEATDEF_unsure , FTQUAL_usedin },
+
+ { FEATDEF_V_region , FTQUAL_allele },
+ { FEATDEF_V_region , FTQUAL_label },
+ { FEATDEF_V_region , FTQUAL_map },
+ { FEATDEF_V_region , FTQUAL_product },
+ { FEATDEF_V_region , FTQUAL_standard_name },
+ { FEATDEF_V_region , FTQUAL_usedin },
+
+ { FEATDEF_V_segment , FTQUAL_allele },
+ { FEATDEF_V_segment , FTQUAL_label },
+ { FEATDEF_V_segment , FTQUAL_map },
+ { FEATDEF_V_segment , FTQUAL_product },
+ { FEATDEF_V_segment , FTQUAL_standard_name },
+ { FEATDEF_V_segment , FTQUAL_usedin },
+
+ { FEATDEF_variation , FTQUAL_allele },
+ { FEATDEF_variation , FTQUAL_frequency },
+ { FEATDEF_variation , FTQUAL_label },
+ { FEATDEF_variation , FTQUAL_map },
+ { FEATDEF_variation , FTQUAL_phenotype },
+ { FEATDEF_variation , FTQUAL_product },
+ { FEATDEF_variation , FTQUAL_replace },
+ { FEATDEF_variation , FTQUAL_standard_name },
+ { FEATDEF_variation , FTQUAL_usedin },
+
+ { FEATDEF_3clip , FTQUAL_allele },
+ { FEATDEF_3clip , FTQUAL_function },
+ { FEATDEF_3clip , FTQUAL_label },
+ { FEATDEF_3clip , FTQUAL_map },
+ { FEATDEF_3clip , FTQUAL_standard_name },
+ { FEATDEF_3clip , FTQUAL_usedin },
+
+ { FEATDEF_3UTR , FTQUAL_allele },
+ { FEATDEF_3UTR , FTQUAL_function },
+ { FEATDEF_3UTR , FTQUAL_label },
+ { FEATDEF_3UTR , FTQUAL_map },
+ { FEATDEF_3UTR , FTQUAL_standard_name },
+ { FEATDEF_3UTR , FTQUAL_usedin },
+
+ { FEATDEF_5clip , FTQUAL_allele },
+ { FEATDEF_5clip , FTQUAL_function },
+ { FEATDEF_5clip , FTQUAL_label },
+ { FEATDEF_5clip , FTQUAL_map },
+ { FEATDEF_5clip , FTQUAL_standard_name },
+ { FEATDEF_5clip , FTQUAL_usedin },
+
+ { FEATDEF_5UTR , FTQUAL_allele },
+ { FEATDEF_5UTR , FTQUAL_function },
+ { FEATDEF_5UTR , FTQUAL_label },
+ { FEATDEF_5UTR , FTQUAL_map },
+ { FEATDEF_5UTR , FTQUAL_standard_name },
+ { FEATDEF_5UTR , FTQUAL_usedin },
+
+ { FEATDEF_10_signal , FTQUAL_allele },
+ { FEATDEF_10_signal , FTQUAL_label },
+ { FEATDEF_10_signal , FTQUAL_map },
+ { FEATDEF_10_signal , FTQUAL_operon },
+ { FEATDEF_10_signal , FTQUAL_standard_name },
+ { FEATDEF_10_signal , FTQUAL_usedin },
+
+ { FEATDEF_35_signal , FTQUAL_allele },
+ { FEATDEF_35_signal , FTQUAL_label },
+ { FEATDEF_35_signal , FTQUAL_map },
+ { FEATDEF_35_signal , FTQUAL_operon },
+ { FEATDEF_35_signal , FTQUAL_standard_name },
+ { FEATDEF_35_signal , FTQUAL_usedin },
+
+ { FEATDEF_REGION , FTQUAL_function },
+ { FEATDEF_REGION , FTQUAL_label },
+ { FEATDEF_REGION , FTQUAL_map },
+ { FEATDEF_REGION , FTQUAL_number },
+ { FEATDEF_REGION , FTQUAL_phenotype },
+ { FEATDEF_REGION , FTQUAL_product },
+ { FEATDEF_REGION , FTQUAL_standard_name },
+ { FEATDEF_REGION , FTQUAL_usedin },
+
+ { FEATDEF_mat_peptide_aa , FTQUAL_allele },
+ { FEATDEF_mat_peptide_aa , FTQUAL_label },
+ { FEATDEF_mat_peptide_aa , FTQUAL_map },
+ { FEATDEF_mat_peptide_aa , FTQUAL_product },
+ { FEATDEF_mat_peptide_aa , FTQUAL_standard_name },
+ { FEATDEF_mat_peptide_aa , FTQUAL_usedin },
+
+ { FEATDEF_sig_peptide_aa , FTQUAL_allele },
+ { FEATDEF_sig_peptide_aa , FTQUAL_label },
+ { FEATDEF_sig_peptide_aa , FTQUAL_map },
+ { FEATDEF_sig_peptide_aa , FTQUAL_product },
+ { FEATDEF_sig_peptide_aa , FTQUAL_standard_name },
+ { FEATDEF_sig_peptide_aa , FTQUAL_usedin },
+
+ { FEATDEF_transit_peptide_aa , FTQUAL_allele },
+ { FEATDEF_transit_peptide_aa , FTQUAL_label },
+ { FEATDEF_transit_peptide_aa , FTQUAL_map },
+ { FEATDEF_transit_peptide_aa , FTQUAL_product },
+ { FEATDEF_transit_peptide_aa , FTQUAL_standard_name },
+ { FEATDEF_transit_peptide_aa , FTQUAL_usedin },
+
+ { FEATDEF_snoRNA , FTQUAL_allele },
+ { FEATDEF_snoRNA , FTQUAL_function },
+ { FEATDEF_snoRNA , FTQUAL_label },
+ { FEATDEF_snoRNA , FTQUAL_map },
+ { FEATDEF_snoRNA , FTQUAL_product },
+ { FEATDEF_snoRNA , FTQUAL_standard_name },
+ { FEATDEF_snoRNA , FTQUAL_usedin },
+
+ { FEATDEF_operon , FTQUAL_allele },
+ { FEATDEF_operon , FTQUAL_function },
+ { FEATDEF_operon , FTQUAL_label },
+ { FEATDEF_operon , FTQUAL_map },
+ { FEATDEF_operon , FTQUAL_operon },
+ { FEATDEF_operon , FTQUAL_phenotype },
+ { FEATDEF_operon , FTQUAL_standard_name },
+ { FEATDEF_operon , FTQUAL_usedin },
+
+ { FEATDEF_oriT , FTQUAL_allele },
+ { FEATDEF_oriT , FTQUAL_direction },
+ { FEATDEF_oriT , FTQUAL_label },
+ { FEATDEF_oriT , FTQUAL_map },
+ { FEATDEF_oriT , FTQUAL_rpt_type },
+ { FEATDEF_oriT , FTQUAL_rpt_type },
+ { FEATDEF_oriT , FTQUAL_rpt_unit },
+ { FEATDEF_oriT , FTQUAL_standard_name },
+ { FEATDEF_oriT , FTQUAL_usedin }
};
/* comparison of ValQual's -- first compare featdef then ftqual */
@@ -9406,7 +9745,27 @@ static CharPtr validConsSpliceString [] = {
};
static CharPtr validExceptionString [] = {
- "RNA editing", "reasons given in citation", NULL
+ "RNA editing",
+ "reasons given in citation",
+ NULL
+};
+
+static CharPtr validRefSeqExceptionString [] = {
+ "RNA editing",
+ "reasons given in citation",
+ "ribosomal slippage",
+ "ribosome slippage",
+ "trans splicing",
+ "trans-splicing",
+ "alternative processing",
+ "alternate processing",
+ "artificial frameshift",
+ "non-consensus splice site",
+ "nonconsensus splice site",
+ "rearrangement required for product",
+ "unclassified transcription discrepancy",
+ "unclassified translation discrepancy",
+ NULL
};
static Boolean StringInStringList (CharPtr testString, CharPtr PNTR stringList) {
@@ -9580,6 +9939,7 @@ static Boolean ValidateRptUnit (
)
{
+#if 0
CharPtr str;
Char tmp [255];
@@ -9607,6 +9967,7 @@ static Boolean ValidateRptUnit (
while (IS_DIGIT (*str)) str++;
if (*str != '\0') return FALSE; /* mustn't be anything after the yyy */
}
+#endif
return TRUE;
}
@@ -10796,6 +11157,7 @@ static void AddLocusBlock (
)
{
+ size_t acclen;
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
@@ -10822,9 +11184,11 @@ static void AddLocusBlock (
Int2 istrand;
Boolean is_nm = FALSE;
Boolean is_np = FALSE;
+ Boolean is_nz = FALSE;
Boolean is_transgenic = FALSE;
Char len [32];
Int4 length;
+ size_t loclen;
Char locus [41];
MolInfoPtr mip;
Char mol [30];
@@ -10887,6 +11251,8 @@ static void AddLocusBlock (
} else if (StringNCmp (tsip->accession, "NP_", 3) == 0 ||
StringNCmp (tsip->accession, "XP_", 3) == 0) {
is_np = TRUE;
+ } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
+ is_nz = TRUE;
}
}
break;
@@ -10936,6 +11302,7 @@ static void AddLocusBlock (
}
}
if (nm != NULL) {
+ /*
sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext);
if (sfp != NULL) {
StringNCpy_0 (gene, fcontext.label, sizeof (gene));
@@ -10946,6 +11313,7 @@ static void AddLocusBlock (
gene [0] = '\0';
}
}
+ */
}
/* more complicated code to get parent locus, if segmented, goes here */
@@ -10992,10 +11360,15 @@ static void AddLocusBlock (
case SEQID_DDBJ :
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ acclen = StringLen (tsip->accession);
+ if (acclen == 12) {
if (StringCmp (tsip->accession + 6, "000000") == 0) {
wgsmaster = TRUE;
}
+ } else if (acclen == 13) {
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ wgsmaster = TRUE;
+ }
}
}
break;
@@ -11060,7 +11433,7 @@ static void AddLocusBlock (
if (awp->newLocusLine) {
- if (wgsmaster) {
+ if (wgsmaster && (! is_nz)) {
sprintf (len, "%ld rc", (long) length);
} else {
sprintf (len, "%ld bp", (long) length);
@@ -11333,7 +11706,10 @@ static void AddLocusBlock (
} else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
- FFAddNChar(ffstring, ' ', 15 - 5 - StringLen(locus), FALSE);
+ loclen = StringLen(locus);
+ if (14 - 5 - loclen > 0) {
+ FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE);
+ }
if (awp->hup) {
FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE);
} else {
@@ -11428,9 +11804,9 @@ static void AddDeflineBlock (
Asn2gbSectPtr asp;
BaseBlockPtr bbp;
BioseqPtr bsp;
- Char buf[1024];
+ Char buf[4096];
/*CharPtr buf;
- size_t buflen = 1024;*/
+ size_t buflen = 4096;*/
SeqMgrDescContext dcontext;
GBSeqPtr gbseq;
ItemInfo ii;
@@ -11506,6 +11882,7 @@ static void AddAccessionBlock (
)
{
+ size_t acclen;
SeqIdPtr accn = NULL;
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
@@ -11554,9 +11931,13 @@ static void AddAccessionBlock (
accn = sip;
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ acclen = StringLen (tsip->accession);
+ if (acclen == 12) {
wgsaccn = tsip->accession;
len = 12;
+ } else if (acclen == 13) {
+ wgsaccn = tsip->accession;
+ len = 13;
}
}
break;
@@ -11661,9 +12042,12 @@ static void AddAccessionBlock (
mip = (MolInfoPtr) sdp->data.ptrvalue;
if (mip != NULL && mip->tech == MI_TECH_wgs) {
StringNCpy_0 (buf, wgsaccn, sizeof (buf));
- if (StringCmp (buf + len - 6, "000000") != 0) {
+ acclen = StringLen (buf);
+ if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) {
StringCpy (buf + len - 6, "000000");
- } else if (StringCmp (buf + len - 8, "00000000") != 0) {
+ } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
+ StringCpy (buf + len - 7, "0000000");
+ } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
StringCpy (buf + len - 8, "00000000");
} else {
buf [0] = '\0';
@@ -13253,7 +13637,9 @@ static void AddSourceBlock (
IntAsn2gbJobPtr ajp;
BaseBlockPtr bbp;
BioseqPtr bsp;
+ SeqFeatPtr cds;
SeqMgrDescContext dcontext;
+ BioseqPtr dna;
SeqMgrFeatContext fcontext;
GBBlockPtr gbp;
SeqDescrPtr sdp;
@@ -13290,6 +13676,29 @@ static void AddSourceBlock (
bbp->entityID = fcontext.entityID;
bbp->itemID = fcontext.itemID;
bbp->itemtype = OBJ_SEQFEAT;
+ } else if (ISA_aa (bsp->mol)) {
+
+ /* if protein with no sources, get sources applicable to DNA location of CDS */
+
+ cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
+ if (cds != NULL) {
+ sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
+ if (sfp != NULL) {
+ bbp->entityID = fcontext.entityID;
+ bbp->itemID = fcontext.itemID;
+ bbp->itemtype = OBJ_SEQFEAT;
+ } else {
+ dna = BioseqFindFromSeqLoc (cds->location);
+ if (dna != NULL) {
+ sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
+ if (sdp != NULL) {
+ bbp->entityID = dcontext.entityID;
+ bbp->itemID = dcontext.itemID;
+ bbp->itemtype = OBJ_SEQDESC;
+ }
+ }
+ }
+ }
}
}
}
@@ -13360,7 +13769,7 @@ static RefBlockPtr AddPub (
)
{
- Char buf [121];
+ Char buf [521]; /* increased for consortium in citsub */
CitArtPtr cap;
CitBookPtr cbp;
CitGenPtr cgp;
@@ -13665,6 +14074,7 @@ static int LIBCALLBACK SortReferences (
rbp1 = rbp2;
rbp2 = temp;
}
+
/* if same uid, one with just uids goes last to be excised but remembered */
if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) {
@@ -13794,6 +14204,20 @@ static CharPtr GetAuthorsPlusConsortium (
return tmp;
}
+static Boolean HasNoPmidOrMuid (
+ PubdescPtr pdp
+)
+
+{
+ ValNodePtr vnp;
+
+ if (pdp == NULL) return TRUE;
+ for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE;
+ }
+ return TRUE;
+}
+
typedef struct cdspubs {
Asn2gbWorkPtr awp;
BioseqPtr target;
@@ -13810,6 +14234,7 @@ static Boolean LIBCALLBACK GetRefsOnCDS (
Asn2gbWorkPtr awp;
CdsPubsPtr cpp;
IntRefBlockPtr irp;
+ Boolean okay;
PubdescPtr pdp;
RefBlockPtr rbp;
@@ -13818,21 +14243,30 @@ static Boolean LIBCALLBACK GetRefsOnCDS (
awp = cpp->awp;
if (awp == NULL) return TRUE;
+ okay = TRUE;
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
- rbp = AddPub (awp, &(awp->pubhead), pdp);
- if (rbp != NULL) {
+ if (awp->format == FTABLE_FMT) {
+ if (HasNoPmidOrMuid (pdp)) {
+ okay = FALSE;
+ }
+ }
- rbp->entityID = context->entityID;
- rbp->itemID = context->itemID;
- rbp->itemtype = OBJ_SEQFEAT;
+ if (okay) {
+ rbp = AddPub (awp, &(awp->pubhead), pdp);
+ if (rbp != NULL) {
- irp = (IntRefBlockPtr) rbp;
- irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
- alp = GetAuthListPtr (pdp, NULL);
- if (alp != NULL) {
- irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
+ rbp->entityID = context->entityID;
+ rbp->itemID = context->itemID;
+ rbp->itemtype = OBJ_SEQFEAT;
+
+ irp = (IntRefBlockPtr) rbp;
+ irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
+ alp = GetAuthListPtr (pdp, NULL);
+ if (alp != NULL) {
+ irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
+ }
+ irp->index = 0;
}
- irp->index = 0;
}
return TRUE;
@@ -13917,6 +14351,12 @@ static void GetRefsOnBioseq (
}
}
}
+ if (awp->format == FTABLE_FMT) {
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (HasNoPmidOrMuid (pdp)) {
+ okay = FALSE;
+ }
+ }
if (okay) {
pdp = (PubdescPtr) sdp->data.ptrvalue;
@@ -13939,8 +14379,6 @@ static void GetRefsOnBioseq (
sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext);
}
- SeqIdFree (sint.id);
-
/* if protein with no pubs, get pubs applicable to DNA location of CDS */
if (cdsloc != NULL) {
@@ -13950,6 +14388,8 @@ static void GetRefsOnBioseq (
SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
}
+ SeqIdFree (sint.id);
+
/* features are indexed on parent if segmented */
bsp = awp->parent;
@@ -13976,6 +14416,12 @@ static void GetRefsOnBioseq (
takeIt = TRUE;
}
}
+ if (awp->format == FTABLE_FMT) {
+ pdp = (PubdescPtr) sdp->data.ptrvalue;
+ if (HasNoPmidOrMuid (pdp)) {
+ takeIt = FALSE;
+ }
+ }
if (takeIt /* stop >= from && stop <= to */) {
@@ -13999,7 +14445,7 @@ static void GetRefsOnBioseq (
right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
strand = SeqLocStrand (ajp->ajp.slp);
split = FALSE;
- newloc = SeqLocReMap (sip, ajp->ajp.slp, irp->loc, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle);
/*
newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split);
*/
@@ -14089,6 +14535,7 @@ static Boolean AddReferenceBlock (
ValNodePtr head = NULL;
Int2 i;
IntRefBlockPtr irp;
+ Boolean is_aa;
Boolean is_embl = FALSE;
Boolean is_patent = FALSE;
IntRefBlockPtr lastirp;
@@ -14128,6 +14575,8 @@ static Boolean AddReferenceBlock (
}
}
+ is_aa = (Boolean) ISA_aa (bsp->mol);
+
if (bsp->repr == Seq_repr_seg) {
/* collect publication descriptors on local parts */
@@ -14254,9 +14703,14 @@ static Boolean AddReferenceBlock (
if (rbp != NULL) {
irp = (IntRefBlockPtr) rbp;
if (irp->justuids) {
- /* do not allow justuids reference to appear by itself - S79174.1 */
- excise = TRUE;
- /* justuids should still combine, even if no authors - S67070.1 */
+ if (isRefSeq && is_aa) {
+ /* if allowing justuid in protein RefSeq, try to look up dynamically */
+ excise = TRUE; /* Back to old behavior, do not fetch */
+ } else {
+ /* do not allow justuids reference to appear by itself - S79174.1 */
+ excise = TRUE;
+ /* justuids should still combine, even if no authors - S67070.1 */
+ }
} else if (is_embl && is_patent) {
/* EMBL patent records do not need author or title - A29528.1 */
} else if (StringHasNoText (irp->authstr)) {
@@ -14563,6 +15017,7 @@ static void AddWGSMasterCommentString (
)
{
+ size_t acclen;
BioSourcePtr biop;
Char buf [256];
SeqMgrDescContext dcontext;
@@ -14618,10 +15073,14 @@ static void AddWGSMasterCommentString (
last = "?";
}
ver [0] = '\0';
- if (StringLen (wgsname) == 12) {
+ acclen = StringLen (wgsname);
+ if (acclen == 12) {
+ StringCpy (ver, wgsname + 4);
+ ver [2] = '\0';
+ } else if (acclen == 13) {
StringCpy (ver, wgsname + 4);
ver [2] = '\0';
- } else if (StringLen (wgsname) == 15) {
+ } else if (acclen == 15) {
StringCpy (ver, wgsname + 7);
ver [2] = '\0';
}
@@ -14630,7 +15089,7 @@ static void AddWGSMasterCommentString (
FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
sprintf (buf, " This version of the project (%s) has the accession number %s,", ver, wgsname);
- FFAddOneString(ffstring, buf, TRUE, FALSE, TILDE_EXPAND);
+ FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
if (StringCmp (first, last) != 0) {
sprintf (buf, " and consists of sequences %s-%s.", first, last);
@@ -14736,13 +15195,15 @@ static CharPtr GetStrForBankit (
return ptr;
}
-static CharPtr reftxt0 = "The reference sequence was derived from ";
-static CharPtr reftxt1 = " This record is predicted by genome sequence analysis and is not yet supported by experimental evidence. ";
-static CharPtr reftxt2 = " This record has not yet been subject to final NCBI review. ";
-static CharPtr reftxt3 = " The mRNA record is supported by experimental evidence; however, the coding sequence is predicted. ";
-static CharPtr reftxt4 = " This record has undergone preliminary review of the sequence, but has not yet been subject to final NCBI review. ";
+static CharPtr reftxt0 = " The reference sequence was derived from ";
+static CharPtr reftxt1 = " This record is predicted by genome sequence analysis and is not yet supported by experimental evidence.";
+static CharPtr reftxt2 = " This record has not yet been subject to final NCBI review.";
+static CharPtr reftxt3 = " The mRNA record is supported by experimental evidence; however, the coding sequence is predicted.";
+static CharPtr reftxt4 = " This record has undergone preliminary review of the sequence, but has not yet been subject to final review.";
static CharPtr reftxt5 = " This record has been curated by ";
-static CharPtr reftxt6 = " This RefSeq record is provided to represent a collection of whole genome shotgun sequences. ";
+static CharPtr reftxt6 = " This record is predicted by automated computational analysis.";
+static CharPtr reftxt7 = " This record is provided to represent a collection of whole genome shotgun sequences.";
+static CharPtr reftxt8 = " This record is derived from an annotated genomic sequence (";
static CharPtr GetStatusForRefTrack (
UserObjectPtr uop
@@ -14762,7 +15223,7 @@ static CharPtr GetStatusForRefTrack (
urf = ufp;
}
}
- if (urf == NULL || urf->choice != 11) return NULL;
+ /* if (urf == NULL || urf->choice != 11) return NULL; */
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
oip = ufp->label;
if (StringCmp (oip->str, "Status") == 0) {
@@ -14777,6 +15238,8 @@ static CharPtr GetStatusForRefTrack (
return "VALIDATED ";
} else if (StringICmp (st, "Reviewed") == 0) {
return "REVIEWED ";
+ } else if (StringICmp (st, "Model") == 0) {
+ return "MODEL ";
} else if (StringICmp (st, "WGS") == 0) {
return "WGS ";
}
@@ -14793,7 +15256,7 @@ static void AddStrForRefTrack (
)
{
- CharPtr accn, curator = NULL, st;
+ CharPtr accn, curator = NULL, source = NULL, st;
ObjectIdPtr oip;
UserFieldPtr ufp, tmp, u, urf = NULL;
Int2 i = 0;
@@ -14822,14 +15285,21 @@ static void AddStrForRefTrack (
review = 4;
} else if (StringICmp (st, "Reviewed") == 0) {
review = 5;
- } else if (StringICmp (st, "WGS") == 0) {
+ } else if (StringICmp (st, "Model") == 0) {
review = 6;
+ } else if (StringICmp (st, "WGS") == 0) {
+ review = 7;
}
} else if (StringCmp (oip->str, "Collaborator") == 0) {
st = (CharPtr) ufp->data.ptrvalue;
if (! StringHasNoText (st)) {
curator = st;
}
+ } else if (StringCmp (oip->str, "GenomicSource") == 0) {
+ st = (CharPtr) ufp->data.ptrvalue;
+ if (! StringHasNoText (st)) {
+ source = st;
+ }
}
}
if (urf != NULL && urf->choice == 11) {
@@ -14842,74 +15312,120 @@ static void AddStrForRefTrack (
}
}
}
- if ( GetWWW(ajp) ) {
- FFAddTextToString(ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
- } else {
- FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
- }
- FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
- if (review == 1) {
- FFAddOneString (ffstring, reftxt1, FALSE, FALSE, TILDE_IGNORE);
- } else if (review == 2) {
- FFAddOneString (ffstring, reftxt2, FALSE, FALSE, TILDE_IGNORE);
- } else if (review == 3) {
- FFAddOneString (ffstring, reftxt3, FALSE, FALSE, TILDE_IGNORE);
- } else if (review == 4) {
- FFAddOneString (ffstring, reftxt4, FALSE, FALSE, TILDE_IGNORE);
- } else if (review == 5) {
- if (curator == NULL) {
- curator = "NCBI staff";
- }
- FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, ". ", FALSE, FALSE, TILDE_IGNORE);
- } else if (review == 6) {
- FFAddOneString (ffstring, reftxt6, FALSE, FALSE, TILDE_IGNORE);
- }
- if (i > 0) {
- FFAddOneString (ffstring, reftxt0, FALSE, FALSE, TILDE_IGNORE);
-
- for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
- is_accn = TRUE;
- for (u = tmp->data.ptrvalue; u != NULL; u = u->next) {
- oip = u->label;
- if (StringCmp (oip->str, "accession") == 0) break;
- if (StringCmp (oip->str, "name") == 0) {
- is_accn = FALSE;
- break;
- }
+ }
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString(ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
+ if (review == 1) {
+ FFAddOneString (ffstring, reftxt1, FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 2) {
+ FFAddOneString (ffstring, reftxt2, FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 3) {
+ FFAddOneString (ffstring, reftxt3, FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 4) {
+ FFAddOneString (ffstring, reftxt4, FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 5) {
+ if (curator == NULL) {
+ curator = "NCBI staff";
+ }
+ FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 6) {
+ FFAddOneString (ffstring, reftxt6, FALSE, FALSE, TILDE_IGNORE);
+ } else if (review == 7) {
+ FFAddOneString (ffstring, reftxt7, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (review != 5 && curator != NULL) {
+ FFAddOneString (ffstring, reftxt5, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, curator, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (source != NULL) {
+ FFAddOneString (ffstring, reftxt8, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, source, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, ").", FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (i > 0) {
+ FFAddOneString (ffstring, reftxt0, FALSE, FALSE, TILDE_IGNORE);
+
+ for (tmp = urf->data.ptrvalue; tmp != NULL; tmp = tmp->next) {
+ is_accn = TRUE;
+ for (u = tmp->data.ptrvalue; u != NULL; u = u->next) {
+ oip = u->label;
+ if (StringCmp (oip->str, "accession") == 0) break;
+ if (StringCmp (oip->str, "name") == 0) {
+ is_accn = FALSE;
+ break;
}
- if (u == NULL) continue;
- accn = (CharPtr) u->data.ptrvalue;
- if (StringHasNoText (accn)) continue;
- if (is_accn && GetWWW(ajp) ) {
- FFAddTextToString(ffstring, "<a href=", link_seq, NULL, FALSE, FALSE, TILDE_IGNORE);
- FFAddTextToString(ffstring, "val=", accn, ">", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (u == NULL) continue;
+ accn = (CharPtr) u->data.ptrvalue;
+ if (StringHasNoText (accn)) continue;
+ if (is_accn && GetWWW(ajp) ) {
+ FFAddTextToString(ffstring, "<a href=", link_seq, NULL, FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString(ffstring, "val=", accn, ">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ }
+ if (tmp->next != NULL) {
+ ufp = tmp->next;
+ if (ufp->next != NULL) {
+ FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
} else {
- FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE);
}
- if (tmp->next != NULL) {
- ufp = tmp->next;
- if (ufp->next != NULL) {
- FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
- } else {
- FFAddOneString (ffstring, " and ", FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND);
+ }
+}
+
+static CharPtr GetGenomeBuildNumber (
+ UserObjectPtr uop
+)
+
+{
+ ObjectIdPtr oip;
+ CharPtr str;
+ UserFieldPtr ufp;
+
+ if (uop == NULL) return NULL;
+ if ((oip = uop->type) == NULL) return NULL;
+ if (StringCmp (oip->str, "GenomeBuild") != 0) return NULL;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (StringCmp(oip->str, "NcbiAnnotation") == 0) {
+ if (ufp->choice == 1) { /* string */
+ str = ufp->data.ptrvalue;
+ if (! StringHasNoText (str)) return str;
+ }
+ } else if (StringCmp (oip->str, "Annotation") == 0) {
+ if (ufp->choice == 1) { /* string */
+ str = ufp->data.ptrvalue;
+ if (! StringHasNoText (str)) {
+ if (StringNICmp (str, "NCBI build ", 11) == 0) {
+ if (! StringHasNoText (str + 11)) {
+ return (str + 11);
+ }
}
}
}
- FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_EXPAND);
}
}
+ return NULL;
}
-static CharPtr reftxt11 = "This model reference sequence was predicted from NCBI contig";
-static CharPtr reftxt12 = "by automated computational analysis";
-static CharPtr reftxt13 = "using gene prediction method:";
+static CharPtr reftxt11 = "This record is predicted by automated computational analysis. This record is derived from an annotated genomic sequence";
+static CharPtr reftxt12 = "using gene prediction method:";
static void FindModelEvidenceUop (
UserObjectPtr uop,
@@ -15113,7 +15629,10 @@ static Boolean GetGeneAndLocus (
return TRUE;
}
-static CharPtr reftxt21 = "GENOME ANNOTATION REFSEQ: NCBI contigs are derived from assembled genomic sequence data. They may include both draft and finished sequence.";
+static CharPtr reftxt21 = "NCBI contigs are derived from assembled genomic sequence data.";
+
+static CharPtr reftxt22 = "Features on this sequence have been produced for build ";
+static CharPtr reftxt23 = " of the NCBI's genome annotation";
static CharPtr nsAreGapsString = "The strings of n's in this record represent gaps between contigs, and the length of each string corresponds to the length of the gap.";
@@ -15492,6 +16011,7 @@ static void AddCommentBlock (
)
{
+ size_t acclen;
IntAsn2gbJobPtr ajp;
BioseqPtr bsp;
Char buf [128];
@@ -15508,15 +16028,19 @@ static void AddCommentBlock (
Boolean first = TRUE;
GBBlockPtr gbp;
CharPtr geneName = NULL;
+ CharPtr genomeBuildNumber = NULL;
Int4 gi = 0;
CommentBlockPtr gsdbcbp = NULL;
Int4 gsdbid = 0;
Boolean has_gaps = FALSE;
+ Boolean hasRefTrackStatus = FALSE;
SeqHistPtr hist;
+ Boolean is_collab = FALSE;
Boolean is_other = FALSE;
Boolean is_tpa = FALSE;
Boolean is_wgs = FALSE;
SeqLitPtr litp;
+ ObjectIdPtr localID = NULL;
Char locusID [32];
CharPtr method = NULL;
MolInfoPtr mip;
@@ -15545,31 +16069,137 @@ static void AddCommentBlock (
ffstring = FFGetString(ajp);
if ( ffstring == NULL ) return;
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ while (sdp != NULL) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL) {
+ str = GetStatusForRefTrack (uop);
+ if (str != NULL) {
+ hasRefTrackStatus = TRUE;
+ }
+ if (genomeBuildNumber == NULL) {
+ genomeBuildNumber = GetGenomeBuildNumber (uop);
+ }
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
+ }
+
for (sip = bsp->id; sip != NULL; sip = sip->next) {
if (sip->choice == SEQID_OTHER) {
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL) {
is_other = TRUE;
- if (StringNCmp(tsip->accession, "NT_", 3) == 0 || StringNCmp(tsip->accession, "NW_", 3) == 0) {
+ if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
+ if (! StringHasNoText (genomeBuildNumber)) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
- cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
- if (cbp != NULL) {
+ cbp->first = first;
+ first = FALSE;
- cbp->first = first;
- first = FALSE;
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
- if (cbp->first) {
- FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
- } else {
- FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE);
+
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE);
+
+ FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND);
+
+ FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND);
+
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE);
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
}
+ }
- FFAddOneString (ffstring, reftxt21, TRUE, FALSE, TILDE_EXPAND);
+ } else if (StringNCmp(tsip->accession, "NT_", 3) == 0 || StringNCmp(tsip->accession, "NW_", 3) == 0) {
- cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
- FFRecycleString(ajp, ffstring);
- ffstring = FFGetString(ajp);
+ if (! hasRefTrackStatus) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->first = first;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE);
+
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, "REFSEQ", FALSE, FALSE, TILDE_IGNORE);
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE);
+
+ if (! StringHasNoText (genomeBuildNumber)) {
+ FFAddOneString (ffstring, reftxt22, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, genomeBuildNumber, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, reftxt23, FALSE, FALSE, TILDE_EXPAND);
+
+ FFAddOneString (ffstring, " [see ", FALSE, FALSE, TILDE_EXPAND);
+
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, "documentation", FALSE, FALSE, TILDE_IGNORE);
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ FFAddOneString (ffstring, "].", FALSE, FALSE, TILDE_EXPAND);
+ } else {
+
+ FFAddOneString (ffstring, reftxt21, TRUE, FALSE, TILDE_EXPAND);
+
+ FFAddOneString (ffstring, "~Also see:~ ", FALSE, FALSE, TILDE_EXPAND);
+
+ if ( GetWWW(ajp) ) {
+ FFAddTextToString (ffstring, "<a href=", doc_link, ">", FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString (ffstring, "Documentation", FALSE, FALSE, TILDE_IGNORE);
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ }
+
+ FFAddOneString (ffstring, " of NCBI's Annotation Process~ ", FALSE, FALSE, TILDE_EXPAND);
+ }
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+ }
}
} else if (StringNCmp(tsip->accession, "XP_", 3) == 0 ||
@@ -15595,7 +16225,7 @@ static void AddCommentBlock (
FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
}
- FFAddOneString (ffstring, "GENOME ANNOTATION ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "MODEL ", FALSE, FALSE, TILDE_IGNORE);
if ( GetWWW(ajp) ) {
FFAddTextToString (ffstring, "<a href=", ref_link, ">", FALSE, FALSE, TILDE_IGNORE);
@@ -15606,7 +16236,7 @@ static void AddCommentBlock (
}
FFAddOneString (ffstring, ": ", FALSE, FALSE, TILDE_IGNORE);
- FFAddTextToString (ffstring, NULL, reftxt11, " ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddTextToString (ffstring, NULL, reftxt11, " (", FALSE, FALSE, TILDE_IGNORE);
if ( GetWWW(ajp) ) {
FFAddTextToString (ffstring, "<a href=", nt_link, name, FALSE, FALSE, TILDE_IGNORE);
@@ -15617,12 +16247,11 @@ static void AddCommentBlock (
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
}
- FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, reftxt12, FALSE, FALSE, TILDE_IGNORE);
-
+ FFAddOneString (ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
+
if (method != NULL) {
FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, reftxt13, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, reftxt12, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
FFAddOneString (ffstring, method, FALSE, FALSE, TILDE_IGNORE);
}
@@ -15691,14 +16320,23 @@ static void AddCommentBlock (
} else if (sip->choice == SEQID_GENBANK || sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
+ is_collab = TRUE;
+
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ acclen = StringLen (tsip->accession);
+ if (acclen == 12) {
is_wgs = TRUE;
if (StringCmp (tsip->accession + 6, "000000") == 0) {
wgsaccn = tsip->accession;
wgsname = tsip->name; /* master accession has 8 zeroes, name has project version plus 6 zeroes */
}
+ } else if (acclen == 13) {
+ is_wgs = TRUE;
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ wgsaccn = tsip->accession;
+ wgsname = tsip->name; /* master accession has 9 zeroes, name has project version plus 7 zeroes */
+ }
} else if (ajp->newSourceOrg && StringLen (tsip->accession) == 6) {
ch = tsip->accession [0];
if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
@@ -15726,6 +16364,45 @@ static void AddCommentBlock (
} else if (sip->choice == SEQID_GI) {
gi = (Int4) sip->data.intvalue;
+
+ } else if (sip->choice == SEQID_LOCAL) {
+ localID = (ObjectIdPtr) sip->data.ptrvalue;
+ }
+ }
+
+ if (localID != NULL) {
+ if (is_tpa || is_collab) {
+ if (awp->mode == SEQUIN_MODE || awp->mode == DUMP_MODE) {
+ buf [0] = '\0';
+ if (! StringHasNoText (localID->str)) {
+ if (StringLen (localID->str) < 100) {
+ sprintf (buf, "LocalID: %s", localID->str);
+ } else {
+ sprintf (buf, "LocalID string too large");
+ }
+ } else {
+ sprintf (buf, "LocalID: %ld", (long) localID->id);
+ }
+
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->first = first;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12,5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+ }
+ }
}
}
@@ -15933,7 +16610,7 @@ static void AddCommentBlock (
}
}
- if (hist->replace_ids != NULL && hist->replace_date != NULL) {
+ if (hist->replace_ids != NULL && hist->replace_date != NULL && awp->mode != SEQUIN_MODE) {
okay = TRUE;
for (sip = hist->replace_ids; sip != NULL; sip = sip->next) {
@@ -16713,7 +17390,7 @@ static void GetSourcesOnBioseq (
right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
strand = SeqLocStrand (ajp->ajp.slp);
split = FALSE;
- newloc = SeqLocReMap (sip, ajp->ajp.slp, isp->loc, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, isp->loc, 0, FALSE, ajp->masterStyle);
/*
newloc = SeqLocCopyRegion (sip, isp->loc, bsp, left, right, strand, &split);
*/
@@ -17066,6 +17743,27 @@ static void AddSourceFeatBlock (
#endif
}
+ str = GetMolTypeQual (bsp);
+ if (str == NULL) {
+ switch (bsp->mol) {
+ case Seq_mol_dna :
+ str = "unassigned DNA";
+ break;
+ case Seq_mol_rna :
+ str = "unassigned RNA";
+ break;
+ case Seq_mol_aa :
+ break;
+ default :
+ str = "unassigned DNA";
+ break;
+ }
+ }
+ if (str != NULL) {
+ FFAddNewLine(ffstring);
+ FFAddTextToString (ffstring, "/mol_type=\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES);
+ }
+
str = FFEndPrint(ajp, ffstring, awp->format, 5, 21, 5, 21, "FT");
bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, SOURCEFEAT_BLOCK, sizeof (IntSrcBlock));
@@ -17303,7 +18001,7 @@ static void GetFeatsOnCdsProduct (
slp = SeqLocMerge (nbsp, location, NULL, FALSE, TRUE, FALSE);
if (slp != NULL) {
sip = SeqIdParse ("lcl|dummy");
- newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
SeqIdFree (sip);
SeqLocFree (slp);
if (newloc == NULL) {
@@ -17532,7 +18230,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, TRUE, FALSE);
if (slp == NULL) return TRUE;
sip = SeqIdParse ("lcl|dummy");
- newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
SeqIdFree (sip);
SeqLocFree (slp);
if (newloc == NULL) return TRUE;
@@ -17600,7 +18298,8 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
} else if (sfp->product != NULL) {
sip = SeqLocIdForProduct (sfp->product);
if (sip != NULL) {
- if (sip->choice == SEQID_GI && sip->data.intvalue > 0) {
+ if ((sip->choice == SEQID_GI && sip->data.intvalue > 0) ||
+ sip->choice == SEQID_LOCAL) {
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
oldscope = SeqEntrySetScope (sep);
prod = BioseqFind (sip);
@@ -17622,7 +18321,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
}
}
- } else {
+ } else if (sip->choice == SEQID_GI && sip->data.intvalue > 0) {
/* RELEASE_MODE requires that /protein_id is an accession */
gi = sip->data.intvalue;
if (GetAccnVerFromServer (gi, buf)) {
@@ -17649,6 +18348,12 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
}
}
+ } else {
+ if (sfp->excpt && (! StringHasNoText (sfp->except_text))) {
+ if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
+ okay = TRUE;
+ }
+ }
}
} else {
okay = TRUE;
@@ -17863,17 +18568,36 @@ static Boolean LIBCALLBACK GetFeatsOnSeg (
)
{
- Asn2gbWorkPtr awp;
- BioseqPtr bsp;
- Uint2 entityID;
- Int4 from;
- SeqLocPtr loc;
- SeqIdPtr sip;
- Int4 to;
+ IntAsn2gbJobPtr ajp;
+ Asn2gbWorkPtr awp;
+ BioseqPtr bsp;
+ Uint2 entityID;
+ Int4 from;
+ Int4 left;
+ SeqLocPtr loc;
+ Int4 right;
+ SeqIdPtr sip;
+ Int4 to;
if (slp == NULL || context == NULL) return FALSE;
awp = (Asn2gbWorkPtr) context->userdata;
+ if (awp == NULL) return FALSE;
+ ajp = awp->ajp;
+ if (ajp == NULL) return FALSE;
+
+ /* do not fetch outside of desired component */
+
+ if (ajp->ajp.slp != NULL) {
+ left = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_LEFT_END);
+ right = GetOffsetInBioseq (ajp->ajp.slp, awp->parent, SEQLOC_RIGHT_END);
+
+ from = context->cumOffset;
+ to = from + context->to - context->from;
+ if (left > to) return TRUE;
+ if (right < from) return TRUE;
+ }
+
from = awp->from;
to = awp->to;
@@ -18684,6 +19408,7 @@ static void DoOneSection (
)
{
+ size_t acclen;
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
CharPtr bases = NULL;
@@ -18785,10 +19510,15 @@ static void DoOneSection (
sip->choice == SEQID_DDBJ) {
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ acclen = StringLen (tsip->accession);
+ if (acclen == 12) {
if (StringCmp (tsip->accession + 6, "000000") == 0) {
wgsmaster = TRUE;
}
+ } else if (acclen == 13) {
+ if (StringCmp (tsip->accession + 6, "0000000") == 0) {
+ wgsmaster = TRUE;
+ }
}
}
} else if (sip->choice == SEQID_OTHER) {
@@ -18818,11 +19548,20 @@ static void DoOneSection (
}
}
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_OTHER) {
+ isRefSeq = TRUE;
+ }
+ }
+
/* start exploring and populating paragraphs */
if (awp->format == FTABLE_FMT) {
AddFeatHeaderBlock (awp);
+ if (awp->showRefs) {
+ AddReferenceBlock (awp, isRefSeq);
+ }
AddFeatureBlock (awp);
} else {
@@ -18875,11 +19614,6 @@ static void DoOneSection (
/* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice == SEQID_OTHER) {
- isRefSeq = TRUE;
- }
- }
hasRefs = AddReferenceBlock (awp, isRefSeq);
if (! hasRefs) {
if (ajp->flags.needAtLeastOneRef) {
@@ -18916,7 +19650,9 @@ static void DoOneSection (
}
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
- AddBasecountBlock (awp, bases);
+ if (awp->showBaseCount) {
+ AddBasecountBlock (awp, bases);
+ }
}
AddOriginBlock (awp);
@@ -18941,7 +19677,9 @@ static void DoOneSection (
}
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
- AddBasecountBlock (awp, bases);
+ if (awp->showBaseCount) {
+ AddBasecountBlock (awp, bases);
+ }
}
AddOriginBlock (awp);
@@ -19125,6 +19863,7 @@ static void DoOneBioseq (
{
IntAsn2gbJobPtr ajp;
Asn2gbWorkPtr awp;
+ BioseqSetPtr bssp;
SeqMgrSegmentContext context;
Boolean contig = FALSE;
Int4 from;
@@ -19145,8 +19884,29 @@ static void DoOneBioseq (
if (ISA_na (bsp->mol)) {
if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
+
+ /* only do mRNA feature tables in GPS if targeted to a specific mRNA */
+
+ if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ return;
+ }
+ }
+ }
+ }
+ }
+
} else if (ISA_aa (bsp->mol)) {
if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
+
+ /* only do protein feature tables if targeted to a specific protein */
+
+ if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
}
if (awp->style == SEGMENT_STYLE) {
@@ -19376,7 +20136,7 @@ static CharPtr GetGOtext (
break;
}
}
- if (StringHasNoText (textstr)) return NULL;
+ /* if (StringHasNoText (textstr)) return NULL; */
str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 50);
if (str == NULL) return NULL;
@@ -19397,10 +20157,31 @@ static CharPtr GetGOtext (
StringCat (str, tmp);
StringCat (str, "]");
}
+ TrimSpacesAroundString (str);
return str;
}
+static Boolean DbxrefAlreadyInGeneXref (
+ DbtagPtr dbt,
+ ValNodePtr dbxref
+)
+
+{
+ DbtagPtr gdbt;
+ ValNodePtr vnp;
+
+ if (dbt == NULL) return FALSE;
+
+ for (vnp = dbxref; vnp != NULL; vnp = vnp->next) {
+ gdbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (gdbt == NULL) continue;
+ if (DbtagMatch (dbt, gdbt)) return TRUE;
+ }
+
+ return FALSE;
+}
+
/* FormatFeatureblockQuals should not be called directly,
except from FormatFeatureBlock. It performs no input
validation. (perhaps it should?) */
@@ -19460,6 +20241,7 @@ static void FormatFeatureBlockQuals (
ObjectIdPtr oip;
Boolean okay;
Boolean only_digits;
+ BioseqPtr pbsp;
ValNodePtr ppr;
CharPtr prefix;
CharPtr protein_seq = NULL;
@@ -19908,7 +20690,7 @@ static void FormatFeatureBlockQuals (
}
/* in release_mode, must be of the form 123..4567 or a single-token label,
- or (technically illegal but common) letters and semicolons */
+ or (technically illegal but common) letters and semicolons - NO LONGER CHECKED */
while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
if (! StringHasNoText (gbq->val)) {
@@ -19927,18 +20709,22 @@ static void FormatFeatureBlockQuals (
ptr++;
}
if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) {
- FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
+ TrimSpacesAroundString (str);
+ FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
FALSE, TRUE, TILDE_IGNORE);
FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
+ FFAddOneChar(ffstring, '\"', FALSE);
FFAddOneChar(ffstring, '\n', FALSE);
}
str = ptr;
}
} else {
if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) {
- FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
+ TrimSpacesAroundString (str);
+ FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
FALSE, TRUE, TILDE_IGNORE);
FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
+ FFAddOneChar(ffstring, '\"', FALSE);
FFAddOneChar(ffstring, '\n', FALSE);
}
}
@@ -20081,7 +20867,7 @@ static void FormatFeatureBlockQuals (
if (ajp->ajp.slp != NULL) {
sip = SeqIdParse ("lcl|dummy");
split = FALSE;
- newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
SeqIdFree (sip);
if (newloc != NULL) {
@@ -20114,10 +20900,10 @@ static void FormatFeatureBlockQuals (
case Qual_class_anti_codon :
slp = qvp [FTQUAL_anticodon].slp;
newloc = NULL;
- if (ajp->ajp.slp != NULL) {
+ if (slp != NULL && ajp->ajp.slp != NULL) {
sip = SeqIdParse ("lcl|dummy");
split = FALSE;
- newloc = SeqLocReMap (sip, ajp->ajp.slp, slp, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
/*
newloc = SeqLocCopyRegion (sip, slp, bsp, left, right, strand, &split);
*/
@@ -20211,6 +20997,11 @@ static void FormatFeatureBlockQuals (
}
}
}
+ if (okay && idx == FTQUAL_db_xref && qvp [FTQUAL_gene_xref].vnp != NULL) {
+ if (DbxrefAlreadyInGeneXref (dbt, qvp [FTQUAL_gene_xref].vnp)) {
+ okay = FALSE;
+ }
+ }
if (okay) {
if (! StringHasNoText (oip->str)) {
@@ -20290,6 +21081,16 @@ static void FormatFeatureBlockQuals (
}
}
*/
+ } else if (dbt != NULL) {
+ pbsp = BioseqFind (sip);
+ if (pbsp != NULL && pbsp->id != NULL && pbsp->id->next == NULL) {
+ if (SeqIdWrite (sip, seqid, PRINTID_REPORT, sizeof (seqid)) != NULL) {
+ FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
+ FALSE, FALSE, TILDE_IGNORE);
+ FF_www_protein_id(ajp, ffstring, seqid);
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
}
}
}
@@ -20497,7 +21298,7 @@ static void FormatFeatureBlockQuals (
case Qual_class_go :
if (qvp [jdx].ufp != NULL) {
for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) {
- if (entry == NULL || entry->choice != 11) break;
+ if (entry == NULL || entry->choice != 11) break;
ufp = (UserFieldPtr) entry->data.ptrvalue;
str = GetGOtext (ufp);
if (! StringHasNoText (str)) {
@@ -20806,10 +21607,73 @@ static void FormatFeatureBlockQuals (
}
break;
+ case Qual_class_seq_id :
+ sip = qvp [jdx].sip;
+ if (sip != NULL) {
+ /* should always be found above for protein_id or transcript_id
+ prod = BioseqFind (sip);
+ */
+ if (prod != NULL) {
+ choice = 0;
+ for (sip = prod->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENBANK ||
+ sip->choice == SEQID_EMBL ||
+ sip->choice == SEQID_DDBJ ||
+ sip->choice == SEQID_OTHER ||
+ sip->choice == SEQID_TPG ||
+ sip->choice == SEQID_TPE ||
+ sip->choice == SEQID_TPD) {
+ choice = sip->choice;
+ if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ } else if (sip->choice == SEQID_GI) {
+ if (choice == 0) {
+ sprintf (seqid, "%ld", (long) sip->data.intvalue);
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ }
+ } else {
+ if (sip->choice == SEQID_GI) {
+ gi = sip->data.intvalue;
+ if (GetAccnVerFromServer (gi, seqid)) {
+ if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ } else {
+ sip = GetSeqIdForGI(gi);
+ if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
+ if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ } else if (! ajp->flags.dropIllegalQuals) {
+ sprintf (seqid, "%ld", (long) gi);
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ } else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
+ if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
+ FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
+ FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ }
+ prefix = "; ";
+ add_period = FALSE;
+ }
+ break;
+
default :
break;
}
}
+
if ( !FFEmpty(unique) ) {
notestr = FFToCharPtr(unique);
TrimSpacesAroundString (notestr);
@@ -20947,7 +21811,9 @@ static CharPtr FormatFeatureBlock (
SeqLocPtr newloc;
Boolean noLeft;
Boolean noRight;
+ SeqMgrFeatContext ocontext;
SeqEntryPtr oldscope;
+ SeqFeatPtr operon = NULL;
Uint2 partial;
SeqMgrFeatContext pcontext;
BioseqPtr prd;
@@ -21135,7 +22001,7 @@ static CharPtr FormatFeatureBlock (
}
}
- FFStartPrint(ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", ifp->firstfeat);
+ FFStartPrint(ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE);
if (ajp->ajp.slp != NULL) {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
} else if ( GetWWW(ajp) /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
@@ -21161,7 +22027,7 @@ static CharPtr FormatFeatureBlock (
right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
strand = SeqLocStrand (ajp->ajp.slp);
split = FALSE;
- newloc = SeqLocReMap (sip, ajp->ajp.slp, location, 0, FALSE);
+ newloc = SeqLocReMapEx (sip, ajp->ajp.slp, location, 0, FALSE, ajp->masterStyle);
/*
newloc = SeqLocCopyRegion (sip, location, bsp, left, right, strand, &split);
*/
@@ -21216,8 +22082,12 @@ static CharPtr FormatFeatureBlock (
}
}
- /* a few features cannot show /partial in RELEASE_MODE - later no features will */
+ /* hide unclassified /partial in RELEASE_MODE and ENTREZ_MODE */
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ qvp [FTQUAL_partial].ble = FALSE;
+ }
+ /*
if (ajp->flags.checkQualSyntax) {
switch (featdeftype) {
case FEATDEF_conflict:
@@ -21230,6 +22100,7 @@ static CharPtr FormatFeatureBlock (
break;
}
}
+ */
}
if (ifp->mapToProt) {
qvp [FTQUAL_partial].ble = FALSE;
@@ -21271,6 +22142,14 @@ static CharPtr FormatFeatureBlock (
qvp [FTQUAL_gene_syn_refseq].vnp = qvp [FTQUAL_gene_syn].vnp;
qvp [FTQUAL_gene_syn].vnp = NULL;
}
+ operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext);
+ if (operon != NULL) {
+ for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "operon") == 0) {
+ qvp [FTQUAL_operon].gbq = gbq;
+ }
+ }
+ }
} else {
@@ -21318,10 +22197,26 @@ static CharPtr FormatFeatureBlock (
gene_syn = vnp;
}
}
+ if (grp != NULL && fcontext.featdeftype != FEATDEF_variation) {
+ qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */
+ }
if (fcontext.seqfeattype != SEQFEAT_CDREGION &&
fcontext.seqfeattype != SEQFEAT_RNA) {
qvp [FTQUAL_gene_xref].vnp = NULL;
}
+ if (fcontext.featdeftype != FEATDEF_operon) {
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) {
+ operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext);
+ if (operon != NULL) {
+ for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "operon") == 0) {
+ qvp [FTQUAL_operon].gbq = gbq;
+ }
+ }
+ }
+ }
+ }
/* specific fields set here */
@@ -21363,7 +22258,11 @@ static CharPtr FormatFeatureBlock (
residue = SeqMapTableConvert (smtp, residue);
}
if (residue == 'U') {
- qvp [FTQUAL_selenocysteine].str = "selenocysteine";
+ if (ajp->flags.selenocysteineToNote) {
+ qvp [FTQUAL_selenocysteine_note].str = "selenocysteine";
+ } else {
+ qvp [FTQUAL_selenocysteine].ble = TRUE;
+ }
}
}
}
@@ -21504,6 +22403,43 @@ static CharPtr FormatFeatureBlock (
qvp [FTQUAL_transl_table].num = 0;
}
}
+ for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
+ seqcode = 0;
+ sctp = NULL;
+ cbaa = cbp->aa;
+ switch (cbaa.choice) {
+ case 1 :
+ seqcode = Seq_code_ncbieaa;
+ break;
+ case 2 :
+ seqcode = Seq_code_ncbi8aa;
+ break;
+ case 3 :
+ seqcode = Seq_code_ncbistdaa;
+ break;
+ default :
+ break;
+ }
+ if (seqcode != 0) {
+ sctp = SeqCodeTableFind (seqcode);
+ if (sctp != NULL) {
+ residue = cbaa.value.intvalue;
+ if (residue != 42) {
+ if (seqcode != Seq_code_ncbieaa) {
+ smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa);
+ residue = SeqMapTableConvert (smtp, residue);
+ }
+ if (residue == 'U') {
+ if (ajp->flags.selenocysteineToNote) {
+ qvp [FTQUAL_selenocysteine_note].str = "selenocysteine";
+ } else {
+ qvp [FTQUAL_selenocysteine].ble = TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
}
}
break;
@@ -21574,9 +22510,12 @@ static CharPtr FormatFeatureBlock (
if (rrp->type == 2) {
sip = SeqLocIdForProduct (sfp->product);
if (sip != NULL) {
- /* for RefSeq records or GenBank not release_mode */
- if (is_other || (! ajp->flags.forGbRelease)) {
+ /* for RefSeq records or GenBank not release_mode or entrez_mode */
+ if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
qvp [FTQUAL_transcript_id].sip = sip;
+ } else {
+ /* otherwise now goes in note */
+ qvp [FTQUAL_transcript_id_note].sip = sip; /* !!! remove October 15, 2003 !!! */
}
prod = BioseqFind (sip);
}
@@ -21651,8 +22590,12 @@ static CharPtr FormatFeatureBlock (
} else {
shift = 1;
}
- idx = aa - (64 + shift);
- if (idx > 0 && idx < 25) {
+ if (aa != '*') {
+ idx = aa - (64 + shift);
+ } else {
+ idx = 25;
+ }
+ if (idx > 0 && idx < 26) {
str = trnaList [idx];
qvp [FTQUAL_product].str = str;
if (StringNICmp (str, "tRNA-", 5) == 0) {
@@ -21830,14 +22773,22 @@ static CharPtr FormatFeatureBlock (
/* !!! if ajp->flags.dropIllegalQuals, check CDS list here as well !!! */
if (ajp->flags.dropIllegalQuals &&
- (! StringInStringList (qvp [FTQUAL_seqfeat_note].str, validExceptionString)) ) {
+ (! StringInStringList (qvp [FTQUAL_seqfeat_note].str, validExceptionString))) {
qvp [FTQUAL_exception].str = NULL;
}
}
- if (ajp->flags.dropIllegalQuals &&
- (! StringInStringList (qvp [FTQUAL_exception].str, validExceptionString))) {
- qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str;
- qvp [FTQUAL_exception].str = NULL;
+ if (ajp->flags.dropIllegalQuals) {
+ if (is_other) {
+ if (! StringInStringList (qvp [FTQUAL_exception].str, validRefSeqExceptionString)) {
+ qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str;
+ qvp [FTQUAL_exception].str = NULL;
+ }
+ } else {
+ if (! StringInStringList (qvp [FTQUAL_exception].str, validExceptionString)) {
+ qvp [FTQUAL_exception_note].str = qvp [FTQUAL_exception].str;
+ qvp [FTQUAL_exception].str = NULL;
+ }
+ }
}
} else {
qvp [FTQUAL_exception_note].str = sfp->except_text;
@@ -22041,7 +22992,13 @@ static CharPtr FormatFeatureBlock (
/* suppress selenocysteine note if already in comment */
if (StringStr (sfp->comment, "selenocysteine") != NULL) {
- qvp [FTQUAL_selenocysteine].str = NULL;
+ qvp [FTQUAL_selenocysteine_note].str = NULL;
+ }
+
+ /* if /allele inherited from gene, suppress allele gbqual on feature */
+
+ if (qvp [FTQUAL_gene_allele].str != NULL) {
+ qvp [FTQUAL_allele].gbq = NULL;
}
/* now print qualifiers from table */
@@ -22443,7 +23400,7 @@ static Boolean IsSepRefseq (
}
typedef struct modeflags {
- Boolean flags [24];
+ Boolean flags [25];
} ModeFlags, PNTR ModeFlagsPtr;
static ModeFlags flagTable [] = {
@@ -22453,28 +23410,28 @@ static ModeFlags flagTable [] = {
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, TRUE, TRUE, TRUE},
+ TRUE, TRUE, TRUE, TRUE, TRUE},
/* ENTREZ_MODE */
{FALSE, TRUE, TRUE, TRUE, TRUE,
FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE,
- TRUE, TRUE, TRUE, FALSE},
+ TRUE, TRUE, TRUE, TRUE, FALSE},
/* SEQUIN_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE},
+ FALSE, FALSE, FALSE, FALSE, FALSE},
/* DUMP_MODE */
{FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE}
+ FALSE, FALSE, FALSE, FALSE, FALSE}
};
static void SetFlagsFromMode (
@@ -22521,6 +23478,7 @@ static void SetFlagsFromMode (
ajp->flags.hideEmptySource = *(bp++);
ajp->flags.goQualsToNote = *(bp++);
ajp->flags.geneSynsToNote = *(bp++);
+ ajp->flags.selenocysteineToNote = *(bp++);
ajp->flags.forGbRelease = *(bp++);
/* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
@@ -22537,7 +23495,13 @@ static void SetFlagsFromMode (
*/
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
- if (! IsSepRefseq (sep)) {
+ if (IsSepRefseq (sep)) {
+
+ /* selenocysteine always a separate qualifier for RefSeq */
+
+ ajp->flags.selenocysteineToNote = FALSE;
+
+ } else {
/* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
@@ -22583,6 +23547,82 @@ static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
}
+typedef struct lookforids {
+ Boolean isGED;
+ Boolean isNTorNW;
+ Boolean isNC;
+ Boolean isTPA;
+ Boolean isNuc;
+ Boolean isProt;
+} LookForIDs, PNTR LookForIDsPtr;
+
+static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
+
+{
+ LookForIDsPtr lfip;
+ SeqIdPtr sip;
+ TextSeqIdPtr tsip;
+
+ lfip = (LookForIDsPtr) userdata;
+ if (ISA_na (bsp->mol)) {
+ lfip->isNuc = TRUE;
+ }
+ if (ISA_aa (bsp->mol)) {
+ lfip->isProt = TRUE;
+ }
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ lfip->isGED = TRUE;
+ break;
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ lfip->isTPA = TRUE;
+ break;
+ case SEQID_OTHER :
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
+ lfip->isNC = TRUE;
+ } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
+ lfip->isNTorNW = TRUE;
+ } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
+ lfip->isNTorNW = TRUE;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+ }
+}
+
+static void LookForGEDetc (
+ SeqEntryPtr topsep,
+ BoolPtr isGED,
+ BoolPtr isNTorNW,
+ BoolPtr isNC,
+ BoolPtr isTPA,
+ BoolPtr isNuc,
+ BoolPtr isProt
+)
+
+{
+ LookForIDs lfi;
+
+ MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
+ VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
+ *isGED = lfi.isGED;
+ *isNTorNW = lfi.isNTorNW;
+ *isNC = lfi.isNC;
+ *isTPA = lfi.isTPA;
+ *isNuc = lfi.isNuc;
+ *isProt = lfi.isProt;
+}
+
#define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS)
#define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
#define GENE_RIF_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | LATEST_GENE_RIFS)
@@ -22610,6 +23650,12 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
GBSeqPtr gbseq = NULL;
Int4 i;
IndxPtr index = NULL;
+ Boolean isGED;
+ Boolean isNTorNW;
+ Boolean isNC;
+ Boolean isNuc;
+ Boolean isProt;
+ Boolean isTPA;
Int4 j;
Int4 k;
Boolean lockFarComp;
@@ -22631,6 +23677,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
SubmitBlockPtr sbp;
SeqEntryPtr sep;
SeqIntPtr sintp;
+ Boolean skipMrnas = FALSE;
+ Boolean skipProts = FALSE;
SeqSubmitPtr ssp;
BioseqSetPtr topbssp;
ValNodePtr vnp;
@@ -22668,6 +23716,14 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
entityID = ObjMgrGetEntityIDForPointer (bsp);
} else if (bssp != NULL) {
entityID = ObjMgrGetEntityIDForPointer (bssp);
+ if (format == FTABLE_FMT) {
+ skipProts = TRUE;
+ skipMrnas = TRUE;
+ }
+ }
+ if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) {
+ skipProts = FALSE;
+ skipMrnas = FALSE;
}
if (entityID == 0) return NULL;
@@ -22727,7 +23783,11 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
/* lock all bioseqs in advance, including remote genome components */
sep = GetTopSeqEntryForEntityID (entityID);
- ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd);
+ if (ajp->ajp.slp != NULL && lockFarComp) {
+ ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp);
+ } else {
+ ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
+ }
}
lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
@@ -22753,6 +23813,8 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
}
ajp->relModeError = FALSE;
+ ajp->skipProts = skipProts;
+ ajp->skipMrnas = skipMrnas;
MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork));
aw.ajp = ajp;
@@ -22767,9 +23829,40 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0);
aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0);
- aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
- aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
- aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
+ aw.format = format;
+ aw.mode = mode;
+ aw.style = style;
+
+ sep = GetTopSeqEntryForEntityID (entityID);
+
+ /* special types of records override feature fetching parameters */
+
+ aw.onlyNearFeats = FALSE;
+ aw.farFeatsSuppress = FALSE;
+ aw.nearFeatsSuppress = FALSE;
+ LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isNuc, &isProt);
+ if (ajp->ajp.slp != NULL) {
+ /* specified location obeys fetching parameters, for now */
+ aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
+ aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
+ aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
+ } else if (mode == ENTREZ_MODE) {
+ /* entrez_mode overrides settings to avoid far fetches */
+ aw.onlyNearFeats = TRUE;
+ aw.showconfeats = TRUE;
+ } else if (isNTorNW || isTPA) {
+ aw.onlyNearFeats = TRUE;
+ } else if (isNC) {
+ if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
+ aw.onlyNearFeats = TRUE;
+ } else {
+ aw.nearFeatsSuppress = TRUE;
+ }
+ } else {
+ aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
+ aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
+ aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
+ }
aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
aw.hideRemImpFeats = (Boolean) ((custom & HIDE_REM_IMP_FEATS) != 0);
@@ -22789,8 +23882,9 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
aw.onlyGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == ONLY_GENE_RIFS);
aw.latestGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == LATEST_GENE_RIFS);
+ aw.showRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0);
+
aw.isGPS = FALSE;
- sep = GetTopSeqEntryForEntityID (entityID);
if (sep != NULL && IS_Bioseq_set (sep)) {
topbssp = (BioseqSetPtr) sep->data.ptrvalue;
if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) {
@@ -22808,17 +23902,18 @@ NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
*/
aw.newLocusLine = TRUE;
+ aw.showBaseCount = FALSE;
- if ((Boolean) (flags & DDBJ_VARIANT_FORMAT) != 0) {
+ if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) {
aw.citSubsFirst = TRUE;
aw.hideGeneFeats = TRUE;
aw.newLocusLine = FALSE;
+ aw.showBaseCount = TRUE;
ajp->newSourceOrg = FALSE;
}
-
- aw.format = format;
- aw.mode = mode;
- aw.style = style;
+ if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
+ aw.showBaseCount = TRUE;
+ }
aw.hup = FALSE;
aw.ssp = NULL;
@@ -22952,17 +24047,32 @@ static void PrintFtableIntervals (
)
{
- Boolean partial5;
- Boolean partial3;
- SeqLocPtr slp;
- Int4 start;
- Int4 stop;
- Char str [64];
- Char str1 [32];
- Char str2 [32];
+ IntFuzzPtr ifp;
+ Boolean partial5;
+ Boolean partial3;
+ SeqLocPtr slp;
+ SeqPntPtr spp;
+ Int4 start;
+ Int4 stop;
+ Char str [64];
+ Char str1 [32];
+ Char str2 [32];
if (head == NULL || target == NULL || location == NULL || label == NULL) return;
+ if (location->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) location->data.ptrvalue;
+ if (spp != NULL) {
+ ifp = spp->fuzz;
+ if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
+ sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1),
+ (long) (spp->point + 2), label);
+ ValNodeCopyStr (head, 0, str);
+ return;
+ }
+ }
+ }
+
slp = SeqLocFindNext (location, NULL);
if (slp == NULL) return;
@@ -23074,7 +24184,7 @@ static void PrintFTUserFld (
break;
}
}
- if (StringHasNoText (textstr)) break;
+ /* if (StringHasNoText (textstr)) break; */
str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 40);
if (str == NULL) return;
@@ -23182,6 +24292,33 @@ static void PrintFTCodeBreak (
}
}
+static SeqIdPtr SeqIdFindForTable (SeqIdPtr sip)
+
+{
+ Uint1 order [NUM_SEQID];
+
+ SeqIdBestRank (order, NUM_SEQID);
+ order [SEQID_LOCAL] = 20;
+ order [SEQID_GENBANK] = 5;
+ order [SEQID_EMBL] = 5;
+ order [SEQID_PIR] = 5;
+ order [SEQID_SWISSPROT] = 5;
+ order [SEQID_DDBJ] = 5;
+ order [SEQID_PRF] = 5;
+ order [SEQID_PDB] = 5;
+ order [SEQID_TPG] = 5;
+ order [SEQID_TPE] = 5;
+ order [SEQID_TPD] = 5;
+ order [SEQID_PATENT] = 10;
+ order [SEQID_OTHER] = 8;
+ order [SEQID_GENERAL] = 15;
+ order [SEQID_GIBBSQ] = 15;
+ order [SEQID_GIBBMT] = 15;
+ order [SEQID_GIIM] = 20;
+ order [SEQID_GI] = 20;
+ return SeqIdSelect (sip, order, NUM_SEQID);
+}
+
/* #define MAKE_MRNA_GPS_FEAT */
static void PrintFtableLocAndQuals (
@@ -23193,28 +24330,39 @@ static void PrintFtableLocAndQuals (
)
{
- CodeBreakPtr cbp;
- CdRegionPtr crp;
- DbtagPtr dbt;
- GBQualPtr gbq;
- ValNodePtr geneorprotdb;
- GeneRefPtr grp;
- CharPtr label;
- ObjectIdPtr oip;
- BioseqPtr prod;
- SeqFeatPtr prot;
- ProtRefPtr prp;
- Boolean pseudo;
- RnaRefPtr rrp;
- SeqIdPtr sip;
- SeqIdPtr sip2;
- Char str [256];
- Char tmp [300];
- tRNAPtr trp;
- ValNodePtr vnp;
+ Int2 bondidx;
+ BioseqSetPtr bssp;
+ CodeBreakPtr cbp;
+ BioseqPtr cdna;
+ SeqFeatPtr cds;
+ CdRegionPtr crp;
+ SeqMgrDescContext dcontext;
+ DbtagPtr dbt;
+ SeqMgrFeatContext fcontext;
+ GBQualPtr gbq;
+ ValNodePtr geneorprotdb;
+ GeneRefPtr grp;
+ Boolean is_gps_genomic = FALSE;
+ CharPtr label;
+ MolInfoPtr mip;
+ ObjectIdPtr oip;
+ BioseqPtr prod;
+ SeqFeatPtr prot;
+ ProtRefPtr prp;
+ Boolean pseudo;
+ RnaRefPtr rrp;
+ SeqDescrPtr sdp;
+ Int4 sec_str;
+ SeqIdPtr sip;
+ SeqIdPtr sip2;
+ Int2 siteidx;
+ Char str [256];
+ Char tmp [300];
+ tRNAPtr trp;
+ ValNodePtr vnp;
#ifdef MAKE_MRNA_GPS_FEAT
- CharPtr rnaid;
- CharPtr rnaprod;
+ CharPtr rnaid;
+ CharPtr rnaprod;
#endif
if (head == NULL || target == NULL || sfp == NULL || context == NULL) return;
@@ -23227,6 +24375,23 @@ static void PrintFtableLocAndQuals (
label = "???";
}
+ /* check if genomic sequence in genomic product set */
+
+ if (target->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) target->idx.parentptr;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext);
+ if (sdp != NULL) {
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) {
+#ifndef MAKE_MRNA_GPS_FEAT
+ is_gps_genomic = TRUE;
+#endif
+ }
+ }
+ }
+ }
+
PrintFtableIntervals (head, target, sfp->location, label);
geneorprotdb = NULL;
@@ -23335,7 +24500,8 @@ static void PrintFtableLocAndQuals (
}
}
if (prod != NULL) {
- for (sip = prod->id; sip != NULL; sip = sip->next) {
+ sip = SeqIdFindForTable (prod->id);
+ if (sip != NULL) {
if (sip->choice == SEQID_GENBANK ||
sip->choice == SEQID_EMBL ||
sip->choice == SEQID_DDBJ ||
@@ -23380,6 +24546,39 @@ static void PrintFtableLocAndQuals (
}
}
}
+ if (is_gps_genomic) {
+ cds = SeqMgrGetCDSgivenProduct (prod, NULL);
+ if (cds != NULL) {
+ cdna = BioseqFindFromSeqLoc (cds->location);
+ if (cdna != NULL) {
+ sip = SeqIdFindWorst (cdna->id);
+ if (sip != NULL) {
+ if (sip->choice == SEQID_GENBANK ||
+ sip->choice == SEQID_EMBL ||
+ sip->choice == SEQID_DDBJ ||
+ sip->choice == SEQID_OTHER ||
+ sip->choice == SEQID_TPG ||
+ sip->choice == SEQID_TPE ||
+ sip->choice == SEQID_TPD) {
+ if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ } else if (sip->choice == SEQID_LOCAL && (! ajp->flags.suppressLocalID)) {
+ if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\ttranscript_id\tlcl|%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ } else if (sip->choice == SEQID_GENERAL) {
+ if (SeqIdWrite (sip, str, PRINTID_FASTA_GENERAL, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ }
+ }
+ }
+ }
} else if (sfp->product != NULL) {
sip = SeqLocId (sfp->product);
if (sip != NULL) {
@@ -23441,7 +24640,8 @@ static void PrintFtableLocAndQuals (
}
}
if (prod != NULL) {
- for (sip = prod->id; sip != NULL; sip = sip->next) {
+ sip = SeqIdFindForTable (prod->id);
+ if (sip != NULL) {
if (sip->choice == SEQID_GENBANK ||
sip->choice == SEQID_EMBL ||
sip->choice == SEQID_DDBJ ||
@@ -23465,6 +24665,39 @@ static void PrintFtableLocAndQuals (
}
}
}
+ if (is_gps_genomic) {
+ cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext);
+ if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) {
+ prod = BioseqFindFromSeqLoc (cds->product);
+ if (prod != NULL) {
+ sip = SeqIdFindWorst (prod->id);
+ if (sip != NULL) {
+ if (sip->choice == SEQID_GENBANK ||
+ sip->choice == SEQID_EMBL ||
+ sip->choice == SEQID_DDBJ ||
+ sip->choice == SEQID_OTHER ||
+ sip->choice == SEQID_TPG ||
+ sip->choice == SEQID_TPE ||
+ sip->choice == SEQID_TPD) {
+ if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ } else if (sip->choice == SEQID_LOCAL && (! ajp->flags.suppressLocalID)) {
+ if (SeqIdWrite (sip, str, PRINTID_TEXTID_ACC_VER, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\tprotein_id\tlcl|%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ } else if (sip->choice == SEQID_GENERAL) {
+ if (SeqIdWrite (sip, str, PRINTID_FASTA_GENERAL, sizeof (str)) != NULL) {
+ sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ }
+ }
+ }
+ }
} else if (sfp->product != NULL) {
sip = SeqLocId (sfp->product);
if (sip != NULL) {
@@ -23499,6 +24732,87 @@ static void PrintFtableLocAndQuals (
}
}
break;
+ case SEQFEAT_PROT :
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp != NULL) {
+ if (prp->name != NULL) {
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tproduct\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ }
+ if (prp->desc != NULL) {
+ StringNCpy_0 (str, prp->desc, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
+ StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tfunction\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
+ StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ }
+ }
+ StringNCpy_0 (str, sfp->comment, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
+ case SEQFEAT_REGION :
+ StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\tregion\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
+ case SEQFEAT_BOND :
+ bondidx = (Int2) sfp->data.value.intvalue;
+ if (bondidx == 255) {
+ bondidx = 5;
+ }
+ if (bondidx > 0 && bondidx < 6) {
+ sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
+ case SEQFEAT_SITE :
+ siteidx = (Int2) sfp->data.value.intvalue;
+ if (siteidx == 255) {
+ siteidx = 26;
+ }
+ if (siteidx > 0 && siteidx < 27) {
+ sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
+ case SEQFEAT_PSEC_STR :
+ sec_str = (Int2) sfp->data.value.intvalue;
+ if (sec_str > 0 && sec_str <= 3) {
+ sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
+ case SEQFEAT_HET :
+ StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
+ if (! StringHasNoText (str)) {
+ sprintf (tmp, "\t\t\theterogen\t%s\n", str);
+ ValNodeCopyStr (head, 0, tmp);
+ }
+ break;
default :
break;
}
@@ -23655,6 +24969,7 @@ NLM_EXTERN CharPtr asn2gnbk_format (
ValNodePtr head;
IntAsn2gbJobPtr iajp;
Char id [42];
+ IntRefBlockPtr irp;
size_t max;
SeqEntryPtr oldscope;
QualValPtr qv;
@@ -23662,6 +24977,7 @@ NLM_EXTERN CharPtr asn2gnbk_format (
SeqEntryPtr sep;
SeqFeatPtr sfp;
SeqIdPtr sip;
+ SeqIdPtr sip2;
CharPtr str = NULL;
BioseqPtr target;
Char tmp [53];
@@ -23717,12 +25033,37 @@ NLM_EXTERN CharPtr asn2gnbk_format (
if (blocktype == FEATHEADER_BLOCK) {
sip = SeqIdFindBest (target->id, 0);
+ if (sip != NULL && sip->choice == SEQID_GI) {
+ sip2 = GetSeqIdForGI (sip->data.intvalue);
+ if (sip2 != NULL) {
+ sip = sip2;
+ }
+ }
SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
if (! StringHasNoText (id)) {
sprintf (tmp, ">Feature %s\n", id);
str = StringSave (tmp);
}
+ } else if (blocktype == REFERENCE_BLOCK) {
+
+ irp = (IntRefBlockPtr) bbp;
+ if (irp->loc != NULL) {
+ if (irp->rb.pmid != 0 || irp->rb.muid != 0) {
+ head = NULL;
+ PrintFtableIntervals (&head, target, irp->loc, "REFERENCE");
+ if (irp->rb.pmid != 0) {
+ sprintf (tmp, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid);
+ ValNodeCopyStr (&head, 0, tmp);
+ } else if (irp->rb.muid != 0) {
+ sprintf (tmp, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid);
+ ValNodeCopyStr (&head, 0, tmp);
+ }
+ str = MergeValNodeStrings (head);
+ ValNodeFreeData (head);
+ }
+ }
+
} else if (blocktype == FEATURE_BLOCK) {
sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
@@ -23968,7 +25309,11 @@ NLM_EXTERN Boolean SeqEntryToGnbk (
if (lockFarComp || lockFarLocs || lockFarProd) {
locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS);
- bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd);
+ if (slp != NULL && lockFarComp) {
+ bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp);
+ } else {
+ bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
+ }
}
lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
diff --git a/api/asn2gnbk.h b/api/asn2gnbk.h
index 0a1c056e..1c2bfc61 100644
--- a/api/asn2gnbk.h
+++ b/api/asn2gnbk.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 6.52 $
+* $Revision: 6.53 $
*
* File Description: New GenBank flatfile generator
*
@@ -142,6 +142,9 @@ typedef unsigned long CstType;
#define ONLY_GENE_RIFS 2048
#define LATEST_GENE_RIFS 3072
+#define SHOW_PROT_FTABLE 4096
+#define SHOW_FTABLE_REFS 8192
+
/* opaque pointer for special extensions */
struct XtraData;
diff --git a/api/edutil.c b/api/edutil.c
index b0227a9a..b6ea951e 100644
--- a/api/edutil.c
+++ b/api/edutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/4/94
*
-* $Revision: 6.19 $
+* $Revision: 6.20 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,9 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: edutil.c,v $
+* Revision 6.20 2003/06/03 20:25:34 kans
+* SeqLocReplaceID works on bonds if both ends bonded to the same Seq-id
+*
* Revision 6.19 2003/02/10 22:57:45 kans
* added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source
*
@@ -3490,6 +3493,7 @@ NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip)
SeqLocPtr curr;
PackSeqPntPtr pspp;
SeqIntPtr target_sit;
+ SeqBondPtr sbp;
SeqPntPtr spp;
switch (slp->choice) {
@@ -3519,10 +3523,23 @@ NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip)
target_sit->id = SeqIdDup (new_sip);
break;
case SEQLOC_PNT :
- spp = (SeqPntPtr)slp->data.ptrvalue;
+ spp = (SeqPntPtr) slp->data.ptrvalue;
SeqIdFree(spp->id);
spp->id = SeqIdDup(new_sip);
break;
+ case SEQLOC_BOND :
+ sbp = (SeqBondPtr) slp->data.ptrvalue;
+ if (sbp == NULL || sbp->a == NULL || sbp->b == NULL) break;
+ /* only do this if both ends bonded to same Seq-id */
+ if (SeqIdMatch (sbp->a->id, sbp->b->id)) {
+ spp = sbp->a;
+ SeqIdFree(spp->id);
+ spp->id = SeqIdDup(new_sip);
+ spp = sbp->b;
+ SeqIdFree(spp->id);
+ spp->id = SeqIdDup(new_sip);
+ }
+ break;
default :
break;
}
diff --git a/api/explore.h b/api/explore.h
index a4923d10..5d8ed27d 100644
--- a/api/explore.h
+++ b/api/explore.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/30/98
*
-* $Revision: 6.43 $
+* $Revision: 6.45 $
*
* File Description: Reengineered and optimized exploration functions
* to be used for future code
@@ -208,6 +208,11 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene (
SeqMgrFeatContext PNTR context
);
+NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (
+ SeqLocPtr slp,
+ SeqMgrFeatContext PNTR context
+);
+
/*****************************************************************************
*
* SeqMgrGetOverlappingXXX returns the overlapping mRNA/CDS/publication/biosource
@@ -482,6 +487,7 @@ NLM_EXTERN Boolean LIBCALL SeqMgrGetBioseqContext (
#define LOCATION_SUBSET 2 /* SeqLocAinB must be satisfied, no boundary checking */
#define CHECK_INTERVALS 3 /* SeqLocAinB plus internal exon-intron boundaries must match */
#define INTERVAL_OVERLAP 4 /* at least one pair of intervals must overlap */
+#define COMMON_INTERVAL 5 /* at least one pair of intervals must match */
NLM_EXTERN VoidPtr LIBCALL SeqMgrBuildFeatureIndex (
BioseqPtr bsp,
diff --git a/api/fdlKludge.h b/api/fdlKludge.h
index 13d3c8bf..96783f80 100644
--- a/api/fdlKludge.h
+++ b/api/fdlKludge.h
@@ -28,13 +28,25 @@
*
* Version Creation Date: 10/15/01
*
-* $Revision: 6.8 $
+* $Revision: 6.12 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: fdlKludge.h,v $
+* Revision 6.12 2003/06/11 20:15:45 jianye
+* changed unigene linkout
+*
+* Revision 6.11 2003/06/02 20:02:23 jianye
+* Added geo linkout
+*
+* Revision 6.10 2003/05/05 19:33:54 jianye
+* Change url for structure linkout
+*
+* Revision 6.9 2003/04/21 21:49:37 jianye
+* changed some url
+*
* Revision 6.8 2003/04/14 20:43:22 jianye
* Adde geo url and modified structure linkout url
*
@@ -63,7 +75,6 @@
#include <objloc.h>
-#define total_linkout 4
#define linkout_locuslink (1<<0)
#define linkout_unigene (1<<1)
@@ -72,12 +83,12 @@
/* url for linkout*/
#define URL_LocusLink "<a href=\"http://www.ncbi.nlm.nih.gov/LocusLink/list.cgi?Q=%d%s\"><img border=0 height=16 width=16 src=\"/blast/images/L.gif\" alt=\"LocusLink info\"></a>"
-#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/UniGene/query.cgi?ORG=%s&TEXT=@gi(%d)\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>"
-
-#define URL_Structure "<a href=\"http://ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>"
+#define URL_Unigene "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=unigene&cmd=search&term=%d[Nucleotide+UID]\"><img border=0 height=16 width=16 src=\"/blast/images/U.gif\" alt=\"UniGene info\"></a>"
-#define URL_Structure_Overview "<a href=\"http://ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>"
+#define URL_Structure "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\"><img border=0 height=16 width=16 src=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Related structures\"></a>"
-#define URL_Geo "<a href=\"http://boris.ncbi.nlm.nih.gov:2441/entrez/testers/edgar/query.fcgi?term=%d[gi]&db=geo\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Geo\"></a>"
+#define URL_Structure_Overview "<a href=\"http://www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&blast_CD_RID=%s&blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>"
+#define URL_Geo "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=geo&term=%d[gi]\"><img border=0 height=16 width=16 src=\"/blast/images/G.gif\" alt=\"Geo\"></a>"
+
#endif
diff --git a/api/ffprint.c b/api/ffprint.c
index dfe12693..b0fa78ce 100644
--- a/api/ffprint.c
+++ b/api/ffprint.c
@@ -29,13 +29,16 @@
*
* Version Creation Date: 7/15/95
*
-* $Revision: 6.7 $
+* $Revision: 6.8 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: ffprint.c,v $
+ * Revision 6.8 2003/07/15 14:35:56 dondosha
+ * Added #defines for substitutes to fprintf and fflush, needed for gzip compression of Web BLAST results
+ *
* Revision 6.7 2002/08/26 22:06:57 kans
* ff_RecalculateLinks (MS) to fix hotlink artifact
*
@@ -125,6 +128,11 @@ parameters combined into Asn2ffJobPtr structure
#include <ffprint.h>
#include <ncbithr.h>
+int (*ff_fprintf)(FILE*, const char *, ...) = fprintf;
+int (*ff_fflush)(FILE*) = fflush;
+
+#define fprintf ff_fprintf
+#define fflush ff_fflush
static TNlmTls ffprint_tls = NULL;
diff --git a/api/findrepl.c b/api/findrepl.c
index 43b5ca45..37e91158 100644
--- a/api/findrepl.c
+++ b/api/findrepl.c
@@ -44,6 +44,15 @@
* RCS Modification History:
* -------------------------
* $Log: findrepl.c,v $
+* Revision 6.10 2003/07/31 20:54:54 kans
+* FindReplaceString does not need do_replace argument
+*
+* Revision 6.9 2003/07/31 18:18:03 kans
+* added FindReplaceString
+*
+* Revision 6.8 2003/05/11 21:12:50 kans
+* FindReplAligns loops through StdSegPtr chain, also does ssp->ids within
+*
* Revision 6.7 2002/06/11 14:41:20 kans
* added support for locus_tag
*
@@ -1214,9 +1223,13 @@ static void FindReplAligns (
}
break;
case SAS_STD :
- ssp = (StdSegPtr) sap->segs;
- for (slp = ssp->loc; slp != NULL; slp = slp->next) {
- VisitSeqIdsInSeqLoc (slp, userdata, FindReplSeqId);
+ for (ssp = (StdSegPtr) sap->segs; ssp != NULL; ssp = ssp->next) {
+ for (sip = ssp->ids; sip != NULL; sip = sip->next) {
+ FindReplSeqId (sip, userdata);
+ }
+ for (slp = ssp->loc; slp != NULL; slp = slp->next) {
+ VisitSeqIdsInSeqLoc (slp, userdata, FindReplSeqId);
+ }
}
break;
case SAS_DISC :
@@ -1699,3 +1712,56 @@ NLM_EXTERN void FindReplaceInEntity (
}
}
+/*=======================================================================*/
+/* */
+/* FindReplaceString() - find/replace just one string. */
+/* */
+/*=======================================================================*/
+
+NLM_EXTERN void FindReplaceString (
+ CharPtr PNTR strp,
+ CharPtr find_string,
+ CharPtr replace_string,
+ Boolean case_counts,
+ Boolean whole_word
+)
+
+{
+ int ch;
+ FindStruct fs;
+ int j;
+
+ if (strp == NULL || StringHasNoText (find_string)) return;
+
+ MemSet ((Pointer) &fs, 0, sizeof (FindStruct));
+
+ fs.entityID = 0;
+ fs.find_string = find_string;
+ fs.replace_string = replace_string;
+ fs.case_counts = case_counts;
+ fs.whole_word = whole_word;
+ fs.do_replace = TRUE;
+ fs.select_item = FALSE;
+ fs.send_update = FALSE;
+
+ fs.did_find = FALSE;
+ fs.did_replace = FALSE;
+ fs.dirty = FALSE;
+
+ /* build Boyer-Moore displacement array in advance */
+
+ fs.subLen = StringLen (find_string);
+
+ for (ch = 0; ch < 256; ch++) {
+ fs.d [ch] = fs.subLen;
+ }
+ for (j = 0; j < (int) (fs.subLen - 1); j++) {
+ ch = (int) (case_counts ? find_string [j] : TO_UPPER (find_string [j]));
+ if (ch >= 0 && ch <= 255) {
+ fs.d [ch] = fs.subLen - j - 1;
+ }
+ }
+
+ FindReplString (strp, &fs);
+}
+
diff --git a/api/findrepl.h b/api/findrepl.h
index 84f16c72..20df8b12 100644
--- a/api/findrepl.h
+++ b/api/findrepl.h
@@ -44,6 +44,12 @@
* RCS Modification History:
* -------------------------
* $Log: findrepl.h,v $
+* Revision 6.3 2003/07/31 20:54:54 kans
+* FindReplaceString does not need do_replace argument
+*
+* Revision 6.2 2003/07/31 18:18:03 kans
+* added FindReplaceString
+*
* Revision 6.1 2000/11/03 20:36:00 kans
* FindReplaceInEntity replaces FindInEntity and FindInEntityX - complete redesign, no longer using AsnExpOptExplore because of the difficulty of replacing with a larger string (TF + JK)
*
@@ -112,6 +118,14 @@ NLM_EXTERN void FindReplaceInEntity (
Boolean do_seqid_local
);
+NLM_EXTERN void FindReplaceString (
+ CharPtr PNTR strp,
+ CharPtr find_string,
+ CharPtr replace_string,
+ Boolean case_counts,
+ Boolean whole_word
+);
+
#ifdef __cplusplus
extern "C" }
diff --git a/api/gbfeat.c b/api/gbfeat.c
index dd2fa347..55eeac51 100644
--- a/api/gbfeat.c
+++ b/api/gbfeat.c
@@ -3,9 +3,13 @@
* -- all routines for checking genbank feature table
* -- all extern variables are in gbftglob.c
* 10-11-93
-$Revision: 6.8 $
+$Revision: 6.9 $
*
* $Log: gbfeat.c,v $
+* Revision 6.9 2003/10/09 15:35:51 bazhin
+* Qualifier "rpt_unit" is removed from the list of ones to be splitted
+* by commas.
+*
* Revision 6.8 2001/12/06 17:00:41 kans
* TextSave takes size_t, not Int2, otherwise titin protein tries to allocate negative number
*
@@ -74,9 +78,9 @@ $Revision: 6.8 $
#include <gbfeat.h>
#include <errdefn.h>
-#define ParFlat_SPLIT_IGNORE 5
+#define ParFlat_SPLIT_IGNORE 4
CharPtr GBQual_names_split_ignore[ParFlat_SPLIT_IGNORE] = {
-"citation", "EC_number", "rpt_type", "rpt_unit", "usedin"};
+"citation", "EC_number", "rpt_type", "usedin"};
/*------------------------- GBQualNameValid() ------------------------*/
/****************************************************************************
diff --git a/api/gbftdef.h b/api/gbftdef.h
index fa06de1a..f9d5df3c 100644
--- a/api/gbftdef.h
+++ b/api/gbftdef.h
@@ -3,6 +3,15 @@
* -- GenBank Feature table define file
*
* $Log: gbftdef.h,v $
+* Revision 6.17 2003/10/07 13:50:36 kans
+* added gap, operon, oriT features and ecotype, estimated_length and operon qualifiers
+*
+* Revision 6.16 2003/08/19 15:18:37 kans
+* added GBQUAL_segment, increased ParFlat_TOTAL_GBQUAL and opt_qual array size
+*
+* Revision 6.15 2003/05/07 22:03:31 kans
+* added GBQUAL_mol_type, raised opt_qual array to 51 elements
+*
* Revision 6.14 2003/02/22 21:20:05 kans
* added GBQUAL_locus_tag, legal for now in gene features
*
@@ -175,13 +184,18 @@
#define GBQUAL_isolation_source 79
#define GBQUAL_serovar 80
#define GBQUAL_locus_tag 81
+#define GBQUAL_mol_type 82
+#define GBQUAL_segment 83
+#define GBQUAL_ecotype 84
+#define GBQUAL_estimated_length 85
+#define GBQUAL_operon 86
-#define ParFlat_TOTAL_GBQUAL 82
+#define ParFlat_TOTAL_GBQUAL 87
#define ParFlat_TOTAL_IntOr 3
#define ParFlat_TOTAL_LRB 3
#define ParFlat_TOTAL_Exp 2
#define ParFlat_TOTAL_Rpt 7
-#define ParFlat_TOTAL_GBFEAT 64
+#define ParFlat_TOTAL_GBFEAT 67
#define Class_pos_aa 1
#define Class_text 2
@@ -214,7 +228,7 @@ typedef struct sematic_gbfeature {
Int2 mand_num;
Int2 mand_qual[5];
Int2 opt_num;
- Int2 opt_qual[50];
+ Int2 opt_qual[55];
} SematicFeat, PNTR SematicFeatPtr;
typedef struct gbfeat_name {
diff --git a/api/gbftglob.c b/api/gbftglob.c
index 894ed6b6..e9b26887 100644
--- a/api/gbftglob.c
+++ b/api/gbftglob.c
@@ -4,6 +4,31 @@
* -- all the defined variables in the gbfeat.h
*
* $Log: gbftglob.c,v $
+* Revision 6.37 2003/10/07 17:05:58 kans
+* added allele and operon to many features
+*
+* Revision 6.36 2003/10/07 13:50:36 kans
+* added gap, operon, oriT features and ecotype, estimated_length and operon qualifiers
+*
+* Revision 6.35 2003/10/06 16:19:45 kans
+* rpt_unit went from Class_token to Class_text
+*
+* Revision 6.34 2003/10/03 15:16:14 bazhin
+* Numeric value "opt_num" in STATIC__ParFlat_GBFeat array changed
+* from 13 to 15 for feature "allele" to cover "usedin" and "locus_tag"
+* qualifiers.
+*
+* Revision 6.33 2003/10/03 12:44:27 bazhin
+* Numeric value "opt_num" in STATIC__ParFlat_GBFeat array changed
+* from 13 to 14 for feature "precursor_RNA" to cover "locus_tag"
+* qualifier.
+*
+* Revision 6.32 2003/08/19 15:19:04 kans
+* added GBQUAL_segment, increased ParFlat_TOTAL_GBQUAL and opt_qual array size
+*
+* Revision 6.31 2003/05/07 22:03:31 kans
+* added GBQUAL_mol_type, raised opt_qual array to 51 elements
+*
* Revision 6.30 2003/03/06 16:23:13 kans
* when gene qualifier was removed as required on gene feature, it was not put back as optional qualifier
*
@@ -181,7 +206,7 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = {
{"proviral", Class_none}, {"pseudo", Class_none},
{"rearranged", Class_none}, { "replace", Class_text},
{"rpt_family", Class_text}, {"rpt_type", Class_rpt},
- { "rpt_unit", Class_token}, { "sex", Class_text},
+ { "rpt_unit", Class_text}, { "sex", Class_text},
{"sequenced_mol", Class_text}, { "serotype", Class_text},
{"specific_host", Class_text}, {"standard_name", Class_text},
{"strain", Class_text}, {"sub_clone", Class_text},
@@ -195,8 +220,9 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = {
{"organelle", Class_text}, {"transcript_id", Class_text},
{"transgenic", Class_none}, {"environmental_sample", Class_none},
{"isolation_source", Class_text}, {"serovar", Class_text},
- {"locus_tag", Class_text},
- };
+ {"locus_tag", Class_text}, {"mol_type", Class_text},
+ {"segment", Class_text},{"ecotype", Class_text},
+ {"estimated_length", Class_text}, {"operon", Class_text} };
NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) {
return STATIC__ParFlat_GBQual_names;
@@ -214,70 +240,72 @@ CharPtr ParFlat_RptString[ParFlat_TOTAL_Rpt] = {
"dispersed", "other"};
static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
- {"allele", 0, {-1, -1, -1, -1, -1}, 13,
+ {"allele", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency,
GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_phenotype, GBQUAL_product, GBQUAL_replace, GBQUAL_standard_name,
GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"attenuator", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"attenuator", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label,
GBQUAL_gene, GBQUAL_map,
GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"C_region", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"C_region", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"CAAT_signal", 0, {-1, -1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"CAAT_signal", 0, {-1, -1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label,
GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"CDS", 0, {-1, -1, -1, -1, -1}, 25,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"CDS", 0, {-1, -1, -1, -1, -1}, 26,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_codon, GBQUAL_codon_start, GBQUAL_db_xref,
GBQUAL_EC_number,GBQUAL_evidence, GBQUAL_exception, GBQUAL_function,
GBQUAL_gdb_xref, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note,
GBQUAL_number, GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo,
GBQUAL_standard_name, GBQUAL_translation, GBQUAL_transl_except,
GBQUAL_transl_table, GBQUAL_usedin, GBQUAL_protein_id,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1}},
- {"conflict", 1, {GBQUAL_citation, -1, -1, -1, -1}, 8,
+ GBQUAL_locus_tag, GBQUAL_operon,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"conflict", 1, {GBQUAL_citation, -1, -1, -1, -1}, 9,
{GBQUAL_db_xref,GBQUAL_evidence, GBQUAL_map, GBQUAL_note, GBQUAL_gene, GBQUAL_usedin,
- GBQUAL_replace,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_replace, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"D-loop", 0, {-1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"D-loop", 0, {-1, -1, -1, -1}, 11,
{GBQUAL_evidence, GBQUAL_citation, GBQUAL_label, GBQUAL_gene,
GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin, GBQUAL_db_xref,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"D_segment", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"D_segment", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product,
GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin, GBQUAL_db_xref,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"enhancer", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"enhancer", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label,
GBQUAL_gene, GBQUAL_map,
GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"exon", 0, {-1, -1, -1, -1, -1}, 17,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_EC_number,
GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label,
@@ -285,28 +313,34 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name,
GBQUAL_usedin,GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"GC_signal", 0, {-1, -1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"gap", 1, {GBQUAL_estimated_length, -1, -1, -1, -1}, 0,
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"GC_signal", 0, {-1, -1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_label,
GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"gene", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"gene", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_pseudo, GBQUAL_phenotype, GBQUAL_usedin, GBQUAL_locus_tag,
+ GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"iDNA", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"iDNA", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_label,
GBQUAL_gene, GBQUAL_map, GBQUAL_note, GBQUAL_number, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"intron", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_cons_splice, GBQUAL_db_xref,
GBQUAL_evidence, GBQUAL_function,
@@ -314,244 +348,259 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1,}},
- {"J_segment", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"J_segment", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product,
GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"LTR", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"LTR", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
- GBQUAL_gene,
- GBQUAL_label, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name,
- GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_gene, GBQUAL_label, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name,
+ GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 16,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 17,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_EC_number,
GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label,
GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_pseudo,
GBQUAL_product, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_binding", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_binding", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_difference", 0, {-1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_difference", 0, {-1, -1, -1, -1}, 15,
{GBQUAL_phenotype, GBQUAL_citation, GBQUAL_clone, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_partial, GBQUAL_replace,
GBQUAL_note, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1}},
- {"misc_feature", 0, {-1, -1, -1, -1, -1}, 16,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_feature", 0, {-1, -1, -1, -1, -1}, 17,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_number,
GBQUAL_phenotype, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name,
- GBQUAL_usedin, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_recomb", 0, {-1, -1, -1, -1, -1}, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_recomb", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_organism,
GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_signal", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_signal", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_phenotype,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_structure", 0, {-1, -1, -1, -1, -1}, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"misc_structure", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"modified_base", 1, {GBQUAL_mod_base, -1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"modified_base", 1, {GBQUAL_mod_base, -1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"mRNA", 0, {-1, -1, -1, -1, -1}, 16,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"mRNA", 0, {-1, -1, -1, -1, -1}, 17,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_transcript_id, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_transcript_id, GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"mutation", 0, {-1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_frequency,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_phenotype,
GBQUAL_product, GBQUAL_replace, GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"N_region", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_product,
GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"old_sequence", 1, {GBQUAL_citation, -1, -1, -1}, 9,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"old_sequence", 1, {GBQUAL_citation, -1, -1, -1}, 10,
{GBQUAL_db_xref, GBQUAL_gene, GBQUAL_evidence, GBQUAL_map, GBQUAL_note,
GBQUAL_partial, GBQUAL_replace, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"polyA_signal", 0, {-1, -1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"operon", 1, {GBQUAL_operon, -1, -1, -1, -1}, 12,
+ {GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
+ GBQUAL_function, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
+ GBQUAL_pseudo, GBQUAL_phenotype, GBQUAL_usedin,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"oriT", 0, {-1, -1, -1, -1, -1}, 16,
+ {GBQUAL_citation, GBQUAL_db_xref, GBQUAL_direction, GBQUAL_evidence,
+ GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
+ GBQUAL_rpt_family, GBQUAL_rpt_type, GBQUAL_rpt_unit, GBQUAL_standard_name,
+ GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"polyA_signal", 0, {-1, -1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"polyA_site", 0, {-1, -1, -1, -1, -1}, 9,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"polyA_site", 0, {-1, -1, -1, -1, -1}, 10,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"primer_bind", 0, {-1, -1, -1, -1, -1}, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"primer_bind", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name,
GBQUAL_PCR_conditions,GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"promoter", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"promoter", 0, {-1, -1, -1, -1, -1}, 16,
{GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_phenotype, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"protein_bind", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"protein_bind", 1, {GBQUAL_bound_moiety, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"RBS", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"RBS", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map,GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name,
- GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_usedin, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"repeat_region", 0, {-1, -1, -1, -1, -1}, 17,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"repeat_region", 0, {-1, -1, -1, -1, -1}, 18,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_insertion_seq, GBQUAL_label, GBQUAL_map, GBQUAL_note,
GBQUAL_partial, GBQUAL_rpt_type, GBQUAL_rpt_family, GBQUAL_rpt_unit,
GBQUAL_standard_name,
GBQUAL_transposon, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1}},
- {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_rpt_family,
GBQUAL_rpt_type, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"rep_origin", 0, {-1, -1, -1, -1, -1}, 12,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"rep_origin", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_direction, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"rRNA", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"rRNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product,
GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"S_region", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"S_region", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_product,
GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"satellite", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"satellite", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial, GBQUAL_rpt_type,
GBQUAL_rpt_family,GBQUAL_rpt_unit, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"scRNA", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"scRNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"sig_peptide", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"sig_peptide", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"snoRNA", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"snoRNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"snRNA", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"snRNA", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene,GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"source", 1, {GBQUAL_organism, -1, -1, -1, -1}, 50,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"source", 1, {GBQUAL_organism, -1, -1, -1, -1}, 53,
{GBQUAL_cell_line, GBQUAL_cell_type, GBQUAL_chloroplast,
GBQUAL_chromoplast, GBQUAL_chromosome,
GBQUAL_citation, GBQUAL_clone, GBQUAL_clone_lib, GBQUAL_country, GBQUAL_cultivar,
@@ -566,70 +615,75 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_tissue_type, GBQUAL_transposon, GBQUAL_usedin,
GBQUAL_specimen_voucher, GBQUAL_variety, GBQUAL_virion,
GBQUAL_organelle, GBQUAL_transgenic, GBQUAL_environmental_sample,
- GBQUAL_isolation_source, GBQUAL_serovar}},
- {"stem_loop", 0, {-1, -1, -1, -1, -1}, 12,
+ GBQUAL_isolation_source, GBQUAL_serovar, GBQUAL_mol_type, GBQUAL_segment,
+ GBQUAL_ecotype, -1, -1}},
+ {"stem_loop", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"STS", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"STS", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation, GBQUAL_standard_name, GBQUAL_db_xref, GBQUAL_gene,
GBQUAL_label, GBQUAL_usedin, GBQUAL_note, GBQUAL_partial, GBQUAL_map,
- GBQUAL_evidence, GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_evidence, GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map,
GBQUAL_note, GBQUAL_partial, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"terminator", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"terminator", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map,GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, GBQUAL_operon,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 14,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 15,
{GBQUAL_citation, GBQUAL_db_xref,
GBQUAL_evidence, GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map,
GBQUAL_note,GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"tRNA", 0, {-1, -1, -1, -1, -1}, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"tRNA", 0, {-1, -1, -1, -1, -1}, 16,
{GBQUAL_anticodon, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_function, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note,
GBQUAL_partial, GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name,
GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"unsure", 0, {-1, -1, -1, -1}, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"unsure", 0, {-1, -1, -1, -1}, 11,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene, GBQUAL_usedin,
GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_replace,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"V_region", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"V_region", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"V_segment", 0, {-1, -1, -1, -1, -1}, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"V_segment", 0, {-1, -1, -1, -1, -1}, 14,
{GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_product, GBQUAL_pseudo, GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_allele, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"variation", 0, {-1, -1, -1, -1}, 16,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence,
GBQUAL_frequency, GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note,
@@ -637,49 +691,49 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"3'clip", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"3'UTR", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map, GBQUAL_note, GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"5'clip", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_function, GBQUAL_gene,
GBQUAL_evidence, GBQUAL_label, GBQUAL_map,
GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
{"5'UTR", 0, {-1, -1, -1, -1, -1}, 13,
{GBQUAL_allele, GBQUAL_citation, GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_function,
GBQUAL_gene, GBQUAL_label, GBQUAL_map,
GBQUAL_note, GBQUAL_partial, GBQUAL_standard_name, GBQUAL_usedin,
GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"-10_signal", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"-10_signal", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map,GBQUAL_note,GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"-35_signal", 0, {-1, -1, -1, -1, -1}, 11,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"-35_signal", 0, {-1, -1, -1, -1, -1}, 12,
{GBQUAL_citation,GBQUAL_db_xref, GBQUAL_evidence, GBQUAL_gene,
GBQUAL_label, GBQUAL_map,GBQUAL_note,GBQUAL_partial,
GBQUAL_standard_name, GBQUAL_usedin,
- GBQUAL_locus_tag, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ GBQUAL_locus_tag, GBQUAL_operon, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -1, -1, -1, -1}}
};
NLM_EXTERN SematicFeatPtr x_ParFlat_GBFeat(void) {
diff --git a/api/lsqfetch.c b/api/lsqfetch.c
index fb778990..68378ca3 100644
--- a/api/lsqfetch.c
+++ b/api/lsqfetch.c
@@ -37,6 +37,12 @@
* Date Name Description of modification
*
* $Log: lsqfetch.c,v $
+* Revision 6.16 2003/08/27 21:24:05 kans
+* enable alt indexed fasta looks up previously registered function, changes settings for new path
+*
+* Revision 6.15 2003/08/27 19:27:43 kans
+* added AltIndexedFastaLibFetch functions for chimpanzee genome project
+*
* Revision 6.14 2002/11/13 23:07:37 johnson
* Changed make_lib such that it looks to see if it matches the *whole* seq-id
* (defined by the next character being non-alphanumeric).
@@ -81,6 +87,12 @@
* Revision changed to 6.0
*
* $Log: lsqfetch.c,v $
+* Revision 6.16 2003/08/27 21:24:05 kans
+* enable alt indexed fasta looks up previously registered function, changes settings for new path
+*
+* Revision 6.15 2003/08/27 19:27:43 kans
+* added AltIndexedFastaLibFetch functions for chimpanzee genome project
+*
* Revision 6.14 2002/11/13 23:07:37 johnson
* Changed make_lib such that it looks to see if it matches the *whole* seq-id
* (defined by the next character being non-alphanumeric).
@@ -1240,12 +1252,13 @@ static FastaIndexPtr ReadFastaIndex (
return fip;
}
-/* object manager registerable fetch function */
+/* human genome object manager registerable fetch function */
static CharPtr fastalibfetchproc = "IndexedFastaLibBioseqFetch";
typedef struct flibftch {
CharPtr path;
+ CharPtr fastaname;
FastaIndexPtr currentfip;
} FastaLibFetchData, PNTR FastaLibFetchPtr;
@@ -1352,10 +1365,145 @@ NLM_EXTERN void IndexedFastaLibFetchDisable (void)
flfp = (FastaLibFetchPtr) ompp->procdata;
if (flfp == NULL) return;
MemFree (flfp->path);
+ /* MemFree (flfp->fastaname); */
+ FreeFastaIndex (flfp->currentfip);
+ MemFree (flfp);
+}
+
+/* chimpanzee genome object manager registerable fetch function */
+
+static CharPtr altfastalibfetchproc = "AltIndexedFastaLibBioseqFetch";
+
+static void ChangeLocalToGenbank (BioseqPtr bsp, Pointer userdata)
+
+{
+ Char id [41], tmp [41];
+ SeqIdPtr sip;
+
+ for (sip = bsp->id; sip != NULL && sip->choice != SEQID_LOCAL; sip = sip->next) continue;
+ if (sip == NULL) return;
+ SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id));
+ sprintf (tmp, "gb|%s", id);
+ sip = SeqIdParse (tmp);
+ bsp->id = SeqIdSetFree (bsp->id);
+ bsp->id = sip;
+ SeqMgrReplaceInBioseqIndex (bsp);
+}
+
+static Int2 LIBCALLBACK AltIndexedFastaLibBioseqFetchFunc (Pointer data)
+
+{
+ BioseqPtr bsp;
+ Pointer dataptr = NULL;
+ Uint2 datatype, entityID = 0;
+ Char file [FILENAME_MAX], path [PATH_MAX], id [41];
+ FastaLibFetchPtr flfp;
+ FILE *fp;
+ Int4 offset;
+ OMProcControlPtr ompcp;
+ ObjMgrProcPtr ompp;
+ SeqEntryPtr sep = NULL;
+ SeqIdPtr sip;
+
+ ompcp = (OMProcControlPtr) data;
+ if (ompcp == NULL) return OM_MSG_RET_ERROR;
+ ompp = ompcp->proc;
+ if (ompp == NULL) return OM_MSG_RET_ERROR;
+ flfp = (FastaLibFetchPtr) ompp->procdata;
+ if (flfp == NULL) return OM_MSG_RET_ERROR;
+ sip = (SeqIdPtr) ompcp->input_data;
+ if (sip == NULL) return OM_MSG_RET_ERROR;
+
+ if (sip->choice == SEQID_GENBANK) {
+
+ SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id));
+ if (flfp->currentfip != NULL) {
+ offset = SearchFastaIndex (flfp->currentfip, id);
+ if (offset < 0) return OM_MSG_RET_ERROR;
+ sprintf (file, "%s.fsa", flfp->fastaname);
+ StringNCpy_0 (path, flfp->path, sizeof (path));
+ FileBuildPath (path, NULL, file);
+ fp = FileOpen (path, "r");
+ if (fp == NULL) return OM_MSG_RET_ERROR;
+ fseek (fp, offset, SEEK_SET);
+ dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID,
+ FALSE, FALSE, TRUE, FALSE);
+ if (dataptr != NULL) {
+ sep = GetTopSeqEntryForEntityID (entityID);
+ }
+ FileClose (fp);
+ }
+ }
+
+ if (sep == NULL) return OM_MSG_RET_ERROR;
+ VisitBioseqsInSep (sep, NULL, ChangeLocalToGenbank);
+ bsp = BioseqFindInSeqEntry (sip, sep);
+ ompcp->output_data = (Pointer) bsp;
+ ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
+ return OM_MSG_RET_DONE;
+}
+
+NLM_EXTERN Boolean AltIndexedFastaLibFetchEnable (CharPtr path, CharPtr fastaname)
+
+{
+ Char file [FILENAME_MAX];
+ FastaLibFetchPtr flfp = NULL;
+ Boolean is_new = FALSE;
+ ObjMgrPtr omp;
+ ObjMgrProcPtr ompp;
+ Char str [PATH_MAX];
+
+ StringNCpy_0 (str, path, sizeof (str));
+ TrimSpacesAroundString (str);
+ omp = ObjMgrGet ();
+ ompp = ObjMgrProcFind (omp, 0, altfastalibfetchproc, OMPROC_FETCH);
+ if (ompp != NULL) {
+ flfp = (FastaLibFetchPtr) ompp->procdata;
+ if (flfp != NULL) {
+ flfp->path = MemFree (flfp->path);
+ flfp->fastaname = MemFree (flfp->fastaname);
+ flfp->currentfip = FreeFastaIndex (flfp->currentfip);
+ }
+ } else {
+ flfp = (FastaLibFetchPtr) MemNew (sizeof (FastaLibFetchData));
+ is_new = TRUE;
+ }
+ if (flfp != NULL) {
+ flfp->path = StringSave (str);
+ flfp->fastaname = StringSave (fastaname);
+ sprintf (file, "%s.idx", fastaname);
+ FileBuildPath (str, NULL, file);
+ flfp->currentfip = ReadFastaIndex (str);
+ }
+ if (is_new) {
+ ObjMgrProcLoad (OMPROC_FETCH, altfastalibfetchproc, altfastalibfetchproc,
+ OBJ_SEQID, 0, OBJ_BIOSEQ, 0, (Pointer) flfp,
+ AltIndexedFastaLibBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
+ }
+ return TRUE;
+}
+
+NLM_EXTERN void AltIndexedFastaLibFetchDisable (void)
+
+{
+ FastaLibFetchPtr flfp;
+ ObjMgrPtr omp;
+ ObjMgrProcPtr ompp;
+
+ omp = ObjMgrGet ();
+ ompp = ObjMgrProcFind (omp, 0, altfastalibfetchproc, OMPROC_FETCH);
+ if (ompp == NULL) return;
+ ObjMgrFreeUserData (0, ompp->procid, OMPROC_FETCH, 0);
+ flfp = (FastaLibFetchPtr) ompp->procdata;
+ if (flfp == NULL) return;
+ MemFree (flfp->path);
+ MemFree (flfp->fastaname);
FreeFastaIndex (flfp->currentfip);
MemFree (flfp);
}
+/* common function for creating indexes of fasta library files */
+
NLM_EXTERN void CreateFastaIndex (
CharPtr file
)
diff --git a/api/lsqfetch.h b/api/lsqfetch.h
index a0e9daac..52a59363 100644
--- a/api/lsqfetch.h
+++ b/api/lsqfetch.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 5/25/95
*
-* $Revision: 6.2 $
+* $Revision: 6.3 $
*
* File Description: Utilities for fetching local sequences
*
@@ -40,6 +40,9 @@
*
*
* $Log: lsqfetch.h,v $
+* Revision 6.3 2003/08/27 19:27:43 kans
+* added AltIndexedFastaLibFetch functions for chimpanzee genome project
+*
* Revision 6.2 2001/03/12 23:19:33 kans
* added IndexedFastaLib functions - currently uses genome contig naming conventions
*
@@ -246,10 +249,18 @@ NLM_EXTERN Boolean CheckDnaResidue PROTO((CharPtr seq_ptr, Int4 ck_len, Int4Ptr
/**********************************************************************/
-/* indexed FASTA lib functions - currently uses genome contig naming convention */
+/* indexed FASTA lib functions - currently uses human genome contig naming convention */
NLM_EXTERN Boolean IndexedFastaLibFetchEnable (CharPtr path);
NLM_EXTERN void IndexedFastaLibFetchDisable (void);
+
+/* alternative indexed FASTA lib functions - currently uses chimpanzee genome contig naming convention */
+
+NLM_EXTERN Boolean AltIndexedFastaLibFetchEnable (CharPtr path, CharPtr fastaname);
+NLM_EXTERN void AltIndexedFastaLibFetchDisable (void);
+
+/* common function for creating indexes of fasta library files */
+
NLM_EXTERN void CreateFastaIndex (CharPtr file);
diff --git a/api/salpstat.c b/api/salpstat.c
index 68ca2aae..4f741ace 100644
--- a/api/salpstat.c
+++ b/api/salpstat.c
@@ -631,7 +631,7 @@ NLM_EXTERN Int4Ptr LIBCALL SeqAlignListGapList(SeqAlignPtr sap,Int4Ptr gap_num){
/* Utility subroutine for SeqAlignWindowStats */
-static void LIBCALL UpdateWindow(Int4 win_size,Int4 cur_pos,Uint1Ptr win_buf1,Uint1Ptr win_buf2,Uint1 res_1,Uint1 res_2,Uint1 code,Int4Ptr win_gap,Int4Ptr win_gapmismatch,Int4Ptr win_mismatch,Int4Ptr win_match,Boolean N_are_not_mismatches){
+static void LIBCALL UpdateWindowStats(Int4 win_size,Int4 cur_pos,Uint1Ptr win_buf1,Uint1Ptr win_buf2,Uint1 res_1,Uint1 res_2,Uint1 code,Int4Ptr win_gap,Int4Ptr win_gapmismatch,Int4Ptr win_mismatch,Int4Ptr win_match,Boolean N_are_not_mismatches){
Uint1 exit_char1,exit_char2;
Int4 loc;
if(win_size<=0) return; /* For case where user didn't care about window */
@@ -828,7 +828,7 @@ NLM_EXTERN Int4 LIBCALL SeqAlignWindowStats(SeqAlignPtr align, BioseqPtr bsp_1,
res_1 = SeqPortGetResidue(spp_1);
res_2= '-';
}
- UpdateWindow(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches);
+ UpdateWindowStats(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches);
CheckMinMax(win_size,cur_pos,win_gap,
win_gapmismatch,win_mismatch,win_match,
mmin_mismatch,mmax_mismatch,
@@ -864,7 +864,7 @@ NLM_EXTERN Int4 LIBCALL SeqAlignWindowStats(SeqAlignPtr align, BioseqPtr bsp_1,
}
else
++mismatches;
- UpdateWindow(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches);
+ UpdateWindowStats(win_size,cur_pos,win_buf1,win_buf2,res_1,res_2,code,&win_gap,&win_gapmismatch,&win_mismatch,&win_match,N_are_not_mismatches);
CheckMinMax(win_size,cur_pos,win_gap,
win_gapmismatch,win_mismatch,win_match,
mmin_mismatch,mmax_mismatch,
diff --git a/api/seqmgr.c b/api/seqmgr.c
index 65f24262..9994a256 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.203 $
+* $Revision: 6.210 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -39,6 +39,27 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqmgr.c,v $
+* Revision 6.210 2003/10/24 19:49:11 kans
+* operon feature of equal range sorted before gene, mRNA, CDS
+*
+* Revision 6.209 2003/10/23 17:40:01 kans
+* added SeqMgrGetOverlappingOperon and bspextra operonsByPos and numoperons fields
+*
+* Revision 6.208 2003/10/02 16:12:31 bollin
+* added COMMON_INTERVAL overlap type to TestForOverlap
+*
+* Revision 6.207 2003/09/22 17:27:29 kans
+* strip RNA- prefix, not just - on RNAs
+*
+* Revision 6.206 2003/09/22 16:13:20 kans
+* LockFarComponentsEx takes new SeqLocPtr parameter
+*
+* Revision 6.205 2003/09/22 15:55:06 kans
+* all rna context labels were searched for dash, now just trna
+*
+* Revision 6.204 2003/08/04 20:41:20 kans
+* SeqMgrProcessNonIndexedBioseq needed to reset version to 0 each time through outer loop (EY)
+*
* Revision 6.203 2003/04/03 22:40:09 kans
* feature index location problem now reports latest identifier in record to make it easier to find problem
*
@@ -3420,7 +3441,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
Boolean indexed;
TextSeqIdPtr tsip;
SeqMgrPtr smp;
- Int2 version = 0;
+ Int2 version;
Boolean sort_now = TRUE;
smp = SeqMgrReadLock();
@@ -3456,6 +3477,7 @@ static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
if (bsp->id != NULL)
{
indexed = TRUE;
+ version = 0;
for (sip = bsp->id; sip != NULL; sip = sip->next)
{
oldchoice = 0;
@@ -4041,6 +4063,7 @@ static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp)
bspextra->CDSsByPos = MemFree (bspextra->CDSsByPos);
bspextra->pubsByPos = MemFree (bspextra->pubsByPos);
bspextra->orgsByPos = MemFree (bspextra->orgsByPos);
+ bspextra->operonsByPos = MemFree (bspextra->operonsByPos);
/* free list of descriptor information */
@@ -4104,6 +4127,7 @@ static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp)
bspextra->numCDSs = 0;
bspextra->numpubs = 0;
bspextra->numorgs = 0;
+ bspextra->numoperons = 0;
bspextra->numsegs = 0;
bspextra->min = INT4_MAX;
@@ -5467,9 +5491,9 @@ static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
ptr = buf;
if (sfp->data.choice == SEQFEAT_RNA) {
- ptr = StringChr (buf, '-');
+ ptr = StringStr (buf, "RNA-");
if (ptr != NULL) {
- ptr++;
+ ptr += 4;
} else {
ptr = buf;
}
@@ -6163,10 +6187,19 @@ static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
return -1; /* was 1 */
} else if (sp1->right < sp2->right) {
return 1; /* was -1 */
+ }
+
+ /* given identical extremes, put operon features first */
+
+ if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
+ return -1;
+ } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
+ return 1;
+ }
- /* given identical extremes, put gene features first */
+ /* then gene features */
- } else if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
+ if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
return -1;
} else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
return 1;
@@ -6324,10 +6357,19 @@ static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2)
return -1;
} else if (sp1->left > sp2->left) {
return 1;
+ }
+
+ /* given identical extremes, put operon features first */
+
+ if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
+ return -1;
+ } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
+ return 1;
+ }
- /* given identical extremes, put gene features first */
+ /* then gene features */
- } else if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
+ if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
return -1;
} else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
return 1;
@@ -6703,6 +6745,7 @@ static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats)
bspextra->CDSsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numCDSs), 0, FEATDEF_CDS);
bspextra->pubsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numpubs), 0, FEATDEF_PUB);
bspextra->orgsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numorgs), 0, FEATDEF_BIOSRC);
+ bspextra->operonsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numoperons), 0, FEATDEF_operon);
}
if (dorevfeats) {
@@ -7679,9 +7722,10 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp,
}
}
- } else if (overlapType == INTERVAL_OVERLAP) {
+ } else if (overlapType == INTERVAL_OVERLAP || overlapType == COMMON_INTERVAL) {
- /* requires overlap between at least one pair of intervals */
+ /* requires overlap between at least one pair of intervals (INTERVAL_OVERLAP) */
+ /* or one complete shared interval (COMMON_INTERVAL) */
if (feat->right >= left && feat->left <= right) {
sfp = feat->sfp;
@@ -7690,7 +7734,11 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp,
while (a != NULL) {
b = SeqLocFindNext (sfp->location, NULL);
while (b != NULL) {
- if (SeqLocCompare (a, b) != SLC_NO_MATCH) {
+ if ((overlapType == INTERVAL_OVERLAP
+ && SeqLocCompare (a, b) != SLC_NO_MATCH)
+ || (overlapType == COMMON_INTERVAL
+ && SeqLocCompare (a, b) == SLC_A_EQ_B))
+ {
diff = ABS (left - feat->left) + ABS (feat->right - right);
return diff;
}
@@ -7825,6 +7873,9 @@ static SeqFeatPtr SeqMgrGetBestOverlappingFeat (SeqLocPtr slp, Uint2 subtype,
array = bspextra->orgsByPos;
num = bspextra->numorgs;
break;
+ case FEATDEF_operon :
+ array = bspextra->operonsByPos;
+ num = bspextra->numoperons;
default :
break;
}
@@ -8058,6 +8109,12 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrF
return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
}
+NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
+
+{
+ return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL);
+}
+
/*****************************************************************************
*
* SeqMgrGetFeatureByLabel returns the a feature with the desired label
@@ -9542,7 +9599,24 @@ static void LockAllProducts (SeqFeatPtr sfp, Pointer userdata)
}
}
-NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products)
+static void LockAllSublocs (SeqLocPtr loc, Pointer userdata)
+
+{
+ SeqLocPtr slp = NULL;
+ ValNodePtr PNTR vnpp;
+
+ if (loc == NULL) return;
+ vnpp = (ValNodePtr PNTR) userdata;
+ if (vnpp == NULL) return;
+
+ while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
+ if (slp != NULL && slp->choice != SEQLOC_NULL) {
+ LockAllSegments (slp, vnpp);
+ }
+ }
+}
+
+NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc)
{
ValNodePtr bsplist = NULL;
@@ -9559,6 +9633,9 @@ NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components,
if (products) {
VisitFeaturesInSep (sep, (Pointer) &bsplist, LockAllProducts);
}
+ if (loc != NULL) {
+ LockAllSublocs (sep, (Pointer) &bsplist);
+ }
SeqEntrySetScope (oldsep);
return bsplist;
}
@@ -9566,7 +9643,7 @@ NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components,
NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep)
{
- return LockFarComponentsEx (sep, TRUE, FALSE, FALSE);
+ return LockFarComponentsEx (sep, TRUE, FALSE, FALSE, NULL);
}
NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist)
diff --git a/api/seqmgr.h b/api/seqmgr.h
index 6a2d8b4e..86731995 100644
--- a/api/seqmgr.h
+++ b/api/seqmgr.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.50 $
+* $Revision: 6.52 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -40,6 +40,12 @@
*
*
* $Log: seqmgr.h,v $
+* Revision 6.52 2003/10/23 17:40:01 kans
+* added SeqMgrGetOverlappingOperon and bspextra operonsByPos and numoperons fields
+*
+* Revision 6.51 2003/09/22 16:13:20 kans
+* LockFarComponentsEx takes new SeqLocPtr parameter
+*
* Revision 6.50 2003/02/12 14:20:47 kans
* added IsNonGappedLiteral, used to allow compressed deltas as (previously always raw) parts of segsets
*
@@ -931,6 +937,7 @@ typedef struct bioseqextra {
SMFeatItemPtr PNTR CDSsByPos; /* subset of featsByPos array containing only CDS features */
SMFeatItemPtr PNTR pubsByPos; /* subset of featsByPos array containing only publication features */
SMFeatItemPtr PNTR orgsByPos; /* subset of featsByPos array containing only biosource features */
+ SMFeatItemPtr PNTR operonsByPos; /* subset of featsByPos array containing only operon features */
BioseqPtr parentBioseq; /* segmented parent of this raw part all packaged together */
SMSeqIdxPtr segparthead; /* linked list to speed mapping from parts to segmented bioseq */
@@ -946,6 +953,7 @@ typedef struct bioseqextra {
Int4 numCDSs; /* number of elements in CDSsByPos array */
Int4 numpubs; /* number of elements in pubsByPos array */
Int4 numorgs; /* number of elements in orgsByPos array */
+ Int4 numoperons; /* number of elements in operonsByPos array */
Int4 numsegs; /* number of segments in partslist array */
@@ -1048,7 +1056,7 @@ NLM_EXTERN SeqAlignPtr LIBCALL SeqMgrFindSeqAlignByID PROTO((Uint2 entityID, Uin
NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep);
-NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products);
+NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc);
NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist);
diff --git a/api/seqport.c b/api/seqport.c
index e19eb81e..a42d70d3 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.79 $
+* $Revision: 6.82 $
*
* File Description: Ports onto Bioseqs
*
@@ -39,6 +39,15 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: seqport.c,v $
+* Revision 6.82 2003/08/18 21:07:35 kans
+* RevCompStr was stepping on str variable
+*
+* Revision 6.81 2003/08/18 20:09:46 kans
+* SeqPortStreamLoc calls SeqPortStream recursively to local buffer, reverse complements if necessary, and passes appropriate subsequence to callback - speeds up far deltas that point to other far deltas
+*
+* Revision 6.80 2003/08/07 19:54:33 kans
+* TransTableTranslateCommon turns on no_start only if CDS location is 5prime partial, not if product is missing the amino end
+*
* Revision 6.79 2002/11/11 18:02:40 kans
* added SeqPortStream to efficiently stream through a sequence
*
@@ -2202,6 +2211,84 @@ static void SeqPortStreamLit (
BioseqFree (bsp);
}
+static void RevCompStr (
+ CharPtr str
+)
+
+{
+ Char ch;
+ CharPtr complementBase = " TVGH CD M KN YSAABW R ";
+ Int2 i;
+ Uint1 letterToComp [256];
+ Char lttr;
+ CharPtr nd;
+ CharPtr tmp;
+
+ if (str == NULL) return;
+
+ /* set up complementation lookup table */
+
+ for (i = 0; i < 256; i++) {
+ letterToComp [i] = '\0';
+ }
+ for (ch = 'A', i = 1; ch <= 'Z'; ch++, i++) {
+ lttr = complementBase [i];
+ if (lttr != ' ') {
+ letterToComp [(int) (Uint1) ch] = lttr;
+ }
+ }
+ for (ch = 'a', i = 1; ch <= 'z'; ch++, i++) {
+ lttr = complementBase [i];
+ if (lttr != ' ') {
+ letterToComp [(int) (Uint1) ch] = lttr;
+ }
+ }
+
+ /* reverse string */
+
+ nd = str;
+ while (*nd != '\0') {
+ nd++;
+ }
+ nd--;
+
+ tmp = str;
+ while (nd > tmp) {
+ ch = *nd;
+ *nd = *tmp;
+ *tmp = ch;
+ nd--;
+ tmp++;
+ }
+
+ /* complement string */
+
+ nd = str;
+ ch = *nd;
+ while (ch != '\0') {
+ *nd = letterToComp [(int) (Uint1) ch];
+ nd++;
+ ch = *nd;
+ }
+}
+
+static void LIBCALLBACK SaveLocStream (
+ CharPtr sequence,
+ Pointer userdata
+)
+
+{
+ CharPtr tmp;
+ CharPtr PNTR tmpp;
+
+ tmpp = (CharPtr PNTR) userdata;
+ tmp = *tmpp;
+
+ tmp = StringMove (tmp, sequence);
+
+ *tmpp = tmp;
+}
+
static void SeqPortStreamLoc (
SeqLocPtr slp,
Boolean expandGaps,
@@ -2212,7 +2299,9 @@ static void SeqPortStreamLoc (
{
BioseqPtr bsp;
Int4 from;
+ CharPtr str;
Uint1 strand;
+ CharPtr tmp;
Int4 to;
if (slp == NULL || proc == NULL) return;
@@ -2224,7 +2313,28 @@ static void SeqPortStreamLoc (
to = SeqLocStop (slp);
strand = SeqLocStrand (slp);
- SeqPortStreamRaw (bsp, from, to, strand, expandGaps, userdata, proc);
+ str = str = MemNew (sizeof (Char) * (bsp->length + 10));
+ if (str != NULL) {
+ tmp = str;
+ SeqPortStream (bsp, TRUE, (Pointer) &tmp, SaveLocStream);
+
+ if (to > 0 && to < bsp->length) {
+ str [to + 1] = '\0';
+ }
+ tmp = str;
+ if (from > 0 && from < bsp->length) {
+ tmp += from;
+ }
+ if (strand == Seq_strand_minus && ISA_na (bsp->mol)) {
+ RevCompStr (tmp);
+ }
+
+ proc (tmp, userdata);
+
+ MemFree (str);
+ }
+
+ /* SeqPortStreamRaw (bsp, from, to, strand, expandGaps, userdata, proc); */
BioseqUnlock (bsp);
}
@@ -4906,7 +5016,7 @@ static ByteStorePtr TransTableTranslateCommon (
no_start = FALSE;
part_loc = SeqLocPartialCheck (location);
part_prod = SeqLocPartialCheck (product);
- if ((part_loc & SLP_START) || (part_prod & SLP_START)) {
+ if ((part_loc & SLP_START) /* || (part_prod & SLP_START) */) {
no_start = TRUE;
}
if (StringHasNoText (tbl->sncbieaa) || no_start || frame > 1) {
diff --git a/api/sequtil.c b/api/sequtil.c
index 722119e6..6b014789 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,13 +29,37 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.131 $
+* $Revision: 6.139 $
*
* File Description: Sequence Utilities for objseq and objsset
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: sequtil.c,v $
+* Revision 6.139 2003/10/24 14:36:12 kans
+* added CH as GenBank CONN to WHICH_db_accession
+*
+* Revision 6.138 2003/09/09 20:08:18 kans
+* SeqLocPartialCheck locks bioseq if seqloc_whole and far
+*
+* Revision 6.137 2003/09/02 15:11:50 kans
+* WHICH_db_accession takes ZP_ with 8 digits as refseq_prot_predicted
+*
+* Revision 6.136 2003/08/11 13:45:18 kans
+* added CG as ncbi gss
+*
+* Revision 6.135 2003/07/14 20:17:53 kans
+* added CF as ncbi est to WHICH_db_accession
+*
+* Revision 6.134 2003/07/02 14:35:21 kans
+* added CE as ncbi gss
+*
+* Revision 6.133 2003/05/20 22:15:24 yaschenk
+* SeqIdSelect loops indefinitely on corrupted memory
+*
+* Revision 6.132 2003/04/30 16:40:41 kans
+* added CD as GenBank EST
+*
* Revision 6.131 2003/03/25 13:32:22 kans
* added CC as ncbi gss accession prefix
*
@@ -3063,10 +3087,12 @@ NLM_EXTERN SeqIdPtr SeqIdSelect (SeqIdPtr sip, Uint1Ptr order, Int2 num)
else if (order[sip->choice] < order[bestid->choice])
bestid = sip;
}
- }
- else
+ } else {
ErrPostEx(SEV_ERROR, 0,0, "SeqIdSelect: choice [%d] out of range [%d]",
(int)(sip->choice), (int)num);
+ if(sip->choice > NUM_SEQID) /*** something is really wrong ***/
+ return NULL;
+ }
}
return bestid;
@@ -5974,68 +6000,82 @@ NLM_EXTERN Uint2 SeqLocPartialCheck(SeqLocPtr head)
break;
case SEQLOC_WHOLE:
found_molinfo = FALSE;
- bsp = BioseqFind((SeqIdPtr)(slp->data.ptrvalue));
- if (bsp == NULL) break;
- bcp = BioseqContextNew(bsp);
- if (bcp == NULL) break;
- vnp = NULL;
- while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_molinfo, vnp, NULL)) != NULL)
+ locked = FALSE;
+ bsp = BioseqFindCore((SeqIdPtr)(slp->data.ptrvalue));
+ if (bsp == NULL)
{
- found_molinfo = TRUE;
- mip = (MolInfoPtr)(vnp->data.ptrvalue);
- switch (mip->completeness)
- {
- case 3: /* no left */
- if (slp == first)
- retval |= SLP_START;
- else
- retval |= SLP_INTERNAL;
- break;
- case 4: /* no right */
- if (slp == last)
- retval |= SLP_STOP;
- else
- retval |= SLP_INTERNAL;
- break;
- case 2: /* partial */
- retval |= SLP_OTHER;
- break;
- case 5: /* no ends */
- retval |= SLP_START;
- retval |= SLP_STOP;
- break;
- default:
- break;
- }
+ bsp = BioseqLockById((SeqIdPtr)(slp->data.ptrvalue));
+ if (bsp != NULL)
+ locked = TRUE;
}
- if (! found_molinfo)
- {
- while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_modif, vnp, NULL)) != NULL)
- {
- for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next)
+ if (bsp == NULL) break;
+ bcp = BioseqContextNew(bsp);
+ if (bcp != NULL) {
+ vnp = NULL;
+ while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_molinfo, vnp, NULL)) != NULL)
{
- switch (vnp2->data.intvalue)
+ found_molinfo = TRUE;
+ mip = (MolInfoPtr)(vnp->data.ptrvalue);
+ switch (mip->completeness)
{
- case 16: /* no left */
+ case 3: /* no left */
if (slp == first)
retval |= SLP_START;
else
retval |= SLP_INTERNAL;
break;
- case 17: /* no right */
+ case 4: /* no right */
if (slp == last)
retval |= SLP_STOP;
else
retval |= SLP_INTERNAL;
break;
- case 10: /* partial */
+ case 2: /* partial */
retval |= SLP_OTHER;
break;
+ case 5: /* no ends */
+ retval |= SLP_START;
+ retval |= SLP_STOP;
+ break;
+ default:
+ break;
}
}
+ if (! found_molinfo)
+ {
+ while ((vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_modif, vnp, NULL)) != NULL)
+ {
+ for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next)
+ {
+ switch (vnp2->data.intvalue)
+ {
+
+ case 16: /* no left */
+
+ if (slp == first)
+
+ retval |= SLP_START;
+
+ else
+ retval |= SLP_INTERNAL;
+ break;
+ case 17: /* no right */
+ if (slp == last)
+ retval |= SLP_STOP;
+ else
+ retval |= SLP_INTERNAL;
+ break;
+ case 10: /* partial */
+ retval |= SLP_OTHER;
+ break;
+ }
+ }
+ }
+ }
+ BioseqContextFree(bcp);
}
- }
- BioseqContextFree(bcp);
+ if (locked)
+ BioseqUnlock (bsp);
break;
default:
break;
@@ -8723,7 +8763,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"BQ") == 0) ||
(StringICmp(temp,"BU") == 0) ||
(StringICmp(temp,"CA") == 0) ||
- (StringICmp(temp,"CB") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"CB") == 0) ||
+ (StringICmp(temp,"CD") == 0) ||
+ (StringICmp(temp,"CF") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0)) { /* NCBI STS */
retcode = ACCN_NCBI_STS;
@@ -8734,7 +8776,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_NCBI_DIRSUB;
} else if ((StringICmp(temp,"AE") == 0)) { /* NCBI genome project data */
retcode = ACCN_NCBI_GENOME;
- } else if ((StringICmp(temp,"AH") == 0)) { /* NCBI segmented set header Bioseq */
+ } else if ((StringICmp(temp,"AH") == 0) ||
+ (StringICmp(temp,"CH") == 0)) { /* NCBI segmented set header Bioseq */
retcode = ACCN_NCBI_SEGSET | ACCN_AMBIGOUS_MOL; /* A few segmented
proteins are AH */
} else if ((StringICmp(temp,"AS") == 0)) { /* NCBI "other" */
@@ -8745,7 +8788,9 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"AZ") == 0) ||
(StringICmp(temp,"BH") == 0) ||
(StringICmp(temp,"BZ") == 0) ||
- (StringICmp(temp,"CC") == 0) ) { /* NCBI GSS */
+ (StringICmp(temp,"CC") == 0) ||
+ (StringICmp(temp,"CE") == 0) ||
+ (StringICmp(temp,"CG") == 0) ) { /* NCBI GSS */
retcode = ACCN_NCBI_GSS;
} else if ((StringICmp(temp,"AR") == 0)) { /* NCBI patent */
retcode = ACCN_NCBI_PATENT;
@@ -8845,6 +8890,27 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
s++;
}
break;
+ case 11: /* New 11-character accession, two letters +"_"+ 8 digits */
+ if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
+ break;
+ if(*(s+2)!='_')
+ break;
+ temp[0] = *s; s++;
+ temp[1] = *s; s++;
+ temp[2] = NULLB; s++;
+
+ if ((StringICmp(temp,"ZP") == 0)) {
+ retcode = ACCN_REFSEQ_PROT_PREDICTED;
+ } else
+ retval = FALSE;
+ while (*s) {
+ if (! IS_DIGIT(*s)) {
+ retval = FALSE;
+ break;
+ }
+ s++;
+ }
+ break;
case 12: /* whole genome shotgun 12-character accession, four letters + 8 digits */
if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)) || !IS_ALPHA(*(s+2)) || !IS_ALPHA(*(s+3)))
break;
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index c22f7a65..41e84d32 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.271 $
+* $Revision: 6.283 $
*
* File Description:
*
@@ -1643,12 +1643,9 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
Char ch;
CdRegionPtr crp;
Int2 ctr = 1;
- DbtagPtr dbt;
ValNodePtr descr;
SeqFeatPtr first;
GBQualPtr gbq;
- SeqFeatPtr gene;
- GeneRefPtr grp;
Int4 i;
Char id [64];
SeqEntryPtr last;
@@ -1680,6 +1677,11 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
long int val;
ValNodePtr vnp;
SeqFeatXrefPtr xref;
+ /*
+ DbtagPtr dbt;
+ SeqFeatPtr gene;
+ GeneRefPtr grp;
+ */
if (sfp == NULL || bsp == NULL) return;
@@ -1693,8 +1695,9 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
sfp = sfp->next;
}
- /* expand genes specified by qualifiers on other features (except repeat_region) */
+ /* no longer expand genes specified by qualifiers on other features (except repeat_region) */
+ /*
sfp = first;
while (sfp != NULL) {
prev = &(sfp->xref);
@@ -1720,7 +1723,6 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
gene->location = AsnIoMemCopy (sfp->location,
(AsnReadFunc) SeqLocAsnRead,
(AsnWriteFunc) SeqLocAsnWrite);
- /* copy dbxrefs from parent feature */
for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
dbt = (DbtagPtr) vnp->data.ptrvalue;
if (dbt == NULL) continue;
@@ -1741,6 +1743,7 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
}
sfp = sfp->next;
}
+ */
/* expand mRNA features into cDNA product sequences */
@@ -2739,7 +2742,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
if (aa == 0 && curraa != 0) {
aa = curraa;
trp->aa = curraa;
- trp->aatype = Seq_code_ncbieaa;
+ trp->aatype = 2;
}
if (aa != 0 && aa == curraa) {
if (justTrnaText) {
@@ -2758,7 +2761,7 @@ static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
aa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE);
if (aa == 0) return;
trp->aa = aa;
- trp->aatype = Seq_code_ncbieaa;
+ trp->aatype = 2;
if (justTrnaText) {
for (j = 0; j < 6; j++) {
if (trp->codon [j] == 255) {
@@ -3241,7 +3244,7 @@ static Boolean HandledGBQualOnImp (SeqFeatPtr sfp, GBQualPtr gbq)
ptr++;
ch = *ptr;
}
- return TRUE;
+ /* return TRUE; */
}
return FALSE;
}
@@ -5468,6 +5471,27 @@ static void CopyProtXrefToProtFeat (ProtRefPtr prp, ProtRefPtr prx)
}
}
+static Boolean InGpsGenomic (SeqFeatPtr sfp)
+
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+
+ if (sfp == NULL) return FALSE;
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) return FALSE;
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ bssp = (BioseqSetPtr) bsp->idx.parentptr;
+ while (bssp != NULL) {
+ if (bssp->_class == BioseqseqSet_class_nuc_prot) return FALSE;
+ if (bssp->_class == BioseqseqSet_class_gen_prod_set) return TRUE;
+ if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
+ bssp = (BioseqSetPtr) bssp->idx.parentptr;
+ }
+ }
+ return FALSE;
+}
+
static void HandleXrefOnCDS (SeqFeatPtr sfp)
{
@@ -5479,6 +5503,7 @@ static void HandleXrefOnCDS (SeqFeatPtr sfp)
SeqFeatXrefPtr xref;
if (sfp != NULL && sfp->product != NULL) {
+ if (InGpsGenomic (sfp)) return;
prot = GetBestProteinFeatureUnindexed (sfp->product);
if (prot != NULL) {
prp = (ProtRefPtr) prot->data.value.ptrvalue;
@@ -5737,6 +5762,10 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
CleanDoubleQuoteList (grp->syn);
grp->db = ValNodeSort (grp->db, SortDbxref);
CleanupDuplicateDbxrefs (&(grp->db));
+ /* now move grp->dbxref to sfp->dbxref */
+ vnp = grp->db;
+ grp->db = NULL;
+ ValNodeLink ((&sfp->dbxref), vnp);
if (grp->locus != NULL && grp->syn != NULL) {
vnp = grp->syn;
str = (CharPtr) vnp->data.ptrvalue;
@@ -5772,6 +5801,10 @@ static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodeP
CleanDoubleQuoteList (prp->activity);
prp->db = ValNodeSort (prp->db, SortDbxref);
CleanupDuplicateDbxrefs (&(prp->db));
+ /* now move prp->dbxref to sfp->dbxref */
+ vnp = prp->db;
+ prp->db = NULL;
+ ValNodeLink ((&sfp->dbxref), vnp);
if (prp->processed != 3 && prp->processed != 4 &&
prp->name == NULL && sfp->comment != NULL) {
if (StringICmp (sfp->comment, "putative") != 0) {
@@ -6563,6 +6596,8 @@ static void BasicSeqEntryCleanupInternal (SeqEntryPtr sep, ValNodePtr PNTR publi
OrgRefPtr orp;
Boolean partial5;
Boolean partial3;
+ Uint1 processed;
+ ProtRefPtr prp;
ValNodePtr psp;
RnaRefPtr rrp;
Uint1 rrptype;
@@ -6659,6 +6694,28 @@ static void BasicSeqEntryCleanupInternal (SeqEntryPtr sep, ValNodePtr PNTR publi
sfp->data.value.ptrvalue = rrp;
rrp->type = rrptype;
sfp->idx.subtype = FindFeatDefType (sfp);
+ } else {
+ processed = 0;
+ if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) {
+ processed = 1;
+ } else if (StringCmp (ifp->key, "mat_peptide") == 0) {
+ processed = 2;
+ } else if (StringCmp (ifp->key, "sig_peptide") == 0) {
+ processed = 3;
+ } else if (StringCmp (ifp->key, "transit_peptide") == 0) {
+ processed = 4;
+ }
+ if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
+ bsp = BioseqFind (SeqLocId (sfp->location));
+ if (bsp != NULL && ISA_aa (bsp->mol)) {
+ sfp->data.value.ptrvalue = ImpFeatFree (ifp);
+ sfp->data.choice = SEQFEAT_PROT;
+ prp = ProtRefNew ();
+ sfp->data.value.ptrvalue = prp;
+ prp->processed = processed;
+ sfp->idx.subtype = FindFeatDefType (sfp);
+ }
+ }
}
}
}
@@ -7267,6 +7324,83 @@ NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep)
VisitFeaturesInSep (sep, NULL, ResynchMRNAPartials);
}
+NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ SeqFeatPtr bestprot;
+ BioseqPtr bsp;
+ MolInfoPtr mip;
+ Boolean partial5;
+ Boolean partial3;
+ ProtRefPtr prp;
+ SeqEntryPtr sep;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+ ValNodePtr vnp;
+
+ if (sfp->data.choice != SEQFEAT_PROT) return;
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp == NULL) return;
+ if (prp->processed < 1 || prp->processed > 4) return;
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
+ slp = SeqLocFindNext (sfp->location, NULL);
+ if (slp == NULL) return;
+ sip = SeqLocId (sfp->product);
+ if (sip == NULL) return;
+ bsp = BioseqFind (sip);
+ if (bsp != NULL && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_raw) {
+ sep = SeqMgrGetSeqEntryForData (bsp);
+ if (sep == NULL) return;
+ bestprot = SeqMgrGetBestProteinFeature (bsp, NULL);
+ if (bestprot == NULL) {
+ bestprot = GetBestProteinFeatureUnindexed (sfp->product);
+ }
+ if (bestprot != NULL) {
+ bestprot->location = SeqLocFree (bestprot->location);
+ bestprot->location = CreateWholeInterval (sep);
+ SetSeqLocPartial (bestprot->location, partial5, partial3);
+ bestprot->partial = (partial5 || partial3);
+ }
+ vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
+ if (vnp == NULL) {
+ vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
+ if (vnp != NULL) {
+ mip = MolInfoNew ();
+ vnp->data.ptrvalue = (Pointer) mip;
+ if (mip != NULL) {
+ mip->biomol = 8;
+ mip->tech = 13;
+ }
+ }
+ }
+ if (vnp != NULL) {
+ mip = (MolInfoPtr) vnp->data.ptrvalue;
+ if (mip != NULL) {
+ if (partial5 && partial3) {
+ mip->completeness = 5;
+ } else if (partial5) {
+ mip->completeness = 3;
+ } else if (partial3) {
+ mip->completeness = 4;
+ /*
+ } else if (partial) {
+ mip->completeness = 2;
+ */
+ } else {
+ mip->completeness = 0;
+ }
+ }
+ }
+ }
+}
+
+NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep)
+
+{
+ VisitFeaturesInSep (sep, NULL, ResynchPeptidePartials);
+}
+
/* SeqIdStripLocus removes the SeqId.name field if accession is set */
NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip)
@@ -7479,9 +7613,12 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
{
Boolean dataFound;
Char path [PATH_MAX];
+ Char appPath[PATH_MAX];
CharPtr ptr;
- ProgramPath (path, sizeof (path));
+ ProgramPath (appPath, sizeof (path));
+ StrCpy(path, appPath);
+ /* data a sibling of our application? */
ptr = StringRChr (path, DIRDELIMCHR);
if (ptr != NULL) {
ptr++;
@@ -7489,6 +7626,7 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
}
dataFound = CheckDataPath (path, "data");
if (! (dataFound)) {
+ /* data an uncle of our application? */
if (ptr != NULL) {
ptr--;
*ptr = '\0';
@@ -7501,50 +7639,12 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
}
}
#ifdef OS_UNIX_DARWIN
- /* Mac OS X package has application in Programname.app/Contents/MacOS/Programname */
- if (! (dataFound)) {
- if (ptr != NULL) {
- /* check within Contents/Resources */
+ if (! (dataFound) && IsApplicationPackage(appPath)) {
+ /* is data inside our application within Contents/Resources? */
+ StrCpy(path, appPath);
+ FileBuildPath(path, "Contents", NULL);
FileBuildPath (path, "Resources", NULL);
dataFound = CheckDataPath (path, "data");
- /* did not change ptr, so if it failed just go up to next higher level */
- }
- }
- if (! (dataFound)) {
- if (ptr != NULL) {
- ptr--;
- *ptr = '\0';
- ptr = StringRChr (path, DIRDELIMCHR);
- if (ptr != NULL) {
- ptr++;
- *ptr = '\0';
- }
- dataFound = CheckDataPath (path, "data");
- }
- }
- if (! (dataFound)) {
- if (ptr != NULL) {
- ptr--;
- *ptr = '\0';
- ptr = StringRChr (path, DIRDELIMCHR);
- if (ptr != NULL) {
- ptr++;
- *ptr = '\0';
- }
- dataFound = CheckDataPath (path, "data");
- }
- }
- if (! (dataFound)) {
- if (ptr != NULL) {
- ptr--;
- *ptr = '\0';
- ptr = StringRChr (path, DIRDELIMCHR);
- if (ptr != NULL) {
- ptr++;
- *ptr = '\0';
- }
- dataFound = CheckDataPath (path, "data");
- }
}
#endif
if (dataFound) {
@@ -7941,9 +8041,18 @@ NLM_EXTERN Uint2 FindFeatFromFeatDefType (Uint2 subtype)
if (subtype >= FEATDEF_IMP && subtype <= FEATDEF_site_ref) {
return SEQFEAT_IMP;
}
+ if (subtype == FEATDEF_oriT) {
+ return SEQFEAT_IMP;
+ }
if (subtype >= FEATDEF_preprotein && subtype <= FEATDEF_transit_peptide_aa) {
return SEQFEAT_PROT;
}
+ if (subtype == FEATDEF_operon) {
+ return SEQFEAT_IMP;
+ }
+ if (subtype == FEATDEF_gap) {
+ return SEQFEAT_IMP;
+ }
}
return 0;
}
@@ -8263,6 +8372,7 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqId
SeqPntPtr spp;
if (slp == NULL) return index;
+
while (slp != NULL) {
switch (slp->choice) {
case SEQLOC_NULL :
@@ -8324,6 +8434,155 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqId
}
slp = slp->next;
}
+
+ return index;
+}
+
+NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback)
+
+{
+ CodeBreakPtr cbp;
+ CdRegionPtr crp;
+ Int4 index = 0;
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+
+ if (sfp == NULL) return index;
+
+ index += VisitSeqIdsInSeqLoc (sfp->location, userdata, callback);
+ index += VisitSeqIdsInSeqLoc (sfp->product, userdata, callback);
+
+ switch (sfp->data.choice) {
+ case SEQFEAT_CDREGION :
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp != NULL) {
+ for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
+ index += VisitSeqIdsInSeqLoc (cbp->loc, userdata, callback);
+ }
+ }
+ break;
+ case SEQFEAT_RNA :
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp != NULL && rrp->ext.choice == 2) {
+ trp = (tRNAPtr) rrp->ext.value.ptrvalue;
+ if (trp != NULL && trp->anticodon != NULL) {
+ index += VisitSeqIdsInSeqLoc (trp->anticodon, userdata, callback);
+ }
+ }
+ break;
+ default :
+ break;
+ }
+
+ return index;
+}
+
+NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback)
+
+{
+ DenseDiagPtr ddp;
+ DenseSegPtr dsp;
+ Int4 index = 0;
+ SeqIdPtr sip;
+ SeqLocPtr slp = NULL;
+ StdSegPtr ssp;
+
+ if (sap == NULL) return index;
+
+ if (sap->bounds != NULL) {
+ sip = SeqLocId (sap->bounds);
+ index += VisitSeqIdList (sip, userdata, callback);
+ }
+
+ if (sap->segs == NULL) return index;
+
+ switch (sap->segtype) {
+ case SAS_DENDIAG :
+ ddp = (DenseDiagPtr) sap->segs;
+ if (ddp != NULL) {
+ for (sip = ddp->id; sip != NULL; sip = sip->next) {
+ index += VisitSeqIdList (sip, userdata, callback);
+ }
+ }
+ break;
+ case SAS_DENSEG :
+ dsp = (DenseSegPtr) sap->segs;
+ if (dsp != NULL) {
+ for (sip = dsp->ids; sip != NULL; sip = sip->next) {
+ index += VisitSeqIdList (sip, userdata, callback);
+ }
+ }
+ break;
+ case SAS_STD :
+ ssp = (StdSegPtr) sap->segs;
+ for (slp = ssp->loc; slp != NULL; slp = slp->next) {
+ sip = SeqLocId (slp);
+ index += VisitSeqIdList (sip, userdata, callback);
+ }
+ break;
+ case SAS_DISC :
+ /* recursive */
+ for (sap = (SeqAlignPtr) sap->segs; sap != NULL; sap = sap->next) {
+ index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
+ }
+ break;
+ default :
+ break;
+ }
+
+ return index;
+}
+
+NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback)
+
+{
+ Int4 index = 0;
+ SeqIdPtr sip;
+
+ if (sgp == NULL) return index;
+
+ if (sgp->loc != NULL) {
+ sip = SeqLocId (sgp->loc);
+ index += VisitSeqIdList (sip, userdata, callback);
+ }
+
+ return index;
+}
+
+NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback)
+
+{
+ Int4 index = 0;
+ SeqAlignPtr sap;
+ SeqFeatPtr sfp;
+ SeqGraphPtr sgp;
+
+ if (annot == NULL || annot->data == NULL) return index;
+
+ switch (annot->type) {
+
+ case 1 :
+ for (sfp = (SeqFeatPtr) annot->data; sfp != NULL; sfp = sfp->next) {
+ index += VisitSeqIdsInSeqFeat (sfp, userdata, callback);
+ }
+ break;
+
+ case 2 :
+ for (sap = (SeqAlignPtr) annot->data; sap != NULL; sap = sap->next) {
+ index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
+ }
+ break;
+
+ case 3 :
+ for (sgp = (SeqGraphPtr) annot->data; sgp != NULL; sgp = sgp->next) {
+ index += VisitSeqIdsInSeqGraph (sgp, userdata, callback);
+ }
+ break;
+
+ default :
+ break;
+ }
+
return index;
}
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index f2e27242..a5817f57 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.144 $
+* $Revision: 6.152 $
*
* File Description:
*
@@ -3054,14 +3054,17 @@ static SimpleSeqPtr ByteStoreToSimpleSeq (ByteStorePtr bs, CharPtr seqid, CharPt
#define strandStr field [STRAND_TAG]
static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
- BoolPtr partial5P, BoolPtr partial3P,
- CharPtr PNTR featP, CharPtr PNTR qualP,
+ BoolPtr partial5P, BoolPtr partial3P, BoolPtr ispointP,
+ BoolPtr isminusP, CharPtr PNTR featP, CharPtr PNTR qualP,
CharPtr PNTR valP, Int4 offset)
{
Boolean badNumber;
CharPtr field [NUM_FTABLE_COLUMNS];
Int2 i;
+ Boolean isminus = FALSE;
+ Boolean ispoint = FALSE;
+ size_t len;
ValNodePtr parsed;
Boolean partial5 = FALSE;
Boolean partial3 = FALSE;
@@ -3098,6 +3101,11 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
partial5 = TRUE;
str++;
}
+ len = StringLen (str);
+ if (len > 1 && str [len - 1] == '^') {
+ ispoint = TRUE;
+ str [len - 1] = '\0';
+ }
if (str != NULL && sscanf (str, "%ld", &val) == 1) {
start = val;
} else {
@@ -3131,6 +3139,7 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
start = stop;
stop = tmp;
}
+ isminus = TRUE;
}
}
}
@@ -3139,6 +3148,8 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
*stopP = stop + offset;
*partial5P = partial5;
*partial3P = partial3;
+ *ispointP = ispoint;
+ *isminusP = isminus;
*featP = featType;
*qualP = qualType;
*valP = qualVal;
@@ -3297,7 +3308,8 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
}
} else if (StringICmp ("tRNA", str) != 0 &&
StringICmp ("transfer", str) != 0 &&
- StringICmp ("RNA", str) != 0) {
+ StringICmp ("RNA", str) != 0 &&
+ StringICmp ("product", str) != 0) {
if (cdP != NULL && StringLen (str) == 3) {
StringCpy (codon, str);
for (i = 0; i < 3; i++) {
@@ -3323,7 +3335,8 @@ NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr c
if (curraa != 0) {
} else if (StringICmp ("tRNA", str) != 0 &&
StringICmp ("transfer", str) != 0 &&
- StringICmp ("RNA", str) != 0) {
+ StringICmp ("RNA", str) != 0 &&
+ StringICmp ("product", str) != 0) {
if (cdP != NULL && StringLen (str) == 3) {
StringCpy (codon, str);
for (i = 0; i < 3; i++) {
@@ -4127,6 +4140,12 @@ NLM_EXTERN void AddQualifierToFeature (SeqFeatPtr sfp, CharPtr qual, CharPtr val
prp->desc = StringSaveNoNull (val);
} else if (sfp->data.choice == SEQFEAT_CDREGION && StringCmp (qual, "prot_note") == 0) {
bail = FALSE;
+ } else if (sfp->data.choice == SEQFEAT_PROT && StringCmp (qual, "prot_desc") == 0) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp != NULL) {
+ prp->desc = MemFree (prp->desc);
+ prp->desc = StringSaveNoNull (val);
+ }
} else if (ifp != NULL && StringICmp (ifp->key, "variation") == 0 && ParseQualIntoSnpUserObject (sfp, qual, val)) {
} else if (ifp != NULL && StringICmp (ifp->key, "STS") == 0 && ParseQualIntoStsUserObject (sfp, qual, val)) {
} else if (ifp != NULL && StringICmp (ifp->key, "misc_feature") == 0 && ParseQualIntoCloneUserObject (sfp, qual, val)) {
@@ -4313,6 +4332,20 @@ NLM_EXTERN void AddQualifierToFeature (SeqFeatPtr sfp, CharPtr qual, CharPtr val
}
}
}
+ } else if (sfp->data.choice == SEQFEAT_PROT) {
+ if (qnum == GBQUAL_function || qnum == GBQUAL_EC_number || qnum == GBQUAL_product) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp != NULL) {
+ if (qnum == GBQUAL_function) {
+ ValNodeCopyStr (&(prp->activity), 0, val);
+ } else if (qnum == GBQUAL_EC_number) {
+ ValNodeCopyStr (&(prp->ec), 0, val);
+ } else if (qnum == GBQUAL_product) {
+ ValNodeCopyStr (&(prp->name), 0, val);
+ }
+ return;
+ }
+ }
} else if (sfp->data.choice == SEQFEAT_RNA) {
if (qnum == GBQUAL_product) {
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
@@ -4485,16 +4518,94 @@ NLM_EXTERN SeqLocPtr AddIntervalToLocation (SeqLocPtr loc, SeqIdPtr sip,
return rsult;
}
+static CharPtr TokenizeAtWhiteSpace (CharPtr str)
+
+{
+ Char ch;
+ CharPtr ptr;
+
+ if (str == NULL) return NULL;
+ ptr = str;
+ ch = *ptr;
+
+ while (ch != '\0' && (IS_WHITESP (ch))) {
+ ptr++;
+ ch = *ptr;
+ }
+ while (ch != '\0' && (! IS_WHITESP (ch))) {
+ ptr++;
+ ch = *ptr;
+ }
+ if (ch != '\0') {
+ *ptr = '\0';
+ ptr++;
+ }
+
+ return ptr;
+}
+
+static void ParseWhitespaceIntoTabs (CharPtr line)
+
+{
+ Char ch;
+ size_t len;
+ CharPtr ptr;
+ CharPtr str;
+ CharPtr tmp;
+
+ if (StringHasNoText (line)) return;
+ len = StringLen (line) + 10;
+
+ str = MemNew (len);
+ if (str == NULL) return;
+
+ ptr = line;
+ ch = *ptr;
+ if (IS_WHITESP (ch)) {
+ /* qualifier value line */
+ StringCat (str, "\t\t\t");
+ TrimSpacesAroundString (ptr);
+ tmp = TokenizeAtWhiteSpace (ptr);
+ StringCat (str, ptr);
+ StringCat (str, "\t");
+ StringCat (str, tmp);
+ } else {
+ /* location and possible feature key line */
+ TrimSpacesAroundString (ptr);
+ tmp = TokenizeAtWhiteSpace (ptr);
+ StringCat (str, ptr);
+ StringCat (str, "\t");
+ ptr = tmp;
+ tmp = TokenizeAtWhiteSpace (ptr);
+ StringCat (str, ptr);
+ ptr = tmp;
+ if (! StringHasNoText (ptr)) {
+ tmp = TokenizeAtWhiteSpace (ptr);
+ StringCat (str, "\t");
+ StringCat (str, ptr);
+ }
+ }
+
+ /* replace original with tab-delimited table */
+ StringCpy (line, str);
+
+ MemFree (str);
+}
+
static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname)
{
+ Boolean allowWhitesp = TRUE;
BioSourcePtr biop;
CdRegionPtr crp;
AnnotDescrPtr desc;
CharPtr feat;
+ IntFuzzPtr fuzz;
GeneRefPtr grp;
Int2 idx;
ImpFeatPtr ifp;
+ Boolean isminus;
+ Boolean ispoint;
Int2 j;
CharPtr label;
Char line [2047];
@@ -4508,12 +4619,15 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname)
PubdescPtr pdp;
Int4 pos;
SeqFeatPtr prev = NULL;
+ ProtRefPtr prp;
CharPtr qual;
Uint1 rnatype;
RnaRefPtr rrp;
SeqAnnotPtr sap = NULL;
SeqFeatPtr sfp = NULL;
SeqIdPtr sip;
+ SeqLocPtr slp;
+ SeqPntPtr spp;
Int4 start;
Int4 stop;
SqnTagPtr stp;
@@ -4543,7 +4657,11 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname)
return sap;
}
- if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &feat, &qual, &val, offset)) {
+ if (allowWhitesp) {
+ ParseWhitespaceIntoTabs (line);
+ }
+
+ if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &ispoint, &isminus, &feat, &qual, &val, offset)) {
if (feat != NULL && start >= 0 && stop >= 0) {
if (sap == NULL) {
@@ -4620,6 +4738,23 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname)
rrp->type = rnatype;
}
+ } else if (StringCmp (feat, "Protein") == 0) {
+
+ sfp->data.choice = SEQFEAT_PROT;
+ prp = ProtRefNew ();
+ if (prp != NULL) {
+ sfp->data.value.ptrvalue = (Pointer) prp;
+ }
+
+ } else if (StringCmp (feat, "proprotein") == 0) {
+
+ sfp->data.choice = SEQFEAT_PROT;
+ prp = ProtRefNew ();
+ if (prp != NULL) {
+ sfp->data.value.ptrvalue = (Pointer) prp;
+ prp->processed = 1;
+ }
+
} else if (StringCmp (feat, "source") == 0) {
sfp->data.choice = SEQFEAT_BIOSRC;
@@ -4672,7 +4807,30 @@ static SeqAnnotPtr ReadFeatureTable (FILE *fp, CharPtr seqid, CharPtr annotname)
}
}
- sfp->location = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3);
+ if (ispoint) {
+ spp = SeqPntNew ();
+ if (spp != NULL) {
+ spp->point = start;
+ if (isminus) {
+ spp->strand = Seq_strand_minus;
+ }
+ spp->id = SeqIdDup (sip);
+ fuzz = IntFuzzNew ();
+ if (fuzz != NULL) {
+ fuzz->choice = 4;
+ fuzz->a = 3;
+ spp->fuzz = fuzz;
+ }
+ slp = ValNodeNew (NULL);
+ if (slp != NULL) {
+ slp->choice = SEQLOC_PNT;
+ slp->data.ptrvalue = (Pointer) spp;
+ sfp->location = slp;
+ }
+ }
+ } else {
+ sfp->location = AddIntervalToLocation (NULL, sip, start, stop, partial5, partial3);
+ }
if (partial5 || partial3) {
sfp->partial = TRUE;
@@ -5250,7 +5408,7 @@ NLM_EXTERN Pointer ReadAsnFastaOrFlatFile (FILE *fp, Uint2Ptr datatypeptr, Uint2
Boolean inLetters;
Boolean isProt = FALSE;
Int4 j;
- Char line [1023];
+ Char line [4096];
Boolean mayBeAccessionList = TRUE;
Boolean mayBePlainFasta = TRUE;
SeqFeatPtr nextsfp;
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index 76bcdcc8..c1554dcd 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.23 $
+* $Revision: 6.28 $
*
* File Description:
*
@@ -977,6 +977,7 @@ static FeatdefNameData featdefWithName [] = {
{ FEATDEF_D_segment , "D_segment" },
{ FEATDEF_enhancer , "enhancer" },
{ FEATDEF_exon , "exon" },
+ { FEATDEF_gap , "gap" },
{ FEATDEF_GC_signal , "GC_signal" },
{ FEATDEF_GENE , "Gene" },
{ FEATDEF_HET , "Het" },
@@ -1001,6 +1002,8 @@ static FeatdefNameData featdefWithName [] = {
{ FEATDEF_NUM , "Num" },
{ FEATDEF_N_region , "N_region" },
{ FEATDEF_old_sequence , "old_sequence" },
+ { FEATDEF_operon , "operon" },
+ { FEATDEF_oriT , "oriT" },
{ FEATDEF_polyA_signal , "polyA_signal" },
{ FEATDEF_polyA_site , "polyA_site" },
{ FEATDEF_preRNA , "precursor_RNA" },
@@ -1159,11 +1162,14 @@ static CharPtr featurekeys [] = {
"NonStdRes" ,
"Het" ,
"Src" ,
- "pro_peptide" ,
+ "proprotein" ,
"mat_peptide" ,
"sig_peptide" ,
"transit_peptide",
- "snoRNA"
+ "snoRNA",
+ "gap",
+ "operon",
+ "oriT"
};
NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF)
@@ -1527,3 +1533,23 @@ NLM_EXTERN void SegOrDeltaBioseqToRaw (BioseqPtr bsp)
bsp->seq_data_type = Seq_code_iupacna;
}
+
+static PubMedFetchFunc pmf_pubfetch = NULL;
+
+NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func)
+
+{
+ pmf_pubfetch = func;
+}
+
+NLM_EXTERN PubmedEntryPtr LIBCALL GetPubMedForUid (Int4 uid)
+
+{
+ PubMedFetchFunc func;
+
+ if (uid < 1) return NULL;
+ func = pmf_pubfetch;
+ if (func == NULL) return NULL;
+ return func (uid);
+}
+
diff --git a/api/sqnutils.h b/api/sqnutils.h
index 20b6e762..e3a061e3 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.87 $
+* $Revision: 6.92 $
*
* File Description:
*
@@ -47,6 +47,7 @@
#include <ncbi.h>
#include <sequtil.h>
+#include <objpubme.h>
#undef NLM_EXTERN
#ifdef NLM_IMPORT
@@ -195,10 +196,15 @@ NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep);
NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep);
+/* resynchronizes protein feature with product peptide bioseq */
+
+NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep);
+
/* individual feature callbacks for above functions */
NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata);
NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata);
+NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata);
/* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */
/* for example, passing "gene" to SqnTagFind returns "lacZ" */
@@ -493,11 +499,16 @@ NLM_EXTERN Int4 VisitSetsInSet (BioseqSetPtr bssp, Pointer userdata, VisitSetsFu
typedef void (*VisitElementsFunc) (SeqEntryPtr sep, Pointer userdata);
NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElementsFunc callback);
-/* visits all SeqIds within a SeqLoc */
+/* visits all SeqIds within a SeqLoc, or within features, alignments, graphs, or annots */
typedef void (*VisitSeqIdFunc) (SeqIdPtr sip, Pointer userdata);
NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqIdFunc callback);
+NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback);
+NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback);
+NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback);
+NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback);
+
/* visits all sub UserFields - if the data type is 11, VisitUserFieldsInUfp recurses */
typedef void (*VisitUserFieldsFunc) (UserFieldPtr ufp, Pointer userdata);
@@ -538,6 +549,17 @@ NLM_EXTERN Int4 VisitBioSourcesInSep (SeqEntryPtr sep, Pointer userdata, VisitBi
typedef void (*ScanBioseqSetFunc) (SeqEntryPtr sep, Pointer userdata);
NLM_EXTERN Int4 ScanBioseqSetRelease (CharPtr inputFile, Boolean binary, Boolean compressed, Pointer userdata, ScanBioseqSetFunc callback);
+/* PubMed registered fetch functionality */
+
+NLM_EXTERN PubmedEntryPtr LIBCALL GetPubMedForUid (Int4 uid);
+
+/* internal support type, registration function */
+
+typedef PubmedEntryPtr (LIBCALLBACK * PubMedFetchFunc) (Int4 uid);
+
+NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func);
+
+
#ifdef __cplusplus
}
diff --git a/api/subutil.c b/api/subutil.c
index ff5c748d..a4cbcaed 100644
--- a/api/subutil.c
+++ b/api/subutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.52 $
+* $Revision: 6.55 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -40,6 +40,17 @@
*
*
* $Log: subutil.c,v $
+* Revision 6.55 2003/10/16 17:16:33 mjohnson
+*
+* Added ORG_* and IS_ORG_* defines for origins. Use these constants
+* and macros instead of small integers.
+*
+* Revision 6.54 2003/10/08 16:46:44 kans
+* fix in AddCompleteness (KT)
+*
+* Revision 6.53 2003/07/11 18:22:45 kans
+* AddSourceToRefGeneTrackUserObject
+*
* Revision 6.52 2002/11/05 17:01:55 kans
* refgene tracking user object uses comment as name if accession is empty
*
@@ -2570,7 +2581,7 @@ NLM_EXTERN Boolean AddGenomeToEntry (
bio = BioSourceNew();
}
bio->genome = (Uint1)type;
- bio->origin = 0; /* unknown */
+ bio->origin = ORG_DEFAULT; /* unknown */
vnp->data.ptrvalue = (Pointer) bio;
return TRUE;
@@ -2732,19 +2743,19 @@ NLM_EXTERN void AddCompleteness(NCBISubPtr submission, SeqEntryPtr sep, SeqFeatP
Boolean partial = FALSE;
retval = SeqLocPartialCheck(sfp->location);
- if (retval & SLP_START) {
+ if ((retval & SLP_START) && (retval & SLP_STOP)) {
+ AddCompleteToEntry(submission, sep, 5); /* no_ends */
+ partial = TRUE;
+ } else if (retval & SLP_START) {
AddCompleteToEntry(submission, sep, 3); /* no_left */
partial = TRUE;
- }
- if (retval & SLP_STOP) {
+ } else if (retval & SLP_STOP) {
AddCompleteToEntry(submission, sep, 4); /* no_right */
partial = TRUE;
- }
- if (retval & (SLP_OTHER | SLP_INTERNAL)) {
+ } else if (retval & (SLP_OTHER | SLP_INTERNAL)) {
AddCompleteToEntry(submission, sep, 2); /* partial */
partial = TRUE;
- }
- if (!partial && sfp->partial) {
+ } else if (!partial && sfp->partial) {
AddCompleteToEntry(submission, sep, 2); /* partial */
}
}
@@ -4420,6 +4431,7 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c
{
UserFieldPtr curr;
ObjectIdPtr oip;
+ UserFieldPtr prev = NULL;
if (uop == NULL || collaborator == NULL) return;
oip = uop->type;
@@ -4430,6 +4442,7 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c
if (oip != NULL && StringICmp (oip->str, "Collaborator") == 0) {
break;
}
+ prev = curr;
}
if (curr == NULL) {
@@ -4439,10 +4452,13 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c
curr->label = oip;
curr->choice = 1; /* visible string */
- /* link status at beginning of list */
+ /* link curator at end of list */
- curr->next = uop->data;
- uop->data = curr;
+ if (prev != NULL) {
+ prev->next = curr;
+ } else {
+ uop->data = curr;
+ }
}
if (curr == NULL || curr->choice != 1) return;
@@ -4454,6 +4470,50 @@ NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr c
curr->data.ptrvalue = (Pointer) StringSave (collaborator);
}
+NLM_EXTERN void AddSourceToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr genomicSource)
+
+{
+ UserFieldPtr curr;
+ ObjectIdPtr oip;
+ UserFieldPtr prev = NULL;
+
+ if (uop == NULL || genomicSource == NULL) return;
+ oip = uop->type;
+ if (oip == NULL || StringICmp (oip->str, "RefGeneTracking") != 0) return;
+
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ oip = curr->label;
+ if (oip != NULL && StringICmp (oip->str, "GenomicSource") == 0) {
+ break;
+ }
+ prev = curr;
+ }
+
+ if (curr == NULL) {
+ curr = UserFieldNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("GenomicSource");
+ curr->label = oip;
+ curr->choice = 1; /* visible string */
+
+ /* link source at end of list */
+
+ if (prev != NULL) {
+ prev->next = curr;
+ } else {
+ uop->data = curr;
+ }
+ }
+
+ if (curr == NULL || curr->choice != 1) return;
+
+ /* replace any existing source indication */
+
+ curr->data.ptrvalue = MemFree (curr->data.ptrvalue);
+
+ curr->data.ptrvalue = (Pointer) StringSave (genomicSource);
+}
+
NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr field,
CharPtr accn, Int4 gi,
Boolean sequenceChange,
diff --git a/api/subutil.h b/api/subutil.h
index 480c5157..41645047 100644
--- a/api/subutil.h
+++ b/api/subutil.h
@@ -31,7 +31,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.43 $
+* $Revision: 6.46 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -42,6 +42,17 @@
*
*
* $Log: subutil.h,v $
+* Revision 6.46 2003/10/21 18:16:05 bazhin
+* Replaced C++ comments with C ones.
+*
+* Revision 6.45 2003/10/16 17:16:33 mjohnson
+*
+* Added ORG_* and IS_ORG_* defines for origins. Use these constants
+* and macros instead of small integers.
+*
+* Revision 6.44 2003/07/11 18:22:45 kans
+* AddSourceToRefGeneTrackUserObject
+*
* Revision 6.43 2002/07/09 16:17:35 kans
* AddAccessionToTpaAssemblyUserObject takes from and to parameters
*
@@ -949,6 +960,26 @@ NLM_EXTERN Boolean AddSubSourceToEntry (
#define ORGMOD_old_name 254
#define ORGMOD_other 255
+/* Defines for BioSrc.origin
+ */
+#define ORG_UNKNOWN 0
+#define ORG_NATURAL 1
+#define ORG_NATMUT 2
+#define ORG_MUT 3
+#define ORG_ARTIFICIAL 4
+#define ORG_SYNTHETIC 5
+#define ORG_OTHER 255
+#define ORG_DEFAULT ORG_UNKNOWN
+
+#define IS_ORG_UNKNOWN(S) ((S).origin == ORG_UNKNOWN)
+#define IS_ORG_NATURAL(S) ((S).origin == ORG_NATURAL)
+#define IS_ORG_NATMUT(S) ((S).origin == ORG_NATMUT)
+#define IS_ORG_MUT(S) ((S).origin == ORG_MUT)
+#define IS_ORG_ARTIFICIAL(S) ((S).origin == ORG_ARTIFICIAL)
+#define IS_ORG_SYNTHETIC(S) ((S).origin == ORG_SYNTHETIC)
+#define IS_ORG_OTHER(S) ((S).origin == ORG_OTHER)
+
+
/*********************************************
* OrgMod defines subclasses of organism names
* (also see SubSource above for subclasses of source material)
@@ -1557,6 +1588,7 @@ NLM_EXTERN Boolean AddPhrapGraphToSeqLit (
NLM_EXTERN UserObjectPtr CreateRefGeneTrackUserObject (void);
NLM_EXTERN void AddStatusToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr status);
NLM_EXTERN void AddCuratorToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr collaborator);
+NLM_EXTERN void AddSourceToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr genomicSource);
NLM_EXTERN void AddAccessionToRefGeneTrackUserObject (UserObjectPtr uop, CharPtr field,
CharPtr accn, Int4 gi,
Boolean sequenceChange,
diff --git a/api/tofasta.c b/api/tofasta.c
index bba0ed88..c605b2a0 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.114 $
+* $Revision: 6.121 $
*
* File Description: various sequence objects to fasta output
*
@@ -39,6 +39,27 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: tofasta.c,v $
+* Revision 6.121 2003/08/04 19:51:02 kans
+* for complete chromosome title, if > 3 clones (by counting semicolons) then just display count, not full text of clones
+*
+* Revision 6.120 2003/07/25 16:15:25 kans
+* FindProtDefLine of hypothetical protein only needs to look for locus_tag
+*
+* Revision 6.119 2003/07/25 15:34:07 kans
+* protect FindProtDefLine against no parent CDS (e.g., SWISS-PROT segmented record P33072)
+*
+* Revision 6.118 2003/07/24 21:51:04 kans
+* if hypothetical protein, find gene and add to name
+*
+* Revision 6.117 2003/07/23 20:37:02 kans
+* if making htgs title, do not set iip values
+*
+* Revision 6.116 2003/07/22 18:31:44 kans
+* Added support for EMBLBlockPtr keywords in addition to GBBlockPtr keywords in suppressing sequencing in progress message
+*
+* Revision 6.115 2003/05/02 16:22:24 kans
+* added FindNRDefLine to make NR_ deflines on the fly
+*
* Revision 6.114 2003/03/25 17:00:53 kans
* CreateDefLine htgs suffix only shows if delta seq with more than 0 gaps
*
@@ -2603,6 +2624,7 @@ static ValNodePtr IndexedGatherDescrOnBioseq (ItemInfoPtr iip, BioseqPtr bsp, Ui
SeqDescrPtr sdp;
sdp = SeqMgrGetNextDescriptor (bsp, NULL, choice, &dcontext);
+ if (sdp == NULL) return NULL;
if (ISA_aa(bsp->mol) && !is_pdb(bsp)) {
if (dcontext.level != 0) return NULL;
}
@@ -2836,6 +2858,89 @@ static CharPtr FindNMDefLine (BioseqPtr bsp)
return str;
}
+static CharPtr FindNRDefLine (BioseqPtr bsp)
+
+{
+ BioSourcePtr biop;
+ Char buf [512];
+ Uint2 entityID;
+ CharPtr gene;
+ size_t len;
+ MolInfoPtr mip;
+ NMDef nd;
+ OrgRefPtr orp;
+ CharPtr rna = "miscRNA";
+ SeqEntryPtr sep;
+ CharPtr str;
+ ValNodePtr vnp;
+
+ MemSet ((Pointer) &nd, 0, sizeof (NMDef));
+ entityID = ObjMgrGetEntityIDForPointer (bsp);
+ sep = GetBestTopParentForDataEx (entityID, bsp, TRUE);
+
+ VisitFeaturesInSep (sep, (Pointer) &nd, FindNMFeats);
+ if (nd.numgenes < 1) return NULL;
+
+ vnp = GatherDescrOnBioseq (NULL, bsp, Seq_descr_source, FALSE);
+ if (vnp == NULL) return NULL;
+ biop = (BioSourcePtr) vnp->data.ptrvalue;
+ orp = biop->org;
+ if (orp == NULL || StringHasNoText (orp->taxname)) return NULL;
+
+ FeatDefLabel (nd.gene, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
+ gene = StringSaveNoNull (buf);
+
+ vnp = GatherDescrOnBioseq (NULL, bsp, Seq_descr_molinfo,TRUE);
+ if (vnp != NULL) {
+ mip = (MolInfoPtr) vnp->data.ptrvalue;
+ if (mip != NULL) {
+ switch (mip->biomol) {
+ case MOLECULE_TYPE_PRE_MRNA :
+ rna = "precursorRNA";
+ break;
+ case MOLECULE_TYPE_MRNA :
+ rna = "mRNA";
+ break;
+ case MOLECULE_TYPE_RRNA :
+ rna = "rRNA";
+ break;
+ case MOLECULE_TYPE_TRNA :
+ rna = "tRNA";
+ break;
+ case MOLECULE_TYPE_SNRNA :
+ rna = "snRNA";
+ break;
+ case MOLECULE_TYPE_SCRNA :
+ rna = "scRNA";
+ break;
+ case MOLECULE_TYPE_CRNA :
+ rna = "cRNA";
+ break;
+ case MOLECULE_TYPE_SNORNA :
+ rna = "snoRNA";
+ break;
+ case MOLECULE_TYPE_TRANSCRIBED_RNA :
+ rna = "miscRNA";
+ break;
+ default :
+ break;
+ }
+ }
+ }
+
+ len = StringLen (orp->taxname) + StringLen (gene) +
+ StringLen (", ") + 30;
+
+ str = (CharPtr) MemNew (len);
+ if (str != NULL) {
+ sprintf (str, "%s %s, %s", orp->taxname, gene, rna);
+ }
+
+ MemFree (gene);
+
+ return str;
+}
+
static CharPtr FindProtDefLine(BioseqPtr bsp)
{
SeqFeatPtr sfp = NULL, f;
@@ -2880,6 +2985,58 @@ static CharPtr FindProtDefLine(BioseqPtr bsp)
(CharPtr) vnp->data.ptrvalue);
s += StringLen((CharPtr)vnp->data.ptrvalue) + 2;
}
+ /* if hypothetical protein, append locus_tag */
+ if (StringICmp (title, "hypothetical protein") == 0) {
+ sfp = NULL;
+ if (indexed) {
+ sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ } else {
+ sfp = GatherProtCDS(bsp);
+ }
+ if (sfp != NULL) {
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ loc = sfp->location;
+ best_gene = NULL;
+ if (indexed) {
+ best_gene = SeqMgrGetOverlappingGene (loc, NULL);
+ } else {
+ vnp = GatherGenesForCDS(loc);
+ for (v=vnp; v; v=v->next) {
+ f = (SeqFeatPtr) v->data.ptrvalue;
+ diff_current = SeqLocAinB(loc, f->location);
+ if (! diff_current) {
+ best_gene = f;
+ break;
+ } else if (diff_current > 0) {
+ if ((diff_lowest == -1) || (diff_current<diff_lowest)) {
+ diff_lowest = diff_current;
+ best_gene = f;
+ }
+ }
+ }
+ ValNodeFree(vnp);
+ }
+ if (best_gene != NULL) {
+ grp = (GeneRefPtr) best_gene->data.value.ptrvalue;
+ }
+ }
+ }
+ if (grp != NULL) {
+ geneprod = NULL;
+ if (grp->locus_tag != NULL) {
+ geneprod = grp->locus_tag;
+ }
+ if (geneprod != NULL) {
+ s = (CharPtr) MemNew (StringLen (geneprod) + StringLen (title) + 20);
+ if (s != NULL) {
+ sprintf (s, "%s %s", title, geneprod);
+ MemFree (title);
+ title = s;
+ }
+ }
+ }
+ }
} else if (prp->desc) {
title = StringSave(prp->desc);
}
@@ -3084,6 +3241,27 @@ static Boolean StrainNotAtEndOfTaxname (CharPtr name, CharPtr strain)
return FALSE;
}
+static Int2 GetNumClones (CharPtr str)
+
+{
+ Char ch;
+ Int2 count;
+
+ if (StringHasNoText (str)) return 0;
+
+ count = 1;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == ';') {
+ count++;
+ }
+ str++;
+ ch = *str;
+ }
+
+ return count;
+}
+
static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix)
{
ItemInfoPtr iip = NULL;
@@ -3096,6 +3274,7 @@ static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix)
CharPtr name = NULL, chr = NULL, str = NULL,
cln = NULL, map = NULL, def=NULL;
Int2 deflen = 0;
+ Int2 numclones;
if (bsp == NULL) {
return NULL;
@@ -3119,9 +3298,16 @@ static CharPtr UseOrgMods(BioseqPtr bsp, CharPtr suffix)
}
if (ssp->subtype == 3) { /* clone */
if (ssp->name != NULL) {
- cln = (CharPtr) MemNew(StringLen(ssp->name) + 8);
- deflen += StringLen(ssp->name) + 8;
- sprintf(cln, " clone %s", ssp->name);
+ numclones = GetNumClones (ssp->name);
+ if (numclones > 3) {
+ cln = (CharPtr) MemNew (20);
+ sprintf (cln, ", %d clones,", (int) numclones);
+ deflen += StringLen (cln) + 2;
+ } else {
+ cln = (CharPtr) MemNew(StringLen(ssp->name) + 8);
+ deflen += StringLen(ssp->name) + 8;
+ sprintf(cln, " clone %s", ssp->name);
+ }
}
}
if (ssp->subtype == 2) { /* map */
@@ -3446,9 +3632,11 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
"WORKING DRAFT SEQUENCE",
"*** SEQUENCING IN PROGRESS ***" };
Boolean htg_tech = FALSE, htgs_draft = FALSE, htgs_cancelled = FALSE,
- is_nc = FALSE, is_nm = FALSE, is_tpa = FALSE;
+ is_nc = FALSE, is_nm = FALSE, is_nr = FALSE, is_tpa = FALSE;
MolInfoPtr mip;
GBBlockPtr gbp = NULL;
+ EMBLBlockPtr ebp = NULL;
+ ValNodePtr keywords = NULL;
Boolean wgsmaster = FALSE;
CharPtr suffix = NULL;
SeqIdPtr sip;
@@ -3474,6 +3662,8 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
is_nc = TRUE;
} else if (StringNICmp (tsip->accession, "NM_", 3) == 0) {
is_nm = TRUE;
+ } else if (StringNICmp (tsip->accession, "NR_", 3) == 0) {
+ is_nr = TRUE;
}
}
break;
@@ -3519,6 +3709,16 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_genbank,TRUE);
if (vnp != NULL) {
gbp = (GBBlockPtr) vnp->data.ptrvalue;
+ if (gbp != NULL) {
+ keywords = gbp->keywords;
+ }
+ }
+ vnp=GatherDescrOnBioseq(iip, bsp, Seq_descr_embl,TRUE);
+ if (vnp != NULL) {
+ ebp = (EMBLBlockPtr) vnp->data.ptrvalue;
+ if (ebp != NULL) {
+ keywords = ebp->keywords;
+ }
}
}
if (! ignoreTitle)
@@ -3528,8 +3728,13 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
title = StringSaveNoNull((CharPtr)vnp->data.ptrvalue);
}
if (tech == MI_TECH_htgs_0 || tech == MI_TECH_htgs_1 || tech == MI_TECH_htgs_2) {
- MemFree(title); /* manufacture all HTG titles */
- title = NULL;
+ MemFree(title); /* manufacture all HTG titles */
+ title = NULL;
+ if (iip != NULL) {
+ iip->entityID = 0;
+ iip->itemID = 0;
+ iip->itemtype = 0;
+ }
if (title == NULL || *title == '\0') {
title = UseOrgMods(bsp, NULL);
organism = NULL;
@@ -3578,6 +3783,13 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
iip->itemID = 0;
iip->itemtype = 0;
}
+ } else if (is_nr && title == NULL) {
+ title = FindNRDefLine (bsp);
+ if (title != NULL && iip != NULL) {
+ iip->entityID = 0;
+ iip->itemID = 0;
+ iip->itemtype = 0;
+ }
}
/* some titles may have zero length */
if (title != NULL && *title != '\0') {
@@ -3736,8 +3948,8 @@ NLM_EXTERN Boolean CreateDefLineEx (ItemInfoPtr iip, BioseqPtr bsp, CharPtr buf,
i = 0;
}
} else {
- if (gbp != NULL) {
- for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
+ if (keywords != NULL) {
+ for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_DRAFT") == 0) {
htgs_draft = TRUE;
} else if (StringICmp ((CharPtr) vnp->data.ptrvalue, "HTGS_CANCELLED") == 0) {
diff --git a/api/tomedlin.c b/api/tomedlin.c
index 3a39dab6..0f779966 100644
--- a/api/tomedlin.c
+++ b/api/tomedlin.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/15/91
*
-* $Revision: 6.8 $
+* $Revision: 6.10 $
*
* File Description: conversion to medlars format
*
@@ -40,6 +40,12 @@
*
*
* $Log: tomedlin.c,v $
+* Revision 6.10 2003/09/28 20:22:47 kans
+* added PubmedEntryToXXXFile functions
+*
+* Revision 6.9 2003/09/26 18:57:51 kans
+* MedlineEntryToDataFile calls MakeMLAuthString for structured author
+*
* Revision 6.8 2001/10/29 20:37:06 kans
* MakeAuthorString for structured authors
*
@@ -187,15 +193,66 @@ static ColData table [2] = {{0, 6, 0, 'l', TRUE, TRUE, FALSE},
static Char *months[13] = {"", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
-NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp)
+static CharPtr MakeMLAuthString (
+ CharPtr name,
+ CharPtr initials,
+ CharPtr suffix
+)
+
+{
+ Char ch;
+ size_t len;
+ CharPtr ptr;
+ CharPtr str;
+ CharPtr tmp;
+
+ if (name == NULL) return NULL;
+
+ len = StringLen (name) + StringLen (initials) * 3 + StringLen (suffix);
+ str = MemNew (sizeof (Char) * (len + 4));
+ if (str == NULL) return NULL;
+
+ tmp = str;
+
+ tmp = StringMove (tmp, name);
+
+ ptr = initials;
+ if (! StringHasNoText (initials)) {
+ tmp = StringMove (tmp, " ");
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == '-') {
+ *tmp = '-';
+ tmp++;
+ } else if (ch != '.') {
+ *tmp = ch;
+ tmp++;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ *tmp = '\0';
+ }
+
+ if (! StringHasNoText (suffix)) {
+ tmp = StringMove (tmp, " ");
+ tmp = StringMove (tmp, suffix);
+ }
+
+ return str;
+}
+
+static Boolean MedlineEntryToDataFileEx (MedlineEntryPtr mep, Int4 pmid, FILE *fp)
{
CharPtr abstract;
AffilPtr affil;
+ AuthorPtr ap;
AuthListPtr authors = NULL;
CitArtPtr cit;
CitJourPtr citjour;
Int2 count;
+ CharPtr curr;
DatePtr date = NULL;
ValNodePtr gene;
Int2 i;
@@ -204,9 +261,11 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp)
size_t len;
MedlineMeshPtr mesh;
ValNodePtr names;
+ NameStdPtr nsp;
CharPtr p;
CharPtr pages = NULL;
ParData para;
+ PersonIdPtr pid;
CharPtr ptr;
ValNodePtr qual;
Boolean rsult;
@@ -255,20 +314,59 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp)
cit = mep->cit;
if (cit != NULL) {
authors = cit->authors;
- if (authors != NULL && (authors->choice == 2 || authors->choice == 3)) {
- names = authors->names;
- count = 0;
- while (names != NULL) {
- if (count >= 20) {
- rsult = (Boolean) (SendTextToFile (fp, buffer, &para, table) && rsult);
- ClearString ();
- count = 0;
+ if (authors != NULL) {
+ if (authors->choice == 1) {
+ names = authors->names;
+ count = 0;
+ while (names != NULL) {
+ if (count >= 20) {
+ rsult = (Boolean) (SendTextToFile (fp, buffer, &para, table) && rsult);
+ ClearString ();
+ count = 0;
+ }
+ curr = NULL;
+ ap = (AuthorPtr) names->data.ptrvalue;
+ if (ap != NULL) {
+ pid = ap->name;
+ if (pid != NULL) {
+ if (pid->choice == 2) {
+ nsp = (NameStdPtr) pid->data;
+ if (nsp != NULL) {
+ if (! StringHasNoText (nsp->names [0])) {
+ curr = MakeMLAuthString (nsp->names [0], nsp->names [4], nsp->names [5]);
+ } else if (! StringHasNoText (nsp->names [3])) {
+ curr = MakeMLAuthString (nsp->names [3], NULL, NULL);
+ }
+ }
+ } else if (pid->choice == 3 || pid->choice == 4) {
+ curr = MakeMLAuthString ((CharPtr) pid->data, NULL, NULL);
+ }
+ }
+ }
+ if (curr != NULL) {
+ AddString ("AU -\t");
+ AddString (curr);
+ AddString ("\n");
+ curr = MemFree (curr);
+ }
+ names = names->next;
+ count++;
+ }
+ } else if (authors->choice == 2 || authors->choice == 3) {
+ names = authors->names;
+ count = 0;
+ while (names != NULL) {
+ if (count >= 20) {
+ rsult = (Boolean) (SendTextToFile (fp, buffer, &para, table) && rsult);
+ ClearString ();
+ count = 0;
+ }
+ AddString ("AU -\t");
+ AddString (names->data.ptrvalue);
+ AddString ("\n");
+ names = names->next;
+ count++;
}
- AddString ("AU -\t");
- AddString (names->data.ptrvalue);
- AddString ("\n");
- names = names->next;
- count++;
}
}
rsult = (Boolean) (SendTextToFile (fp, buffer, &para, table) && rsult);
@@ -581,6 +679,23 @@ NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp)
return rsult;
}
+NLM_EXTERN Boolean MedlineEntryToDataFile (MedlineEntryPtr mep, FILE *fp)
+
+{
+ return MedlineEntryToDataFileEx (mep, 0, fp);
+}
+
+NLM_EXTERN Boolean PubmedEntryToDataFile (PubmedEntryPtr pep, FILE *fp)
+
+{
+ MedlineEntryPtr mep;
+
+ if (pep == NULL || fp == NULL) return FALSE;
+ mep = (MedlineEntryPtr) pep->medent;
+ if (mep == NULL) return FALSE;
+ return MedlineEntryToDataFileEx (mep, pep->pmid, fp);
+}
+
#ifdef VAR_ARGS
static CharPtr CDECL StrngAppend (first, va_alist)
CharPtr first;
@@ -1036,7 +1151,7 @@ static ColData colFmt [3] = {{0, 0, 0, 'l', TRUE, TRUE, FALSE},
static ColData mshFmt [1] = {{0, 80, 0, 'l', FALSE, FALSE, TRUE}};
-static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, FILE *fp, Boolean showMesh)
+static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, Int4 pmid, FILE *fp, Boolean showMesh)
{
size_t len;
@@ -1122,13 +1237,35 @@ static Boolean MedlineEntryToDocOrAbsFile (MedlineEntryPtr mep, FILE *fp, Boolea
NLM_EXTERN Boolean MedlineEntryToDocFile (MedlineEntryPtr mep, FILE *fp)
{
- return MedlineEntryToDocOrAbsFile (mep, fp, TRUE);
+ return MedlineEntryToDocOrAbsFile (mep, 0, fp, TRUE);
}
NLM_EXTERN Boolean MedlineEntryToAbsFile (MedlineEntryPtr mep, FILE *fp)
{
- return MedlineEntryToDocOrAbsFile (mep, fp, FALSE);
+ return MedlineEntryToDocOrAbsFile (mep, 0, fp, FALSE);
+}
+
+NLM_EXTERN Boolean PubmedEntryToDocFile (PubmedEntryPtr pep, FILE *fp)
+
+{
+ MedlineEntryPtr mep;
+
+ if (pep == NULL || fp == NULL) return FALSE;
+ mep = (MedlineEntryPtr) pep->medent;
+ if (mep == NULL) return FALSE;
+ return MedlineEntryToDocOrAbsFile (mep, pep->pmid, fp, TRUE);
+}
+
+NLM_EXTERN Boolean PubmedEntryToAbsFile (PubmedEntryPtr pep, FILE *fp)
+
+{
+ MedlineEntryPtr mep;
+
+ if (pep == NULL || fp == NULL) return FALSE;
+ mep = (MedlineEntryPtr) pep->medent;
+ if (mep == NULL) return FALSE;
+ return MedlineEntryToDocOrAbsFile (mep, pep->pmid, fp, FALSE);
}
#define IBM_MEDLINE_DIVSS '$'
diff --git a/api/tomedlin.h b/api/tomedlin.h
index 487bb1c1..1be1818f 100644
--- a/api/tomedlin.h
+++ b/api/tomedlin.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/15/91
*
-* $Revision: 6.1 $
+* $Revision: 6.2 $
*
* File Description: conversion to medlars format
*
@@ -40,6 +40,9 @@
*
*
* $Log: tomedlin.h,v $
+* Revision 6.2 2003/09/28 20:22:47 kans
+* added PubmedEntryToXXXFile functions
+*
* Revision 6.1 1997/12/22 18:51:23 grisha
* update unit-record number for PmId to 969
*
@@ -78,6 +81,7 @@
#include <objmedli.h>
#include <objmdrs.h>
+#include <objpubme.h>
#define MEDLINE_BADCODE 0 /* BAD */
#define MEDLINE_EOF -1 /* END OF ENTRY */
@@ -163,6 +167,10 @@ NLM_EXTERN Boolean MedlarsEntryToDataFile PROTO((MedlarsEntryPtr mep, FILE *fp))
NLM_EXTERN Boolean MedlarsEntryToDocFile PROTO((MedlarsEntryPtr mep, FILE *fp));
NLM_EXTERN Boolean MedlarsEntryToAbsFile PROTO((MedlarsEntryPtr mep, FILE *fp));
+NLM_EXTERN Boolean PubmedEntryToDataFile PROTO((PubmedEntryPtr pep, FILE *fp));
+NLM_EXTERN Boolean PubmedEntryToDocFile PROTO((PubmedEntryPtr pep, FILE *fp));
+NLM_EXTERN Boolean PubmedEntryToAbsFile PROTO((PubmedEntryPtr pep, FILE *fp));
+
NLM_EXTERN MedlinePtr ParseMedline PROTO((MedlineEntryPtr mep));
NLM_EXTERN MedlinePtr FreeMedline PROTO((MedlinePtr mPtr));
diff --git a/api/txalign.c b/api/txalign.c
index 3253529f..127cc1a5 100644
--- a/api/txalign.c
+++ b/api/txalign.c
@@ -1,4 +1,4 @@
-/* $Id: txalign.c,v 6.72 2003/01/23 23:31:58 dondosha Exp $
+/* $Id: txalign.c,v 6.79 2003/09/26 20:54:10 dondosha Exp $
***************************************************************************
* *
* COPYRIGHT NOTICE *
@@ -27,13 +27,34 @@
*
* File Name: txalign.c
*
-* $Revision: 6.72 $
+* $Revision: 6.79 $
*
* File Description: Formating of text alignment for the BLAST output
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: txalign.c,v $
+* Revision 6.79 2003/09/26 20:54:10 dondosha
+* Revert change in revision 6.77, as it turned trace.cgi links should have stayed as they were
+*
+* Revision 6.78 2003/08/20 21:29:13 dondosha
+* Correction for OOF alignments with nucleotide coordinates starting at 1
+*
+* Revision 6.77 2003/07/30 14:07:36 dondosha
+* Changed hrefs to trace.cgi in accordance with the new taxonomy web interface
+*
+* Revision 6.76 2003/07/21 22:15:23 dondosha
+* Added support for out-of-frame tblastn alignments
+*
+* Revision 6.75 2003/07/15 14:36:06 dondosha
+* Added a #define for fprintf substitute, needed for gzip compression of Web BLAST results
+*
+* Revision 6.74 2003/06/11 20:15:35 jianye
+* changed unigene linkout
+*
+* Revision 6.73 2003/06/02 20:02:15 jianye
+* Added geo linkout
+*
* Revision 6.72 2003/01/23 23:31:58 dondosha
* Added a global variable for the query number, needed in make_dumpgnl_links
*
@@ -511,6 +532,9 @@ int query_number_glb;
/*Indicate if db contains sequence with gi*/
Boolean DbHasGi=FALSE;
+int (*tx_fprintf)(FILE*, const char *, ...) = fprintf;
+#define fprintf tx_fprintf
+
/*
Used by the functions that format the one-line descriptions.
*/
@@ -643,6 +667,7 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
/*add space in front of linkout*/
fprintf(fp, " ");
bdlpTemp=bdlp;
+
while(bdlpTemp){
if(checkLinkoutType(bdlpTemp, linkout_locuslink)){
hasLinkout=TRUE;
@@ -654,20 +679,10 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
}
bdlpTemp=bdlp;
while(bdlpTemp){
- if(checkLinkoutType(bdlpTemp, linkout_unigene)){
-
+ if(checkLinkoutType(bdlpTemp, linkout_unigene)){
hasLinkout=TRUE;
gi=GetGIForSeqId(bdlpTemp->seqid);
- rnp=FDGetTaxNamesFromBioseq(bsp, bdlpTemp->taxid);
- if(rnp&&rnp->sci_name){
- unigeneName=getNameInitials(rnp->sci_name);
- if(unigeneName){
- fprintf(fp, URL_Unigene, unigeneName, gi);
- }
- MemFree(unigeneName);
- }
-
- RDBTaxNamesFree(rnp);
+ fprintf(fp, URL_Unigene, gi);
break;
}
bdlpTemp=bdlpTemp->next;
@@ -682,6 +697,16 @@ static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
}
bdlpTemp=bdlpTemp->next;
}
+ bdlpTemp=bdlp;
+ while(bdlpTemp){
+ if(checkLinkoutType(bdlpTemp, linkout_geo)){
+ gi=GetGIForSeqId(bdlpTemp->seqid);
+ fprintf(fp, URL_Geo, gi);
+ break;
+ }
+ bdlpTemp=bdlpTemp->next;
+ }
+
}
BlastDefLineSetFree(bdlp);
}
@@ -722,21 +747,16 @@ static void addLinkoutForBioseq(BioseqPtr bsp, SeqIdPtr sip, SeqIdPtr firstSip,
if(checkLinkoutType(actualBdlp, linkout_unigene)){
hasLinkout=TRUE;
- rnp=FDGetTaxNamesFromBioseq(bsp, actualBdlp->taxid);
- if(rnp&&rnp->sci_name){
- unigeneName=getNameInitials(rnp->sci_name);
- if(unigeneName){
- fprintf(fp, URL_Unigene, unigeneName, gi);
- }
- MemFree(unigeneName);
- }
-
- RDBTaxNamesFree(rnp);
+ fprintf(fp, URL_Unigene, gi);
}
if(checkLinkoutType(actualBdlp, linkout_structure)){
hasLinkout=TRUE;
fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onepair", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
}
+
+ if(checkLinkoutType(actualBdlp, linkout_geo)){
+ fprintf(fp, URL_Geo, gi);
+ }
}
BlastDefLineSetFree(bdlp);
}
@@ -1722,7 +1742,7 @@ static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean
tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
load = TRUE;
} else if (!StringICmp(db_tag->db, "TI")) {
- sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id, (long) oip->id);
+ sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id, (long) oip->id);
html_len = StringLen(HTML_buffer);
sprintf(docbuf+pos, HTML_buffer);
@@ -2616,8 +2636,13 @@ static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp)
return FALSE;
if(asp->ooframe) {
- master_is_translated = TRUE;
- target_is_translated = FALSE;
+ if (SeqLocStrand(ssp->loc) != Seq_strand_unknown) {
+ master_is_translated = TRUE;
+ target_is_translated = FALSE;
+ } else {
+ master_is_translated = FALSE;
+ target_is_translated = TRUE;
+ }
} else {
/* Check for valid sequence. */
if (SeqLocLen(ssp->loc) == 3*SeqLocLen(ssp->loc->next))
@@ -2748,10 +2773,17 @@ static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp)
}
if(asp->ooframe) {
- if(ssp->loc->next->choice != SEQLOC_EMPTY)
- asp->totlen += SeqLocLen(ssp->loc->next);
- else
- asp->totlen += SeqLocLen(ssp->loc)/3;
+ if (master_is_translated) {
+ if(ssp->loc->next->choice != SEQLOC_EMPTY)
+ asp->totlen += SeqLocLen(ssp->loc->next);
+ else
+ asp->totlen += SeqLocLen(ssp->loc)/3;
+ } else {
+ if(ssp->loc->choice != SEQLOC_EMPTY)
+ asp->totlen += SeqLocLen(ssp->loc);
+ else
+ asp->totlen += SeqLocLen(ssp->loc->next)/3;
+ }
} else {
if (ssp->loc->choice != SEQLOC_EMPTY) {
@@ -4589,7 +4621,7 @@ PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp
#endif
if(!StringICmp(blast_type, "fruitfly")) {
- fprintf(stdout, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n");
+ fprintf(outfp, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n");
}
asn2ff_set_output(outfp, NULL);
@@ -4884,7 +4916,7 @@ PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp
} else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
oip = db_tag->tag;
if(oip->id != 0) {
- fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id);
+ fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
}
} else {
make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE);
@@ -5417,7 +5449,7 @@ static CharPtr FSFPrintOneDefline(AlignStatOptionPtr asop, Boolean is_na,
} else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
oip = db_tag->tag;
if(oip->id != 0) {
- fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?val=%ld&cmd=retrieve&dopt=fasta\">", (long) oip->id);
+ fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
}
} else {
/** * links to incomplete genomes */
@@ -5960,22 +5992,27 @@ NLM_EXTERN Uint4 GetTxAlignOptionValue (Uint1 tx_option, BoolPtr hide_feature,
return option;
}
-Int4 OOFGetDNAStrand(StdSegPtr sseg)
+/** The following function assumes that neither of the locations in
+ * the first link in StdSeg is empty.
+ * @param sseg Alignment segments [in]
+ * @param dna_strand The strand of the nucleotide sequence [out]
+ * @return TRUE for tblastn, FALSE for blastx.
+ */
+static Boolean OOFGetDNAStrand(StdSegPtr sseg, Int4Ptr dna_strand)
{
- Int4 dna_strand;
- SeqIntPtr seq_int1;
- SeqLocPtr slp1;
+ Uint1 strand;
+ Boolean reverse;
- for(; sseg != NULL; sseg= sseg->next) {
- slp1 = sseg->loc;
-
- if(slp1->choice == SEQLOC_INT) {
- seq_int1 = (SeqIntPtr) slp1->data.ptrvalue;
- return seq_int1->strand;
- }
+ if ((strand = SeqLocStrand(sseg->loc)) != Seq_strand_unknown) {
+ *dna_strand = (Int4) strand;
+ reverse = FALSE;
+ } else {
+ *dna_strand = (Int4) SeqLocStrand(sseg->loc->next);
+ reverse = TRUE;
}
- return Seq_strand_unknown;
+ return reverse;
}
+
static Int4 SetDNALineEnd(Int4 dna_index, Int4 dna_strand)
{
Int4 dna_line_end;
@@ -6041,6 +6078,7 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
Int4 i, lines, k, shift_info = 0;
Char c1, c2, c3;
Int4 dna_strand, max_digits, num_pad;
+ Boolean reverse = FALSE;
if(sap == NULL || sap->segtype != 3) /* Should be StdSeg here! */
return FALSE;
@@ -6051,8 +6089,8 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
pro_index = 0;
pro_line_end = 0;
dna_line_end = 0;
-
- dna_strand = OOFGetDNAStrand((StdSegPtr) sap->segs);
+
+ reverse = OOFGetDNAStrand((StdSegPtr) sap->segs, &dna_strand);
/* Needed for printing nice alignment with normal spacing */
max_digits = GetMaxFROMDigits((StdSegPtr) sap->segs);
@@ -6065,8 +6103,18 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
length_pro = 0;
b_store = NULL;
- slp1 = sseg->loc;
-
+ if (reverse) {
+ slp2 = sseg->loc;
+ slp1 = sseg->loc->next;
+ sip2 = sseg->ids; /* Protein */
+ sip1 = sseg->ids->next; /* DNA */
+ } else {
+ slp1 = sseg->loc;
+ slp2 = sseg->loc->next;
+ sip1 = sseg->ids; /* DNA */
+ sip2 = sseg->ids->next; /* Protein */
+ }
+
if(slp1->choice == SEQLOC_INT)
seq_int1 = (SeqIntPtr) slp1->data.ptrvalue;
else if (slp1->choice == SEQLOC_EMPTY)
@@ -6074,7 +6122,6 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
else
return FALSE; /* Invalid SeqLoc */
- slp2 = sseg->loc->next;
if(slp2->choice == SEQLOC_INT)
seq_int2 = (SeqIntPtr) slp2->data.ptrvalue;
@@ -6087,9 +6134,6 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
if(seq_int1 == NULL && seq_int2 == NULL)
continue;
- sip1 = sseg->ids; /* DNA */
- sip2 = sseg->ids->next; /* Protein */
-
/* printf("shift_info = %d\n", shift_info); */
if(shift_info%3)
@@ -6171,12 +6215,12 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
}
if(line_index == 0) {
- dna_line_start = dna_index;
+ dna_line_start = dna_index + 1;
pro_line_start = pro_index + 1;
}
if (dna_line_start == 0)
- dna_line_start = dna_index;
+ dna_line_start = dna_index + 1;
if(pro_line_start == 0)
pro_line_start = pro_index + 1;
@@ -6293,37 +6337,64 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
/* ------- Printout of the alignment ------------- */
- fprintf(fp, "Query: %d", dna_line_start+1);
+ if (reverse) {
+ fprintf(fp, "Query: %d", pro_line_start);
+ num_pad =
+ max_digits - GetDigitsInINT(pro_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n", line2, pro_line_end);
- num_pad = max_digits - GetDigitsInINT(dna_line_start+1) + 1;
+ num_pad = 8 + max_digits;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
+ fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
- fprintf(fp, "%s %d\n", line1, dna_line_end+3);
-
- num_pad = 8 + max_digits;
-
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
+ num_pad =
+ max_digits - GetDigitsInINT(dna_line_start) + 1;
- fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n\n", line1, dna_line_end+3);
- num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
+ } else {
+ fprintf(fp, "Query: %d", dna_line_start);
+ num_pad =
+ max_digits - GetDigitsInINT(dna_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n", line1, dna_line_end+3);
+
+ num_pad = 8 + max_digits;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
+ fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
- fprintf(fp, "%s %d\n\n", line2, pro_line_end);
+ num_pad =
+ max_digits - GetDigitsInINT(pro_line_start) + 1;
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n\n", line2, pro_line_end);
+ }
/* --------------------------------------------------- */
if(dna_line_end != 0) {
if(dna_strand != Seq_strand_minus)
- dna_line_start = dna_line_end+3; /*takes 3 bases*/
+ dna_line_start = dna_line_end+4; /*takes 3 bases*/
else
- dna_line_start = dna_line_end+1; /*takes 3 bases*/
+ dna_line_start = dna_line_end+2; /*takes 3 bases*/
}
if(pro_line_end != 0)
pro_line_start = pro_line_end+1;
@@ -6357,35 +6428,58 @@ static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
/* ------- Printout of the alignment remainder ------- */
+ if (reverse) {
+ fprintf(fp, "Query: %d", pro_line_start);
- fprintf(fp, "Query: %d", dna_line_start+1);
-
- num_pad = max_digits - GetDigitsInINT(dna_line_start+1) + 1;
-
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
-
- fprintf(fp, "%s %d\n", line1, dna_line_end+3);
-
- num_pad = 8 + max_digits;
-
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
-
- fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
-
- num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
-
- for(k=0; k < num_pad; k++)
- fprintf(fp, " ");
-
- fprintf(fp, "%s %d\n\n\n", line2, pro_line_end);
+ num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n", line2, pro_line_end);
+
+ num_pad = 8 + max_digits;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
+
+ num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n\n\n", line1, dna_line_end+3);
+ } else {
+ fprintf(fp, "Query: %d", dna_line_start);
+ num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n", line1, dna_line_end+3);
+
+ num_pad = 8 + max_digits;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
+
+ num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
+
+ for(k=0; k < num_pad; k++)
+ fprintf(fp, " ");
+
+ fprintf(fp, "%s %d\n\n\n", line2, pro_line_end);
+ }
/* --------------------------------------------------- */
/* fprintf(fp, "\nQuery: %-5d %s %-5d\n "
"%s\nSbjct: %-5d %s %-5d\n\n",
- dna_line_start+1, line1, dna_line_end+3, line3,
+ dna_line_start, line1, dna_line_end+3, line3,
pro_line_start, line2, pro_line_end); */
return TRUE;
diff --git a/api/valid.c b/api/valid.c
index 1b52746f..667a2876 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.364 $
+* $Revision: 6.403 $
*
* File Description: Sequence editing utilities
*
@@ -39,6 +39,123 @@
* ------- ---------- -----------------------------------------------------
*
* $Log: valid.c,v $
+* Revision 6.403 2003/10/24 21:31:00 kans
+* added test for ERR_SEQ_FEAT_UTRdoesNotAbutCDS on mRNA
+*
+* Revision 6.402 2003/10/24 17:50:35 kans
+* added ERR_SEQ_INST_SeqLitGapLength0
+*
+* Revision 6.401 2003/10/24 04:41:50 kans
+* ValidateImpFeat warns if repeat_region /rpt_unit has same length as sfp->location but does not have matching sequence
+*
+* Revision 6.400 2003/10/23 20:29:38 kans
+* warn about allele gbqual when inheriting allele from gene
+*
+* Revision 6.399 2003/10/20 19:44:47 kans
+* added * Terminator codon
+*
+* Revision 6.398 2003/10/20 16:53:05 kans
+* suppress validator warning for synthetic sequences with molinfo other genetic when origin is artificial
+*
+* Revision 6.397 2003/10/17 21:12:27 kans
+* added ERR_SEQ_FEAT_OnlyGeneXrefs test
+*
+* Revision 6.396 2003/10/10 22:38:39 kans
+* added tests for BadTrnaCodon and BadTrnaAA
+*
+* Revision 6.395 2003/10/06 16:19:26 kans
+* commented out check on rpt_unit content - now any text will be allowed
+*
+* Revision 6.394 2003/10/01 19:46:39 kans
+* suppress partial not at end warning for CDD region
+*
+* Revision 6.393 2003/09/30 20:35:30 kans
+* fixed IsSynthetic to look at div if origin was not set
+*
+* Revision 6.392 2003/09/23 12:33:37 kans
+* Check DeltaLitOnly and allow test for terminal Ns
+*
+* Revision 6.391 2003/09/18 18:28:53 kans
+* fixed IsMicroRNA - was using continue statements in while loop instead of for loop
+*
+* Revision 6.390 2003/09/11 15:24:35 kans
+* duplicate feat severity warning check was only done for cds, not mrna
+*
+* Revision 6.389 2003/09/10 14:29:13 kans
+* IsMicroRNA feature test for molinfo-biomol.other, do mrnatrans base comparison even if polyA test fails
+*
+* Revision 6.388 2003/09/09 20:09:21 kans
+* lower severity for far product partial inconsistency and mrnatranscheck, also check for 95% polyA
+*
+* Revision 6.387 2003/08/13 21:45:30 kans
+* added ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus
+*
+* Revision 6.386 2003/08/11 15:08:08 kans
+* REBASE is legal refseq dbxref
+*
+* Revision 6.385 2003/08/01 21:33:38 kans
+* ERR_SEQ_INST_InternalNsInSeqLit dropped to warning, cutoff still at 80
+*
+* Revision 6.384 2003/08/01 21:30:28 kans
+* added CountAdjacentNsInSeqLit for htgs 1 and 2
+*
+* Revision 6.383 2003/07/30 21:44:31 kans
+* comment out archaic locations messages because TMSMART thinks it should not promote locations
+*
+* Revision 6.382 2003/07/29 15:59:11 kans
+* use new ERR_SEQ_PKG_ArchaicFeatureLocation and ERR_SEQ_PKG_ArchaicFeatureProduct tokens, also do not complain if location or product is local and Bioseq has TMSMART or BankIt general
+*
+* Revision 6.381 2003/07/28 22:11:04 kans
+* check for archaic feature locations and products
+*
+* Revision 6.380 2003/07/22 16:18:07 kans
+* added Kerguelen Archipelago to country list
+*
+* Revision 6.379 2003/07/15 16:46:02 kans
+* suppress BadDeltaSeq - HTGS 2 delta seq has no gaps and no graphs - if HTGS_ACTIVEFIN keyword present
+*
+* Revision 6.378 2003/07/07 15:35:01 kans
+* ERR_SEQ_INST_TerminalNs is SEV_ERROR if 10 or more Ns at either end
+*
+* Revision 6.377 2003/07/02 19:36:47 kans
+* added CheckCDSPartial to check cds->location partials against product molinfo
+*
+* Revision 6.376 2003/06/17 21:15:46 kans
+* germline and rearranged are mutually exclusive - proviral and virion are because there is only one biop->genome
+*
+* Revision 6.375 2003/06/17 21:05:13 kans
+* synthetic biosource should have molinfo biomol other
+*
+* Revision 6.374 2003/06/17 20:03:38 kans
+* NT-036298 (dash instead of underscore) gives REJECT level BadSeqIdFormat error
+*
+* Revision 6.373 2003/06/02 21:42:11 kans
+* allow 4 + 2 + 7 wgs master accessions
+*
+* Revision 6.372 2003/05/09 18:46:47 kans
+* severity of first and last delta seq component is gap message lowered if not HTGS
+*
+* Revision 6.371 2003/05/02 19:19:28 kans
+* added rearrangement exception to list that suppressed CdTransCheck
+*
+* Revision 6.370 2003/05/01 20:08:57 kans
+* Serbia and Montenegro restored to list of countries, but Yugoslavia also remains
+*
+* Revision 6.369 2003/04/30 16:38:37 kans
+* added CdsProductIdCheck
+*
+* Revision 6.368 2003/04/27 20:16:23 kans
+* ribosomal slippage exception suppresses CDSmRNArange warning
+*
+* Revision 6.367 2003/04/24 19:51:47 kans
+* rearrangement required okay for all records, not just refseq, at least for now
+*
+* Revision 6.366 2003/04/24 19:19:12 kans
+* added support for new rearrangement required for product exception
+*
+* Revision 6.365 2003/04/23 16:56:16 kans
+* ERR_GENERIC_BadPageNumbering dropped to SEV_WARNING
+*
* Revision 6.364 2003/04/21 16:39:22 kans
* CheckRnaProductType was doing the wrong thing for rRNAs
*
@@ -1337,6 +1454,7 @@ NLM_EXTERN void SpellCheckSeqFeat (GatherContextPtr gcp);
NLM_EXTERN void SpellCheckString (ValidStructPtr vsp, CharPtr str);
NLM_EXTERN void SpliceCheck (ValidStructPtr vsp, SeqFeatPtr sfp);
static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll);
+static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp);
static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop);
static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPtr pdp);
static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp);
@@ -1929,6 +2047,14 @@ static void LookForAnyPubAndOrg (SeqEntryPtr sep, BoolPtr no_pub, BoolPtr no_bio
}
}
+typedef struct ftprob {
+ Uint4 num_misplaced_features;
+ Uint4 num_archaic_locations;
+ Uint4 num_archaic_products;
+ Uint4 num_gene_feats;
+ Uint4 num_gene_xrefs;
+} FeatProb, PNTR FeatProbPtr;
+
static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_misplaced_features)
{
SeqAnnotPtr sap;
@@ -1967,22 +2093,101 @@ static void CheckFeatPacking (BioseqPtr bsp, SeqFeatPtr sfp, Uint4Ptr num_mispla
}
}
+static Boolean IdIsArchaic (SeqIdPtr sip)
+
+{
+ BioseqPtr bsp;
+ DbtagPtr dbt;
+ SeqIdPtr id;
+
+ if (sip == NULL) return FALSE;
+ if (sip->choice != SEQID_LOCAL && sip->choice != SEQID_GENERAL) return FALSE;
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) return FALSE;
+ for (id = bsp->id; id != NULL; id = id->next) {
+ switch (id->choice) {
+ case SEQID_GENERAL :
+ if (sip->choice == SEQID_LOCAL) {
+ dbt = (DbtagPtr) id->data.ptrvalue;
+ if (dbt != NULL) {
+ if (StringICmp (dbt->db, "TMSMART") != 0 && StringICmp (dbt->db, "BankIt") != 0) {
+ return TRUE;
+ }
+ }
+ }
+ break;
+ case SEQID_GI :
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_PATENT :
+ case SEQID_OTHER :
+ case SEQID_DDBJ :
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ return TRUE;
+ default :
+ break;
+ }
+ }
+ return FALSE;
+}
+
+static void CheckFeatLocAndProd (SeqFeatPtr sfp, FeatProbPtr fpp)
+
+{
+ SeqLocPtr slp;
+
+ if (sfp == NULL || fpp == NULL) return;
+ if (sfp->product != NULL && IdIsArchaic (SeqLocId (sfp->product))) {
+ (fpp->num_archaic_products)++;
+ }
+ slp = SeqLocFindNext (sfp->location, NULL);
+ while (slp != NULL) {
+ if (IdIsArchaic (SeqLocId (slp))) {
+ (fpp->num_archaic_locations)++;
+ return;
+ }
+ slp = SeqLocFindNext (sfp->location, slp);
+ }
+}
+
static Boolean LIBCALLBACK CountMisplacedFeatures (BioseqPtr bsp, SeqMgrBioseqContextPtr bcontext)
{
- Uint4Ptr num_misplaced_features;
+ FeatProbPtr fpp;
SeqFeatPtr sfp;
SeqMgrFeatContext fcontext;
- num_misplaced_features = (Uint4Ptr) bcontext->userdata;
+ fpp = (FeatProbPtr) bcontext->userdata;
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (sfp != NULL) {
- CheckFeatPacking (bsp, sfp, num_misplaced_features);
+ CheckFeatPacking (bsp, sfp, &(fpp->num_misplaced_features));
+ CheckFeatLocAndProd (sfp, fpp);
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
}
return TRUE;
}
+static void CountGeneXrefs (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ FeatProbPtr fpp;
+ GeneRefPtr grp;
+
+ if (sfp == NULL || userdata == NULL) return;
+ fpp = (FeatProbPtr) userdata;
+
+ if (sfp->data.choice == SEQFEAT_GENE) {
+ (fpp->num_gene_feats)++;
+ }
+
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return;
+
+ (fpp->num_gene_xrefs)++;
+}
+
static Boolean IsNoncuratedRefSeq (BioseqPtr bsp, ErrSev *sev)
{
@@ -2020,7 +2225,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
Int2 errors[6], i;
Boolean suppress_no_pubs = TRUE;
Boolean suppress_no_biosrc = TRUE;
- Uint4 num_misplaced_features = 0;
+ FeatProb featprob;
GatherContextPtr gcp = NULL;
GatherContext gc;
SeqEntryPtr fsep;
@@ -2040,6 +2245,8 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
for (i = 0; i < 6; i++) /* keep errors between clears */
errors[i] = 0;
+ MemSet ((Pointer) &featprob, 0, sizeof (FeatProb));
+
if (vsp->useSeqMgrIndexes) {
entityID = ObjMgrGetEntityIDForChoice (sep);
@@ -2048,7 +2255,10 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
SeqMgrIndexFeatures (entityID, NULL);
ErrSetMessageLevel (oldsev);
}
- SeqMgrExploreBioseqs (entityID, NULL, (Pointer) &num_misplaced_features, CountMisplacedFeatures, TRUE, TRUE, TRUE);
+ SeqMgrExploreBioseqs (entityID, NULL, (Pointer) &featprob, CountMisplacedFeatures, TRUE, TRUE, TRUE);
+
+ topsep = GetTopSeqEntryForEntityID (entityID);
+ VisitFeaturesInSep (topsep, (Pointer) &featprob, CountGeneXrefs);
} else {
/* if not using indexing, still need feature->idx.subtype now */
@@ -2169,11 +2379,30 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
}
}
- if (num_misplaced_features > 1) {
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There are %d mispackaged features in this record.", (int) num_misplaced_features);
- } else if (num_misplaced_features == 1) {
- ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There is %d mispackaged feature in this record.", (int) num_misplaced_features);
+ if (featprob.num_misplaced_features > 1) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There are %d mispackaged features in this record.", (int) featprob.num_misplaced_features);
+ } else if (featprob.num_misplaced_features == 1) {
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_PKG_FeaturePackagingProblem, "There is %d mispackaged feature in this record.", (int) featprob.num_misplaced_features);
}
+
+ /*
+ if (featprob.num_archaic_locations > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureLocation, "There are %d archaic feature locations in this record.", (int) featprob.num_archaic_locations);
+ } else if (featprob.num_archaic_locations == 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureLocation, "There is %d archaic feature location in this record.", (int) featprob.num_archaic_locations);
+ }
+
+ if (featprob.num_archaic_products > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureProduct, "There are %d archaic feature products in this record.", (int) featprob.num_archaic_products);
+ } else if (featprob.num_archaic_products == 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ArchaicFeatureProduct, "There is %d archaic feature product in this record.", (int) featprob.num_archaic_products);
+ }
+ */
+
+ if (featprob.num_gene_feats == 0 && featprob.num_gene_xrefs > 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_OnlyGeneXrefs, "There are %ld gene xrefs and no gene features in this record.", (long) featprob.num_gene_xrefs);
+ }
+
first = FALSE;
}
@@ -3034,6 +3263,89 @@ static void ValidateIDSetAgainstDb (GatherContextPtr gcp, ValidStructPtr vsp, Bi
}
}
+typedef struct enrun {
+ Int4 ncount;
+ Int4 maxrun;
+} RunOfNs, PNTR RunOfNsPtr;
+
+static void LIBCALLBACK CountAdjacentProc (CharPtr sequence, Pointer userdata)
+
+{
+ Char ch;
+ RunOfNsPtr ronp;
+ CharPtr str;
+
+ ronp = (RunOfNsPtr) userdata;
+ if (sequence == NULL || ronp == NULL) return;
+
+ str = sequence;
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == 'N') {
+ (ronp->ncount)++;
+ if (ronp->ncount > ronp->maxrun) {
+ ronp->maxrun = ronp->ncount;
+ }
+ } else {
+ ronp->ncount = 0;
+ }
+ str++;
+ ch = *str;
+ }
+}
+
+static Int4 CountAdjacentNsInSeqLit (SeqLitPtr slitp, Boolean is_na)
+
+{
+ BioseqPtr bsp;
+ RunOfNs ron;
+
+ if (slitp == NULL || slitp->length < 1 || slitp->seq_data == NULL) return 0;
+
+ bsp = BioseqNew ();
+ if (bsp == NULL) return 0;
+
+ if (slitp->seq_data != NULL) {
+ bsp->repr = Seq_repr_raw;
+ } else {
+ bsp->repr = Seq_repr_virtual;
+ }
+ if (is_na) {
+ bsp->mol = Seq_mol_dna;
+ } else {
+ bsp->mol = Seq_mol_aa;
+ }
+ bsp->seq_data_type = slitp->seq_data_type;
+ bsp->seq_data = slitp->seq_data;
+ bsp->length = slitp->length;
+ bsp->id = SeqIdParse ("lcl|countseqlitns");
+
+ ron.ncount = 0;
+ ron.maxrun = 0;
+
+ SeqPortStream (bsp, TRUE, (Pointer) &ron, CountAdjacentProc);
+
+ bsp->seq_data = NULL;
+
+ BioseqFree (bsp);
+
+ return ron.maxrun;
+}
+
+static Boolean DeltaLitOnly (
+ BioseqPtr bsp
+)
+
+{
+ ValNodePtr vnp;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
+ for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 1) return FALSE;
+ }
+ return TRUE;
+}
+
static void ValidateBioseqInst (GatherContextPtr gcp)
{
Boolean retval = TRUE;
@@ -3061,6 +3373,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean litHasData;
SeqMgrDescContext context;
SeqFeatPtr cds;
+ GBBlockPtr gbp;
GeneRefPtr grp;
SeqFeatPtr gene;
SeqMgrFeatContext genectxt;
@@ -3075,6 +3388,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean multitoken;
Boolean hasGi = FALSE;
SeqHistPtr hist;
+ Boolean isActiveFin = FALSE;
Boolean isGenBankEMBLorDDBJ;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
@@ -3091,7 +3405,6 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Int2 trailingX = 0;
Int2 numletters, numdigits, numunderscores;
Boolean letterAfterDigit, badIDchars;
- GBBlockPtr gbp;
EMBLBlockPtr ebp;
SeqDescrPtr sdp;
SeqMgrDescContext dcontext;
@@ -3103,9 +3416,12 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
ObjValNodePtr ovp;
BioseqSetPtr bssp;
UserObjectPtr uop;
+ UserFieldPtr ufp;
ObjectIdPtr oip;
+ Boolean hasRefTrackStatus;
Int2 accn_count = 0;
Int2 gi_count = 0;
+ Int4 runsofn;
/* set up data structures */
@@ -3192,6 +3508,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
} else if (numletters == 2 && numdigits == 6 && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_seg) {
} else if (numletters == 4 && numdigits == 8 && ISA_na (bsp->mol) &&
(sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
+ } else if (numletters == 4 && numdigits == 9 && ISA_na (bsp->mol) &&
+ (sip1->choice == SEQID_GENBANK || sip1->choice == SEQID_EMBL || sip1->choice == SEQID_DDBJ)) {
} else {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
}
@@ -3260,7 +3578,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
if (letterAfterDigit || badIDchars) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
+ ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_BadSeqIdFormat, "Bad accession %s", tsip->accession);
} else if (isNZ && numletters == 4 && numdigits == 8 && numunderscores == 0) {
} else if (numletters == 2 && numdigits == 6 && numunderscores == 1) {
} else if (numletters == 2 && numdigits == 8 && numunderscores == 1) {
@@ -3347,6 +3665,23 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
}
+ } else if (oip != NULL && StringICmp (oip->str, "RefGeneTracking") == 0) {
+ hasRefTrackStatus = FALSE;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip != NULL && StringCmp (oip->str, "Status") == 0) {
+ hasRefTrackStatus = TRUE;
+ }
+ }
+ if (! hasRefTrackStatus) {
+ olditemid = gcp->itemID;
+ olditemtype = gcp->thistype;
+ gcp->itemID = context.itemID;
+ gcp->thistype = OBJ_SEQDESC;
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus, "RefGeneTracking object needs to have Status set");
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
}
}
vnp = SeqMgrGetNextDescriptor (bsp, vnp, Seq_descr_user, &context);
@@ -3727,6 +4062,39 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
mip = NULL;
if (bsp->repr == Seq_repr_delta) {
+ vnp = NULL;
+ if (vsp->useSeqMgrIndexes) {
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
+ } else {
+ bcp = BioseqContextNew (bsp);
+ vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_genbank, NULL, NULL);
+ BioseqContextFree (bcp);
+ }
+ if (vnp != NULL) {
+ gbp = (GBBlockPtr) vnp->data.ptrvalue;
+ if (gbp != NULL) {
+ for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringICmp (str, "HTGS_ACTIVEFIN") == 0) {
+ isActiveFin = TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ if (bsp->repr == Seq_repr_delta) {
+ vnp = NULL;
+ if (vsp->useSeqMgrIndexes) {
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ } else {
+ bcp = BioseqContextNew (bsp);
+ vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL);
+ BioseqContextFree (bcp);
+ }
+ if (vnp != NULL) {
+ mip = (MolInfoPtr) vnp->data.ptrvalue;
+ }
len = 0;
for (vnp = (ValNodePtr) (bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
if (vnp->data.ptrvalue == NULL)
@@ -3778,6 +4146,16 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
default:
break;
}
+ if (mip != NULL) {
+ if (mip->tech == MI_TECH_htgs_1 || mip->tech == MI_TECH_htgs_2) {
+ runsofn = CountAdjacentNsInSeqLit (slitp, (Boolean) ISA_na (bsp->mol));
+ if (runsofn > 80) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_InternalNsInSeqLit, "Run of %ld Ns in delta chain", (long) runsofn);
+ }
+ }
+ }
+ } else if (slitp->length == 0) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_SeqLitGapLength0, "Gap of length 0 in delta chain");
}
len += slitp->length;
break;
@@ -3792,31 +4170,28 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
} else if (bsp->length < len) {
ValidErr (vsp, SEV_REJECT, ERR_SEQ_INST_SeqDataLenWrong, "Bioseq.seq_data is larger [%ld] than given length [%ld]", (long) (len), (long) bsp->length);
}
- vnp = NULL;
- if (vsp->useSeqMgrIndexes) {
- vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
- } else {
- bcp = BioseqContextNew (bsp);
- vnp = BioseqContextGetSeqDescr (bcp, Seq_descr_molinfo, NULL, NULL);
- BioseqContextFree (bcp);
- }
- if (vnp != NULL) {
- mip = (MolInfoPtr) vnp->data.ptrvalue;
- if (mip != NULL) {
- is_gps = FALSE;
- sep = vsp->sep;
- if (sep != NULL && IS_Bioseq_set (sep)) {
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
- is_gps = TRUE;
- }
- }
- if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
- mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs &&
- mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Delta seq technique should not be [%d]", (int) (mip->tech));
+ if (mip != NULL) {
+ is_gps = FALSE;
+ sep = vsp->sep;
+ if (sep != NULL && IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ is_gps = TRUE;
}
}
+ if ((!isNTorNC) && (! is_gps) && mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
+ mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3 && mip->tech != MI_TECH_wgs &&
+ mip->tech != MI_TECH_unknown && mip->tech != MI_TECH_standard) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Delta seq technique should not be [%d]", (int) (mip->tech));
+ }
+ }
+ }
+
+ sev = SEV_ERROR;
+ if (mip != NULL) {
+ if (mip->tech != MI_TECH_htgs_0 && mip->tech != MI_TECH_htgs_1 &&
+ mip->tech != MI_TECH_htgs_2 && mip->tech != MI_TECH_htgs_3) {
+ sev = SEV_WARNING;
}
}
@@ -3825,7 +4200,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (vnp != NULL && vnp->choice == 2) {
slitp = (SeqLitPtr) vnp->data.ptrvalue;
if (slitp != NULL && slitp->seq_data == NULL) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "First delta seq component is a gap");
+ ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "First delta seq component is a gap");
}
}
last_is_gap = FALSE;
@@ -3856,13 +4231,15 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (vnp != NULL && vnp->choice == 2) {
slitp = (SeqLitPtr) vnp->data.ptrvalue;
if (slitp != NULL && slitp->seq_data == NULL) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadDeltaSeq, "Last delta seq component is a gap");
+ ValidErr (vsp, sev, ERR_SEQ_INST_BadDeltaSeq, "Last delta seq component is a gap");
}
}
if (num_gaps == 0 && mip != NULL) {
if (/* mip->tech == MI_TECH_htgs_1 || */ mip->tech == MI_TECH_htgs_2) {
if (VisitGraphsInSep (sep, NULL, NULL) == 0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadDeltaSeq, "HTGS 2 delta seq has no gaps and no graphs");
+ if (! isActiveFin) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadDeltaSeq, "HTGS 2 delta seq has no gaps and no graphs");
+ }
}
}
}
@@ -3993,28 +4370,38 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
}
- if (ISA_na (bsp->mol) && bsp->repr == Seq_repr_raw && bsp->length > 5) {
+ if (ISA_na (bsp->mol) && (bsp->repr == Seq_repr_raw || (bsp->repr == Seq_repr_delta && DeltaLitOnly (bsp))) && bsp->length > 10) {
/* check for N bases at start or stop of sequence */
sfp = (SeqFeatPtr) MemNew (sizeof (SeqFeat));
if (sfp == NULL) return;
sfp->data.choice = SEQFEAT_COMMENT;
- sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 2, FALSE, FALSE);
+ sfp->location = AddIntervalToLocation (NULL, bsp->id, 0, 9, FALSE, FALSE);
str = GetSequenceByFeature (sfp);
if (str != NULL) {
if (str [0] == 'n' || str [0] == 'N') {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence");
+ if (StringICmp (str, "NNNNNNNNNN") == 0) {
+ sev = SEV_ERROR;
+ } else {
+ sev = SEV_WARNING;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at beginning of sequence");
}
}
MemFree (str);
sfp->location = SeqLocFree (sfp->location);
- sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 3, bsp->length - 1, FALSE, FALSE);
+ sfp->location = AddIntervalToLocation (NULL, bsp->id, bsp->length - 10, bsp->length - 1, FALSE, FALSE);
str = GetSequenceByFeature (sfp);
len = StringLen (str);
if (str != NULL && len > 0) {
if (str [len - 1] == 'n' || str [len - 1] == 'N') {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_TerminalNs, "N at end of sequence");
+ if (StringICmp (str, "NNNNNNNNNN") == 0) {
+ sev = SEV_ERROR;
+ } else {
+ sev = SEV_WARNING;
+ }
+ ValidErr (vsp, sev, ERR_SEQ_INST_TerminalNs, "N at end of sequence");
}
}
MemFree (str);
@@ -4226,6 +4613,7 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt
ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Journal pages missing");
}
if (! noPages) {
+ sev = SEV_WARNING;
StringNCpy_0 (temp, imp->pages, sizeof (temp));
ptr = StringChr (temp, '-');
if (ptr != NULL) {
@@ -4295,9 +4683,10 @@ static void ValidateSfpCit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr
typedef struct bioseqvalid
{
ValidStructPtr vsp;
- Boolean is_aa; /* bioseq is protein? */
- Boolean is_mrna; /* molinfo is mrna? */
- Boolean is_prerna; /* molinfo is precursor rna? */
+ Boolean is_aa; /* bioseq is protein? */
+ Boolean is_mrna; /* molinfo is mrna? */
+ Boolean is_prerna; /* molinfo is precursor rna? */
+ Boolean is_artificial; /* biosource origin is artificial? */
Boolean got_a_pub;
int last_na_mol, last_na_mod, last_organelle, last_partialness, last_left_right, last_biomol, last_tech, last_completeness, num_full_length_src_feat, /* number full length src feats */
num_full_length_prot_ref;
@@ -4729,6 +5118,7 @@ static CharPtr countrycodes[] = {
"Juan de Nova Island",
"Kazakhstan",
"Kenya",
+ "Kerguelen Archipelago",
"Kingman Reef",
"Kiribati",
"Kuwait",
@@ -4811,6 +5201,7 @@ static CharPtr countrycodes[] = {
"Sao Tome and Principe",
"Saudi Arabia",
"Senegal",
+ "Serbia and Montenegro",
"Seychelles",
"Sierra Leone",
"Singapore",
@@ -5028,11 +5419,13 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
CharPtr countryname;
ValNodePtr db;
DbtagPtr dbt;
+ Boolean germline = FALSE;
Int2 i;
Int4 id;
OrgNamePtr onp;
OrgModPtr omp;
OrgRefPtr orp;
+ Boolean rearranged = FALSE;
SubSourcePtr ssp;
if (vsp->sourceQualTags == NULL) {
@@ -5070,9 +5463,16 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadSubSource, "Unknown subsource subtype %d", (int) (ssp->subtype));
} else if (ssp->subtype == SUBSRC_other) {
ValidateSourceQualTags (vsp, gcp, biop, ssp->name);
+ } else if (ssp->subtype == SUBSRC_germline) {
+ germline = TRUE;
+ } else if (ssp->subtype == SUBSRC_rearranged) {
+ rearranged = TRUE;
}
ssp = ssp->next;
}
+ if (germline && rearranged) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadSubSource, "Germline and rearranged should not both be present");
+ }
if (chromcount > 1) {
if (chromconf) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleChromosomes, "Multiple conflicting chromosome qualifiers");
@@ -5166,6 +5566,65 @@ static Boolean IsXr (ValNodePtr sdp)
return FALSE;
}
+static Boolean IsSynthetic (BioseqPtr bsp)
+
+{
+ BioSourcePtr biop;
+ SeqMgrDescContext dcontext;
+ OrgNamePtr onp;
+ OrgRefPtr orp;
+ SeqDescrPtr sdp;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp == NULL) return FALSE;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) return FALSE;
+ if (biop->origin == 5) return TRUE;
+ orp = biop->org;
+ if (orp == NULL) return FALSE;
+ onp = orp->orgname;
+ if (onp == NULL) return FALSE;
+ if (StringICmp (onp->div, "SYN") == 0) return TRUE;
+ return FALSE;
+}
+
+static Boolean IsMicroRNA (BioseqPtr bsp)
+
+{
+ SeqMgrFeatContext fcontext;
+ RnaRefPtr rrp;
+ SeqFeatPtr sfp;
+ CharPtr str;
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_otherRNA, &fcontext);
+ while (sfp != NULL) {
+ if (sfp->data.choice == SEQFEAT_RNA) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp != NULL && rrp->ext.choice == 1) {
+ str = (CharPtr) rrp->ext.value.ptrvalue;
+ if (StringStr (str, "microRNA") != NULL) return TRUE;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_otherRNA, &fcontext);
+ }
+ return FALSE;
+}
+
+static Boolean IsOtherDNA (BioseqPtr bsp)
+
+{
+ SeqMgrDescContext dcontext;
+ MolInfoPtr mip;
+ SeqDescrPtr sdp;
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
+ if (sdp == NULL) return FALSE;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) return FALSE;
+ if (mip->biomol == 255) return TRUE;
+ return FALSE;
+}
+
static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, ValidStructPtr vsp, Uint2 descitemid)
{
ValNodePtr vnp, vnp2;
@@ -5371,6 +5830,12 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
}
}
}
+ if (biop != NULL && biop->origin == 5) {
+ bsp = bvsp->bsp;
+ if (! IsOtherDNA (bsp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other should be used if Biosource-location is synthetic");
+ }
+ }
/* ValidateBioSource (vsp, gcp, biop); */
this_org = biop->org;
/* fall into Seq_descr_org */
@@ -5415,18 +5880,25 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
switch (mip->biomol) {
case MOLECULE_TYPE_PEPTIDE: /* peptide */
if (!bvsp->is_aa) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Nucleic acid with Molinfo-biomol = peptide");
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Nucleic acid with Molinfo-biomol = peptide");
}
break;
case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL:
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol = other genetic");
+ if (! bvsp->is_artificial) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol = other genetic");
+ }
break;
case 0: /* unknown */
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol unknown used");
break;
case 255: /* other */
if (! IsXr (vnp)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other used");
+ bsp = bvsp->bsp;
+ if (! IsSynthetic (bsp)) {
+ if (! IsMicroRNA (bsp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_InvalidForType, "Molinfo-biomol other used");
+ }
+ }
}
break;
default: /* the rest are nucleic acid */
@@ -5776,6 +6248,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
int overlapPepSev;
BioSourcePtr biop = NULL;
OrgRefPtr orp = NULL;
+ Int4 fiveUTRright;
+ Int4 cdsRight;
gcp = bvsp->gcp;
vsp = bvsp->vsp;
@@ -5858,9 +6332,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
}
}
}
- if (GPSorNTorNC (vsp->sep, sfp->location)) {
- severity = SEV_WARNING;
- }
+ }
+ if (GPSorNTorNC (vsp->sep, sfp->location)) {
+ severity = SEV_WARNING;
}
if (FlybaseDbxrefs (last->dbxref) || FlybaseDbxrefs (sfp->dbxref)) {
severity = SEV_ERROR;
@@ -5960,6 +6434,52 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
sfp = SeqMgrGetNextFeatureByLabel (bsp, sfp, SEQFEAT_GENE, 0, &fcontext);
}
+ if (bvsp->is_mrna) {
+ fiveUTRright = 0;
+ cdsRight = 0;
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ while (sfp != NULL) {
+ if (sfp->idx.subtype == FEATDEF_5UTR) {
+ fiveUTRright = fcontext.right;
+ } else if (sfp->idx.subtype == FEATDEF_CDS) {
+ cdsRight = fcontext.right;
+ if (fiveUTRright > 0) {
+ if (fiveUTRright + 1 != fcontext.left) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "5'UTR does not abut CDS");
+ vsp->sfp = NULL;
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ }
+ }
+ } else if (sfp->idx.subtype == FEATDEF_3UTR) {
+ if (cdsRight > 0) {
+ if (cdsRight + 1 != fcontext.left) {
+ if (gcp != NULL) {
+ gcp->itemID = fcontext.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR");
+ if (gcp != NULL) {
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
+ }
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
+ }
+ }
+
SeqMgrExploreDescriptors (bsp, (Pointer) bvsp, ValidateSeqDescrIndexed, NULL);
omdp = ObjMgrGetData (gcp->entityID);
@@ -6019,6 +6539,7 @@ static Boolean ValidateBioseqContextGather (GatherContextPtr gcp)
*****************************************************************************/
static void ValidateBioseqContext (GatherContextPtr gcp)
{
+ size_t acclen;
ValidStructPtr vsp;
BioseqPtr bsp;
GatherScope gs;
@@ -6038,6 +6559,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
Boolean is_gb = FALSE;
ErrSev sev;
TextSeqIdPtr tsip;
+ BioSourcePtr biop;
vsp = (ValidStructPtr) (gcp->userdata);
bsp = (BioseqPtr) (gcp->thisitem);
@@ -6081,6 +6603,15 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
if (vnp != NULL) {
mip = (MolInfoPtr) vnp->data.ptrvalue;
}
+ vnp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (vnp != NULL) {
+ biop = (BioSourcePtr) vnp->data.ptrvalue;
+ if (biop != NULL) {
+ if (biop->origin == ORG_ARTIFICIAL) {
+ bvs.is_artificial = TRUE;
+ }
+ }
+ }
}
bvs.is_mrna = FALSE;
@@ -6179,7 +6710,10 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
is_gb = TRUE;
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
if (tsip != NULL && tsip->accession != NULL) {
- if (StringLen (tsip->accession) == 12) {
+ acclen = StringLen (tsip->accession);
+ if (acclen == 12) {
+ is_wgs = TRUE;
+ } else if (acclen == 13) {
is_wgs = TRUE;
}
}
@@ -6321,6 +6855,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
GeneRefPtr grp;
Int2 i;
Int2 index;
+ Boolean just_nuc_letters;
CharPtr key;
Boolean multi_rpt_unit;
Boolean no_white_space;
@@ -6472,11 +7007,34 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
multi_rpt_unit = FALSE;
}
}
+ /*
if (found) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal value for qualifier %s", gbqual->qual);
} else if ((!multi_rpt_unit) && StringLen (gbqual->val) > 48) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "Illegal value for qualifier %s", gbqual->qual);
}
+ */
+ if (StringICmp (key,"repeat_region") == 0) {
+ if (! multi_rpt_unit) {
+ if (StringLen (gbqual->val) == SeqLocLen (sfp->location)) {
+ just_nuc_letters = TRUE;
+ for (ptr = gbqual->val, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
+ if (StringChr ("ACGTNacgtn", ch) == NULL) {
+ just_nuc_letters = FALSE;
+ }
+ }
+ if (just_nuc_letters) {
+ tmp = GetSequenceByFeature (sfp);
+ if (tmp != NULL) {
+ if (StringICmp (tmp, gbqual->val) != 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "repeat_region /rpt_unit and underlying sequence do not match");
+ }
+ MemFree (tmp);
+ }
+ }
+ }
+ }
+ }
} else if (val == GBQUAL_label) {
no_white_space = TRUE;
only_digits = TRUE;
@@ -6641,21 +7199,54 @@ static Boolean PartialAtSpliceSite (SeqLocPtr head, Uint2 slpTag)
static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, tRNAPtr trp)
{
- Uint1 aa;
+ Uint1 aa = 0;
BioseqPtr bsp;
Int2 code;
CharPtr codes = NULL;
Uint1 from;
GeneticCodePtr gncp;
+ Uint2 idx;
Int2 j;
SeqEntryPtr sep;
ErrSev sev = SEV_ERROR;
+ Uint1 shift;
SeqMapTablePtr smtp;
Uint1 taa;
ValNodePtr vnp;
if (vsp == NULL || gcp == NULL || sfp == NULL || trp == NULL)
return;
+
+ aa = 0;
+ if (trp->aatype == 2) {
+ aa = trp->aa;
+ } else {
+ from = 0;
+ switch (trp->aatype) {
+ case 0:
+ from = 0;
+ break;
+ case 1:
+ from = Seq_code_iupacaa;
+ break;
+ case 2:
+ from = Seq_code_ncbieaa;
+ break;
+ case 3:
+ from = Seq_code_ncbi8aa;
+ break;
+ case 4:
+ from = Seq_code_ncbistdaa;
+ break;
+ default:
+ break;
+ }
+ smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
+ if (smtp != NULL) {
+ aa = SeqMapTableConvert (smtp, trp->aa);
+ }
+ }
+
for (j = 0; j < 6; j++) {
if (trp->codon[j] < 64) {
if (codes == NULL) {
@@ -6677,35 +7268,6 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (codes == NULL)
return;
taa = codes[trp->codon[j]];
- aa = 0;
- if (trp->aatype == 2) {
- aa = trp->aa;
- } else {
- from = 0;
- switch (trp->aatype) {
- case 0:
- from = 0;
- break;
- case 1:
- from = Seq_code_iupacaa;
- break;
- case 2:
- from = Seq_code_ncbieaa;
- break;
- case 3:
- from = Seq_code_ncbi8aa;
- break;
- case 4:
- from = Seq_code_ncbistdaa;
- break;
- default:
- break;
- }
- smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
- if (smtp != NULL) {
- aa = SeqMapTableConvert (smtp, trp->aa);
- }
- }
if (aa > 0 && aa != 255) {
if (taa != aa) {
if (aa == 'U') {
@@ -6714,6 +7276,28 @@ static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
ValidErr (vsp, sev, ERR_SEQ_FEAT_TrnaCodonWrong, "tRNA codon does not match genetic code");
}
}
+ } else if (trp->codon [j] < 255) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaCodon, "tRNA codon value %d is greater than maximum 63", (int) trp->codon [j]);
+ }
+ }
+
+ if (aa > 0 && aa != 255) {
+ if (aa <= 74) {
+ shift = 0;
+ } else if (aa > 79) {
+ shift = 2;
+ } else {
+ shift = 1;
+ }
+ if (aa != '*') {
+ idx = aa - (64 + shift);
+ } else {
+ idx = 25;
+ }
+ if (idx > 0 && idx < 26) {
+ /* valid trna amino acid */
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_BadTrnaAA, "Invalid tRNA amino acid");
}
}
}
@@ -6780,6 +7364,71 @@ static Boolean NucAndProtNotInNPS (BioseqPtr nuc, BioseqPtr prot)
return FALSE;
}
+static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp)
+
+{
+ BioseqPtr bsp;
+ SeqMgrDescContext context;
+ MolInfoPtr mip;
+ Boolean partial5;
+ Boolean partial3;
+ SeqDescrPtr sdp;
+
+ if (vsp == NULL || sfp == NULL) return;
+ if (sfp->product == NULL) return;
+ if (!vsp->useSeqMgrIndexes) return;
+ bsp = BioseqFindFromSeqLoc (sfp->product);
+ if (bsp == NULL) return;
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ if (sdp == NULL) return;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL) return;
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ switch (mip->completeness) {
+ case 0 : /* unknown */
+ break;
+ case 1 : /* complete */
+ if (partial5 || partial3) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is partial but protein is complete");
+ }
+ break;
+ case 2 : /* partial */
+ break;
+ case 3 : /* no-left */
+ if (! partial5) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' complete but protein is NH2 partial");
+ }
+ if (partial3) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' partial but protein is NH2 partial");
+ }
+ break;
+ case 4 : /* no-right */
+ if (! partial3) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' complete but protein is CO2 partial");
+ }
+ if (partial5) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' partial but protein is CO2 partial");
+ }
+ break;
+ case 5 : /* no-ends */
+ if (partial5 && partial3) {
+ } else if (partial5) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 5' partial but protein has neither end");
+ } else if (partial3) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is 3' partial but protein has neither end");
+ } else {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "CDS is complete but protein has neither end");
+ }
+ break;
+ case 6 : /* has-left */
+ break;
+ case 7 : /* has-right */
+ break;
+ default :
+ break;
+ }
+}
+
static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp)
{
BioseqPtr bsp;
@@ -6988,7 +7637,10 @@ static void CheckForBadMRNAOverlap (ValidStructPtr vsp, SeqFeatPtr sfp)
}
mrna = SeqMgrGetOverlappingFeature (sfp->location, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, &fcontext);
if (mrna != NULL) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match");
+ if (StringISearch (sfp->except_text, "ribosomal slippage") == NULL &&
+ StringISearch (sfp->except_text, "ribosome slippage") == NULL) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA contains CDS but internal intron-exon boundaries do not match");
+ }
} else {
ValidErr (vsp, sev, ERR_SEQ_FEAT_CDSmRNArange, "mRNA overlaps or contains CDS but does not completely contain intervals");
}
@@ -7044,6 +7696,7 @@ static CharPtr legalDbXrefOnRefSeq [] = {
"GenBank",
"EMBL",
"DDBJ",
+ "REBASE",
NULL
};
@@ -7112,6 +7765,7 @@ static CharPtr legal_exception_strings [] = {
"artificial frameshift",
"non-consensus splice site",
"nonconsensus splice site",
+ "rearrangement required for product",
NULL
};
@@ -7279,6 +7933,24 @@ NLM_EXTERN Boolean IsNuclAcc (CharPtr name)
return TRUE;
}
+static Boolean IsCddFeat (
+ SeqFeatPtr sfp
+)
+
+{
+ DbtagPtr dbt;
+ ValNodePtr vnp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_REGION) return FALSE;
+
+ for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
+ dbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbt != NULL && StringCmp (dbt->db, "CDD") == 0) return TRUE;
+ }
+
+ return FALSE;
+}
+
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
{
Int2 type, i, j;
@@ -7322,6 +7994,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
SeqMgrDescContext context;
GeneRefPtr grpx;
SeqFeatPtr sfpx;
+ SeqFeatPtr operon;
Boolean redundantgenexref;
SeqMgrFeatContext fcontext;
CharPtr syn1, syn2, label = NULL;
@@ -7329,6 +8002,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
SeqIdPtr sip;
TextSeqIdPtr tsip;
BioseqPtr protBsp;
+ ErrSev sev;
vsp = (ValidStructPtr) (gcp->userdata);
sfp = (SeqFeatPtr) (gcp->thisitem);
@@ -7358,8 +8032,35 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "Gene of 'order' with otherwise complete location should have partial flag set");
}
}
- /* inconsistent combination of partial/complete product,location,partial flag */
- else if (((partials[0] == SLP_COMPLETE) && (sfp->product != NULL)) || (partials[1] == SLP_COMPLETE) || (!sfp->partial)) {
+ /* inconsistent combination of partial/complete product,location,partial flag - part 1 */
+ else if (((partials[0] == SLP_COMPLETE) && (sfp->product != NULL))) {
+ sev = SEV_WARNING;
+ bsp = GetBioseqGivenSeqLoc (sfp->product, gcp->entityID);
+ /* if not local bioseq product, lower severity */
+ if (bsp == NULL) {
+ sev = SEV_INFO;
+ }
+ tmp = StringMove (buf, "Inconsistent: ");
+ if (sfp->product != NULL) {
+ tmp = StringMove (tmp, "Product= ");
+ if (partials[0])
+ tmp = StringMove (tmp, "partial, ");
+ else
+ tmp = StringMove (tmp, "complete, ");
+ }
+ tmp = StringMove (tmp, "Location= ");
+ if (partials[1])
+ tmp = StringMove (tmp, "partial, ");
+ else
+ tmp = StringMove (tmp, "complete, ");
+ tmp = StringMove (tmp, "Feature.partial= ");
+ if (sfp->partial)
+ tmp = StringMove (tmp, "TRUE");
+ else
+ tmp = StringMove (tmp, "FALSE");
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialProblem, buf);
+ /* inconsistent combination of partial/complete product,location,partial flag - part 2 */
+ } else if ((partials[1] == SLP_COMPLETE) || (!sfp->partial)) {
tmp = StringMove (buf, "Inconsistent: ");
if (sfp->product != NULL) {
tmp = StringMove (tmp, "Product= ");
@@ -7390,7 +8091,9 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
errtype = SLP_NOSTART;
for (j = 0; j < 4; j++) {
if (partials[i] & errtype) {
- if (i == 1 && j < 2 && PartialAtSpliceSite (sfp->location, errtype)) {
+ if (i == 1 && j < 2 && IsCddFeat (sfp)) {
+ /* suppresses warning */
+ } else if (i == 1 && j < 2 && PartialAtSpliceSite (sfp->location, errtype)) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
"%s: %s (but is at consensus splice site)",
parterr[i], parterrs[j]);
@@ -7411,7 +8114,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
"%s: %s", parterr[i], parterrs[j]);
- }
+ }
}
errtype <<= 1;
}
@@ -7513,6 +8216,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
CdTransCheck (vsp, sfp);
SpliceCheck (vsp, sfp);
}
+ CdsProductIdCheck (vsp, sfp);
crp = (CdRegionPtr) (sfp->data.value.ptrvalue);
if (crp != NULL) {
for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
@@ -7591,6 +8295,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
CheckForBadGeneOverlap (vsp, sfp);
CheckForBadMRNAOverlap (vsp, sfp);
CheckForCommonCDSProduct (vsp, sfp);
+ CheckCDSPartial (vsp, sfp);
break;
case 4: /* Prot-ref */
prp = (ProtRefPtr) (sfp->data.value.ptrvalue);
@@ -7852,6 +8557,26 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
if (type != SEQFEAT_GENE) {
grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
+ if (sfpx != NULL) {
+ grp = (GeneRefPtr) sfpx->data.value.ptrvalue;
+ }
+ }
+ if (grp != NULL && (! SeqMgrGeneIsSuppressed (grp))) {
+ if (! StringHasNoText (grp->allele)) {
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "allele") == 0) {
+ if (StringICmp (gbq->val, grp->allele) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Redundant allele qualifier (%s) on gene and feature", gbq->val);
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Mismatched allele qualifier on gene (%s) and feature (%s)", grp->allele, gbq->val);
+ }
+ }
+ }
+ }
+ }
+ grp = SeqMgrGetGeneXref (sfp);
if (grp == NULL || SeqMgrGeneIsSuppressed (grp))
return;
sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
@@ -7883,8 +8608,22 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryGeneXref, "Unnecessary gene cross-reference %s", label);
}
+ } else {
+ operon = SeqMgrGetOverlappingOperon (sfp->location, &fcontext);
+ if (operon != NULL) {
+ if (SeqMgrGetDesiredFeature (sfp->idx.entityID, 0, 0, 0, sfp, &fcontext) == sfp) {
+ if (! StringHasNoText (fcontext.label)) {
+ for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringCmp (gbq->qual, "operon") == 0) {
+ if (StringICmp (gbq->val, fcontext.label) == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "Operon is same as gene - %s", gbq->val);
+ }
+ }
+ }
+ }
+ }
+ }
}
- return;
}
/*****************************************************************************
@@ -7903,11 +8642,16 @@ static CharPtr bypass_mrna_trans_check [] = {
NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
{
+ BioseqPtr bsp;
+ Char ch;
+ Int4 counta, countnona;
+ GatherContextPtr gcp;
Int2 i;
Int4 mismatch, total;
CharPtr mrseq, pdseq;
Int4 mlen, plen;
CharPtr ptr1, ptr2;
+ ErrSev sev;
SeqFeat sf;
SeqIdPtr sip;
ValNode vn;
@@ -7929,6 +8673,16 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
if (sip == NULL)
return;
+ sev = SEV_ERROR;
+ gcp = vsp->gcp;
+ if (gcp != NULL) {
+ bsp = GetBioseqGivenSeqLoc (sfp->product, gcp->entityID);
+ /* if not local bioseq product, lower severity */
+ if (bsp == NULL) {
+ sev = SEV_WARNING;
+ }
+ }
+
mrseq = GetSequenceByFeature (sfp);
if (mrseq == NULL)
return;
@@ -7946,8 +8700,32 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
mlen = StringLen (mrseq);
plen = StringLen (pdseq);
if (mlen != plen) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] does not match product length [%ld]", (long) mlen, (long) plen);
- } else if (mlen > 0 && StringICmp (mrseq, pdseq) != 0) {
+ if (mlen < plen) {
+ ptr1 = pdseq + mlen;
+ counta = 0;
+ countnona = 0;
+ ch = *ptr1;
+ while (ch != '\0') {
+ if (ch == 'A' || ch == 'a') {
+ counta++;
+ } else {
+ countnona++;
+ }
+ ptr1++;
+ ch = *ptr1;
+ }
+ if (counta < 19 * countnona) {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than product length [%ld], and tail < 95%s polyA", (long) mlen, (long) plen, "%");
+ plen = mlen; /* even if it fails polyA test, allow base-by-base comparison on common length */
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] less than product length [%ld], but tail >= 95%s polyA", (long) mlen, (long) plen, "%");
+ plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */
+ }
+ } else {
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptLen, "Transcript length [%ld] greater than product length [%ld]", (long) mlen, (long) plen);
+ }
+ }
+ if (mlen == plen && mlen > 0 && StringICmp (mrseq, pdseq) != 0) {
mismatch = 0;
total = 0;
ptr1 = mrseq;
@@ -7960,7 +8738,7 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
ptr2++;
total++;
}
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_TranscriptMismatches,
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_TranscriptMismatches,
"There are %ld mismatches out of %ld bases between the transcript and product sequence", (long) mismatch, (long) total);
}
MemFree (pdseq);
@@ -8035,6 +8813,7 @@ static CharPtr bypass_cds_trans_check [] = {
"reasons given in citation",
"artificial frameshift",
"unclassified translation discrepancy",
+ "rearrangement required for product",
NULL
};
@@ -8628,6 +9407,115 @@ NLM_EXTERN void SpliceCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
/*****************************************************************************
*
+* CdsProductIdCheck (vsp, sfp)
+* code taken from asn2gnbk.c - release mode expects CDS product Bioseqs
+*
+*****************************************************************************/
+static SeqIdPtr SeqLocIdForProduct (
+ SeqLocPtr product
+)
+
+{
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+
+ /* in case product is a SEQLOC_EQUIV */
+
+ if (product == NULL) return NULL;
+ sip = SeqLocId (product);
+ if (sip != NULL) return sip;
+ slp = SeqLocFindNext (product, NULL);
+ while (slp != NULL) {
+ sip = SeqLocId (slp);
+ if (sip != NULL) return sip;
+ slp = SeqLocFindNext (product, slp);
+ }
+ return NULL;
+}
+
+static Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf)
+
+{
+ AccnVerLookupFunc func;
+ SeqMgrPtr smp;
+ CharPtr str;
+
+ if (buf == NULL) return FALSE;
+ *buf = '\0';
+ smp = SeqMgrWriteLock ();
+ if (smp == NULL) return FALSE;
+ func = smp->accn_ver_lookup_func;
+ SeqMgrUnlock ();
+ if (func == NULL) return FALSE;
+ str = (*func) (gi);
+ if (str == NULL) return FALSE;
+ if (StringLen (str) < 40) {
+ StringCpy (buf, str);
+ }
+ MemFree (str);
+ return TRUE;
+}
+
+static void CdsProductIdCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
+
+{
+ SeqFeatPtr gene;
+ GeneRefPtr grp;
+ Boolean juststop = FALSE;
+ Boolean okay = FALSE;
+ SeqEntryPtr oldscope;
+ Boolean partial5;
+ Boolean partial3;
+ Boolean pseudo = FALSE;
+ SeqEntryPtr sep;
+
+ /* non-pseudo CDS must have /product */
+ if (sfp->pseudo) {
+ pseudo = TRUE;
+ }
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ sep = GetTopSeqEntryForEntityID (sfp->idx.entityID);
+ oldscope = SeqEntrySetScope (sep);
+ gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
+ SeqEntrySetScope (oldscope);
+ if (gene != NULL) {
+ grp = (GeneRefPtr) gene->data.value.ptrvalue;
+ if (gene->pseudo) {
+ pseudo = TRUE;
+ }
+ }
+ }
+ if (grp != NULL && grp->pseudo) {
+ pseudo = TRUE;
+ }
+ if (sfp->location != NULL) {
+ if (CheckSeqLocForPartial (sfp->location, &partial5, &partial3)) {
+ if (partial5 && (! partial3)) {
+ if (SeqLocLen (sfp->location) <= 5) {
+ juststop = TRUE;
+ }
+ }
+ }
+ }
+ if (pseudo || juststop) {
+ okay = TRUE;
+ } else if (sfp->product != NULL) {
+ okay = TRUE;
+ } else {
+ if (sfp->excpt && (! StringHasNoText (sfp->except_text))) {
+ if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
+ okay = TRUE;
+ }
+ }
+ }
+ if (! okay) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_MissingCDSproduct, "Expected CDS product absent");
+ }
+}
+
+/*****************************************************************************
+*
* ValidateSeqLoc(vsp, slp, prefix)
*
*****************************************************************************/
diff --git a/api/valid.msg b/api/valid.msg
index 4bb11461..a7923795 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -152,6 +152,13 @@ $^ UnexpectedIdentifierChange, 41
The set of sequence identifiers on a Bioseq are not consistent with the previous version
of the record in the database.
+$^ InternalNsInSeqLit, 42
+There are runs of many Ns inside the SeqLit component of a delta Bioseq.
+
+$^ SeqLitGapLength0, 43
+A SeqLit component of a delta Bioseq can specify a gap, but it should not be a gap
+of 0 length.
+
$$ SEQ_DESCR, 2
$^ BioSourceMissing, 1
@@ -252,6 +259,9 @@ $^ UnnecessaryBioSourceFocus, 24
Focus should not be set on a BioSource descriptor in records where there is no
BioSource feature.
+$^ RefGeneTrackingWithoutStatus, 25
+The RefGeneTracking user object does not have the required Status field set.
+
$$ GENERIC, 3
$^ NonAsciiAsn, 1
@@ -318,6 +328,12 @@ however be referenced remotely.
$^ InconsistentMolInfoBiomols, 12
Mol-info.biomol is inconsistent within a segset or parts set.
+$^ ArchaicFeatureLocation, 13
+A feature location should refer to the accession or gi number, not a local or general ID.
+
+$^ ArchaicFeatureProduct, 14
+A feature product should refer to the accession or gi number, not a local or general ID.
+
$$ SEQ_FEAT, 5
$^ InvalidForType, 1
@@ -573,6 +589,25 @@ $^ RnaProductMismatch, 61
The RNA feature product type does not correspond to the RNA feature type. These
need to be consistent.
+$^ MissingCDSproduct, 62
+The CDS should have a product, but does not. Pseudo or short CDSs (less than 6
+amino acids), or those marked with a rearrangement required for product exception,
+are exempt from needing a product.
+
+$^ BadTrnaCodon, 63
+The tRNA codon recognized is an illegal value.
+
+$^ BadTrnaAA, 64
+The tRNA encoded amino acid is an illegal value.
+
+$^ OnlyGeneXrefs, 65
+There are gene xrefs but no gene features. Records should normally have single-interval
+gene features covering other biological features. Gene xrefs are used only to override
+the inheritance by overlap.
+
+$^ UTRdoesNotAbutCDS, 66
+The 5'UTR and 3'UTR features should exactly abut the CDS feature.
+
$$ SEQ_ALIGN, 6
$^ SeqIdProblem, 1
diff --git a/api/validerr.h b/api/validerr.h
index 5ac8939a..b4ced162 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -43,6 +43,8 @@
#define ERR_SEQ_INST_HistAssemblyMissing 1,39
#define ERR_SEQ_INST_TerminalNs 1,40
#define ERR_SEQ_INST_UnexpectedIdentifierChange 1,41
+#define ERR_SEQ_INST_InternalNsInSeqLit 1,42
+#define ERR_SEQ_INST_SeqLitGapLength0 1,43
#define ERR_SEQ_DESCR 2,0
#define ERR_SEQ_DESCR_BioSourceMissing 2,1
#define ERR_SEQ_DESCR_InvalidForType 2,2
@@ -68,6 +70,7 @@
#define ERR_SEQ_DESCR_ObsoleteSourceQual 2,22
#define ERR_SEQ_DESCR_StructuredSourceNote 2,23
#define ERR_SEQ_DESCR_UnnecessaryBioSourceFocus 2,24
+#define ERR_SEQ_DESCR_RefGeneTrackingWithoutStatus 2,25
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
@@ -88,6 +91,8 @@
#define ERR_SEQ_PKG_FeaturePackagingProblem 4,10
#define ERR_SEQ_PKG_GenomicProductPackagingProblem 4,11
#define ERR_SEQ_PKG_InconsistentMolInfoBiomols 4,12
+#define ERR_SEQ_PKG_ArchaicFeatureLocation 4,13
+#define ERR_SEQ_PKG_ArchaicFeatureProduct 4,14
#define ERR_SEQ_FEAT 5,0
#define ERR_SEQ_FEAT_InvalidForType 5,1
#define ERR_SEQ_FEAT_PartialProblem 5,2
@@ -150,6 +155,11 @@
#define ERR_SEQ_FEAT_FeatContentDup 5,59
#define ERR_SEQ_FEAT_BadProductSeqId 5,60
#define ERR_SEQ_FEAT_RnaProductMismatch 5,61
+#define ERR_SEQ_FEAT_MissingCDSproduct 5,62
+#define ERR_SEQ_FEAT_BadTrnaCodon 5,63
+#define ERR_SEQ_FEAT_BadTrnaAA 5,64
+#define ERR_SEQ_FEAT_OnlyGeneXrefs 5,65
+#define ERR_SEQ_FEAT_UTRdoesNotAbutCDS 5,66
#define ERR_SEQ_ALIGN 6,0
#define ERR_SEQ_ALIGN_SeqIdProblem 6,1
#define ERR_SEQ_ALIGN_StrandRev 6,2