summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
authorAaron M. Ucko <ucko@debian.org>2011-07-13 20:30:10 -0400
committerAaron M. Ucko <ucko@debian.org>2011-07-13 20:30:10 -0400
commit76eb365b53286f69a92cbbcc3007833b7ea05cfa (patch)
tree43153eff618b89d81f1a273a84f190c20cbc823d /api
parenteb0fe75837f0b1ffd48822a16cf8cad5d056b911 (diff)
Imported Upstream version 6.1.20110713
Diffstat (limited to 'api')
-rw-r--r--api/aceread.c98
-rw-r--r--api/aceread.h9
-rw-r--r--api/asn2gnb1.c378
-rw-r--r--api/asn2gnb2.c205
-rw-r--r--api/asn2gnb3.c256
-rw-r--r--api/asn2gnb4.c714
-rw-r--r--api/asn2gnb5.c118
-rw-r--r--api/asn2gnb6.c247
-rw-r--r--api/asn2gnbi.h52
-rw-r--r--api/ecnum_ambiguous.inc1234
-rw-r--r--api/ecnum_deleted.inc40
-rw-r--r--api/ecnum_replaced.inc194
-rw-r--r--api/ecnum_specific.inc8785
-rw-r--r--api/explore.h18
-rw-r--r--api/gather.c7
-rw-r--r--api/gbftdef.h5
-rw-r--r--api/gbftglob.c194
-rwxr-xr-xapi/macroapi.c9702
-rw-r--r--api/macroapi.h108
-rw-r--r--api/objmgr.c5
-rw-r--r--api/seqmgr.c76
-rw-r--r--api/seqport.c46
-rw-r--r--api/seqport.h12
-rw-r--r--api/sequtil.c64
-rw-r--r--api/sqnutil1.c2041
-rw-r--r--api/sqnutil2.c2082
-rw-r--r--api/sqnutil3.c6864
-rwxr-xr-xapi/sqnutil4.c7882
-rw-r--r--api/sqnutils.h196
-rw-r--r--api/subutil.c234
-rw-r--r--api/subutil.h44
-rw-r--r--api/tofasta.c390
-rw-r--r--api/tofasta.h24
-rw-r--r--api/utilpub.c9
-rw-r--r--api/utilpub.h2
-rwxr-xr-xapi/valapi.c15
-rw-r--r--api/valid.c6073
-rw-r--r--api/valid.h99
-rw-r--r--api/valid.msg40
-rw-r--r--api/validerr.h12
40 files changed, 37092 insertions, 11482 deletions
diff --git a/api/aceread.c b/api/aceread.c
index 045acca8..efab0ea1 100644
--- a/api/aceread.c
+++ b/api/aceread.c
@@ -1,5 +1,5 @@
/*
- * $Id: aceread.c,v 1.19 2010/05/26 15:13:01 bollin Exp $
+ * $Id: aceread.c,v 1.23 2010/12/13 16:28:14 bollin Exp $
*
* ===========================================================================
*
@@ -154,7 +154,7 @@ static int s_IsGapChar (char ch, char *gap_chars)
/* The Trace Archive Gap String is a list of the number of nucleotides to skip before adding the next gap */
-extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars)
+extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char gap_char)
{
char * cp;
int num_gaps = 0, pos, gap_num = 0;
@@ -165,7 +165,7 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars)
/* first determine number of gaps */
cp = seq_str;
while (*cp != 0) {
- if (s_IsGapChar(*cp, gap_chars)) {
+ if (*cp == gap_char) {
num_gaps++;
}
cp++;
@@ -178,7 +178,7 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars)
cp = seq_str;
pos = 0;
while (*cp != 0) {
- if (s_IsGapChar(*cp, gap_chars)) {
+ if (*cp == gap_char) {
g->gap_offsets[gap_num] = pos;
gap_num++;
pos = 0;
@@ -191,18 +191,18 @@ extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars)
return g;
}
-extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars)
+extern void RemoveGapCharsFromSequenceString (char *seq_str, char gap_char)
{
char *cp_src, *cp_dst;
- if (seq_str == NULL || gap_chars == NULL) {
+ if (seq_str == NULL) {
return;
}
cp_src = seq_str;
cp_dst = seq_str;
while (*cp_src != 0) {
- if (!s_IsGapChar(*cp_src, gap_chars)) {
+ if (*cp_src != gap_char) {
*cp_dst = *cp_src;
cp_dst++;
}
@@ -755,10 +755,66 @@ extern void ACEFileFree (TACEFilePtr afp)
static char s_IsSeqChar (char ch)
{
- if (ch == '*' || ch == '-' || isalpha (ch)) {
- return 1;
- } else {
- return 0;
+ switch (ch) {
+ case '*':
+ case '-':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ return 1;
+ break;
+ default:
+ return 0;
+ break;
}
}
@@ -1228,13 +1284,13 @@ static void s_CalculateContigOffsets (TContigPtr contig)
}
-static int s_GetUngappedSeqLen (char *str, char *gap_chars)
+static int s_GetUngappedSeqLen (char *str, char gap_char)
{
int len = 0;
if (str == NULL) return 0;
while (*str != 0) {
- if (!s_IsGapChar (*str, gap_chars)) {
+ if (*str != gap_char) {
len++;
}
str++;
@@ -1250,6 +1306,8 @@ static char * s_AddToTagComment (char *orig, char *extra)
if (orig == NULL) {
tag = extra;
+ } else if (extra == NULL) {
+ tag = orig;
} else {
tag_len = strlen (orig) + strlen (extra) + 1;
tag = malloc (sizeof (char) * (tag_len + 1));
@@ -1400,10 +1458,10 @@ static TContigPtr s_ReadContig
}
/* record actual length of consensus seq */
- contig->consensus_seq_len = s_GetUngappedSeqLen (contig->consensus_seq, "*");
+ contig->consensus_seq_len = s_GetUngappedSeqLen (contig->consensus_seq, '*');
/* calculate gap info */
- contig->gaps = GapInfoFromSequenceString (contig->consensus_seq, "*");
+ contig->gaps = GapInfoFromSequenceString (contig->consensus_seq, '*');
/* read quality scores */
if (make_qual_scores) {
@@ -1453,8 +1511,8 @@ static TContigPtr s_ReadContig
return NULL;
}
s_AdjustContigReadForTerminalNs (contig->reads[read_num]);
- contig->reads[read_num]->read_len = s_GetUngappedSeqLen (contig->reads[read_num]->read_seq, "*");
- contig->reads[read_num]->gaps = GapInfoFromSequenceString (contig->reads[read_num]->read_seq, "*");
+ contig->reads[read_num]->read_len = s_GetUngappedSeqLen (contig->reads[read_num]->read_seq, '*');
+ contig->reads[read_num]->gaps = GapInfoFromSequenceString (contig->reads[read_num]->read_seq, '*');
read_num++;
report_read_num = read_num - 1;
} else if (linestring [0] == 'Q' && linestring[1] == 'A' && isspace (linestring[2])) {
@@ -1668,7 +1726,7 @@ static void s_WriteContig (FILE *fp, TContigPtr contig)
}
fprintf (fp, "\n");
for (i = 0; i < contig->num_reads; i++) {
- fprintf (fp, "RD %s %d\n", contig->reads[i]->read_id, strlen (contig->reads[i]->read_seq));
+ fprintf (fp, "RD %s %d\n", contig->reads[i]->read_id, (int) strlen (contig->reads[i]->read_seq));
s_WriteSeq (fp, contig->reads[i]->read_seq);
fprintf (fp, "\n");
}
@@ -2013,9 +2071,9 @@ ReadContigFromString
read->read_stop = read->read_len;
if (n_is_gap) {
/* adjust for gaps */
- read->gaps = GapInfoFromSequenceString (read->read_seq, "N");
+ read->gaps = GapInfoFromSequenceString (read->read_seq, 'N');
if (read->gaps->num_gaps > 0) {
- RemoveGapCharsFromSequenceString (read->read_seq, "N");
+ RemoveGapCharsFromSequenceString (read->read_seq, 'N');
read->read_stop -= read->gaps->num_gaps;
read->read_len -= read->gaps->num_gaps;
}
diff --git a/api/aceread.h b/api/aceread.h
index 982a304c..595cf31f 100644
--- a/api/aceread.h
+++ b/api/aceread.h
@@ -2,7 +2,7 @@
#define API_ACEREAD__H
/*
- * $Id: aceread.h,v 1.13 2010/03/03 18:46:08 bollin Exp $
+ * $Id: aceread.h,v 1.14 2010/08/30 12:39:23 bollin Exp $
*
* ===========================================================================
*
@@ -69,8 +69,8 @@ typedef struct gapinfo {
extern TGapInfoPtr GapInfoNew (void);
extern void GapInfoFree (TGapInfoPtr g);
-extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars);
-extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars);
+extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char gap_char);
+extern void RemoveGapCharsFromSequenceString (char *seq_str, char gap_char);
extern int SeqPosFromTilingPos (int tiling_pos, TGapInfoPtr gap_info);
extern int TilingPosFromSeqPos (int seq_pos, TGapInfoPtr gap_info);
@@ -291,6 +291,9 @@ ProcessLargeACEFileForContigFastaAndQualScores
* ==========================================================================
*
* $Log: aceread.h,v $
+ * Revision 1.14 2010/08/30 12:39:23 bollin
+ * Performance improvements for aceread_tst
+ *
* Revision 1.13 2010/03/03 18:46:08 bollin
* use unsigned int to keep track of the number of contigs.
*
diff --git a/api/asn2gnb1.c b/api/asn2gnb1.c
index 2b03644b..45d3157a 100644
--- a/api/asn2gnb1.c
+++ b/api/asn2gnb1.c
@@ -31,7 +31,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.198 $
+* $Revision: 1.217 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -743,6 +743,7 @@ NLM_EXTERN void FFCatenateSubString (
Int4 max_i, min_i, i, len = 0;
StringItemPtr current;
Boolean in_url = FALSE, found_start = FALSE;
+ Boolean in_html_ampersand_escape = FALSE;
IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp;
Uint4 char_count = 0;
@@ -767,7 +768,7 @@ NLM_EXTERN void FFCatenateSubString (
* HTML specific processing:
* ---------------------------------------------------------------------*/
if ( GetWWW(ajp) ) {
- if ( ! in_url ) {
+ if ( ! in_url && ! in_html_ampersand_escape ) {
if ( current->buf[i] == '<' ) {
/* Watch out! */
if (FFIsStartOfLinkEx (current, i, &len)) {
@@ -781,6 +782,15 @@ NLM_EXTERN void FFCatenateSubString (
continue;
}
}
+ if( current->buf[i] == '&' )
+ {
+ FFAddOneChar(dest, '&', FALSE);
+ if( FFIsStartOfHTMLAmpersandEscape(current, i) ) {
+ in_html_ampersand_escape = TRUE;
+ }
+ ++char_count;
+ continue;
+ }
if (char_count == line_max) {
break;
}
@@ -805,8 +815,18 @@ NLM_EXTERN void FFCatenateSubString (
}
}
+ else if( in_html_ampersand_escape ) {
+ FFAddOneChar(dest, current->buf[i], FALSE);
+ if( current->buf[i] == ';' ) {
+ in_html_ampersand_escape = FALSE;
+ }
+ continue;
+ }
+
else /* in_url */ {
- if ( current->buf[i] == '&' ) {
+ if ( current->buf[i] == '&' &&
+ ! FFStartsWith(current, i, "&amp;", TRUE) )
+ {
/* encode ampersand for XHMLT */
FFAddOneString(dest, "&amp;", FALSE, FALSE, TILDE_IGNORE);
continue;
@@ -839,29 +859,47 @@ NLM_EXTERN void FFCatenateSubString (
}
}
-NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip) {
+NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx)
+
+{
Int4 size = 0;
StringItemPtr iter;
CharPtr result, temp;
+ size_t pfx_len, sfx_len;
+
+ pfx_len = StringLen (pfx);
+ sfx_len = StringLen (sfx);
for ( iter = sip; iter != NULL; iter = iter->next ) {
size += iter->pos;
}
- result = (CharPtr)MemNew(size + 2);
+ result = (CharPtr)MemNew(size + pfx_len + sfx_len + 2);
temp = result;
+ if (pfx_len > 0) {
+ MemCpy( temp, pfx, pfx_len );
+ temp += pfx_len;
+ }
for ( iter = sip; iter != NULL; iter = iter->next ) {
MemCpy( temp, iter->buf, iter->pos );
temp += iter->pos;
}
+ if (sfx_len > 0) {
+ MemCpy( temp, sfx, sfx_len );
+ temp += sfx_len;
+ }
*temp = '\0';
return result;
}
+NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip)
+{
+ return FFToCharPtrEx (sip, NULL, NULL);
+}
/* word wrap functions */
@@ -877,6 +915,8 @@ static CharPtr url_anchor_strings [] = {
"<A HREF=\"HTTP://",
"<A HREF=\"HTTPS://",
"<ACRONYM TITLE=\"",
+ "<DIV ",
+ "</DIV>",
NULL
};
@@ -941,6 +981,24 @@ NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) {
*ip = i;
}
+NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip)
+{
+ StringItemPtr iter = *iterp;
+ Int4 i = *ip;
+
+ while ( (iter != NULL) && (iter->buf[i] != ';') ) {
+ ++i;
+
+ if ( i == iter->pos ) {
+ iter = iter->next;
+ i = 0;
+ }
+ }
+
+ *iterp = iter;
+ *ip = i;
+}
+
static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP)
{
@@ -987,6 +1045,48 @@ NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos)
return FFIsStartOfLinkEx (iter, pos, NULL);
}
+NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape (
+ StringItemPtr iter,
+ Int4 pos )
+{
+ Char ch;
+ Int4 i;
+ Int4 max_len = 20;
+
+ if ( iter == NULL || pos >= iter->pos ) return FALSE;
+ if ( iter->buf [pos] != '&' ) return FALSE;
+
+ /* skip the initial '&' */
+ pos++;
+ if (pos >= iter->pos) {
+ iter = iter->next;
+ pos = 0;
+ if (iter == NULL) return FALSE;
+ }
+
+ for (i = 0; i < max_len; i++) {
+ ch = iter->buf [pos];
+ if( isalnum(ch) || ch == '#' ) {
+ /* fine; these are chars expected in HTML ampersand char */
+ } else if( ch == ';' ) {
+ /* found end of HTML ampersand char */
+ return TRUE;
+ } else {
+ /* illegal char in HTML ampersand char */
+ return FALSE;
+ }
+
+ pos++;
+ if (pos >= iter->pos) {
+ iter = iter->next;
+ pos = 0;
+ if (iter == NULL) return FALSE;
+ }
+ }
+
+ return FALSE;
+}
+
/*
NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) {
static CharPtr start_link = "<A HREF=";
@@ -1127,7 +1227,8 @@ NLM_EXTERN void FFAdvanceChar(
NLM_EXTERN void FFCalculateLineBreak (
StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
- Int4 init_indent, Int4 visible
+ Int4 init_indent, Int4 visible,
+ Boolean is_html
)
{
StringItemPtr iter, prev;
@@ -1145,6 +1246,13 @@ NLM_EXTERN void FFCalculateLineBreak (
Int4 candidate_int_space = -1,
candidate_int_comma = -1,
candidate_int_dash = -1;
+ /* This is set when the line consists entirely of one huge word that
+ we actually ended up breaking in the middle */
+ /* This variable was introduced to cover problems with the corner case
+ of having a really long word such that it would be broken at exactly the
+ point where only its last letter ends up on the next line. */
+ /* e.g. AA000002 */
+ Boolean breaking_long_word = FALSE;
iter = *break_sip;
@@ -1161,10 +1269,15 @@ NLM_EXTERN void FFCalculateLineBreak (
done = TRUE;
break;
}
- if ( FFIsStartOfLink(iter, i) ) {
- FFSkipLink(&iter, &i);
- --i;
- continue;
+ if( is_html ) {
+ if ( FFIsStartOfLink(iter, i) ) {
+ FFSkipLink(&iter, &i);
+ --i;
+ continue;
+ }
+ if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
+ FFSkipHTMLAmpersandEscape(&iter, &i);
+ }
}
--init_indent;
@@ -1210,10 +1323,15 @@ NLM_EXTERN void FFCalculateLineBreak (
candidate_int_dash = i;*/
}
- if ( FFIsStartOfLink(iter, i) ) {
- FFSkipLink(&iter, &i);
- --i;
- continue;
+ if( is_html ) {
+ if ( FFIsStartOfLink(iter, i) ) {
+ FFSkipLink(&iter, &i);
+ --i;
+ continue;
+ }
+ if( FFIsStartOfHTMLAmpersandEscape(iter, i) ) {
+ FFSkipHTMLAmpersandEscape(&iter, &i);
+ }
}
++copied;
@@ -1221,6 +1339,7 @@ NLM_EXTERN void FFCalculateLineBreak (
if ( (candidate_sip_space == NULL) && (candidate_int_space == -1) &&
(candidate_sip_comma == NULL) && (candidate_int_comma == -1) &&
(candidate_sip_dash == NULL) && (candidate_int_dash == -1) ) {
+ breaking_long_word = TRUE;
candidate_sip_space = iter;
candidate_int_space = i;
}
@@ -1251,9 +1370,9 @@ NLM_EXTERN void FFCalculateLineBreak (
*break_sip = candidate_sip_dash;
*break_pos = candidate_int_dash;
}
- if (! found_lb) {
- while (FFNextChar(*break_sip, *break_pos) == ' ') {
- FFAdvanceChar(break_sip, break_pos);
+ if ( ! found_lb && ! breaking_long_word ) {
+ while (FFNextChar(*break_sip, *break_pos) == ' ') {
+ FFAdvanceChar(break_sip, break_pos);
}
if (FFNextChar(*break_sip, *break_pos) == '\n') {
FFAdvanceChar(break_sip, break_pos);
@@ -1498,6 +1617,7 @@ NLM_EXTERN Boolean FFLineBreakSplitsHtmlLink(
} /*FFLineBreakSplitsHtmlLink*/
NLM_EXTERN void FFLineWrap (
+ IntAsn2gbJobPtr ajp,
StringItemPtr dest,
StringItemPtr src,
Int4 init_indent,
@@ -1514,6 +1634,7 @@ NLM_EXTERN void FFLineWrap (
Int4 i, line_prefix_len = 0;
StringItemPtr iter;
Boolean cont = FALSE;
+ Boolean is_html = GetWWW(ajp);
/* Note:
The value of the next two variables needs to persist between consecutive
@@ -1531,12 +1652,11 @@ NLM_EXTERN void FFLineWrap (
for ( iter = src; iter != NULL; iter = iter->next ) {
for ( i = 0; i < iter->pos; ) {
-
break_pos = i;
break_sip = iter;
- FFCalculateLineBreak(
- &break_sip, &break_pos, init_indent, line_max - line_prefix_len + 1);
+ FFCalculateLineBreak(&break_sip, &break_pos, init_indent,
+ line_max - line_prefix_len + 1, is_html);
linebreak_splits_link =
FFLineBreakSplitsHtmlLink(iter, i, break_sip, break_pos,
buf_split_link_open, &html_open_link_counter );
@@ -1647,8 +1767,8 @@ NLM_EXTERN void FFAddTextToString (
FFAddPeriod(ffstring);
}
}
-
-NLM_EXTERN CharPtr FFEndPrint (
+
+NLM_EXTERN CharPtr FFEndPrintEx (
IntAsn2gbJobPtr ajp,
StringItemPtr ffstring,
FmtType format,
@@ -1656,8 +1776,11 @@ NLM_EXTERN CharPtr FFEndPrint (
Int2 gb_cont_indent,
Int2 eb_init_indent,
Int2 eb_cont_indent,
- CharPtr eb_line_prefix
+ CharPtr eb_line_prefix,
+ CharPtr pfx,
+ CharPtr sfx
)
+
{
StringItemPtr temp = FFGetString(ajp);
CharPtr result;
@@ -1665,15 +1788,31 @@ NLM_EXTERN CharPtr FFEndPrint (
if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
- FFLineWrap(temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
+ FFLineWrap(ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
}
- result = FFToCharPtr(temp);
+ result = FFToCharPtrEx(temp, pfx, sfx);
FFRecycleString(ajp, temp);
return result;
}
+NLM_EXTERN CharPtr FFEndPrint (
+ IntAsn2gbJobPtr ajp,
+ StringItemPtr ffstring,
+ FmtType format,
+ Int2 gb_init_indent,
+ Int2 gb_cont_indent,
+ Int2 eb_init_indent,
+ Int2 eb_cont_indent,
+ CharPtr eb_line_prefix
+)
+
+{
+ return FFEndPrintEx (ajp, ffstring, format, gb_init_indent, gb_cont_indent,
+ eb_init_indent, eb_cont_indent, eb_line_prefix, NULL, NULL);
+}
+
NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring) {
Uint4 len = 0;
StringItemPtr current;
@@ -1764,14 +1903,14 @@ NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring) {
*
* The result is returned in the supplied vector.
*/
-static void ComputeLastOccurrence(const CharPtr pattern, Uint4 last_occurrence[])
+static void ComputeLastOccurrence(const CharPtr pattern, Int4 last_occurrence[])
{
- Uint4 i;
- Uint4 pat_len;
+ Int4 i;
+ Int4 pat_len;
/* Initilalize vector */
for ( i = 0; i < 256; ++i ) {
- last_occurrence[i] = 0;
+ last_occurrence[i] = -1;
}
/* compute right-most occurrence */
@@ -1781,10 +1920,10 @@ static void ComputeLastOccurrence(const CharPtr pattern, Uint4 last_occurrence[]
}
}
-static void ComputePrefix(const CharPtr pattern, Uint4 longest_prefix[])
+static void ComputePrefix(const CharPtr pattern, Int4 longest_prefix[])
{
- Uint4 pat_len = StringLen(pattern);
- Uint4 k, q;
+ Int4 pat_len = StringLen(pattern);
+ Int4 k, q;
longest_prefix[0] = 0;
@@ -1801,16 +1940,16 @@ static void ComputePrefix(const CharPtr pattern, Uint4 longest_prefix[])
}
-static void ComputeGoodSuffix(const CharPtr pattern, Uint4 good_suffix[])
+static void ComputeGoodSuffix(const CharPtr pattern, Int4 good_suffix[])
{
- Uint4 pat_len = StringLen(pattern);
- Uint4Ptr longest_prefix, reverse_longest_prefix;
+ Int4 pat_len = StringLen(pattern);
+ Int4Ptr longest_prefix, reverse_longest_prefix;
CharPtr reverse_pattern;
- Uint4 i, j;
+ Int4 i, j;
/* allocate memory */
- longest_prefix = MemNew(pat_len * sizeof(Uint4));
- reverse_longest_prefix = MemNew(pat_len * sizeof(Uint4));
+ longest_prefix = MemNew(pat_len * sizeof(Int4));
+ reverse_longest_prefix = MemNew(pat_len * sizeof(Int4));
reverse_pattern = MemNew((pat_len + 1) * sizeof(Char));
if ( longest_prefix == NULL ||
@@ -1824,18 +1963,18 @@ static void ComputeGoodSuffix(const CharPtr pattern, Uint4 good_suffix[])
/* compute reverse pattern */
for ( i = 0; i < pat_len; ++i ) {
- reverse_pattern[pat_len - i] = pattern[i];
+ reverse_pattern[pat_len - i - 1] = pattern[i];
}
ComputePrefix(pattern, longest_prefix);
ComputePrefix(reverse_pattern, reverse_longest_prefix);
- for ( j = 0; j < pat_len; ++j) {
+ for ( j = 0; j <= pat_len; ++j) {
good_suffix[j] = pat_len - longest_prefix[pat_len-1];
}
for ( i = 0; i < pat_len; ++i ) {
- j = pat_len - reverse_longest_prefix[i] - 1;
+ j = pat_len - reverse_longest_prefix[i];
if ( good_suffix[j] > i - reverse_longest_prefix[i] + 1) {
good_suffix[j] = i - reverse_longest_prefix[i] + 1;
}
@@ -1856,17 +1995,17 @@ NLM_EXTERN Int4 FFStringSearch (
const CharPtr pattern,
Uint4 position )
{
- Uint4 text_len = FFLength(text);
- Uint4 pat_len = StringLen(pattern);
- Uint4 last_occurrence[256];
- Uint4Ptr good_suffix;
- Uint4 shift;
+ Int4 text_len = FFLength(text);
+ Int4 pat_len = StringLen(pattern);
+ Int4 last_occurrence[256];
+ Int4Ptr good_suffix;
+ Int4 shift;
Int4 j;
if ( pat_len == 0 ) return 0;
if ( text_len == 0 || pat_len > text_len - position ) return -1;
- good_suffix = (Uint4Ptr)MemNew(pat_len * sizeof(Int4));
+ good_suffix = (Int4Ptr)MemNew((pat_len+1) * sizeof(Int4));
if ( good_suffix == NULL ) return -1;
ComputeLastOccurrence(pattern, last_occurrence);
@@ -1882,8 +2021,12 @@ NLM_EXTERN Int4 FFStringSearch (
MemFree (good_suffix);
return shift;
} else {
- shift += MAX( (Int4)good_suffix[(int) j],
+ if( last_occurrence[(int) FFCharAt(text,shift + j)] <= j ) {
+ shift += MAX( (Int4)good_suffix[(int) j+1],
(Int4)(j - last_occurrence[(int) FFCharAt(text,shift + j)]));
+ } else {
+ shift += (Int4)good_suffix[(int) j+1];
+ }
}
}
MemFree (good_suffix);
@@ -1891,6 +2034,47 @@ NLM_EXTERN Int4 FFStringSearch (
return -1;
}
+/* Returns true if the given text starts with "pattern".
+ You can also control whether this is done case insensitively */
+NLM_EXTERN Boolean FFStartsWith(
+ StringItemPtr text,
+ Int4 text_pos,
+ const CharPtr pattern,
+ Boolean case_insens
+)
+{
+ Int4 pattern_pos = 0;
+
+ if( NULL == text || NULL == pattern ) {
+ return FALSE;
+ }
+
+ /* every string starts with the empty string */
+ if( pattern[0] == '\0' ) {
+ return TRUE;
+ }
+
+ while( ( case_insens ?
+ toupper(pattern[pattern_pos]) == toupper(text->buf[text_pos]) :
+ pattern[pattern_pos] == text->buf[text_pos] ) )
+ {
+ /* advance pattern; if we reach the end,
+ * text starts with pattern */
+ ++pattern_pos;
+ if( pattern[pattern_pos] == '\0' ) {
+ return TRUE;
+ }
+
+ /* advance text, if we reach the end, text does NOT start
+ * with pattern */
+ FFAdvanceChar( &text, &text_pos );
+ if( NULL == text ) {
+ return FALSE;
+ }
+ }
+
+ return FALSE;
+}
/* */
/* IsWholeWordSubstr () -- Determines if a substring that is */
@@ -2881,7 +3065,8 @@ static void DoBioseqSetList (
bssp->_class == BioseqseqSet_class_phy_set ||
bssp->_class == BioseqseqSet_class_eco_set ||
bssp->_class == BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
/* if popset within genbank set, for example, recurse */
@@ -2917,7 +3102,8 @@ static void DoOneBioseqSet (
bssp->_class == BioseqseqSet_class_phy_set ||
bssp->_class == BioseqseqSet_class_eco_set ||
bssp->_class == BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
/* this is a pop/phy/mut/eco set, catenate separate reports */
@@ -3179,7 +3365,8 @@ static void CountBioseqSetList (
bssp->_class == BioseqseqSet_class_phy_set ||
bssp->_class == BioseqseqSet_class_eco_set ||
bssp->_class == BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
CountBioseqSetList (bssp->seq_set, awp);
@@ -3211,7 +3398,8 @@ static void CountOneBioseqSet (
bssp->_class == BioseqseqSet_class_phy_set ||
bssp->_class == BioseqseqSet_class_eco_set ||
bssp->_class == BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
CountBioseqSetList (bssp->seq_set, awp);
@@ -3640,7 +3828,8 @@ static void LookForGEDetc (
static void MakeGapFeatsBase (
BioseqPtr bsp,
Pointer userdata,
- Boolean isSP
+ Boolean isSP,
+ Boolean rev_comp
)
{
@@ -3702,7 +3891,11 @@ static void MakeGapFeatsBase (
sprintf (buf, "%ld", (long) litp->length);
AddQualifierToFeature (sfp, "estimated_length", buf);
}
- sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos + litp->length - 1, FALSE, FALSE);
+ if (rev_comp) {
+ sfp->location = AddIntervalToLocation (NULL, sip, currpos + litp->length - 1, currpos, FALSE, FALSE);
+ } else {
+ sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos + litp->length - 1, FALSE, FALSE);
+ }
} else if (isSP && litp->length == 0) {
if (fakebsp == NULL) {
/* to be freed with MemFree, not BioseqFree */
@@ -3730,7 +3923,11 @@ static void MakeGapFeatsBase (
sprintf (buf, "%ld", (long) litp->length);
AddQualifierToFeature (sfp, "estimated_length", buf);
}
- sfp->location = AddIntervalToLocation (NULL, sip, currpos - 1, currpos, FALSE, FALSE);
+ if (rev_comp) {
+ sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos - 1, FALSE, FALSE);
+ } else {
+ sfp->location = AddIntervalToLocation (NULL, sip, currpos - 1, currpos, FALSE, FALSE);
+ }
sfp->comment = StringSave ("Non-consecutive residues");
}
}
@@ -3745,7 +3942,16 @@ static void MakeSPGapFeats (
)
{
- MakeGapFeatsBase (bsp, userdata, TRUE);
+ MakeGapFeatsBase (bsp, userdata, TRUE, FALSE);
+}
+
+static void MakeRCGapFeats (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ MakeGapFeatsBase (bsp, userdata, FALSE, TRUE);
}
static void MakeGapFeats (
@@ -3754,7 +3960,7 @@ static void MakeGapFeats (
)
{
- MakeGapFeatsBase (bsp, userdata, FALSE);
+ MakeGapFeatsBase (bsp, userdata, FALSE, FALSE);
}
static CharPtr gapstr1 = " gap ";
@@ -3893,6 +4099,20 @@ static void FindMultiIntervalGenes (
}
}
+static void FindSegmentedBioseqs (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ BoolPtr segmentedBioseqsP;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
+ segmentedBioseqsP = (BoolPtr) userdata;
+ if (segmentedBioseqsP == NULL) return;
+ *segmentedBioseqsP = TRUE;
+}
+
static CharPtr bad_html_strings [] = {
"<script", "<object", "<applet", "<embed", "<form", "javascript:", "vbscript:", NULL
};
@@ -4000,8 +4220,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
Asn2gbFreeFunc remotefree = NULL;
Asn2gbLockFunc remotelock = NULL;
ValNodePtr remotevnp = NULL;
- Asn2gbSectPtr PNTR sectionArray;
SubmitBlockPtr sbp;
+ Asn2gbSectPtr PNTR sectionArray;
+ Boolean segmentedBioseqs = FALSE;
SeqEntryPtr sep;
Boolean seqspans = FALSE;
SeqIntPtr sintp;
@@ -4013,6 +4234,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
BioseqSetPtr topbssp;
Pointer userdata = NULL;
ValNodePtr vnp;
+ Boolean was_slp = FALSE;
+ Boolean rev_comp = FALSE;
Boolean is_html = FALSE;
if (format == 0) {
@@ -4089,14 +4312,24 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
if (sintp != NULL &&
sintp->from == 0 &&
sintp->to == bsp->length - 1 &&
- sintp->strand == Seq_strand_plus) {
+ sintp->strand != Seq_strand_minus) {
slp = NULL;
SeqLocFree (loc);
loc = NULL;
+ } else if (sintp != NULL &&
+ sintp->from == 0 &&
+ sintp->to == bsp->length - 1 &&
+ sintp->strand == Seq_strand_minus) {
+ rev_comp = TRUE;
}
}
}
+ if (slp != NULL && (! rev_comp)) {
+ /* suppress gaps if using sub-location, but show gaps if location was whole or interval 0..length-1 on either strand */
+ was_slp = TRUE;
+ }
+
if (bsp != NULL) {
bssp = NULL;
entityID = ObjMgrGetEntityIDForPointer (bsp);
@@ -4139,24 +4372,26 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
VisitDescriptorsInSep (sep, (Pointer) &featpolicy, LookFarFeatFetchPolicy);
fargaps = NULL;
- if (format != FTABLE_FMT) {
+ if (format != FTABLE_FMT && (! was_slp)) {
if (isRefSeq && isNC && VisitFeaturesInSep (sep, NULL, NULL) == 0) {
if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
VisitBioseqsInSep (sep, (Pointer) &fargaps, MakeFarGapFeats);
}
+ if (fargaps != NULL && fargaps->choice == 1) {
+ fargaps = ValNodeFreeData (fargaps);
+ }
}
}
- if (fargaps != NULL && fargaps->choice == 1) {
- fargaps = ValNodeFreeData (fargaps);
- }
ajp->fargaps = fargaps;
gapvnp = NULL;
- if (fargaps == NULL && format != FTABLE_FMT) {
+ if (fargaps == NULL && format != FTABLE_FMT && (! was_slp)) {
if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) {
if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
if (isSP) {
VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeSPGapFeats);
+ } else if (rev_comp) {
+ VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeRCGapFeats);
} else {
VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats);
}
@@ -4234,9 +4469,9 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
}
}
- /* if location specified, normal defaults to master style */
+ /* if location specified, other than full reverse complement, normal defaults to master style */
- if (ajp->ajp.slp != NULL && style == NORMAL_STYLE) {
+ if (ajp->ajp.slp != NULL && style == NORMAL_STYLE && (! rev_comp)) {
style = MASTER_STYLE;
}
@@ -4327,6 +4562,8 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
VisitFeaturesInSep (sep, (Pointer) &multiIntervalGenes, FindMultiIntervalGenes);
ajp->multiIntervalGenes = multiIntervalGenes;
+ VisitBioseqsInSep (sep, (Pointer) &segmentedBioseqs, FindSegmentedBioseqs);
+ ajp->segmentedBioseqs = segmentedBioseqs;
ajp->relModeError = FALSE;
ajp->skipProts = skipProts;
@@ -4470,6 +4707,7 @@ static Asn2gbJobPtr asn2gnbk_setup_ex (
aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
}
}
+ aw.isRefSeq = isRefSeq;
aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
/*
@@ -5537,6 +5775,12 @@ static void AddOneFtableQual (
if (StringHasNoText (qual)) return;
if (StringHasNoText (val)) return;
+ if (StringCmp (qual, "orig_protein_id") == 0) {
+ qual = "protein_id";
+ } else if (StringCmp (qual, "orig_transcript_id") == 0) {
+ qual = "transcript_id";
+ }
+
len = StringLen (qual) + StringLen (val) + 10;
tmp = (CharPtr) MemNew (sizeof (Char) * len);
if (tmp == NULL) return;
diff --git a/api/asn2gnb2.c b/api/asn2gnb2.c
index e435d2bc..3fbb0b60 100644
--- a/api/asn2gnb2.c
+++ b/api/asn2gnb2.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.147 $
+* $Revision: 1.161 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -63,7 +63,7 @@
#endif
#endif
-static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=";
+static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/bioproject/";
static CharPtr link_srr = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=sra&term=";
@@ -449,6 +449,7 @@ NLM_EXTERN void AddLocusBlock (
Char buf [1024];
Boolean cagemaster = FALSE;
SeqFeatPtr cds;
+ Char ch1, ch2, ch3;
Int4 currGi;
Char dataclass [10];
Char date [40];
@@ -494,6 +495,7 @@ NLM_EXTERN void AddLocusBlock (
OrgRefPtr orp;
BioseqPtr parent;
Int4 prevGi;
+ CharPtr ptr;
SeqDescrPtr sdp;
Char sect [128];
Char seg [32];
@@ -619,6 +621,27 @@ NLM_EXTERN void AddLocusBlock (
}
}
+ if (sip != NULL && sip->choice == SEQID_PDB) {
+ ptr = StringChr (locus, '_');
+ if (ptr != NULL) {
+ ch1 = ptr [1];
+ if (ch1 != '\0') {
+ ch2 = ptr [2];
+ if (ch2 != '\0') {
+ ch3 = ptr [3];
+ if (ch3 == '\0') {
+ if (ch1 == ch2) {
+ if (IS_UPPER (ch1)) {
+ ptr [1] = TO_LOWER (ch1);
+ ptr [2] = '\0';
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
if (is_np) {
sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
if (sfp != NULL && fcontext.bsp != NULL) {
@@ -1763,6 +1786,7 @@ NLM_EXTERN void AddAccessionBlock (
BaseBlockPtr bbp;
BioseqPtr bsp;
Char buf [41];
+ Char ch1, ch2, ch3;
SeqMgrDescContext dcontext;
EMBLBlockPtr ebp;
ValNodePtr extra_access;
@@ -1777,6 +1801,7 @@ NLM_EXTERN void AddAccessionBlock (
SeqIdPtr lcl = NULL;
size_t len = 0;
MolInfoPtr mip;
+ CharPtr ptr;
SeqDescrPtr sdp;
ValNodePtr secondary_acc;
CharPtr separator = " ";
@@ -1875,6 +1900,27 @@ NLM_EXTERN void AddAccessionBlock (
SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
+ if (sip->choice == SEQID_PDB) {
+ ptr = StringChr (buf, '_');
+ if (ptr != NULL) {
+ ch1 = ptr [1];
+ if (ch1 != '\0') {
+ ch2 = ptr [2];
+ if (ch2 != '\0') {
+ ch3 = ptr [3];
+ if (ch3 == '\0') {
+ if (ch1 == ch2) {
+ if (IS_UPPER (ch1)) {
+ ptr [1] = TO_LOWER (ch1);
+ ptr [2] = '\0';
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock));
if (bbp == NULL) return;
@@ -2051,22 +2097,24 @@ NLM_EXTERN void AddVersionBlock (
)
{
- SeqIdPtr accn = NULL;
- IntAsn2gbJobPtr ajp;
- Asn2gbSectPtr asp;
- BaseBlockPtr bbp;
- BioseqPtr bsp;
- Char buf [41];
- Uint1 format = PRINTID_TEXTID_ACC_VER;
- GBSeqPtr gbseq;
- Int4 gi = -1;
- SeqIdPtr gpp = NULL;
- IndxPtr index;
- CharPtr ptr;
- SeqIdPtr sip;
- Char tmp [41];
- Char version [64];
- StringItemPtr ffstring;
+ SeqIdPtr accn = NULL;
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ BaseBlockPtr bbp;
+ BioseqPtr bsp;
+ Char buf [41];
+ Char ch1, ch2, ch3;
+ Uint1 format = PRINTID_TEXTID_ACC_VER;
+ GBSeqPtr gbseq;
+ Int4 gi = -1;
+ SeqIdPtr gpp = NULL;
+ IntAsn2gbSectPtr iasp;
+ IndxPtr index;
+ CharPtr ptr;
+ SeqIdPtr sip;
+ Char tmp [41];
+ Char version [64];
+ StringItemPtr ffstring;
if (awp == NULL) return;
ajp = awp->ajp;
@@ -2079,6 +2127,8 @@ NLM_EXTERN void AddVersionBlock (
ffstring = FFGetString(ajp);
if ( ffstring == NULL ) return;
+ iasp = (IntAsn2gbSectPtr) asp;
+
for (sip = bsp->id; sip != NULL; sip = sip->next) {
switch (sip->choice) {
case SEQID_GI :
@@ -2166,11 +2216,45 @@ NLM_EXTERN void AddVersionBlock (
return;
}
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
+ sprintf (version, "%ld", (long) gi);
+ iasp->gi = StringSave (version);
+ }
+
if (accn != NULL) {
buf [0] = '\0';
SeqIdWrite (accn, buf, format, sizeof (buf) - 1);
+ if (accn->choice == SEQID_PDB) {
+ ptr = StringChr (buf, '_');
+ if (ptr != NULL) {
+ ch1 = ptr [1];
+ if (ch1 != '\0') {
+ ch2 = ptr [2];
+ if (ch2 != '\0') {
+ ch3 = ptr [3];
+ if (ch3 == '\0') {
+ if (ch1 == ch2) {
+ if (IS_UPPER (ch1)) {
+ ptr [1] = TO_LOWER (ch1);
+ ptr [2] = '\0';
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
+ iasp->acc = StringSave (buf);
+ ptr = StringChr (iasp->acc, '.');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ }
+
if (gi > 0) {
sprintf (version, "%s GI:%ld", buf, (long) gi);
} else {
@@ -3065,12 +3149,14 @@ static void AddSPBlock (
str = NULL;
if ( oip->str != NULL ) {
str = oip->str;
- if (StringNCmp (str, "GO:", 3) == 0) {
+ if (StringCmp (db->db, "GO") == 0 && StringNCmp (str, "GO:", 3) == 0) {
str += 3;
} else if (StringNCmp (str, "MGI:", 4) == 0) {
str += 4;
- } else if (StringNCmp (str, "HGNC:", 5) == 0) {
+ } else if (StringCmp (db->db, "HGNC") == 0 && StringNCmp (str, "HGNC:", 5) == 0) {
str += 5;
+ } else if (StringCmp (db->db, "DIP") == 0 && StringNCmp (str, "DIP:", 4) == 0) {
+ str += 4;
}
} else if ( oip->id > 0 ) {
sprintf (numbuf, "%d", oip->id);
@@ -3869,13 +3955,13 @@ typedef struct finstatdata {
} FinStatData, PNTR FinStatPtr;
static FinStatData finStatKywds [] = {
- {"Standard-Draft", "STANDARD_DRAFT"},
- {"High-quality-draft", "HIGH_QUALITY_DRAFT"},
- {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"},
- {"Annotation-grade", "ANNOTATION_GRADE"},
- {"Non-contiguous-finished", "NON_CONTIGUOUS_FINISHED"},
+ {"Standard-draft", "STANDARD_DRAFT"},
+ {"High-quality-draft", "HIGH_QUALITY_DRAFT"},
+ {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"},
+ {"Annotation-directed-improvement", "ANNOTATION_DIRECTED_IMPROVEMENT"},
+ {"Noncontiguous-finished", "NONCONTIGUOUS_FINISHED"},
/*
- {"Finished", "FINISHED"},
+ {"Finished", "FINISHED"},
*/
{NULL, NULL}
};
@@ -3937,6 +4023,7 @@ NLM_EXTERN void AddKeywordsBlock (
Boolean is_sts = FALSE;
Boolean is_env_sample = FALSE;
Boolean is_genome_assembly = FALSE;
+ Boolean is_unverified = FALSE;
ValNodePtr keywords;
CharPtr kwd;
MolInfoPtr mip;
@@ -4124,10 +4211,18 @@ NLM_EXTERN void AddKeywordsBlock (
finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
}
}
+ } else if (oip != NULL && StringICmp (oip->str, "Unverified") == 0) {
+ is_unverified = TRUE;
}
}
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
}
+ if (is_unverified) {
+ if (head != NULL) {
+ ValNodeCopyStr (&head, 0, "; ");
+ }
+ ValNodeCopyStr (&head, 0, "UNVERIFIED");
+ }
if (add_encode) {
if (head != NULL) {
ValNodeCopyStr (&head, 0, "; ");
@@ -4822,6 +4917,13 @@ static int LIBCALLBACK SortReferences (
status = DateMatch (irp1->date, irp2->date, TRUE);
if (status == 1 || status == -1) return status;
+ /* if dates incomparable, do other comparisons */
+ if ( status != 0 ) {
+ if( (NULL != irp1->date) && (NULL != irp2->date ) ) {
+ /* std date comes before str date */
+ return ( irp2->date->data[0] - irp1->date->data[0] );
+ }
+ }
/* if dates (e.g., years) match, try to distinguish by uids */
@@ -4870,14 +4972,6 @@ static int LIBCALLBACK SortReferences (
return -1;
}
- /* for publication features, sort in explore index order */
-
- if (irp1->index > irp2->index) {
- return 1;
- } else if (irp1->index < irp2->index) {
- return -1;
- }
-
/* next use author string */
if (irp1->authstr != NULL && irp2->authstr != NULL) {
@@ -4900,6 +4994,14 @@ static int LIBCALLBACK SortReferences (
}
}
+ /* for publication features, sort in explore index order - probably superset of itemID below */
+
+ if (irp1->index > irp2->index) {
+ return 1;
+ } else if (irp1->index < irp2->index) {
+ return -1;
+ }
+
/* last resort for equivalent publication descriptors, sort in itemID order */
if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) {
@@ -5069,7 +5171,8 @@ static void GetRefsOnBioseq (
BioseqPtr bsp,
Int4 from,
Int4 to,
- SeqLocPtr cdsloc
+ SeqLocPtr cdsloc,
+ BioseqPtr cdsbsp
)
{
@@ -5094,6 +5197,7 @@ static void GetRefsOnBioseq (
SeqDescrPtr sdp;
SeqFeatPtr sfp;
SeqInt sint;
+ SeqIntPtr sintp;
SeqIdPtr sip;
Boolean split;
Int4 start;
@@ -5161,9 +5265,22 @@ static void GetRefsOnBioseq (
rbp->itemtype = OBJ_SEQDESC;
irp = (IntRefBlockPtr) rbp;
- irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
- irp->left = 0;
- irp->right = target->length - 1;
+ if (cdsloc != NULL && cdsbsp != NULL) {
+ sintp = SeqIntNew ();
+ sintp->from = 0;
+ sintp->to = cdsbsp->length - 1;
+ sintp->id = SeqIdDup (cdsbsp->id);
+ irp->loc = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp);
+ /*
+ irp->loc = SeqLocWholeNew (cdsbsp);
+ */
+ irp->left = 0;
+ irp->right = cdsbsp->length - 1;
+ } else {
+ irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
+ irp->left = from;
+ irp->right = to;
+ }
alp = GetAuthListPtr (pdp, NULL);
if (alp != NULL) {
irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
@@ -5178,7 +5295,7 @@ static void GetRefsOnBioseq (
if (cdsloc != NULL) {
cp.awp = awp;
- cp.target = target;
+ cp.target = cdsbsp;
cp.vnp = &vn;
SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
}
@@ -5201,8 +5318,8 @@ static void GetRefsOnBioseq (
irp = (IntRefBlockPtr) rbp;
irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
- irp->left = 0;
- irp->right = target->length - 1;
+ irp->left = from;
+ irp->right = to;
alp = GetAuthListPtr (pdp, NULL);
if (alp != NULL) {
irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
@@ -5334,7 +5451,7 @@ static Boolean LIBCALLBACK GetRefsOnSeg (
SeqEntrySetScope (oldscope);
if (bsp != NULL) {
- GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL);
+ GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL, NULL);
return TRUE;
}
@@ -5394,7 +5511,7 @@ NLM_EXTERN Boolean AddReferenceBlock (
/* collect publications on bioseq */
awp->pubhead = NULL;
- GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL);
+ GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL, NULL);
target = bsp;
for (sip = bsp->id; sip != NULL; sip = sip->next) {
@@ -5425,7 +5542,7 @@ NLM_EXTERN Boolean AddReferenceBlock (
if (cds != NULL) {
dna = BioseqFindFromSeqLoc (cds->location);
if (dna != NULL) {
- GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location);
+ GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location, bsp);
target = dna;
}
}
diff --git a/api/asn2gnb3.c b/api/asn2gnb3.c
index 5a12103b..1ff6eea4 100644
--- a/api/asn2gnb3.c
+++ b/api/asn2gnb3.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.129 $
+* $Revision: 1.142 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -84,15 +84,16 @@ static void AddHistCommentString (
CharPtr suffix,
DatePtr dp,
SeqIdPtr ids,
- Boolean is_na
+ Boolean is_na,
+ Boolean use_accn
)
{
Int2 count = 0;
- Char buf [256];
- Boolean first;
+ Char buf [256], id [42];
+ Boolean first, skip;
Int4 gi = 0;
- SeqIdPtr sip;
+ SeqIdPtr sip, sip2;
CharPtr strd;
if (dp == NULL || ids == NULL || prefix == NULL || suffix == NULL || ffstring == NULL) return;
@@ -131,21 +132,50 @@ static void AddHistCommentString (
FFAddOneString (ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
}
first = FALSE;
- if ( GetWWW(ajp) ) {
- FFAddOneString (ffstring, " gi:", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
- if (is_na) {
- FF_Add_NCBI_Base_URL (ffstring, link_seqn);
+ skip = FALSE;
+ if (use_accn) {
+ sip2 = GetSeqIdForGI (gi);
+ if (sip2 != NULL) {
+ SeqIdWrite (sip2, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
+ if (StringDoesHaveText (id)) {
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, " ", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (is_na) {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqn);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ }
+ sprintf (buf, "%ld", (long) gi);
+ FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ } else {
+ sprintf (buf, " %s", id);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
+ }
+ skip = TRUE;
+ }
+ SeqIdFree (sip2);
+ }
+ }
+ if (! skip) {
+ if ( GetWWW(ajp) ) {
+ FFAddOneString (ffstring, " gi:", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (is_na) {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqn);
+ } else {
+ FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ }
+ sprintf (buf, "%ld", (long) gi);
+ FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
} else {
- FF_Add_NCBI_Base_URL (ffstring, link_seqp);
+ sprintf (buf, " gi:%ld", (long) gi);
+ FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
}
- sprintf (buf, "%ld", (long) gi);
- FFAddTextToString (ffstring, /* "val=" */ NULL, buf, "\">", FALSE, FALSE, TILDE_IGNORE);
- FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
- FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
- } else {
- sprintf (buf, " gi:%ld", (long) gi);
- FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_EXPAND);
}
}
}
@@ -2043,6 +2073,7 @@ NLM_EXTERN void AddCommentBlock (
Boolean is_tpa = FALSE;
Boolean is_wgs = FALSE;
Boolean isRefSeqStandard = FALSE;
+ Boolean is_unverified = FALSE;
SeqLitPtr litp;
ObjectIdPtr localID = NULL;
Char locusID [32];
@@ -2066,6 +2097,7 @@ NLM_EXTERN void AddCommentBlock (
Char tmp [32];
TextSeqIdPtr tsip;
UserFieldPtr ufp;
+ Int4 unverified_itemID = 0;
UserObjectPtr uop;
CharPtr wgsaccn = NULL;
CharPtr wgsname = NULL;
@@ -2102,6 +2134,10 @@ NLM_EXTERN void AddCommentBlock (
}
oip = uop->type;
if (oip != NULL) {
+ if (StringICmp (oip->str, "Unverified") == 0) {
+ is_unverified = TRUE;
+ unverified_itemID = dcontext.itemID;
+ }
if (StringICmp (oip->str, "ENCODE") == 0) {
is_encode = TRUE;
encodeUop = uop;
@@ -2116,6 +2152,36 @@ NLM_EXTERN void AddCommentBlock (
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
}
+ if (is_unverified) {
+ cbp = (CommentBlockPtr) Asn2gbAddBlock (awp, COMMENT_BLOCK, sizeof (CommentBlock));
+ if (cbp != NULL) {
+
+ cbp->entityID = awp->entityID;
+ cbp->itemID = unverified_itemID;
+ cbp->itemtype = OBJ_SEQDESC;
+ cbp->first = first;
+ first = FALSE;
+
+ if (cbp->first) {
+ FFStartPrint (ffstring, awp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
+ } else {
+ FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
+ }
+
+ FFAddOneString (ffstring,
+ "GenBank staff is unable to verify sequence and/or annotation provided by the submitter.",
+ FALSE, FALSE, TILDE_IGNORE);
+
+ cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
+ FFRecycleString(ajp, ffstring);
+ ffstring = FFGetString(ajp);
+
+ if (awp->afp != NULL) {
+ DoImmediateFormat (awp->afp, (BaseBlockPtr) cbp);
+ }
+ }
+ }
+
/*
if (dblinkUop != NULL) {
str = GetDBLinkString (dblinkUop);
@@ -2153,7 +2219,7 @@ NLM_EXTERN void AddCommentBlock (
if (tsip != NULL) {
is_other = TRUE;
- if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
+ if (StringNCmp (tsip->accession, "NC_", 3) == 0 || StringNCmp (tsip->accession, "AC_", 3) == 0) {
if (hasRefTrackStatus) {
/* will print elsewhere */
} else if (! StringHasNoText (genomeBuildNumber)) {
@@ -2950,8 +3016,13 @@ NLM_EXTERN void AddCommentBlock (
FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
}
- AddHistCommentString (ajp, ffstring, "[WARNING] On", "this sequence was replaced by",
- hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol));
+ if (wgsaccn != NULL) {
+ AddHistCommentString (ajp, ffstring, "[WARNING] On", "this project was updated. The new version is",
+ hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), TRUE);
+ } else {
+ AddHistCommentString (ajp, ffstring, "[WARNING] On", "this sequence was replaced by",
+ hist->replaced_by_date, hist->replaced_by_ids, ISA_na (bsp->mol), FALSE);
+ }
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
@@ -2990,7 +3061,7 @@ NLM_EXTERN void AddCommentBlock (
}
AddHistCommentString (ajp, ffstring, "On", "this sequence version replaced",
- hist->replace_date, hist->replace_ids, ISA_na (bsp->mol));
+ hist->replace_date, hist->replace_ids, ISA_na (bsp->mol), FALSE);
cbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "CC");
FFRecycleString(ajp, ffstring);
@@ -4189,6 +4260,7 @@ NLM_EXTERN void AddSourceFeatBlock (
BioseqPtr bsp;
SeqFeatPtr cds;
SeqMgrFeatContext context;
+ Int4 currGi = 0;
BioseqPtr dna;
SeqLocPtr duploc;
Boolean excise;
@@ -4199,9 +4271,12 @@ NLM_EXTERN void AddSourceFeatBlock (
IntSrcBlockPtr lastisp;
IntSrcBlockPtr descrIsp;
ValNodePtr next;
+ Char pfx [128], sfx [128];
ValNodePtr PNTR prev;
SeqInt sint;
+ SeqIdPtr sip;
SeqLocPtr slp;
+ Int4 source_count = 0;
CharPtr str;
BioseqPtr target;
ValNode vn;
@@ -4220,6 +4295,8 @@ NLM_EXTERN void AddSourceFeatBlock (
ffstring = FFGetString(ajp);
if ( ffstring == NULL ) return;
+ pfx [0] = '\0';
+ sfx [0] = '\0';
/* collect biosources on bioseq */
@@ -4272,6 +4349,18 @@ NLM_EXTERN void AddSourceFeatBlock (
vn.next = NULL;
FFStartPrint (ffstring, awp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI) {
+ currGi = (Int4) sip->data.intvalue;
+ }
+ }
+
+ if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ sprintf (pfx, "<span id=\"feature_%ld_source_0\" class=\"feature\">", (long) currGi);
+ }
+
FFAddOneString(ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
@@ -4335,7 +4424,12 @@ NLM_EXTERN void AddSourceFeatBlock (
FFAddTextToString (ffstring, "/mol_type=\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES);
}
- str = FFEndPrint(ajp, ffstring, awp->format, 5, 21, 5, 21, "FT");
+ if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ sprintf (sfx, "</span>");
+ }
+
+ str = FFEndPrintEx (ajp, ffstring, awp->format, 5, 21, 5, 21, "FT", pfx, sfx);
bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, SOURCEFEAT_BLOCK, sizeof (IntSrcBlock));
if (bbp != NULL) {
@@ -4357,6 +4451,8 @@ NLM_EXTERN void AddSourceFeatBlock (
AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
}
}
+
+ return;
}
if (head == NULL) return;
@@ -4488,6 +4584,13 @@ NLM_EXTERN void AddSourceFeatBlock (
}
FFRecycleString(ajp, ffstring);
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
+ if (isp == NULL) continue;
+ isp->source_count = source_count;
+ source_count++;
+ }
+
if (awp->afp != NULL) {
for (vnp = head; vnp != NULL; vnp = vnp->next) {
isp = (IntSrcBlockPtr) vnp->data.ptrvalue;
@@ -4514,6 +4617,92 @@ static Boolean IsCDD (
return FALSE;
}
+static void SetIfpFeatCount (
+ IntFeatBlockPtr ifp,
+ IntAsn2gbJobPtr ajp,
+ Asn2gbWorkPtr awp,
+ Boolean isProt
+)
+
+{
+ FeatBlockPtr fbp;
+ Uint1 featdeftype;
+ IntAsn2gbSectPtr iasp;
+ Boolean is_other = FALSE;
+
+ if (ifp == NULL || ajp == NULL || awp == NULL) return;
+ iasp = (IntAsn2gbSectPtr) awp->asp;
+ if (iasp == NULL) return;
+
+ fbp = (FeatBlockPtr) ifp;
+
+ featdeftype = fbp->featdeftype;
+
+ if (featdeftype == FEATDEF_COMMENT) {
+ featdeftype = FEATDEF_misc_feature;
+ }
+
+ if (! isProt) {
+ if (featdeftype == FEATDEF_REGION || featdeftype == FEATDEF_BOND || featdeftype == FEATDEF_SITE) {
+ featdeftype = FEATDEF_misc_feature;
+ }
+ }
+
+ if (ajp->format == GENPEPT_FMT && isProt) {
+ if (ifp->mapToPep) {
+ if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_transit_peptide_aa) {
+ featdeftype = FEATDEF_preprotein;
+ }
+ }
+ }
+
+ if (featdeftype == FEATDEF_Imp_CDS) {
+ featdeftype = FEATDEF_CDS;
+ }
+ if (featdeftype == FEATDEF_preRNA) {
+ featdeftype = FEATDEF_precursor_RNA;
+ }
+ if (featdeftype == FEATDEF_otherRNA) {
+ featdeftype = FEATDEF_misc_RNA;
+ }
+ if (featdeftype == FEATDEF_mat_peptide_aa) {
+ featdeftype = FEATDEF_mat_peptide;
+ }
+ if (featdeftype == FEATDEF_sig_peptide_aa) {
+ featdeftype = FEATDEF_sig_peptide;
+ }
+ if (featdeftype == FEATDEF_transit_peptide_aa) {
+ featdeftype = FEATDEF_transit_peptide;
+ }
+
+ if (ajp->refseqConventions || awp->isRefSeq) {
+ is_other = TRUE;
+ }
+
+ if (! isProt) {
+ if (featdeftype == FEATDEF_preprotein) {
+ if (! is_other) {
+ featdeftype = FEATDEF_misc_feature;
+ }
+ }
+ }
+
+ if (featdeftype == FEATDEF_CLONEREF) {
+ if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
+ featdeftype = FEATDEF_misc_feature;
+ }
+ }
+
+ if (featdeftype == FEATDEF_repeat_unit && (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE)) {
+ featdeftype = FEATDEF_repeat_region;
+ }
+
+ if (featdeftype < FEATDEF_MAX) {
+ ifp->feat_count = iasp->feat_counts [featdeftype];
+ (iasp->feat_counts [featdeftype])++;
+ }
+}
+
static void GetFeatsOnCdsProduct (
SeqFeatPtr cds,
BioseqPtr nbsp,
@@ -4635,6 +4824,7 @@ static void GetFeatsOnCdsProduct (
ifp->mapToPep = FALSE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, FALSE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -4802,6 +4992,7 @@ static void GetRemoteFeatsOnCdsProduct (
ifp->mapToPep = FALSE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, FALSE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -4932,6 +5123,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
if (awp->hideImpFeats && sfp->data.choice == SEQFEAT_IMP && fcontext->featdeftype != FEATDEF_operon) return TRUE;
if (awp->hideVariations && fcontext->featdeftype == FEATDEF_variation) return TRUE;
if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_repeat_region) return TRUE;
+ if (awp->hideRepeatRegions && fcontext->featdeftype == FEATDEF_mobile_element) return TRUE;
if (awp->hideGaps && fcontext->featdeftype == FEATDEF_gap) return TRUE;
if (ISA_aa (bsp->mol) && fcontext->featdeftype == FEATDEF_REGION &&
awp->hideCddFeats && IsCDD (sfp)) return TRUE;
@@ -5286,6 +5478,18 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
}
break;
+ case FEATDEF_mobile_element:
+ /* mobile_element requires FTQUAL_mobile_element_type */
+ gbq = sfp->qual;
+ while (gbq != NULL) {
+ if (StringICmp (gbq->qual, "mobile_element_type") == 0 && (StringDoesHaveText (gbq->val))) {
+ okay = TRUE;
+ break;
+ }
+ gbq = gbq->next;
+ }
+ break;
+
default:
if (fcontext->featdeftype >= FEATDEF_GENE && fcontext->featdeftype < FEATDEF_MAX) {
okay = TRUE;
@@ -5338,6 +5542,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ifp->mapToPep = FALSE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, ISA_aa (bsp->mol));
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -5415,6 +5620,7 @@ static Boolean LIBCALLBACK GetFeatsOnBioseq (
ifp->mapToPep = FALSE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, FALSE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -5757,6 +5963,7 @@ NLM_EXTERN void AddFeatureBlock (
ifp->isCDS = TRUE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, FALSE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -5846,6 +6053,7 @@ NLM_EXTERN void AddFeatureBlock (
ifp->isCDS = TRUE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, TRUE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -5873,6 +6081,7 @@ NLM_EXTERN void AddFeatureBlock (
ifp->isCDS = TRUE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, TRUE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
@@ -5910,6 +6119,7 @@ NLM_EXTERN void AddFeatureBlock (
ifp->mapToPep = TRUE;
ifp->left = 0;
ifp->right = 0;
+ SetIfpFeatCount (ifp, ajp, awp, TRUE);
ifp->firstfeat = awp->firstfeat;
awp->firstfeat = FALSE;
diff --git a/api/asn2gnb4.c b/api/asn2gnb4.c
index 6491f6dc..e788b0bf 100644
--- a/api/asn2gnb4.c
+++ b/api/asn2gnb4.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.215 $
+* $Revision: 1.249 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -133,6 +133,7 @@ static FtQualType feat_qual_order [] = {
FTQUAL_ribosomal_slippage,
FTQUAL_trans_splicing,
FTQUAL_artificial_location,
+ FTQUAL_artificial_location_str,
FTQUAL_note,
FTQUAL_citation,
@@ -171,11 +172,14 @@ static FtQualType feat_qual_order [] = {
FTQUAL_rpt_unit_seq,
FTQUAL_satellite,
FTQUAL_mobile_element,
+ FTQUAL_mobile_element_type,
FTQUAL_usedin,
FTQUAL_illegal_qual,
FTQUAL_replace,
+ FTQUAL_delta_item,
+ FTQUAL_variation_set,
FTQUAL_transl_except,
FTQUAL_transl_table,
@@ -189,6 +193,7 @@ static FtQualType feat_qual_order [] = {
FTQUAL_transcript_id,
FTQUAL_db_xref,
FTQUAL_gene_xref,
+ FTQUAL_variation_id,
FTQUAL_mol_wt,
FTQUAL_translation,
FTQUAL_transcription,
@@ -261,6 +266,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "allele", Qual_class_quote },
{ "anticodon", Qual_class_anti_codon },
{ "artificial_location", Qual_class_boolean },
+ { "artificial_location", Qual_class_string },
{ "bond", Qual_class_bond },
{ "bond_type", Qual_class_bond },
{ "bound_moiety", Qual_class_quote },
@@ -274,6 +280,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "codon_start", Qual_class_int },
{ "cons_splice", Qual_class_consplice },
{ "db_xref", Qual_class_db_xref },
+ { "delta_item", Qual_class_delta_item },
{ "derived_from", Qual_class_seq_loc },
{ "direction", Qual_class_L_R_B },
{ "EC_number", Qual_class_EC_quote },
@@ -282,7 +289,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "evidence", Qual_class_evidence },
{ "exception", Qual_class_exception },
{ "exception_note", Qual_class_exception },
- { "experiment", Qual_class_quote },
+ { "experiment", Qual_class_experiment },
{ "experiment", Qual_class_string },
{ "product", Qual_class_valnode },
{ "figure", Qual_class_string },
@@ -314,6 +321,7 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "map", Qual_class_quote },
{ "maploc", Qual_class_string },
{ "mobile_element", Qual_class_mobile_element },
+ { "mobile_element_type", Qual_class_mobile_element },
{ "mod_base", Qual_class_noquote },
{ "model_evidence", Qual_class_model_ev },
{ "calculated_mol_wt", Qual_class_mol_wt },
@@ -381,6 +389,8 @@ static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
{ "trna_codons", Qual_class_trna_codons },
{ "UniProtKB_evidence", Qual_class_quote },
{ "usedin", Qual_class_usedin },
+ { "db_xref", Qual_class_variation_id },
+ { "variation_set", Qual_class_variation_set },
{ "xtra_products", Qual_class_xtraprds }
};
@@ -390,50 +400,51 @@ typedef struct qualfeatur {
FtQualType featurclass;
} QualFeatur, PNTR QualFeaturPtr;
-#define NUM_GB_QUALS 41
+#define NUM_GB_QUALS 42
static QualFeatur qualToFeature [NUM_GB_QUALS] = {
- { "allele", FTQUAL_allele },
- { "bound_moiety", FTQUAL_bound_moiety },
- { "clone", FTQUAL_clone },
- { "codon", FTQUAL_codon },
- { "compare", FTQUAL_compare },
- { "cons_splice", FTQUAL_cons_splice },
- { "cyt_map", FTQUAL_gene_cyt_map },
- { "direction", FTQUAL_direction },
- { "EC_number", FTQUAL_EC_number },
- { "estimated_length", FTQUAL_estimated_length },
- { "experiment", FTQUAL_experiment },
- { "frequency", FTQUAL_frequency },
- { "function", FTQUAL_function },
- { "gen_map", FTQUAL_gene_gen_map },
- { "inference", FTQUAL_inference },
- { "insertion_seq", FTQUAL_insertion_seq },
- { "label", FTQUAL_label },
- { "map", FTQUAL_map },
- { "mobile_element", FTQUAL_mobile_element },
- { "mod_base", FTQUAL_mod_base },
- { "ncRNA_class", FTQUAL_ncRNA_class },
- { "number", FTQUAL_number },
- { "old_locus_tag", FTQUAL_old_locus_tag },
- { "operon", FTQUAL_operon },
- { "organism", FTQUAL_organism },
- { "PCR_conditions", FTQUAL_PCR_conditions },
- { "phenotype", FTQUAL_phenotype },
- { "product", FTQUAL_product_quals },
- { "rad_map", FTQUAL_gene_rad_map },
- { "replace", FTQUAL_replace },
- { "rpt_family", FTQUAL_rpt_family },
- { "rpt_type", FTQUAL_rpt_type },
- { "rpt_unit", FTQUAL_rpt_unit },
- { "rpt_unit_range", FTQUAL_rpt_unit_range },
- { "rpt_unit_seq", FTQUAL_rpt_unit_seq },
- { "satellite", FTQUAL_satellite },
- { "standard_name", FTQUAL_standard_name },
- { "tag_peptide", FTQUAL_tag_peptide },
- { "transposon", FTQUAL_transposon },
- { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence },
- { "usedin", FTQUAL_usedin }
+ { "allele", FTQUAL_allele },
+ { "bound_moiety", FTQUAL_bound_moiety },
+ { "clone", FTQUAL_clone },
+ { "codon", FTQUAL_codon },
+ { "compare", FTQUAL_compare },
+ { "cons_splice", FTQUAL_cons_splice },
+ { "cyt_map", FTQUAL_gene_cyt_map },
+ { "direction", FTQUAL_direction },
+ { "EC_number", FTQUAL_EC_number },
+ { "estimated_length", FTQUAL_estimated_length },
+ { "experiment", FTQUAL_experiment },
+ { "frequency", FTQUAL_frequency },
+ { "function", FTQUAL_function },
+ { "gen_map", FTQUAL_gene_gen_map },
+ { "inference", FTQUAL_inference },
+ { "insertion_seq", FTQUAL_insertion_seq },
+ { "label", FTQUAL_label },
+ { "map", FTQUAL_map },
+ { "mobile_element", FTQUAL_mobile_element },
+ { "mobile_element_type", FTQUAL_mobile_element_type },
+ { "mod_base", FTQUAL_mod_base },
+ { "ncRNA_class", FTQUAL_ncRNA_class },
+ { "number", FTQUAL_number },
+ { "old_locus_tag", FTQUAL_old_locus_tag },
+ { "operon", FTQUAL_operon },
+ { "organism", FTQUAL_organism },
+ { "PCR_conditions", FTQUAL_PCR_conditions },
+ { "phenotype", FTQUAL_phenotype },
+ { "product", FTQUAL_product_quals },
+ { "rad_map", FTQUAL_gene_rad_map },
+ { "replace", FTQUAL_replace },
+ { "rpt_family", FTQUAL_rpt_family },
+ { "rpt_type", FTQUAL_rpt_type },
+ { "rpt_unit", FTQUAL_rpt_unit },
+ { "rpt_unit_range", FTQUAL_rpt_unit_range },
+ { "rpt_unit_seq", FTQUAL_rpt_unit_seq },
+ { "satellite", FTQUAL_satellite },
+ { "standard_name", FTQUAL_standard_name },
+ { "tag_peptide", FTQUAL_tag_peptide },
+ { "transposon", FTQUAL_transposon },
+ { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence },
+ { "usedin", FTQUAL_usedin }
};
static Int2 GbqualToFeaturIndex (
@@ -689,7 +700,7 @@ NLM_EXTERN Int2 MatchRef (
for (j = 0; j < numReferences; j++) {
rbp = rbpp [j];
if (rbp == NULL) continue;
- if (MatchCit (ppr, rbp)) return j + 1;
+ if (MatchCit (ppr, rbp)) return rbp->serial;
}
return 0;
}
@@ -1478,7 +1489,28 @@ static ValQual legalGbqualList [] = {
{ FEATDEF_tmRNA , FTQUAL_operon },
{ FEATDEF_tmRNA , FTQUAL_product },
{ FEATDEF_tmRNA , FTQUAL_standard_name },
- { FEATDEF_tmRNA , FTQUAL_tag_peptide }
+ { FEATDEF_tmRNA , FTQUAL_tag_peptide },
+
+ { FEATDEF_VARIATIONREF , FTQUAL_allele },
+ { FEATDEF_VARIATIONREF , FTQUAL_compare },
+ { FEATDEF_VARIATIONREF , FTQUAL_frequency },
+ { FEATDEF_VARIATIONREF , FTQUAL_label },
+ { FEATDEF_VARIATIONREF , FTQUAL_map },
+ { FEATDEF_VARIATIONREF , FTQUAL_old_locus_tag },
+ { FEATDEF_VARIATIONREF , FTQUAL_phenotype },
+ { FEATDEF_VARIATIONREF , FTQUAL_product },
+ { FEATDEF_VARIATIONREF , FTQUAL_replace },
+ { FEATDEF_VARIATIONREF , FTQUAL_standard_name },
+
+ { FEATDEF_mobile_element , FTQUAL_allele },
+ { FEATDEF_mobile_element , FTQUAL_function },
+ { FEATDEF_mobile_element , FTQUAL_label },
+ { FEATDEF_mobile_element , FTQUAL_map },
+ { FEATDEF_mobile_element , FTQUAL_mobile_element_type },
+ { FEATDEF_mobile_element , FTQUAL_old_locus_tag },
+ { FEATDEF_mobile_element , FTQUAL_rpt_family },
+ { FEATDEF_mobile_element , FTQUAL_rpt_type },
+ { FEATDEF_mobile_element , FTQUAL_standard_name }
};
/* comparison of ValQual's -- first compare featdef then ftqual */
@@ -2806,6 +2838,84 @@ static Boolean OnlyOneRealGeneral (SeqIdPtr sip)
return FALSE;
}
+static void AddExperimentWithPMIDlinks(
+ IntAsn2gbJobPtr ajp,
+ StringItemPtr ffstring,
+ CharPtr str
+)
+
+{
+ Char ch;
+ Boolean had_pmid;
+ CharPtr pmid;
+ CharPtr prefix = "PMID:";
+ CharPtr ptr;
+
+ if (! GetWWW (ajp)) {
+ FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE);
+ return;
+ }
+
+ if (CommentHasSuspiciousHtml (ajp, str)) {
+ FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE);
+ return;
+ }
+
+ while (StringDoesHaveText (str)) {
+ ptr = StringStr (str, prefix);
+ if (ptr == NULL) {
+ FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE);
+ return;
+ }
+ *ptr = '\0';
+ FFAddOneString (ffstring, str, FALSE, TRUE, TILDE_IGNORE);
+ ptr += StringLen (prefix);
+ pmid = ptr;
+ ch = *ptr;
+ while (ch == ' ') {
+ ptr++;
+ pmid = ptr;
+ ch = *ptr;
+ }
+ while (IS_DIGIT (ch)) {
+ ptr++;
+ ch = *ptr;
+ }
+ *ptr = '\0';
+
+ had_pmid = FALSE;
+ if (StringDoesHaveText (pmid)) {
+ FFAddOneString (ffstring, prefix, FALSE, TRUE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_Add_NCBI_Base_URL (ffstring, link_muid);
+ FFAddTextToString (ffstring, NULL, pmid, "\">", FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, pmid, FALSE, FALSE, TILDE_IGNORE);
+ FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
+ had_pmid = TRUE;
+ }
+
+ *ptr = ch;
+ str = ptr;
+
+ prefix = "PMID:";
+ ptr = str;
+ ch = *ptr;
+ if (had_pmid) {
+ if (ch == ',') {
+ ptr++;
+ ch = *ptr;
+ while (ch == ' ') {
+ ptr++;
+ ch = *ptr;
+ }
+ if (IS_DIGIT (ch)) {
+ prefix = ",";
+ }
+ }
+ }
+ }
+}
+
static void FormatFeatureBlockQuals (
StringItemPtr ffstring,
IntAsn2gbJobPtr ajp,
@@ -2847,6 +2957,7 @@ static void FormatFeatureBlockQuals (
Int4 gi;
Boolean hadProtDesc = FALSE;
DbtagPtr dbt;
+ DeltaItemPtr dip;
UserFieldPtr entry;
Int4 exp_ev;
GBQualPtr gbq;
@@ -2860,6 +2971,9 @@ static void FormatFeatureBlockQuals (
Boolean is_sc;
Int2 j;
FtQualType jdx;
+ Int2 k;
+ Int2 k_lower;
+ Int2 k_upper;
Int4 len;
Boolean link_is_na;
FloatHi molwt;
@@ -2885,10 +2999,11 @@ static void FormatFeatureBlockQuals (
Uint1 residue;
SeqCodeTablePtr sctp;
Int4 sec_str;
+ ValNodePtr seq_seq;
Uint1 seqcode;
Char seqid [50];
- SeqIntPtr sintp;
SeqIdPtr sip;
+ SeqLitPtr slitp;
SeqLocPtr slp;
Boolean split;
CharPtr start;
@@ -2898,7 +3013,9 @@ static void FormatFeatureBlockQuals (
tRNAPtr trna;
UserFieldPtr ufp;
UserObjectPtr uop;
- ValNodePtr vnp;
+ ValNodePtr vnp, vnp2, vnp3;
+ VariationInstPtr vip;
+ VariationRefPtr vrp;
StringItemPtr unique;
Boolean indexerVersion;
@@ -3114,13 +3231,8 @@ static void FormatFeatureBlockQuals (
if (str == NULL) continue;
if (ajp->flags.dropIllegalQuals) {
- tmp = str;
- while (*tmp != '\0' && *tmp == '\"')
- tmp++;
- for (; *tmp != '\0' && *tmp != '\"'; tmp++) {
- if (!IS_DIGIT(*tmp) && *tmp != '.' && *tmp != '-') {
- okay = FALSE;
- }
+ if (! ECNumberFormatOkay (str, ajp->flags.forGbRelease)) {
+ okay = FALSE;
}
}
if (!okay) continue;
@@ -3192,6 +3304,27 @@ static void FormatFeatureBlockQuals (
}
break;
+ case Qual_class_experiment :
+ gbq = qvp [idx].gbq;
+ if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
+ (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
+ if (lasttype == NULL) {
+ lasttype = gbq->qual;
+ }
+ while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
+ if (! StringHasNoText (gbq->val)) {
+ FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
+ FALSE, TRUE, TILDE_IGNORE);
+ if (!StringIsJustQuotes (gbq->val)) {
+ AddExperimentWithPMIDlinks(ajp, ffstring, gbq->val);
+ }
+ FFAddOneChar(ffstring, '\"', FALSE);
+ FFAddOneChar(ffstring, '\n', FALSE);
+ }
+ gbq = gbq->next;
+ }
+ break;
+
case Qual_class_noquote :
gbq = qvp [idx].gbq;
if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
@@ -3788,32 +3921,22 @@ static void FormatFeatureBlockQuals (
}
str = qvp [FTQUAL_trna_aa].str;
if (slp != NULL && StringDoesHaveText (str)) {
- if (ajp->mode == RELEASE_MODE) { /* !!! quarantined pending collab approval !!! */
- if (slp->choice == SEQLOC_INT) {
- sintp = (SeqIntPtr) slp->data.ptrvalue;
- if (sintp != NULL) {
- sprintf(numbuf, "%ld", (long) sintp->from + 1);
- FFAddTextToString (ffstring, "/anticodon=(pos:", numbuf, "..",
- FALSE, FALSE, TILDE_IGNORE);
- sprintf (numbuf, "%ld", (long) sintp->to + 1);
- FFAddTextToString (ffstring, NULL, numbuf, ",",
- FALSE, FALSE, TILDE_IGNORE);
- FFAddTextToString (ffstring, "aa:", str, ")",
- FALSE, FALSE, TILDE_IGNORE);
- FFAddOneChar (ffstring, '\n', FALSE);
- }
- }
- } else {
- tmp = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE);
- if (tmp != NULL) {
+ tmp = FFFlatLoc (ajp, target, slp, ajp->masterStyle, FALSE);
+ if (tmp != NULL) {
+ if (ajp->mode == RELEASE_MODE &&
+ (StringStr (tmp, "join") != NULL ||
+ StringStr (tmp, "order") != NULL ||
+ StringStr (tmp, "complement") != NULL)) {
+ /* !!! join in anticodon quarantined pending collab approval !!! */
+ } else {
FFAddTextToString (ffstring, "/anticodon=(pos:", tmp, ",",
FALSE, FALSE, TILDE_IGNORE);
FFAddTextToString(ffstring, "aa:", str, ")",
FALSE, FALSE, TILDE_IGNORE);
FFAddOneChar(ffstring, '\n', FALSE);
}
- MemFree (tmp);
}
+ MemFree (tmp);
}
if (newloc != NULL) {
SeqLocFree (newloc);
@@ -3875,10 +3998,21 @@ static void FormatFeatureBlockQuals (
sprintf (numbuf, "%d", (int) j);
FFAddOneString(ffstring, "/citation=[", FALSE, TRUE, TILDE_TO_SPACES);
pmid = 0;
- if (j <= asp->numReferences) {
- rbp = asp->referenceArray [j - 1];
- if (rbp != NULL) {
- pmid = rbp->pmid;
+ if( GetWWW (ajp) && asp->numReferences > 0 ) {
+ /* binary search for reference that matches serial number j */
+ k_lower = 0;
+ k_upper = (asp->numReferences - 1);
+ while( k_lower <= k_upper ) {
+ k = (k_upper + k_lower) / 2;
+ rbp = asp->referenceArray [k];
+ if( rbp->serial == j ) {
+ pmid = rbp->pmid;
+ break;
+ } else if( rbp->serial < j ) {
+ k_lower = (k+1);
+ } else { /* rbp->serial > j */
+ k_upper = (k-1);
+ }
}
}
if (pmid > 0 && GetWWW (ajp)) {
@@ -4011,6 +4145,103 @@ static void FormatFeatureBlockQuals (
}
break;
+ case Qual_class_variation_id :
+ dbt = qvp [idx].dbt;
+ if (dbt != NULL) {
+ buf [0] = '\0';
+ if (StringICmp (dbt->db, "dbSNP") == 0) {
+ oip = dbt->tag;
+ if (oip != NULL && StringDoesHaveText (oip->str)) {
+ str = oip->str;
+ if (StringNICmp (str, "rs", 2) == 0) {
+ FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
+ FF_www_db_xref(ajp, ffstring, dbt->db, str + 2, bsp);
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
+ }
+ }
+ }
+ break;
+
+ case Qual_class_delta_item :
+ for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
+ dip = (DeltaItemPtr) vnp->data.ptrvalue;
+ if (dip == NULL) continue;
+ seq_seq = dip->Seq_seq;
+ if (seq_seq != NULL && seq_seq->choice == Seq_seq_literal) {
+ slitp = (SeqLitPtr) seq_seq->data.ptrvalue;
+ if (slitp != NULL) {
+ if (slitp->length > 0 && slitp->seq_data_type != Seq_code_gap && slitp->seq_data != NULL) {
+ str = (CharPtr) MemNew ((size_t) (slitp->length + 6));
+ if (str != NULL) {
+ SeqPortStreamLit (slitp, 0, (Pointer) str, NULL);
+ FFAddOneString(ffstring, "/replace=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (StringDoesHaveText (str)) {
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_UPPER (ch)) {
+ ch = TO_LOWER (ch);
+ *ptr = ch;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
+ MemFree (str);
+ }
+ }
+ }
+ }
+ break;
+
+ case Qual_class_variation_set:
+ for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
+ vrp = (VariationRefPtr) vnp->data.ptrvalue;
+ if (vrp == NULL) continue;
+ vnp2 = vrp->data;
+ if (vnp2 == NULL) continue;
+ if (vnp2->choice != VarRefData_instance) continue;
+ vip = (VariationInstPtr) vnp2->data.ptrvalue;
+ if (vip == NULL) continue;
+ for (vnp3 = vip->delta; vnp3 != NULL; vnp3 = vnp3->next) {
+ dip = (DeltaItemPtr) vnp3->data.ptrvalue;
+ if (dip == NULL) continue;
+ seq_seq = dip->Seq_seq;
+ if (seq_seq != NULL && seq_seq->choice == Seq_seq_literal) {
+ slitp = (SeqLitPtr) seq_seq->data.ptrvalue;
+ if (slitp != NULL) {
+ if (slitp->length > 0 && slitp->seq_data_type != Seq_code_gap && slitp->seq_data != NULL) {
+ str = (CharPtr) MemNew ((size_t) (slitp->length + 6));
+ if (str != NULL) {
+ SeqPortStreamLit (slitp, 0, (Pointer) str, NULL);
+ FFAddOneString(ffstring, "/replace=\"", FALSE, FALSE, TILDE_IGNORE);
+ if (StringDoesHaveText (str)) {
+ ptr = str;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_UPPER (ch)) {
+ ch = TO_LOWER (ch);
+ *ptr = ch;
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
+ }
+ FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
+ }
+ MemFree (str);
+ }
+ }
+ }
+ }
+ }
+ break;
+
case Qual_class_nuc_id :
link_is_na = TRUE;
/* fall through */
@@ -4332,6 +4563,10 @@ static void FormatFeatureBlockQuals (
for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
str = (CharPtr) vnp->data.ptrvalue;
if (str != NULL) {
+ if (ajp->mode == SEQUIN_MODE) {
+ if (StringNICmp (str, "/orig_protein_id=", 17) == 0) continue;
+ if (StringNICmp (str, "/orig_transcript_id=", 20) == 0) continue;
+ }
FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_TO_SPACES);
FFAddNewLine(ffstring);
}
@@ -5025,6 +5260,7 @@ static void FF_asn2gb_www_featkey (
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
prefix = "&";
}
+ /*
if ( is_aa ) {
FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, "report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
@@ -5032,6 +5268,7 @@ static void FF_asn2gb_www_featkey (
FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
FFAddOneString(ffstring, "report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
}
+ */
FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
}
@@ -5306,7 +5543,9 @@ static void ParseException (
Uint1 subtype,
BoolPtr riboSlipP,
BoolPtr transSpliceP,
- BoolPtr artLocP
+ BoolPtr artLocP,
+ BoolPtr hetPopP,
+ BoolPtr lowQualP
)
{
@@ -5321,6 +5560,8 @@ static void ParseException (
*riboSlipP = FALSE;
*transSpliceP = FALSE;
*artLocP = FALSE;
+ *hetPopP = FALSE;
+ *lowQualP = FALSE;
if (StringHasNoText (original)) return;
@@ -5393,12 +5634,18 @@ static void ParseException (
ValNodeCopyStr (&note, 0, tmp);
}
found = TRUE;
- } else if (StringICmp (tmp, "heterogeneous population sequenced") == 0 ||
- StringICmp (tmp, "low-quality sequence region") == 0) {
+ } else if (StringICmp (tmp, "heterogeneous population sequenced") == 0) {
if (subtype == FEATDEF_CDS ||
subtype == FEATDEF_mRNA) {
- *artLocP = TRUE;
- ValNodeCopyStr (&note, 0, tmp); /* also copy to note */
+ *hetPopP = TRUE;
+ } else {
+ ValNodeCopyStr (&note, 0, tmp);
+ }
+ found = TRUE;
+ } else if (StringICmp (tmp, "low-quality sequence region") == 0) {
+ if (subtype == FEATDEF_CDS ||
+ subtype == FEATDEF_mRNA) {
+ *lowQualP = TRUE;
} else {
ValNodeCopyStr (&note, 0, tmp);
}
@@ -5459,6 +5706,14 @@ static void ParseException (
MemFree (str);
}
+static CharPtr legalCategoryPrefixes [] = {
+ "",
+ "COORDINATES: ",
+ "DESCRIPTION: ",
+ "EXISTENCE: ",
+ NULL
+};
+
static CharPtr legalInferencePrefixes [] = {
"",
"similar to sequence",
@@ -5487,6 +5742,7 @@ static void ParseInference (
ValNodePtr good = NULL, bad = NULL;
GBQualPtr gbq;
size_t len;
+ CharPtr skip, val;
*good_inferenceP = NULL;
*bad_inferenceP = NULL;
@@ -5496,10 +5752,20 @@ static void ParseInference (
for (gbq = quals; gbq != NULL; gbq = gbq->next) {
if (StringICmp (gbq->qual, "inference") != 0) continue;
if (StringHasNoText (gbq->val)) continue;
+ val = gbq->val;
+ skip = NULL;
+ for (j = 0; legalCategoryPrefixes [j] != NULL; j++) {
+ len = StringLen (legalCategoryPrefixes [j]);
+ if (StringNICmp (val, legalCategoryPrefixes [j], len) != 0) continue;
+ skip = val + len;
+ }
+ if (skip != NULL) {
+ val = skip;
+ }
best = -1;
for (j = 0; legalInferencePrefixes [j] != NULL; j++) {
len = StringLen (legalInferencePrefixes [j]);
- if (StringNICmp (gbq->val, legalInferencePrefixes [j], len) != 0) continue;
+ if (StringNICmp (val, legalInferencePrefixes [j], len) != 0) continue;
best = j;
}
if (best >= 0 && legalInferencePrefixes [best] != NULL) {
@@ -5563,6 +5829,8 @@ static SeqFeatPtr GetOverlappingGeneInEntity (
SeqMgrFeatContextPtr fcontext,
SeqMgrFeatContextPtr gcontext,
SeqLocPtr locforgene,
+ Boolean is_ed,
+ Boolean is_oldgb,
IntAsn2gbJobPtr ajp
)
@@ -5616,10 +5884,10 @@ static SeqFeatPtr GetOverlappingGeneInEntity (
}
} else {
if (fcontext->bad_order || fcontext->mixed_strand) {
- gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext);
+ gene = SeqMgrGetOverlappingFeatureEx (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext, TRUE);
} else if (ajp->multiIntervalGenes) {
- gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext);
- if (gene == NULL) {
+ gene = SeqMgrGetOverlappingFeatureEx (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext, TRUE);
+ if (gene == NULL && (ajp->segmentedBioseqs || is_ed || is_oldgb)) {
gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
}
} else {
@@ -5693,7 +5961,118 @@ static CharPtr SeqLoc2Str (
}
*/
+static CharPtr AddJsPush (
+ BioseqPtr target,
+ SeqLocPtr location
+)
+
+{
+ ValNodePtr head = NULL, tail = NULL;
+ IntFuzzPtr ifp;
+ SeqLocPtr slp;
+ SeqPntPtr spp;
+ Int4 start, stop;
+ Char str [64];
+ CharPtr tmp;
+
+ if (target == NULL || location == NULL) return NULL;
+
+ if (location->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) location->data.ptrvalue;
+ if (spp != NULL) {
+ ifp = spp->fuzz;
+ if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
+ sprintf (str, "[[%ld, %ld]]", (long) (spp->point + 1), (long) (spp->point + 2));
+ return StringSave (str);
+ }
+ }
+ }
+
+ slp = SeqLocFindNext (location, NULL);
+ if (slp == NULL) return NULL;
+
+ start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
+ stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
+ sprintf (str, "[%ld, %ld]", (long) start, (long) stop);
+ ValNodeCopyStrEx (&head, &tail, 0, str);
+
+ while ((slp = SeqLocFindNext (location, slp)) != NULL) {
+ start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
+ stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
+ if (start != 0 && stop != 0) {
+ sprintf (str, "[%ld, %ld]", (long) start, (long) stop);
+ ValNodeCopyStrEx (&head, &tail, 0, str);
+ }
+ }
+
+ tmp = ValNodeMergeStrsExEx (head, ",", "[", "]");
+ ValNodeFreeData (head);
+
+ return tmp;
+}
+
+static CharPtr AddJsInterval (
+ IntAsn2gbSectPtr iasp,
+ CharPtr pfx,
+ BioseqPtr target,
+ Uint1 featdeftype,
+ SeqLocPtr location
+)
+
+{
+ Char buf [512];
+ ValNodePtr head = NULL, tail = NULL;
+ CharPtr ivls;
+ CharPtr key = NULL;
+ CharPtr tmp;
+
+ if (iasp == NULL || target == NULL || location == NULL) return NULL;
+ if (featdeftype >= FEATDEF_MAX) return NULL;
+
+ if (StringICmp (iasp->feat_key [featdeftype], "misc_feature") == 0) {
+ featdeftype = FEATDEF_misc_feature;
+ if (iasp->feat_key [featdeftype] == NULL) {
+ iasp->feat_key [featdeftype] = StringSave ("misc_feature");
+ }
+ }
+
+ key = iasp->feat_key [featdeftype];
+ if (StringHasNoText (key)) return NULL;
+
+ if (StringDoesHaveText (pfx)) {
+ ValNodeCopyStrEx (&head, &tail, 0, pfx);
+ }
+
+ ValNodeCopyStrEx (&head, &tail, 0, "<script type=\"text/javascript\">");
+ if (! iasp->feat_js_prefix_added) {
+ sprintf (buf, "if (typeof(oData) == \"undefined\") oData = []; oData.push ({gi:%s,acc:\"%s\",features: {}});",
+ iasp->gi, iasp->acc);
+ ValNodeCopyStrEx (&head, &tail, 0, buf);
+
+ iasp->feat_js_prefix_added = TRUE;
+ }
+
+ ValNodeCopyStrEx (&head, &tail, 0, "if (!oData[oData.length - 1].features.");
+ ValNodeCopyStrEx (&head, &tail, 0, key);
+ ValNodeCopyStrEx (&head, &tail, 0, ") oData[oData.length - 1].features.");
+ ValNodeCopyStrEx (&head, &tail, 0, key);
+ ValNodeCopyStrEx (&head, &tail, 0, " = [];");
+ ValNodeCopyStrEx (&head, &tail, 0, "oData[oData.length - 1].features.");
+ ValNodeCopyStrEx (&head, &tail, 0, key);
+ ValNodeCopyStrEx (&head, &tail, 0, ".push(");
+
+ ivls = AddJsPush (target, location);
+ ValNodeCopyStrEx (&head, &tail, 0, ivls);
+
+ ValNodeCopyStrEx (&head, &tail, 0, ");</script>");
+
+ tmp = ValNodeMergeStrs (head);
+ ValNodeFreeData (head);
+ return tmp;
+}
+
static CharPtr FormatFeatureBlockEx (
+ Asn2gbFormatPtr afp,
IntAsn2gbJobPtr ajp,
Asn2gbSectPtr asp,
BioseqPtr bsp,
@@ -5723,11 +6102,15 @@ static CharPtr FormatFeatureBlockEx (
Char ch;
Uint1 code = Seq_code_ncbieaa;
CdRegionPtr crp;
+ Int4 currGi = 0;
SeqMgrDescContext dcontext;
Boolean encode_prefix = FALSE;
CharPtr exception_note = NULL;
CharPtr exception_string = NULL;
+ Char fbuf [32];
Uint1 featdeftype;
+ CharPtr featid = NULL;
+ ObjectIdPtr fid = NULL;
Uint1 from;
GBQualPtr gbq;
GBFeaturePtr gbfeat = NULL;
@@ -5738,6 +6121,8 @@ static CharPtr FormatFeatureBlockEx (
ValNodePtr gene_syn = NULL;
ValNodePtr good_inference = NULL;
GeneRefPtr grp = NULL;
+ Boolean hetPop = FALSE;
+ IntAsn2gbSectPtr iasp;
IntCdsBlockPtr icp;
Uint2 idx;
ValNodePtr illegal = NULL;
@@ -5747,11 +6132,13 @@ static CharPtr FormatFeatureBlockEx (
Boolean is_ged = FALSE;
Boolean is_gps = FALSE;
Boolean is_journalscan = FALSE;
+ Boolean is_oldgb = FALSE;
Boolean is_other = FALSE;
Boolean is_misc_rna = FALSE;
Boolean isGap = FALSE;
Uint4 itemID;
CharPtr its_prod = NULL;
+ CharPtr js = NULL;
CharPtr key = NULL;
CharPtr lasttype = NULL;
Int4 left = -1;
@@ -5759,6 +6146,7 @@ static CharPtr FormatFeatureBlockEx (
SeqLocPtr location = NULL;
SeqLocPtr locforgene = NULL;
SeqLocPtr locformatpep = NULL;
+ Boolean lowQual = FALSE;
SeqMgrFeatContext mcontext;
MolInfoPtr mip;
SeqFeatPtr mrna;
@@ -5771,6 +6159,7 @@ static CharPtr FormatFeatureBlockEx (
SeqFeatPtr operon = NULL;
Uint2 partial;
SeqMgrFeatContext pcontext;
+ Char pfx [128], sfx [128];
BioseqPtr prd;
CharPtr precursor_comment = NULL;
BioseqPtr prod = NULL;
@@ -5805,9 +6194,13 @@ static CharPtr FormatFeatureBlockEx (
CharPtr tmp;
Boolean transSplice = FALSE;
tRNAPtr trna;
+ TextSeqIdPtr tsip;
UserFieldPtr ufp;
BioseqPtr unlockme = NULL;
UserObjectPtr uop;
+ VariationInstPtr vip;
+ VariationRefPtr vrp;
+ VarRefDataSetPtr vsp;
ValNodePtr vnp;
StringItemPtr ffstring;
/*
@@ -5832,7 +6225,10 @@ static CharPtr FormatFeatureBlockEx (
} else {
gbseq = NULL;
}
-
+
+ pfx [0] = '\0';
+ sfx [0] = '\0';
+
protein_pid_g [0] = '\0';
itemID = fcontext->itemID;
@@ -5927,7 +6323,11 @@ static CharPtr FormatFeatureBlockEx (
locforgene = sfp->location;
}
if (location == NULL) return NULL;
-
+
+ if (loc != NULL) {
+ NormalizeNullsBetween (loc);
+ }
+
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
if (sep != NULL && IS_Bioseq_set (sep)) {
bssp = (BioseqSetPtr) sep->data.ptrvalue;
@@ -5948,20 +6348,21 @@ static CharPtr FormatFeatureBlockEx (
is_journalscan = TRUE;
break;
case SEQID_GENBANK :
+ case SEQID_TPG :
is_ged = TRUE;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringLen (tsip->accession) == 6) {
+ is_oldgb = TRUE;
+ }
+ }
break;
case SEQID_EMBL :
+ case SEQID_TPE :
is_ged = TRUE;
is_ed = TRUE;
break;
case SEQID_DDBJ :
- is_ged = TRUE;
- is_ed = TRUE;
- break;
- case SEQID_TPG :
- is_ged = TRUE;
- break;
- case SEQID_TPE :
case SEQID_TPD :
is_ged = TRUE;
is_ed = TRUE;
@@ -6007,11 +6408,13 @@ static CharPtr FormatFeatureBlockEx (
key = "misc_feature";
}
}
+ /*
if (featdeftype == FEATDEF_VARIATIONREF) {
if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
key = "misc_feature";
}
}
+ */
/* deal with unmappable impfeats */
@@ -6027,7 +6430,27 @@ static CharPtr FormatFeatureBlockEx (
key = "repeat_region";
}
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI) {
+ currGi = (Int4) sip->data.intvalue;
+ }
+ }
+
+ iasp = (IntAsn2gbSectPtr) asp;
+
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) {
+ if (iasp->feat_key [featdeftype] == NULL) {
+ iasp->feat_key [featdeftype] = StringSave (key);
+ }
+ }
+
+ if (afp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ sprintf (pfx, "<span id=\"feature_%ld_%s_%ld\" class=\"feature\">", (long) currGi, key, (long) ifp->feat_count);
+ }
+
FFStartPrint(ffstring, format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE);
+
if (ajp->ajp.slp != NULL) {
FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
} else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 && bsp != NULL /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
@@ -6047,7 +6470,6 @@ static CharPtr FormatFeatureBlockEx (
if (imp == NULL || StringHasNoText (imp->loc)) {
-
if (ajp->ajp.slp != NULL) {
sip = SeqIdParse ("lcl|dummy");
left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
@@ -6059,7 +6481,9 @@ static CharPtr FormatFeatureBlockEx (
newloc = SeqLocCopyRegion (sip, location, bsp, left, right, strand, &split);
*/
SeqIdFree (sip);
- if (newloc == NULL) return NULL;
+ if (newloc == NULL) {
+ return NULL;
+ }
/*
firstloc = SeqLoc2Str (newloc);
*/
@@ -6068,6 +6492,9 @@ static CharPtr FormatFeatureBlockEx (
secondloc = SeqLoc2Str (newloc);
*/
str = FFFlatLoc (ajp, target, newloc, ajp->masterStyle, isGap);
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) {
+ js = AddJsInterval (iasp, pfx, target, featdeftype, newloc);
+ }
SeqLocFree (newloc);
/*
thirdloc = SeqLoc2Str (ajp->ajp.slp);
@@ -6079,6 +6506,9 @@ static CharPtr FormatFeatureBlockEx (
*/
} else {
str = FFFlatLoc (ajp, target, location, ajp->masterStyle, isGap);
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && featdeftype < FEATDEF_MAX) {
+ js = AddJsInterval (iasp, pfx, target, featdeftype, location);
+ }
/*
if (StringCmp (str, "?") == 0) {
firstloc = SeqLoc2Str (location);
@@ -6229,7 +6659,15 @@ static CharPtr FormatFeatureBlockEx (
/* if mat_peptide, grp is already be set based on parent CDS, otherwise check current feature */
if (grp == NULL) {
- grp = SeqMgrGetGeneXref (sfp);
+ grp = SeqMgrGetGeneXrefEx (sfp, &fid);
+ if (fid != NULL) {
+ if (StringDoesHaveText (fid->str)) {
+ featid = fid->str;
+ } else {
+ sprintf (fbuf, "%ld", (long) fid->id);
+ featid = fbuf;
+ }
+ }
}
/* if gene xref, then find referenced gene, take everything as if it overlapped */
@@ -6243,7 +6681,9 @@ static CharPtr FormatFeatureBlockEx (
}
bspx = BioseqFindFromSeqLoc (sfp->location);
if (bspx != NULL) {
- if (StringDoesHaveText (grp->locus_tag)) {
+ if (featid != NULL) {
+ gene = SeqMgrGetFeatureByFeatID (0, bspx, featid, NULL, &gcontext);
+ } else if (StringDoesHaveText (grp->locus_tag)) {
gene = SeqMgrGetGeneByLocusTag (bspx, grp->locus_tag, &gcontext);
} else if (StringDoesHaveText (grp->locus)) {
gene = SeqMgrGetFeatureByLabel (bspx, grp->locus, SEQFEAT_GENE, 0, &gcontext);
@@ -6268,9 +6708,9 @@ static CharPtr FormatFeatureBlockEx (
/* first look for gene that exactly matches mat_peptide DNA projection */
if (gene == NULL && grp == NULL && locformatpep != NULL) {
- gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locformatpep, ajp);
+ gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locformatpep, is_ed, is_oldgb, ajp);
if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
- gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locformatpep, ajp);
+ gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locformatpep, is_ed, is_oldgb, ajp);
}
if (gene != NULL) {
@@ -6297,9 +6737,9 @@ static CharPtr FormatFeatureBlockEx (
if (gene == NULL && grp == NULL) {
if (featdeftype != FEATDEF_primer_bind) {
- gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene, ajp);
+ gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene, is_ed, is_oldgb, ajp);
if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
- gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene, ajp);
+ gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene, is_ed, is_oldgb, ajp);
}
}
@@ -6333,7 +6773,9 @@ static CharPtr FormatFeatureBlockEx (
pseudo = TRUE;
}
- if (grp != NULL && (featdeftype != FEATDEF_repeat_region || is_ed || gene == NULL)) {
+ if (grp != NULL &&
+ ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) ||
+ is_ed || gene == NULL)) {
if (! StringHasNoText (grp->locus)) {
qvp [FTQUAL_gene].str = grp->locus;
qvp [FTQUAL_locus_tag].str = grp->locus_tag;
@@ -6361,11 +6803,11 @@ static CharPtr FormatFeatureBlockEx (
}
if (grp != NULL &&
featdeftype != FEATDEF_variation &&
- (featdeftype != FEATDEF_repeat_region || is_ed)) {
+ ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) || is_ed)) {
qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */
}
- if (gene != NULL && (featdeftype != FEATDEF_repeat_region || is_ed)) {
+ if (gene != NULL && ((featdeftype != FEATDEF_repeat_region && featdeftype != FEATDEF_mobile_element) || is_ed)) {
/* now propagate old_locus_tag to almost any underlying feature */
for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
if (StringHasNoText (gbq->val)) continue;
@@ -6977,7 +7419,9 @@ static CharPtr FormatFeatureBlockEx (
if (StringICmp (oip->str, "definition") == 0) {
str = (CharPtr) ufp->data.ptrvalue;
if (StringDoesHaveText (str)) {
- qvp [FTQUAL_cdd_definition].str = str;
+ if (StringICmp (str, (CharPtr) sfp->data.value.ptrvalue) != 0) {
+ qvp [FTQUAL_cdd_definition].str = str;
+ }
}
}
}
@@ -7018,6 +7462,26 @@ static CharPtr FormatFeatureBlockEx (
case SEQFEAT_HET :
qvp [FTQUAL_heterogen].str = (CharPtr) sfp->data.value.ptrvalue;
break;
+ case SEQFEAT_VARIATIONREF :
+ vrp = (VariationRefPtr) sfp->data.value.ptrvalue;
+ if (vrp != NULL) {
+ qvp [FTQUAL_variation_id].dbt = vrp->id;
+ vnp = vrp->data;
+ if (vnp != NULL) {
+ if (vnp->choice == VarRefData_instance) {
+ vip = (VariationInstPtr) vnp->data.ptrvalue;
+ if (vip != NULL) {
+ qvp [FTQUAL_delta_item].vnp = vip->delta;
+ }
+ } else if (vnp->choice == VarRefData_set) {
+ vsp = (VarRefDataSetPtr) vnp->data.ptrvalue;
+ if (vsp != NULL) {
+ qvp [FTQUAL_variation_set].vnp = vsp->variations;
+ }
+ }
+ }
+ }
+ break;
default :
break;
}
@@ -7038,7 +7502,7 @@ static CharPtr FormatFeatureBlockEx (
qvp [FTQUAL_go_function].ufp = NULL;
}
- if (featdeftype == FEATDEF_repeat_region) {
+ if (featdeftype == FEATDEF_repeat_region || featdeftype == FEATDEF_mobile_element) {
pseudo = FALSE;
}
@@ -7098,6 +7562,7 @@ static CharPtr FormatFeatureBlockEx (
case FEATDEF_misc_signal:
case FEATDEF_misc_structure:
case FEATDEF_modified_base:
+ case FEATDEF_mobile_element:
case FEATDEF_mutation:
case FEATDEF_old_sequence:
case FEATDEF_polyA_signal:
@@ -7146,13 +7611,21 @@ static CharPtr FormatFeatureBlockEx (
sfp->idx.subtype,
&riboSlippage,
&transSplice,
- &artLoc);
+ &artLoc,
+ &hetPop,
+ &lowQual);
qvp [FTQUAL_exception].str = exception_string;
qvp [FTQUAL_exception_note].str = exception_note;
qvp [FTQUAL_ribosomal_slippage].ble = riboSlippage;
qvp [FTQUAL_trans_splicing].ble = transSplice;
qvp [FTQUAL_artificial_location].ble = artLoc;
+ if (hetPop) {
+ qvp [FTQUAL_artificial_location_str].str = "heterogeneous population sequenced";
+ }
+ if (lowQual) {
+ qvp [FTQUAL_artificial_location_str].str = "low-quality sequence region";
+ }
/*
if (StringHasNoText (qvp [FTQUAL_exception].str)) {
@@ -7512,7 +7985,20 @@ static CharPtr FormatFeatureBlockEx (
BioseqUnlock (unlockme);
- str = FFEndPrint (ajp, ffstring, format, 21, 21, 21, 21, "FT");
+ if (afp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ sprintf (sfx, "</span>");
+ }
+
+ str = NULL;
+
+ if (js != NULL) {
+ str = FFEndPrintEx (ajp, ffstring, format, 21, 21, 21, 21, "FT", js, sfx);
+ } else {
+ str = FFEndPrintEx (ajp, ffstring, format, 21, 21, 21, 21, "FT", pfx, sfx);
+ }
+
+ MemFree (js);
/* optionally populate gbseq for XML-ized GenBank format */
@@ -7572,7 +8058,7 @@ NLM_EXTERN CharPtr FormatFeatureBlock (
/* otherwise do regular flatfile formatting */
- return FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp,
+ return FormatFeatureBlockEx (afp, ajp, asp, bsp, target, sfp, &fcontext, qvp,
format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE);
}
@@ -7742,7 +8228,7 @@ NLM_EXTERN void DoImmediateRemoteFeatureFormat (
oldscope = SeqEntrySetScope (sep);
if (ajp->format != FTABLE_FMT) {
- str = FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp,
+ str = FormatFeatureBlockEx (afp, ajp, asp, bsp, target, sfp, &fcontext, qvp,
ajp->format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE);
}
@@ -7801,7 +8287,7 @@ NLM_EXTERN CharPtr FormatFeatureQuals (
qvp = MemNew (sizeof (QualVal) * (max + 5));
if (qvp == NULL) return NULL;
- str = FormatFeatureBlockEx (ajp, NULL, NULL, NULL, sfp, &fcontext, qvp,
+ str = FormatFeatureBlockEx (NULL, ajp, NULL, NULL, NULL, sfp, &fcontext, qvp,
GENBANK_FMT, ifp, FALSE, FALSE);
MemFree (qvp);
diff --git a/api/asn2gnb5.c b/api/asn2gnb5.c
index 724eab36..d311080e 100644
--- a/api/asn2gnb5.c
+++ b/api/asn2gnb5.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.171 $
+* $Revision: 1.185 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -149,6 +149,7 @@ static UrlData Nlm_url_base [] = {
{"dictyBase", "http://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid="},
{"ECOCYC", "http://biocyc.org/ECOLI/new-image?type=GENE&object="},
{"EcoGene", "http://ecogene.org/geneInfo.php?eg_id="},
+ {"ENSEMBL", "http://www.ensembl.org/id/"},
{"ERIC", "http://www.ericbrc.org/genbank/dbxref/"},
{"FANTOM_DB", "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="},
{"FLYBASE", "http://flybase.bio.indiana.edu/.bin/fbidq.html?"},
@@ -165,6 +166,7 @@ static UrlData Nlm_url_base [] = {
{"HOMD", "http://www.homd.org/"},
{"HPRD", "http://www.hprd.org/protein/"},
{"HSSP", "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-newId+-e+hssp-ID:"},
+ {"IKMC", "http://www.knockoutmouse.org/martsearch/project/"},
{"IMGT/GENE-DB", "http://imgt.cines.fr/cgi-bin/GENElect.jv?species=Homo+sapiens&query=2+"},
{"IMGT/LIGM", "http://imgt.cines.fr:8104/cgi-bin/IMGTlect.jv?query=202+"},
{"InterimID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
@@ -175,7 +177,7 @@ static UrlData Nlm_url_base [] = {
{"JCM", "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="},
{"JGIDB", "http://genome.jgi-psf.org/cgi-bin/jgrs?id="},
{"LocusID", "http://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
- {"MaizeGDB", "http://www.maizegdb.org/supersearch.php?show=loc&pattern="},
+ {"MaizeGDB", "http://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?"},
{"MGI", "http://www.informatics.jax.org/searches/accession_report.cgi?id=MGI:"},
{"MIM", "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id="},
{"miRBase", "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc="},
@@ -198,7 +200,7 @@ static UrlData Nlm_url_base [] = {
{"RATMAP", "http://ratmap.gen.gu.se/ShowSingleLocus.htm?accno="},
{"REBASE", "http://rebase.neb.com/rebase/enz/"},
{"RFAM", "http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?"},
- {"RGD", "http://rgd.mcw.edu/query/query.cgi?id="},
+ {"RGD", "http://rgd.mcw.edu/generalSearch/RgdSearch.jsp?quickSearch=1&searchKeyword="},
{"RiceGenes", "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="},
{"SEED", "http://www.theseed.org/linkin.cgi?id="},
{"SGD", "http://db.yeastgenome.org/cgi-bin/SGD/locus.pl?locus="},
@@ -262,9 +264,7 @@ static void FF_www_get_url (
{
CharPtr base = NULL, prefix = NULL, profix = NULL, ident = NULL, suffix = NULL, url = NULL, ptr, str;
Char ch, buf [128], id [20], taxname [128];
- /*
Boolean is_numeric;
- */
Int2 R;
if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
@@ -369,7 +369,6 @@ static void FF_www_get_url (
url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR=";
}
-
} else if (StringCmp (db, "HOMD") == 0) {
if (StringStr (identifier, "tax_") != NULL ) {
@@ -391,6 +390,31 @@ static void FF_www_get_url (
}
}
+ } else if (StringCmp (db, "MaizeGDB") == 0) {
+
+ is_numeric = TRUE;
+ str = identifier;
+ ch = *str;
+ while (ch != '\0') {
+ if (! IS_DIGIT (ch)) {
+ is_numeric = FALSE;
+ }
+ str++;
+ ch = *str;
+ }
+
+ if (is_numeric) {
+ prefix = "id=";
+ } else {
+ prefix = "term=";
+ }
+
+ } else if (StringCmp (db, "miRBase") == 0) {
+
+ if (StringStr (identifier, "MIMAT") != NULL) {
+ url = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=";
+ }
+
} else if (StringCmp (db, "niaEST") == 0) {
suffix = "&val=1";
@@ -1538,7 +1562,7 @@ static CharPtr FormatCitJour (
if (dp->data [1] != 0) {
sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
}
- } else {
+ } else if (StringDoesHaveText (dp->str) && StringCmp (dp->str, "?") != 0) {
StringCpy (year, " (");
StringNCat (year, dp->str, 4);
StringCat (year, ")");
@@ -2427,6 +2451,7 @@ static CharPtr FormatCitPat (
static CharPtr FormatCitGen (
FmtType format,
Boolean dropBadCitGens,
+ Boolean is_ed,
Boolean noAffilOnUnpub,
CitGenPtr cgp
)
@@ -2452,7 +2477,7 @@ static CharPtr FormatCitGen (
/* !!! temporarily put date in unpublished citation for QA !!! */
- if (dropBadCitGens) {
+ if (dropBadCitGens && is_ed) {
year [0] = '\0';
dp = cgp->date;
if (dp != NULL) {
@@ -2560,7 +2585,7 @@ static CharPtr FormatCitGen (
}
if (! StringHasNoText (pages)) {
- if (format == GENBANK_FMT) {
+ if (format == GENBANK_FMT || format == GENPEPT_FMT) {
AddValNodeString (&head, ", ", pages, NULL);
} else if (format == EMBL_FMT) {
AddValNodeString (&head, ":", pages, NULL);
@@ -2635,6 +2660,7 @@ static CharPtr GetPubJournal (
FmtType format,
ModType mode,
Boolean dropBadCitGens,
+ Boolean is_ed,
Boolean noAffilOnUnpub,
Boolean citArtIsoJta,
PubdescPtr pdp,
@@ -2668,7 +2694,7 @@ static CharPtr GetPubJournal (
break; /* skip just serial number */
}
}
- journal = FormatCitGen (format, dropBadCitGens, noAffilOnUnpub, cgp);
+ journal = FormatCitGen (format, dropBadCitGens, is_ed, noAffilOnUnpub, cgp);
}
break;
case PUB_Sub :
@@ -3154,6 +3180,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
ImprintPtr imp;
IndxPtr index;
IntRefBlockPtr irp;
+ Boolean is_ed = FALSE;
size_t len;
SeqLocPtr loc = NULL;
MedlineEntryPtr mep;
@@ -3334,7 +3361,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
if (rbp->sites == 0) {
- FFLineWrap(ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN");
+ FFLineWrap(ajp, ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN");
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
FFStartPrint(temp, afp->format, 0, 0, NULL, 0, 5, 5, "RP", FALSE);
@@ -3345,15 +3372,15 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
FFAddOneString (temp, "(sites)", FALSE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
}
} else if (rbp->sites == 3) {
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
}
} else {
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
@@ -3410,9 +3437,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFAddOneString (temp, ")", FALSE, FALSE, TILDE_TO_SPACES);
}
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
}
}
@@ -3461,9 +3488,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
}
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA");
}
}
MemFree (str);
@@ -3476,9 +3503,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 2, 12, "CONSRTM", 12, 5, 5, "RG", FALSE);
FFAddTextToString (temp, NULL, consortium, suffix, FALSE, FALSE, TILDE_TO_SPACES);
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG");
}
}
MemFree (consortium);
@@ -3509,7 +3536,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
}
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
@@ -3520,7 +3547,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
} else {
FFAddOneChar (temp, ';', FALSE);
}
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT");
}
if (gbseq != NULL) {
@@ -3551,14 +3578,17 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
sip->choice == SEQID_TPD) {
strict_isojta = TRUE;
}
+ if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
+ is_ed = TRUE;
+ }
}
if (! strict_isojta) {
citArtIsoJta = FALSE;
}
str = GetPubJournal (afp->format, ajp->mode, ajp->flags.dropBadCitGens,
- ajp->flags.noAffilOnUnpub, citArtIsoJta, pdp, csp,
- bsp->id, index, ajp);
+ is_ed, ajp->flags.noAffilOnUnpub, citArtIsoJta,
+ pdp, csp, bsp->id, index, ajp);
if (str == NULL) {
str = StringSave ("Unpublished");
}
@@ -3598,9 +3628,9 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
MemFree (str);
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else {
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL");
}
if (gbseq != NULL) {
@@ -3643,11 +3673,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
FF_www_muid (ajp, temp, muid);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
sprintf (buf, "MEDLINE; %ld.", (long) muid);
FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
}
}
@@ -3658,11 +3688,11 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 3, 12, "PUBMED", 12, 5, 5, "RX", FALSE);
if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
FF_www_muid (ajp, temp, pmid);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
sprintf (buf, "PUBMED; %ld.", (long) pmid);
FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
}
}
FFRecycleString(ajp, temp);
@@ -3684,7 +3714,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 2, 12, "REMARK", 12, 5, 5, NULL, FALSE);
/* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
}
}
@@ -3747,7 +3777,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
} else {
FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
}
- FFLineWrap (ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap (ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
}
@@ -3771,7 +3801,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND);
/* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
if (gbseq != NULL) {
@@ -3809,7 +3839,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
/* gibbsq comment section (fields may be copied from degenerate pubdesc) */
@@ -3830,7 +3860,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
@@ -3845,7 +3875,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
@@ -3865,7 +3895,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
@@ -3906,7 +3936,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (StringDoesHaveText (crp->exp)) {
FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
}
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
} else if (crp->type == 3) {
FFRecycleString(ajp, temp);
@@ -3933,7 +3963,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (StringDoesHaveText (crp->exp)) {
FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
}
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
} else if (crp->type == 4) {
FFRecycleString(ajp, temp);
@@ -3960,7 +3990,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
if (StringDoesHaveText (crp->exp)) {
FFAddTextToString (temp, " to:[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
}
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
}
@@ -3982,7 +4012,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
/* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
}
@@ -4007,7 +4037,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
remprefix = "; ";
FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
FFAddOneString (temp, pubstatnote, FALSE, FALSE, TILDE_EXPAND);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
@@ -4036,7 +4066,7 @@ NLM_EXTERN CharPtr FormatReferenceBlock (
FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND);
FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
}
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
prefix = NULL;
}
}
diff --git a/api/asn2gnb6.c b/api/asn2gnb6.c
index a70c1f5c..62bd744a 100644
--- a/api/asn2gnb6.c
+++ b/api/asn2gnb6.c
@@ -30,7 +30,7 @@
*
* Version Creation Date: 10/21/98
*
-* $Revision: 1.227 $
+* $Revision: 1.257 $
*
* File Description: New GenBank flatfile generator - work in progress
*
@@ -477,6 +477,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"HMP",
"HOMD",
"HSSP",
+ "IKMC",
"IMGT/GENE-DB",
"IMGT/HLA",
"IMGT/LIGM",
@@ -517,6 +518,7 @@ NLM_EXTERN CharPtr legalDbXrefs [] = {
"SGN",
"SoyBase",
"SubtiList",
+ "TAIR",
"taxon",
"TIGRFAM",
"UniGene",
@@ -547,6 +549,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
"GRIN",
"HMP",
"HOMD",
+ "IKMC",
"IMGT/HLA",
"IMGT/LIGM",
"JCM",
@@ -562,6 +565,7 @@ NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
"BEEBASE",
+ "BioProject",
"CCDS",
"CGNC",
"CloneID",
@@ -573,7 +577,6 @@ NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
"PBR",
"REBASE",
"SK-FST",
- "TAIR",
"VBRC",
NULL
};
@@ -996,7 +999,7 @@ NLM_EXTERN CharPtr FormatSourceBlock (
/* If the organelle prefix is already on the */
/* name, don't add it. */
- if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
+ if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0)
organelle = "";
if (StringHasNoText (common)) {
@@ -1228,13 +1231,13 @@ NLM_EXTERN CharPtr FormatOrganismBlock (
} else {
FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
}
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
FFRecycleString(ajp, temp);
temp = FFGetString(ajp);
FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
+ FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
FFRecycleString(ajp, temp);
/* optionally populate gbseq for XML-ized GenBank format */
@@ -1258,14 +1261,14 @@ NLM_EXTERN CharPtr FormatOrganismBlock (
} else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
FFRecycleString(ajp, temp);
if ( !StringHasNoText(organelle) ) {
temp = FFGetString(ajp);
if ( temp != NULL ) {
FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE);
FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES);
- FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
+ FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
FFRecycleString(ajp, temp);
}
}
@@ -1428,9 +1431,9 @@ static CharPtr StrucCommentFFEndPrint (
if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
- FFLineWrap (temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL);
+ FFLineWrap (ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL);
} else {
- FFLineWrap (temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix);
+ FFLineWrap (ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix);
}
result = FFToCharPtr (temp);
FFRecycleString (ajp, temp);
@@ -1573,6 +1576,10 @@ static CharPtr GetStrForStructuredComment (
FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
} else if (GetWWW (ajp) && StringCmp (field, "url") == 0) {
AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
+ } else if (GetWWW (ajp) && StringNICmp (str, "http://", 7) == 0) {
+ AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
+ } else if (GetWWW (ajp) && StringNICmp (str, "https://", 8) == 0) {
+ AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
} else {
FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
}
@@ -1830,7 +1837,7 @@ static void CatenateCommentInGbseq (
if (gbseq->comment == NULL) {
gbseq->comment = cpy;
} else {
- tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (str) + 10);
+ tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (cpy) + 10);
if (tmp == NULL) return;
StringCpy (tmp, gbseq->comment);
if (ajp->oldXmlPolicy) {
@@ -3696,15 +3703,20 @@ static CharPtr FullNameFromInstCode (CharPtr code)
#define s_atcc_base "http://www.atcc.org/SearchCatalogs/linkin?id="
#define s_bcrc_base "http://strain.bcrc.firdi.org.tw/BSAS/controller?event=SEARCH&bcrc_no="
-#define s_ccmp_base "http://ccmp.bigelow.org/SD/display.php?strain=CCMP"
+#define s_cbs_base "http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+"
+#define s_ccap_base "http://www.ccap.ac.uk/strain_info.php?Strain_No="
+#define s_ccmp_base "https://ccmp.bigelow.org/node/1/strain/CCMP"
#define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
+#define s_cori_base "http://ccr.coriell.org/Sections/Search/Sample_Detail.aspx?Ref="
#define s_dsmz_base "http://www.dsmz.de/microorganisms/search_no.php?q="
#define s_fsu_base "http://www.prz.uni-jena.de/data.php?fsu="
#define s_icmp_base "http://nzfungi.landcareresearch.co.nz/icmp/results_cultures.asp?ID=&icmpVAR="
+#define s_kctc_base "http://www.brc.re.kr/English/_SearchView.aspx?sn="
#define s_ku_base "http://collections.nhm.ku.edu/"
-#define s_pcc_base "http://www.pasteur.fr/recherche/banques/PCC/docs/pcc"
+#define s_pcc_base "http://www.crbip.pasteur.fr/fiches/fichecata.jsp?crbip=PCC+"
#define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
#define s_pdd_base "http://nzfungi.landcareresearch.co.nz/html/data_collections_details.asp?CID="
+#define s_sag_base "http://sagdb.uni-goettingen.de/detailedList.php?str_number="
#define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
#define s_uam_base "http://arctos.database.museum/guid/"
#define s_ypm_base "http://peabody.research.yale.edu/cgi-bin/Query.Ledger?"
@@ -3723,7 +3735,6 @@ static CharPtr FullNameFromInstCode (CharPtr code)
#define s_ypmorn_pfx "LE=orn&ID="
#define s_bcrc_sfx "&type_id=6&keyword=;;"
-#define s_pcc_sfx ".htm"
typedef struct vouch {
CharPtr sites;
@@ -3736,17 +3747,23 @@ typedef struct vouch {
static VouchData Nlm_spec_vouchers [] = {
{ "ATCC", s_atcc_base, FALSE, NULL, NULL },
{ "BCRC", s_bcrc_base, FALSE, NULL, s_bcrc_sfx },
+ { "CBS", s_cbs_base, FALSE, NULL, NULL },
+ { "CCAP", s_ccap_base, FALSE, NULL, NULL },
{ "CCMP", s_ccmp_base, FALSE, NULL, NULL },
{ "CCUG", s_ccug_base, FALSE, NULL, NULL },
+ { "Coriell", s_cori_base, FALSE, NULL, NULL },
{ "CRCM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DGR:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DGR:Ento", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DGR:Fish", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DGR:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DGR:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
+ { "DMNS:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
+ { "DMNS:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "DSM", s_dsmz_base, FALSE, NULL, NULL },
{ "FSU<DEU>", s_fsu_base, FALSE, NULL, NULL },
{ "ICMP", s_icmp_base, FALSE, NULL, NULL },
+ { "KCTC", s_kctc_base, FALSE, NULL, NULL },
{ "KU:I", s_ku_base, FALSE, s_kui_pfx, NULL },
{ "KU:IT", s_ku_base, FALSE, s_kuit_pfx, NULL },
{ "KWP:Ento", s_uam_base, TRUE, s_colon_pfx, NULL },
@@ -3762,10 +3779,11 @@ static VouchData Nlm_spec_vouchers [] = {
{ "MVZ:Page", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "MVZObs:Herp", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "NBSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
- { "PCC", s_pcc_base, FALSE, NULL, s_pcc_sfx },
+ { "PCC", s_pcc_base, FALSE, NULL, NULL },
{ "PCMB", s_pcmb_base, FALSE, NULL, NULL },
{ "PDD", s_pdd_base, FALSE, NULL, NULL },
{ "PSU<USA-OR>:Mamm", s_uam_base, FALSE, s_psu_pfx, NULL },
+ { "SAG", s_sag_base, FALSE, NULL, NULL },
{ "TGRC", s_tgrc_base, FALSE, NULL, NULL },
{ "UAM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL },
{ "UAM:Bryo", s_uam_base, TRUE, s_colon_pfx, NULL },
@@ -4061,14 +4079,16 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
BioSourcePtr biop = NULL;
BioseqPtr bsp;
BioseqSetPtr bssp;
- Char buf [80];
+ Char buf [128], pfx [512], sfx [128];
CharPtr common = NULL;
+ Int4 currGi = 0;
DbtagPtr dbt;
SeqMgrDescContext dcontext;
SeqMgrFeatContext fcontext;
GBFeaturePtr gbfeat = NULL;
GBSeqPtr gbseq;
Int2 i;
+ IntAsn2gbSectPtr iasp;
Uint1 idx;
IntSrcBlockPtr isp;
Boolean is_desc = TRUE;
@@ -4121,6 +4141,9 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
qvp = afp->qvp;
if (qvp == NULL) return NULL;
+ pfx [0] = '\0';
+ sfx [0] = '\0';
+
if (ajp->gbseq) {
gbseq = &asp->gbseq;
} else {
@@ -4164,6 +4187,26 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
if ( ffstring == NULL ) return NULL;
FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GI) {
+ currGi = (Int4) sip->data.intvalue;
+ }
+ }
+
+ iasp = (IntAsn2gbSectPtr) asp;
+
+ if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ if (! iasp->feat_js_prefix_added) {
+ sprintf (pfx, "<span id=\"feature_%ld_source_%ld\" class=\"feature\"><script>if (typeof(oData) == \"undefined\") oData = []; oData.push ({gi:%s,acc:\"%s\",features: {}});</script>",
+ (long) currGi, (long) isp->source_count, iasp->gi, iasp->acc);
+ iasp->feat_js_prefix_added = TRUE;
+ } else {
+ sprintf (pfx, "<span id=\"feature_%ld_source_%ld\" class=\"feature\">", (long) currGi, (long) isp->source_count);
+ }
+ }
+
FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
@@ -4878,7 +4921,12 @@ NLM_EXTERN CharPtr FormatSourceFeatBlock (
/* and then deal with the various note types separately (not in order table) */
- str = FFEndPrint(ajp, ffstring, afp->format, 21, 21, 5, 21, "FT");
+ if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE &&
+ (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
+ sprintf (sfx, "</span>");
+ }
+
+ str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", pfx, sfx);
/* optionally populate gbseq for XML-ized GenBank format */
@@ -5034,12 +5082,12 @@ static void PrintSeqLine (
sprintf (pos, "%9ld", (long) (start + 1));
FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
FFAddOneChar(ffstring, ' ', FALSE);
- if (ajp != NULL && ajp->seqspans) {
+ if (ajp != NULL && GetWWW (ajp)) {
sprintf (tmp, "<span class=\"ff_line\" id=\"gi_%ld_%ld\">", (long) gi, (long) (start + 1));
FFAddOneString(ffstring, tmp, FALSE, FALSE, TILDE_TO_SPACES);
}
FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
- if (ajp != NULL && ajp->seqspans) {
+ if (ajp != NULL && GetWWW (ajp)) {
FFAddOneString(ffstring, "</span>", FALSE, FALSE, TILDE_TO_SPACES);
}
FFAddOneChar(ffstring, '\n', FALSE);
@@ -5117,7 +5165,7 @@ static void PrintGenome (
Boolean first = TRUE;
SeqIdPtr freeid = NULL, sid = NULL, newid = NULL;
SeqLocPtr slp = NULL;
- Int4 from = 0, to = 0, start = 0, stop = 0, gi = 0;
+ Int4 start = 0, stop = 0, gi = 0;
BioseqPtr bsp = NULL;
Int2 p1 = 0, p2 = 0;
@@ -5125,12 +5173,11 @@ static void PrintGenome (
gibuf [0] = '\0';
vbuf [0] = '\0';
for (slp = slp_head; slp; slp = slp->next) {
- from = to = 0;
sid = SeqLocId (slp);
- if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE) {
- start = from = SeqLocStart (slp);
- stop = to = SeqLocStop (slp);
- } else if (slp->choice == SEQLOC_NULL){
+ if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT || slp->choice == SEQLOC_WHOLE) {
+ start = SeqLocStart (slp);
+ stop = SeqLocStop (slp);
+ } else if (slp->choice == SEQLOC_NULL) {
sprintf (vbuf, ",%s", "gap()");
FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
continue;
@@ -5230,6 +5277,82 @@ static void PrintGenome (
}
}
+static DeltaSeqPtr RevCompDelta (
+ DeltaSeqPtr seq_ext
+)
+
+{
+ DeltaSeqPtr dsp;
+ ValNodePtr head = NULL;
+ Int4 from, to, tmp;
+ SeqLocPtr nslp, slp;
+ Boolean partial5, partial3;
+ SeqIntPtr sintp;
+ SeqLitPtr slitp, slip;
+ ValNodePtr vnp;
+
+ for (dsp = seq_ext; dsp != NULL; dsp = dsp->next) {
+ vnp = NULL;
+
+ if (dsp->choice == 1) {
+
+ slp = (SeqLocPtr) dsp->data.ptrvalue;
+ if (slp != NULL) {
+
+ if (slp->choice == SEQLOC_NULL) {
+
+ nslp = ValNodeAddPointer (NULL, SEQLOC_NULL, NULL);
+
+ vnp = ValNodeAddPointer (NULL, 1, nslp);
+
+ } else if (slp->choice == SEQLOC_INT) {
+
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL) {
+ CheckSeqLocForPartial (slp, &partial5, &partial3);
+ from = sintp->from;
+ to = sintp->to;
+ if (sintp->strand != Seq_strand_minus) {
+ tmp = from;
+ from = to;
+ to = tmp;
+ }
+ nslp = AddIntervalToLocation (NULL, sintp->id, from, to, partial3, partial5);
+
+ vnp = ValNodeAddPointer (NULL, 1, nslp);
+
+ }
+ }
+ }
+
+ } else if (dsp->choice == 2) {
+
+ slitp = (SeqLitPtr) dsp->data.ptrvalue;
+ if (slitp != NULL && slitp->seq_data == NULL) {
+ slip = SeqLitNew ();
+ if (slip != NULL) {
+ slip->length = slitp->length;
+ /* not copying fuzz */
+ slip->seq_data_type = slitp->seq_data_type;
+ vnp = ValNodeAddPointer (NULL, 2, (Pointer) slip);
+ }
+ } else {
+ ValNodeFree (head);
+ return NULL;
+ }
+ }
+
+ /* save in new list in reverse order */
+
+ if (vnp != NULL) {
+ vnp->next = head;
+ head = vnp;
+ }
+ }
+
+ return head;
+}
+
NLM_EXTERN CharPtr FormatContigBlock (
Asn2gbFormatPtr afp,
BaseBlockPtr bbp
@@ -5239,13 +5362,19 @@ NLM_EXTERN CharPtr FormatContigBlock (
IntAsn2gbJobPtr ajp;
Asn2gbSectPtr asp;
BioseqPtr bsp;
+ DeltaSeqPtr delta_head = NULL;
DeltaSeqPtr dsp;
+ DeltaSeqPtr dspnext;
IntFuzzPtr fuzz;
GBSeqPtr gbseq;
Boolean is_na;
SeqLitPtr litp;
+ DeltaSeqPtr new_delta = NULL;
CharPtr prefix = NULL;
+ Boolean rev_comp = FALSE;
Boolean segWithParts = FALSE;
+ SeqIntPtr sintp;
+ SeqLocPtr slp;
SeqLocPtr slp_head = NULL;
CharPtr str;
Char tmp [16];
@@ -5267,6 +5396,18 @@ NLM_EXTERN CharPtr FormatContigBlock (
is_na = ISA_na (bsp->mol);
+ if (ajp->ajp.slp != NULL) {
+ slp = ajp->ajp.slp;
+ if (slp->choice == SEQLOC_INT) {
+ sintp = (SeqIntPtr) slp->data.ptrvalue;
+ if (sintp != NULL) {
+ if (sintp->from == 0 && sintp->to == bsp->length - 1 && sintp->strand == Seq_strand_minus) {
+ rev_comp = TRUE;
+ }
+ }
+ }
+ }
+
FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE);
/*
if ( GetWWW(ajp) ) {
@@ -5292,7 +5433,14 @@ NLM_EXTERN CharPtr FormatContigBlock (
} else if (bsp->seq_ext_type == 4) {
- for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp=dsp->next) {
+ if (rev_comp) {
+ new_delta = RevCompDelta ((DeltaSeqPtr) bsp->seq_ext);
+ delta_head = new_delta;
+ } else {
+ delta_head = (DeltaSeqPtr) bsp->seq_ext;
+ }
+
+ for (dsp = delta_head; dsp != NULL; dsp = dsp->next) {
if (dsp->choice == 1) {
slp_head = (SeqLocPtr) dsp->data.ptrvalue;
@@ -5359,6 +5507,16 @@ NLM_EXTERN CharPtr FormatContigBlock (
StripAllSpaces (gbseq->contig);
}
+ if (new_delta != NULL) {
+ dsp = new_delta;
+ while (dsp != NULL) {
+ dspnext = dsp->next;
+ dsp->next = NULL;
+ DeltaSeqFree (dsp);
+ dsp = dsp->next;
+ }
+ }
+
return str;
}
@@ -5536,8 +5694,11 @@ static Int2 ProcessGapSpecialFormat (
Char gi_buf [16];
Boolean is_na;
Char pad;
+ Char rgn_buf [64];
SeqIdPtr sip;
+ SeqLocPtr slp;
Int2 startgapgap = 0, endgap = 0;
+ Int4 from, to;
is_na = ISA_na (bsp->mol);
if (is_na) {
@@ -5574,6 +5735,13 @@ static Int2 ProcessGapSpecialFormat (
sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
StringCat (fmt_buf, "&report=gbwithparts");
+ if (ajp->ajp.slp != NULL) {
+ slp = ajp->ajp.slp;
+ from = SeqLocStart (slp) + 1;
+ to = SeqLocStop (slp) + 1;
+ sprintf (rgn_buf, "&from=%ld&to=%ld", (long) from, (long) to);
+ StringCat (fmt_buf, rgn_buf);
+ }
}
FFAddOneString (ffstring, " <a href=\"", FALSE, FALSE, TILDE_IGNORE);
if (is_na) {
@@ -5592,6 +5760,7 @@ static Int2 ProcessGapSpecialFormat (
FixGapAtStart (buf, ' ');
} else if (startgapgap > 0) {
FixGapAtStart (buf, pad);
+ startgapgap = 0;
}
endgap = GapAtEnd (buf);
@@ -5870,13 +6039,15 @@ NLM_EXTERN CharPtr FormatSlashBlock (
)
{
- IntAsn2gbJobPtr ajp;
- Asn2gbSectPtr asp;
- GBFeaturePtr currf, headf, nextf;
- GBReferencePtr currr, headr, nextr;
- GBSeqPtr gbseq, gbtmp;
- IndxPtr index;
- INSDSeq is;
+ IntAsn2gbJobPtr ajp;
+ Asn2gbSectPtr asp;
+ GBFeaturePtr currf, headf, nextf;
+ GBReferencePtr currr, headr, nextr;
+ Uint1 featdeftype;
+ GBSeqPtr gbseq, gbtmp;
+ IntAsn2gbSectPtr iasp;
+ IndxPtr index;
+ INSDSeq is;
/*
Int2 moltype, strandedness, topology;
*/
@@ -5887,6 +6058,8 @@ NLM_EXTERN CharPtr FormatSlashBlock (
asp = afp->asp;
if (asp == NULL) return NULL;
+ iasp = (IntAsn2gbSectPtr) asp;
+
/* sort and unique indexes */
index = ajp->index;
@@ -6017,7 +6190,15 @@ NLM_EXTERN CharPtr FormatSlashBlock (
GBSeqFree (gbtmp);
}
- /* slash always has string pre-allocated by add slash block function */
+ /* then clean up javascript components */
+
+ iasp->gi = MemFree (iasp->gi);
+ iasp->acc = MemFree (iasp->acc);
+ for (featdeftype = 0; featdeftype < FEATDEF_MAX; featdeftype++) {
+ iasp->feat_key [featdeftype] = MemFree (iasp->feat_key [featdeftype]);
+ }
+
+ /* slash has string pre-allocated by add slash block function */
return StringSaveNoNull (bbp->string);
}
diff --git a/api/asn2gnbi.h b/api/asn2gnbi.h
index a9e09d89..8c1512a2 100644
--- a/api/asn2gnbi.h
+++ b/api/asn2gnbi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/30/03
*
-* $Revision: 1.123 $
+* $Revision: 1.138 $
*
* File Description: New GenBank flatfile generator, internal header
*
@@ -107,6 +107,11 @@ typedef struct asn2gbflags {
typedef struct int_Asn2gbSect {
Asn2gbSect asp;
+ Int4 feat_counts [FEATDEF_MAX];
+ CharPtr gi;
+ CharPtr acc;
+ CharPtr feat_key [FEATDEF_MAX];
+ Boolean feat_js_prefix_added;
} IntAsn2gbSect, PNTR IntAsn2gbSectPtr;
/* string structure */
@@ -160,6 +165,7 @@ typedef struct int_asn2gb_job {
Boolean specialGapFormat;
Boolean hideGoTerms;
Boolean multiIntervalGenes;
+ Boolean segmentedBioseqs;
Boolean reindex;
Int4 seqGapCurrLen;
ValNodePtr gihead;
@@ -188,6 +194,7 @@ typedef union qualval {
RNAGenPtr rgp;
GeneNomenclaturePtr gnp;
PCRReactionSetPtr prp;
+ DbtagPtr dbt;
} QualVal, PNTR QualValPtr;
/* structure passed to individual paragraph format functions */
@@ -306,6 +313,8 @@ typedef struct asn2gbwork {
Boolean copyGpsCdsUp;
Boolean copyGpsGeneDown;
+ Boolean isRefSeq;
+
Boolean showContigAndSeq;
Char basename [SEQID_MAX_LEN];
@@ -384,6 +393,7 @@ typedef struct int_src_block {
ValNodePtr vnp;
Int4 left;
Int4 right;
+ Int4 source_count;
} IntSrcBlock, PNTR IntSrcBlockPtr;
/* internal feature block has fields on top of FeatBlock fields */
@@ -400,6 +410,7 @@ typedef struct int_feat_block {
Boolean firstfeat;
Int4 left;
Int4 right;
+ Int4 feat_count; /* unique in combination with feature type */
} IntFeatBlock, PNTR IntFeatBlockPtr;
/* internal cds block has fields on top of IntFeatBlock fields */
@@ -488,7 +499,11 @@ typedef enum {
Qual_class_voucher,
Qual_class_lat_lon,
Qual_class_mobile_element,
- Qual_class_tag_peptide
+ Qual_class_tag_peptide,
+ Qual_class_variation_id,
+ Qual_class_delta_item,
+ Qual_class_variation_set,
+ Qual_class_experiment
} QualType;
/* source 'feature' */
@@ -609,6 +624,7 @@ typedef enum {
FTQUAL_allele = 1,
FTQUAL_anticodon,
FTQUAL_artificial_location,
+ FTQUAL_artificial_location_str,
FTQUAL_bond,
FTQUAL_bond_type,
FTQUAL_bound_moiety,
@@ -622,6 +638,7 @@ typedef enum {
FTQUAL_codon_start,
FTQUAL_cons_splice,
FTQUAL_db_xref,
+ FTQUAL_delta_item,
FTQUAL_derived_from,
FTQUAL_direction,
FTQUAL_EC_number,
@@ -662,6 +679,7 @@ typedef enum {
FTQUAL_map,
FTQUAL_maploc,
FTQUAL_mobile_element,
+ FTQUAL_mobile_element_type,
FTQUAL_mod_base,
FTQUAL_modelev,
FTQUAL_mol_wt,
@@ -729,6 +747,8 @@ typedef enum {
FTQUAL_trna_codons_note,
FTQUAL_UniProtKB_evidence,
FTQUAL_usedin,
+ FTQUAL_variation_id,
+ FTQUAL_variation_set,
FTQUAL_xtra_prod_quals,
ASN2GNBK_TOTAL_FEATUR
} FtQualType;
@@ -840,12 +860,18 @@ NLM_EXTERN void FFCatenateSubString (
Uint4 line_max
);
NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip);
+NLM_EXTERN CharPtr FFToCharPtrEx (StringItemPtr sip, CharPtr pfx, CharPtr sfx);
NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip);
+NLM_EXTERN void FFSkipHTMLAmpersandEscape (StringItemPtr PNTR iterp, Int4Ptr ip);
NLM_EXTERN Boolean FFIsStartOfLink (
StringItemPtr iter,
Int4 pos );
+NLM_EXTERN Boolean FFIsStartOfHTMLAmpersandEscape (
+ StringItemPtr iter,
+ Int4 pos );
+
NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr);
NLM_EXTERN void FFTrim (
StringItemPtr ffstring,
@@ -863,9 +889,11 @@ NLM_EXTERN void FFAdvanceChar(
);
NLM_EXTERN void FFCalculateLineBreak (
StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
- Int4 init_indent, Int4 visible
+ Int4 init_indent, Int4 visible,
+ Boolean is_html
);
NLM_EXTERN void FFLineWrap (
+ IntAsn2gbJobPtr ajp,
StringItemPtr dest,
StringItemPtr src,
Int4 init_indent,
@@ -904,6 +932,18 @@ NLM_EXTERN CharPtr FFEndPrint (
Int2 eb_cont_indent,
CharPtr eb_line_prefix
);
+NLM_EXTERN CharPtr FFEndPrintEx (
+ IntAsn2gbJobPtr ajp,
+ StringItemPtr ffstring,
+ FmtType format,
+ Int2 gb_init_indent,
+ Int2 gb_cont_indent,
+ Int2 eb_init_indent,
+ Int2 eb_cont_indent,
+ CharPtr eb_line_prefix,
+ CharPtr pfx,
+ CharPtr sfx
+);
NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring);
NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos);
NLM_EXTERN Char FFFindChar (
@@ -919,6 +959,12 @@ NLM_EXTERN Int4 FFStringSearch (
const CharPtr pattern,
Uint4 position
);
+NLM_EXTERN Boolean FFStartsWith(
+ StringItemPtr text,
+ Int4 text_pos,
+ const CharPtr pattern,
+ Boolean case_insens
+);
/*
* Scans the given buffer from a given scan position, for the next occurrence of
diff --git a/api/ecnum_ambiguous.inc b/api/ecnum_ambiguous.inc
index a9408e2f..3535c7f7 100644
--- a/api/ecnum_ambiguous.inc
+++ b/api/ecnum_ambiguous.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_ambiguous.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $
+/* $Id: ecnum_ambiguous.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,672 +31,568 @@
*/
static const char* const kECNum_ambiguous[] = {
- "1.-.-.-",
- "1.1.-.-",
- "1.1.1.-",
- "1.1.1.n",
- "1.1.2.-",
- "1.1.2.n",
- "1.1.3.-",
- "1.1.3.n",
- "1.1.4.-",
- "1.1.4.n",
- "1.1.5.-",
- "1.1.5.n",
- "1.1.98.-",
- "1.1.98.n",
- "1.1.99.-",
- "1.1.99.n",
- "1.1.n.n",
- "1.2.-.-",
- "1.2.1.-",
- "1.2.1.n",
- "1.2.2.-",
- "1.2.2.n",
- "1.2.3.-",
- "1.2.3.n",
- "1.2.4.-",
- "1.2.4.n",
- "1.2.7.-",
- "1.2.7.n",
- "1.2.99.-",
- "1.2.99.n",
- "1.2.n.n",
- "1.3.-.-",
- "1.3.1.-",
- "1.3.1.n",
- "1.3.2.-",
- "1.3.2.n",
- "1.3.3.-",
- "1.3.3.n",
- "1.3.5.-",
- "1.3.5.n",
- "1.3.7.-",
- "1.3.7.n",
- "1.3.99.-",
- "1.3.99.n",
- "1.3.n.n",
- "1.4.-.-",
- "1.4.1.-",
- "1.4.1.n",
- "1.4.2.-",
- "1.4.2.n",
- "1.4.3.-",
- "1.4.3.n",
- "1.4.4.-",
- "1.4.4.n",
- "1.4.5.-",
- "1.4.5.n",
- "1.4.7.-",
- "1.4.7.n",
- "1.4.99.-",
- "1.4.99.n",
- "1.4.n.n",
- "1.5.-.-",
- "1.5.1.-",
- "1.5.1.n",
- "1.5.3.-",
- "1.5.3.n",
- "1.5.4.-",
- "1.5.4.n",
- "1.5.5.-",
- "1.5.5.n",
- "1.5.7.-",
- "1.5.7.n",
- "1.5.8.-",
- "1.5.8.n",
- "1.5.99.-",
- "1.5.99.n",
- "1.5.n.n",
- "1.6.-.-",
- "1.6.1.-",
- "1.6.1.n",
- "1.6.2.-",
- "1.6.2.n",
- "1.6.3.-",
- "1.6.3.n",
- "1.6.4.-",
- "1.6.4.n",
- "1.6.5.-",
- "1.6.5.n",
- "1.6.6.-",
- "1.6.6.n",
- "1.6.7.-",
- "1.6.7.n",
- "1.6.8.-",
- "1.6.8.n",
- "1.6.99.-",
- "1.6.99.n",
- "1.6.n.n",
- "1.7.-.-",
- "1.7.1.-",
- "1.7.1.n",
- "1.7.2.-",
- "1.7.2.n",
- "1.7.3.-",
- "1.7.3.n",
- "1.7.5.-",
- "1.7.5.n",
- "1.7.7.-",
- "1.7.7.n",
- "1.7.99.-",
- "1.7.99.n",
- "1.7.n.n",
- "1.8.-.-",
- "1.8.1.-",
- "1.8.1.n",
- "1.8.2.-",
- "1.8.2.n",
- "1.8.3.-",
- "1.8.3.n",
- "1.8.4.-",
- "1.8.4.n",
- "1.8.5.-",
- "1.8.5.n",
- "1.8.6.-",
- "1.8.6.n",
- "1.8.7.-",
- "1.8.7.n",
- "1.8.98.-",
- "1.8.98.n",
- "1.8.99.-",
- "1.8.99.n",
- "1.8.n.n",
- "1.9.-.-",
- "1.9.3.-",
- "1.9.3.n",
- "1.9.6.-",
- "1.9.6.n",
- "1.9.99.-",
- "1.9.99.n",
- "1.9.n.n",
- "1.10.-.-",
- "1.10.1.-",
- "1.10.1.n",
- "1.10.2.-",
- "1.10.2.n",
- "1.10.3.-",
- "1.10.3.n",
- "1.10.99.-",
- "1.10.99.n",
- "1.10.n.n",
- "1.11.-.-",
- "1.11.1.-",
- "1.11.1.n",
- "1.11.n.n",
- "1.12.-.-",
- "1.12.1.-",
- "1.12.1.n",
- "1.12.2.-",
- "1.12.2.n",
- "1.12.5.-",
- "1.12.5.n",
- "1.12.7.-",
- "1.12.7.n",
- "1.12.98.-",
- "1.12.98.n",
- "1.12.99.-",
- "1.12.99.n",
- "1.12.n.n",
- "1.13.-.-",
- "1.13.1.-",
- "1.13.1.n",
- "1.13.11.-",
- "1.13.11.n",
- "1.13.12.-",
- "1.13.12.n",
- "1.13.99.-",
- "1.13.99.n",
- "1.13.n.n",
- "1.14.-.-",
- "1.14.1.-",
- "1.14.1.n",
- "1.14.2.-",
- "1.14.2.n",
- "1.14.3.-",
- "1.14.3.n",
- "1.14.11.-",
- "1.14.11.n",
- "1.14.12.-",
- "1.14.12.n",
- "1.14.13.-",
- "1.14.13.n",
- "1.14.14.-",
- "1.14.14.n",
- "1.14.15.-",
- "1.14.15.n",
- "1.14.16.-",
- "1.14.16.n",
- "1.14.17.-",
- "1.14.17.n",
- "1.14.18.-",
- "1.14.18.n",
- "1.14.19.-",
- "1.14.19.n",
- "1.14.20.-",
- "1.14.20.n",
- "1.14.21.-",
- "1.14.21.n",
- "1.14.99.-",
- "1.14.99.n",
- "1.14.n.n",
- "1.15.-.-",
- "1.15.1.-",
- "1.15.1.n",
- "1.15.n.n",
- "1.16.-.-",
- "1.16.1.-",
- "1.16.1.n",
- "1.16.3.-",
- "1.16.3.n",
- "1.16.8.-",
- "1.16.8.n",
- "1.16.n.n",
- "1.17.-.-",
- "1.17.1.-",
- "1.17.1.n",
- "1.17.3.-",
- "1.17.3.n",
- "1.17.4.-",
- "1.17.4.n",
- "1.17.5.-",
- "1.17.5.n",
- "1.17.7.-",
- "1.17.7.n",
- "1.17.99.-",
- "1.17.99.n",
- "1.17.n.n",
- "1.18.-.-",
- "1.18.1.-",
- "1.18.1.n",
- "1.18.2.-",
- "1.18.2.n",
- "1.18.3.-",
- "1.18.3.n",
- "1.18.6.-",
- "1.18.6.n",
- "1.18.96.-",
- "1.18.96.n",
- "1.18.99.-",
- "1.18.99.n",
- "1.18.n.n",
- "1.19.-.-",
- "1.19.6.-",
- "1.19.6.n",
- "1.19.n.n",
- "1.20.-.-",
- "1.20.1.-",
- "1.20.1.n",
- "1.20.4.-",
- "1.20.4.n",
- "1.20.98.-",
- "1.20.98.n",
- "1.20.99.-",
- "1.20.99.n",
- "1.20.n.n",
- "1.21.-.-",
- "1.21.3.-",
- "1.21.3.n",
- "1.21.4.-",
- "1.21.4.n",
- "1.21.99.-",
- "1.21.99.n",
- "1.21.n.n",
- "1.22.-.-",
- "1.22.1.-",
- "1.22.1.n",
- "1.22.n.n",
- "1.97.-.-",
- "1.97.1.-",
- "1.97.1.n",
- "1.97.n.n",
- "1.98.-.-",
- "1.98.1.-",
- "1.98.1.n",
- "1.98.n.n",
- "1.99.-.-",
- "1.99.1.-",
- "1.99.1.n",
- "1.99.2.-",
- "1.99.2.n",
- "1.99.n.n",
- "1.n.n.n",
- "2.-.-.-",
- "2.1.-.-",
- "2.1.1.-",
- "2.1.1.n",
- "2.1.2.-",
- "2.1.2.n",
- "2.1.3.-",
- "2.1.3.n",
- "2.1.4.-",
- "2.1.4.n",
- "2.1.n.n",
- "2.2.-.-",
- "2.2.1.-",
- "2.2.1.n",
- "2.2.n.n",
- "2.3.-.-",
- "2.3.1.-",
- "2.3.1.n",
- "2.3.2.-",
- "2.3.2.n",
- "2.3.3.-",
- "2.3.3.n",
- "2.3.n.n",
- "2.4.-.-",
- "2.4.1.-",
- "2.4.1.n",
- "2.4.2.-",
- "2.4.2.n",
- "2.4.99.-",
- "2.4.99.n",
- "2.4.n.n",
- "2.5.-.-",
- "2.5.1.-",
- "2.5.1.n",
- "2.5.n.n",
- "2.6.-.-",
- "2.6.1.-",
- "2.6.1.n",
- "2.6.2.-",
- "2.6.2.n",
- "2.6.3.-",
- "2.6.3.n",
- "2.6.99.-",
- "2.6.99.n",
- "2.6.n.n",
- "2.7.-.-",
- "2.7.1.-",
- "2.7.1.n",
- "2.7.2.-",
- "2.7.2.n",
- "2.7.3.-",
- "2.7.3.n",
- "2.7.4.-",
- "2.7.4.n",
- "2.7.5.-",
- "2.7.5.n",
- "2.7.6.-",
- "2.7.6.n",
- "2.7.7.-",
- "2.7.7.n",
- "2.7.8.-",
- "2.7.8.n",
- "2.7.9.-",
- "2.7.9.n",
- "2.7.10.-",
- "2.7.10.n",
- "2.7.11.-",
- "2.7.11.n",
- "2.7.12.-",
- "2.7.12.n",
- "2.7.13.-",
- "2.7.13.n",
- "2.7.99.-",
- "2.7.99.n",
- "2.7.n.n",
- "2.8.-.-",
- "2.8.1.-",
- "2.8.1.n",
- "2.8.2.-",
- "2.8.2.n",
- "2.8.3.-",
- "2.8.3.n",
- "2.8.4.-",
- "2.8.4.n",
- "2.8.n.n",
- "2.9.-.-",
- "2.9.1.-",
- "2.9.1.n",
- "2.9.n.n",
- "2.n.n.n",
- "3.-.-.-",
- "3.1.-.-",
- "3.1.1.-",
- "3.1.1.n",
- "3.1.2.-",
- "3.1.2.n",
- "3.1.3.-",
- "3.1.3.n",
- "3.1.4.-",
- "3.1.4.n",
- "3.1.5.-",
- "3.1.5.n",
- "3.1.6.-",
- "3.1.6.n",
- "3.1.7.-",
- "3.1.7.n",
- "3.1.8.-",
- "3.1.8.n",
- "3.1.11.-",
- "3.1.11.n",
- "3.1.13.-",
- "3.1.13.n",
- "3.1.14.-",
- "3.1.14.n",
- "3.1.15.-",
- "3.1.15.n",
- "3.1.16.-",
- "3.1.16.n",
- "3.1.21.-",
- "3.1.21.n",
- "3.1.22.-",
- "3.1.22.n",
- "3.1.23.-",
- "3.1.23.n",
- "3.1.24.-",
- "3.1.24.n",
- "3.1.25.-",
- "3.1.25.n",
- "3.1.26.-",
- "3.1.26.n",
- "3.1.27.-",
- "3.1.27.n",
- "3.1.30.-",
- "3.1.30.n",
- "3.1.31.-",
- "3.1.31.n",
- "3.1.n.n",
- "3.2.-.-",
- "3.2.1.-",
- "3.2.1.n",
- "3.2.2.-",
- "3.2.2.n",
- "3.2.3.-",
- "3.2.3.n",
- "3.2.n.n",
- "3.3.-.-",
- "3.3.1.-",
- "3.3.1.n",
- "3.3.2.-",
- "3.3.2.n",
- "3.3.n.n",
- "3.4.-.-",
- "3.4.1.-",
- "3.4.1.n",
- "3.4.2.-",
- "3.4.2.n",
- "3.4.3.-",
- "3.4.3.n",
- "3.4.4.-",
- "3.4.4.n",
- "3.4.11.-",
- "3.4.11.n",
- "3.4.12.-",
- "3.4.12.n",
- "3.4.13.-",
- "3.4.13.n",
- "3.4.14.-",
- "3.4.14.n",
- "3.4.15.-",
- "3.4.15.n",
- "3.4.16.-",
- "3.4.16.n",
- "3.4.17.-",
- "3.4.17.n",
- "3.4.18.-",
- "3.4.18.n",
- "3.4.19.-",
- "3.4.19.n",
- "3.4.21.-",
- "3.4.21.n",
- "3.4.22.-",
- "3.4.22.n",
- "3.4.23.-",
- "3.4.23.n",
- "3.4.24.-",
- "3.4.24.n",
- "3.4.25.-",
- "3.4.25.n",
- "3.4.99.-",
- "3.4.99.n",
- "3.4.n.n",
- "3.5.-.-",
- "3.5.1.-",
- "3.5.1.n",
- "3.5.2.-",
- "3.5.2.n",
- "3.5.3.-",
- "3.5.3.n",
- "3.5.4.-",
- "3.5.4.n",
- "3.5.5.-",
- "3.5.5.n",
- "3.5.99.-",
- "3.5.99.n",
- "3.5.n.n",
- "3.6.-.-",
- "3.6.1.-",
- "3.6.1.n",
- "3.6.2.-",
- "3.6.2.n",
- "3.6.3.-",
- "3.6.3.n",
- "3.6.4.-",
- "3.6.4.n",
- "3.6.5.-",
- "3.6.5.n",
- "3.6.n.n",
- "3.7.-.-",
- "3.7.1.-",
- "3.7.1.n",
- "3.7.n.n",
- "3.8.-.-",
- "3.8.1.-",
- "3.8.1.n",
- "3.8.2.-",
- "3.8.2.n",
- "3.8.n.n",
- "3.9.-.-",
- "3.9.1.-",
- "3.9.1.n",
- "3.9.n.n",
- "3.10.-.-",
- "3.10.1.-",
- "3.10.1.n",
- "3.10.n.n",
- "3.11.-.-",
- "3.11.1.-",
- "3.11.1.n",
- "3.11.n.n",
- "3.12.-.-",
- "3.12.1.-",
- "3.12.1.n",
- "3.12.n.n",
- "3.13.-.-",
- "3.13.1.-",
- "3.13.1.n",
- "3.13.n.n",
- "3.n.n.n",
- "4.-.-.-",
- "4.1.-.-",
- "4.1.1.-",
- "4.1.1.n",
- "4.1.2.-",
- "4.1.2.n",
- "4.1.3.-",
- "4.1.3.n",
- "4.1.99.-",
- "4.1.99.n",
- "4.1.n.n",
- "4.2.-.-",
- "4.2.1.-",
- "4.2.1.n",
- "4.2.2.-",
- "4.2.2.n",
- "4.2.3.-",
- "4.2.3.n",
- "4.2.99.-",
- "4.2.99.n",
- "4.2.n.n",
- "4.3.-.-",
- "4.3.1.-",
- "4.3.1.n",
- "4.3.2.-",
- "4.3.2.n",
- "4.3.3.-",
- "4.3.3.n",
- "4.3.99.-",
- "4.3.99.n",
- "4.3.n.n",
- "4.4.-.-",
- "4.4.1.-",
- "4.4.1.n",
- "4.4.n.n",
- "4.5.-.-",
- "4.5.1.-",
- "4.5.1.n",
- "4.5.n.n",
- "4.6.-.-",
- "4.6.1.-",
- "4.6.1.n",
- "4.6.n.n",
- "4.99.-.-",
- "4.99.1.-",
- "4.99.1.n",
- "4.99.n.n",
- "4.n.n.n",
- "5.-.-.-",
- "5.1.-.-",
- "5.1.1.-",
- "5.1.1.n",
- "5.1.2.-",
- "5.1.2.n",
- "5.1.3.-",
- "5.1.3.n",
- "5.1.99.-",
- "5.1.99.n",
- "5.1.n.n",
- "5.2.-.-",
- "5.2.1.-",
- "5.2.1.n",
- "5.2.n.n",
- "5.3.-.-",
- "5.3.1.-",
- "5.3.1.n",
- "5.3.2.-",
- "5.3.2.n",
- "5.3.3.-",
- "5.3.3.n",
- "5.3.4.-",
- "5.3.4.n",
- "5.3.99.-",
- "5.3.99.n",
- "5.3.n.n",
- "5.4.-.-",
- "5.4.1.-",
- "5.4.1.n",
- "5.4.2.-",
- "5.4.2.n",
- "5.4.3.-",
- "5.4.3.n",
- "5.4.4.-",
- "5.4.4.n",
- "5.4.99.-",
- "5.4.99.n",
- "5.4.n.n",
- "5.5.-.-",
- "5.5.1.-",
- "5.5.1.n",
- "5.5.n.n",
- "5.99.-.-",
- "5.99.1.-",
- "5.99.1.n",
- "5.99.n.n",
- "5.n.n.n",
- "6.-.-.-",
- "6.1.-.-",
- "6.1.1.-",
- "6.1.1.n",
- "6.1.n.n",
- "6.2.-.-",
- "6.2.1.-",
- "6.2.1.n",
- "6.2.n.n",
- "6.3.-.-",
- "6.3.1.-",
- "6.3.1.n",
- "6.3.2.-",
- "6.3.2.n",
- "6.3.3.-",
- "6.3.3.n",
- "6.3.4.-",
- "6.3.4.n",
- "6.3.5.-",
- "6.3.5.n",
- "6.3.n.n",
- "6.4.-.-",
- "6.4.1.-",
- "6.4.1.n",
- "6.4.n.n",
- "6.5.-.-",
- "6.5.1.-",
- "6.5.1.n",
- "6.5.n.n",
- "6.6.-.-",
- "6.6.1.-",
- "6.6.1.n",
- "6.6.n.n",
- "6.n.n.n"
+ "1.-.-.- Oxidoreductases",
+ "1.n.n.n Oxidoreductases",
+ "1.1.-.- Acting on the CH-OH group of donors",
+ "1.1.n.n Acting on the CH-OH group of donors",
+ "1.1.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.1.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.1.2.- With a cytochrome as acceptor",
+ "1.1.2.n With a cytochrome as acceptor",
+ "1.1.3.- With oxygen as acceptor",
+ "1.1.3.n With oxygen as acceptor",
+ "1.1.4.- With a disulfide as acceptor",
+ "1.1.4.n With a disulfide as acceptor",
+ "1.1.5.- With a quinone or similar compound as acceptor",
+ "1.1.5.n With a quinone or similar compound as acceptor",
+ "1.1.98.- With other, known, acceptors",
+ "1.1.98.n With other, known, acceptors",
+ "1.1.99.- With other acceptors",
+ "1.1.99.n With other acceptors",
+ "1.2.-.- Acting on the aldehyde or oxo group of donors",
+ "1.2.n.n Acting on the aldehyde or oxo group of donors",
+ "1.2.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.2.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.2.2.- With a cytochrome as acceptor",
+ "1.2.2.n With a cytochrome as acceptor",
+ "1.2.3.- With oxygen as acceptor",
+ "1.2.3.n With oxygen as acceptor",
+ "1.2.4.- With a disulfide as acceptor",
+ "1.2.4.n With a disulfide as acceptor",
+ "1.2.5.- With a quinone or similar compound as acceptor",
+ "1.2.5.n With a quinone or similar compound as acceptor",
+ "1.2.7.- With an iron-sulfur protein as acceptor",
+ "1.2.7.n With an iron-sulfur protein as acceptor",
+ "1.2.99.- With other acceptors",
+ "1.2.99.n With other acceptors",
+ "1.3.-.- Acting on the CH-CH group of donors",
+ "1.3.n.n Acting on the CH-CH group of donors",
+ "1.3.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.3.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.3.2.- With a cytochrome as acceptor",
+ "1.3.2.n With a cytochrome as acceptor",
+ "1.3.3.- With oxygen as acceptor",
+ "1.3.3.n With oxygen as acceptor",
+ "1.3.5.- With a quinone or related compound as acceptor",
+ "1.3.5.n With a quinone or related compound as acceptor",
+ "1.3.7.- With an iron-sulfur protein as acceptor",
+ "1.3.7.n With an iron-sulfur protein as acceptor",
+ "1.3.99.- With other acceptors",
+ "1.3.99.n With other acceptors",
+ "1.4.-.- Acting on the CH-NH(2) group of donors",
+ "1.4.n.n Acting on the CH-NH(2) group of donors",
+ "1.4.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.4.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.4.2.- With a cytochrome as acceptor",
+ "1.4.2.n With a cytochrome as acceptor",
+ "1.4.3.- With oxygen as acceptor",
+ "1.4.3.n With oxygen as acceptor",
+ "1.4.4.- With a disulfide as acceptor",
+ "1.4.4.n With a disulfide as acceptor",
+ "1.4.7.- With an iron-sulfur protein as acceptor",
+ "1.4.7.n With an iron-sulfur protein as acceptor",
+ "1.4.99.- With other acceptors",
+ "1.4.99.n With other acceptors",
+ "1.5.-.- Acting on the CH-NH group of donors",
+ "1.5.n.n Acting on the CH-NH group of donors",
+ "1.5.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.5.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.5.3.- With oxygen as acceptor",
+ "1.5.3.n With oxygen as acceptor",
+ "1.5.4.- With a disulfide as acceptor",
+ "1.5.4.n With a disulfide as acceptor",
+ "1.5.5.- With a quinone or similar compound as acceptor",
+ "1.5.5.n With a quinone or similar compound as acceptor",
+ "1.5.7.- With an iron-sulfur protein as acceptor",
+ "1.5.7.n With an iron-sulfur protein as acceptor",
+ "1.5.8.- With a flavin as acceptor",
+ "1.5.8.n With a flavin as acceptor",
+ "1.5.99.- With other acceptors",
+ "1.5.99.n With other acceptors",
+ "1.6.-.- Acting on NADH or NADPH",
+ "1.6.n.n Acting on NADH or NADPH",
+ "1.6.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.6.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.6.2.- With a heme protein as acceptor",
+ "1.6.2.n With a heme protein as acceptor",
+ "1.6.3.- With a oxygen as acceptor",
+ "1.6.3.n With a oxygen as acceptor",
+ "1.6.5.- With a quinone or similar compound as acceptor",
+ "1.6.5.n With a quinone or similar compound as acceptor",
+ "1.6.6.- With a nitrogenous group as acceptor",
+ "1.6.6.n With a nitrogenous group as acceptor",
+ "1.6.99.- With other acceptors",
+ "1.6.99.n With other acceptors",
+ "1.7.-.- Acting on other nitrogenous compounds as donors",
+ "1.7.n.n Acting on other nitrogenous compounds as donors",
+ "1.7.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.7.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.7.2.- With a cytochrome as acceptor",
+ "1.7.2.n With a cytochrome as acceptor",
+ "1.7.3.- With oxygen as acceptor",
+ "1.7.3.n With oxygen as acceptor",
+ "1.7.7.- With an iron-sulfur protein as acceptor",
+ "1.7.7.n With an iron-sulfur protein as acceptor",
+ "1.7.99.- With other acceptors",
+ "1.7.99.n With other acceptors",
+ "1.8.-.- Acting on a sulfur group of donors",
+ "1.8.n.n Acting on a sulfur group of donors",
+ "1.8.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.8.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.8.2.- With a cytochrome as acceptor",
+ "1.8.2.n With a cytochrome as acceptor",
+ "1.8.3.- With oxygen as acceptor",
+ "1.8.3.n With oxygen as acceptor",
+ "1.8.4.- With a disulfide as acceptor",
+ "1.8.4.n With a disulfide as acceptor",
+ "1.8.5.- With a quinone or similar compound as acceptor",
+ "1.8.5.n With a quinone or similar compound as acceptor",
+ "1.8.7.- With an iron-sulfur protein as acceptor",
+ "1.8.7.n With an iron-sulfur protein as acceptor",
+ "1.8.98.- With other, known, acceptors",
+ "1.8.98.n With other, known, acceptors",
+ "1.8.99.- With other acceptors",
+ "1.8.99.n With other acceptors",
+ "1.9.-.- Acting on a heme group of donors",
+ "1.9.n.n Acting on a heme group of donors",
+ "1.9.3.- With oxygen as acceptor",
+ "1.9.3.n With oxygen as acceptor",
+ "1.9.6.- With a nitrogenous group as acceptor",
+ "1.9.6.n With a nitrogenous group as acceptor",
+ "1.9.99.- With other acceptors",
+ "1.9.99.n With other acceptors",
+ "1.10.-.- Acting on diphenols and related substances as donors",
+ "1.10.n.n Acting on diphenols and related substances as donors",
+ "1.10.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.10.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.10.2.- With a cytochrome as acceptor",
+ "1.10.2.n With a cytochrome as acceptor",
+ "1.10.3.- With oxygen as acceptor",
+ "1.10.3.n With oxygen as acceptor",
+ "1.11.-.- Acting on a peroxide as acceptor",
+ "1.11.n.n Acting on a peroxide as acceptor",
+ "1.11.1.- Peroxidases",
+ "1.11.1.n Peroxidases",
+ "1.11.2.- With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product",
+ "1.11.2.n With H(2)O(2) as acceptor, one oxygen atom of which is incorporated into the product",
+ "1.12.-.- Acting on hydrogen as donor",
+ "1.12.n.n Acting on hydrogen as donor",
+ "1.12.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.12.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.12.2.- With a cytochrome as acceptor",
+ "1.12.2.n With a cytochrome as acceptor",
+ "1.12.5.- With a quinone or similar compound as acceptor",
+ "1.12.5.n With a quinone or similar compound as acceptor",
+ "1.12.7.- With an iron-sulfur protein as acceptor",
+ "1.12.7.n With an iron-sulfur protein as acceptor",
+ "1.13.-.- Acting on single donors with incorporation of molecular oxygen",
+ "1.13.n.n Acting on single donors with incorporation of molecular oxygen",
+ "1.14.-.- Acting on paired donors, with incorporation or reduction of molecular oxygen",
+ "1.14.n.n Acting on paired donors, with incorporation or reduction of molecular oxygen",
+ "1.15.-.- Acting on superoxide as acceptor",
+ "1.15.n.n Acting on superoxide as acceptor",
+ "1.15.1.- Acting on superoxide as acceptor",
+ "1.15.1.n Acting on superoxide as acceptor",
+ "1.16.-.- Oxidizing metal ions",
+ "1.16.n.n Oxidizing metal ions",
+ "1.16.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.16.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.16.3.- With oxygen as acceptor",
+ "1.16.3.n With oxygen as acceptor",
+ "1.16.8.- With flavin as acceptor",
+ "1.16.8.n With flavin as acceptor",
+ "1.17.-.- Acting on CH or CH(2) groups",
+ "1.17.n.n Acting on CH or CH(2) groups",
+ "1.17.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.17.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.17.2.- With a cytochrome as acceptor",
+ "1.17.2.n With a cytochrome as acceptor",
+ "1.17.3.- With oxygen as acceptor",
+ "1.17.3.n With oxygen as acceptor",
+ "1.17.4.- With a disulfide as acceptor",
+ "1.17.4.n With a disulfide as acceptor",
+ "1.17.5.- With a quinone or similar compound as acceptor",
+ "1.17.5.n With a quinone or similar compound as acceptor",
+ "1.17.7.- With an iron-sulfur protein as acceptor",
+ "1.17.7.n With an iron-sulfur protein as acceptor",
+ "1.18.-.- Acting on iron-sulfur proteins as donors",
+ "1.18.n.n Acting on iron-sulfur proteins as donors",
+ "1.18.1.- With NAD(+) or NADP(+) as acceptor",
+ "1.18.1.n With NAD(+) or NADP(+) as acceptor",
+ "1.18.6.- With dinitrogen as acceptor",
+ "1.18.6.n With dinitrogen as acceptor",
+ "1.19.-.- Acting on reduced flavodoxin as donor",
+ "1.19.n.n Acting on reduced flavodoxin as donor",
+ "1.19.6.- With dinitrogen as acceptor",
+ "1.19.6.n With dinitrogen as acceptor",
+ "1.20.-.- Acting on phosphorus or arsenic in donors",
+ "1.20.n.n Acting on phosphorus or arsenic in donors",
+ "1.20.1.- Acting on phosphorus or arsenic in donors, with NAD(P)(+) as acceptor",
+ "1.20.1.n Acting on phosphorus or arsenic in donors, with NAD(P)(+) as acceptor",
+ "1.20.4.- Acting on phosphorus or arsenic in donors, with disulfide as acceptor",
+ "1.20.4.n Acting on phosphorus or arsenic in donors, with disulfide as acceptor",
+ "1.21.-.- Acting on x-H and y-H to form an x-y bond",
+ "1.21.n.n Acting on x-H and y-H to form an x-y bond",
+ "1.21.3.- With oxygen as acceptor",
+ "1.21.3.n With oxygen as acceptor",
+ "1.21.4.- With a disulfide as acceptor",
+ "1.21.4.n With a disulfide as acceptor",
+ "1.97.-.- Other oxidoreductases",
+ "1.97.n.n Other oxidoreductases",
+ "1.97.1.- Sole sub-subclass for oxidoreductases that do not belong in the other subclasses",
+ "1.97.1.n Sole sub-subclass for oxidoreductases that do not belong in the other subclasses",
+ "2.-.-.- Transferases",
+ "2.n.n.n Transferases",
+ "2.1.-.- Transferring one-carbon groups",
+ "2.1.n.n Transferring one-carbon groups",
+ "2.1.1.- Methyltransferases",
+ "2.1.1.n Methyltransferases",
+ "2.1.2.- Hydroxymethyl-, formyl- and related transferases",
+ "2.1.2.n Hydroxymethyl-, formyl- and related transferases",
+ "2.1.3.- Carboxyl- and carbamoyltransferases",
+ "2.1.3.n Carboxyl- and carbamoyltransferases",
+ "2.1.4.- Amidinotransferases",
+ "2.1.4.n Amidinotransferases",
+ "2.2.-.- Transferring aldehyde or ketone residues",
+ "2.2.n.n Transferring aldehyde or ketone residues",
+ "2.2.1.- Transketolases and transaldolases",
+ "2.2.1.n Transketolases and transaldolases",
+ "2.3.-.- Acyltransferases",
+ "2.3.n.n Acyltransferases",
+ "2.3.1.- Transferring groups other than amino-acyl groups",
+ "2.3.1.n Transferring groups other than amino-acyl groups",
+ "2.3.2.- Aminoacyltransferases",
+ "2.3.2.n Aminoacyltransferases",
+ "2.3.3.- Acyl groups converted into alkyl on transfer",
+ "2.3.3.n Acyl groups converted into alkyl on transfer",
+ "2.4.-.- Glycosyltransferases",
+ "2.4.n.n Glycosyltransferases",
+ "2.4.1.- Hexosyltransferases",
+ "2.4.1.n Hexosyltransferases",
+ "2.4.2.- Pentosyltransferases",
+ "2.4.2.n Pentosyltransferases",
+ "2.4.99.- Transferring other glycosyl groups",
+ "2.4.99.n Transferring other glycosyl groups",
+ "2.5.-.- Transferring alkyl or aryl groups, other than methyl groups",
+ "2.5.n.n Transferring alkyl or aryl groups, other than methyl groups",
+ "2.5.1.- Transferring alkyl or aryl groups, other than methyl groups",
+ "2.5.1.n Transferring alkyl or aryl groups, other than methyl groups",
+ "2.6.-.- Transferring nitrogenous groups",
+ "2.6.n.n Transferring nitrogenous groups",
+ "2.6.1.- Transaminases (aminotransferases)",
+ "2.6.1.n Transaminases (aminotransferases)",
+ "2.6.3.- Oximinotransferases",
+ "2.6.3.n Oximinotransferases",
+ "2.6.99.- Transferring other nitrogenous groups",
+ "2.6.99.n Transferring other nitrogenous groups",
+ "2.7.-.- Transferring phosphorous-containing groups",
+ "2.7.n.n Transferring phosphorous-containing groups",
+ "2.7.1.- Phosphotransferases with an alcohol group as acceptor",
+ "2.7.1.n Phosphotransferases with an alcohol group as acceptor",
+ "2.7.2.- Phosphotransferases with a carboxyl group as acceptor",
+ "2.7.2.n Phosphotransferases with a carboxyl group as acceptor",
+ "2.7.3.- Phosphotransferases with a nitrogenous group as acceptor",
+ "2.7.3.n Phosphotransferases with a nitrogenous group as acceptor",
+ "2.7.4.- Phosphotransferases with a phosphate group as acceptor",
+ "2.7.4.n Phosphotransferases with a phosphate group as acceptor",
+ "2.7.6.- Diphosphotransferases",
+ "2.7.6.n Diphosphotransferases",
+ "2.7.7.- Nucleotidyltransferases",
+ "2.7.7.n Nucleotidyltransferases",
+ "2.7.8.- Transferases for other substituted phosphate groups",
+ "2.7.8.n Transferases for other substituted phosphate groups",
+ "2.7.9.- Phosphotransferases with paired acceptors",
+ "2.7.9.n Phosphotransferases with paired acceptors",
+ "2.7.10.- Protein-tyrosine kinases",
+ "2.7.10.n Protein-tyrosine kinases",
+ "2.7.11.- Protein-serine/threonine kinases",
+ "2.7.11.n Protein-serine/threonine kinases",
+ "2.7.12.- Dual-specificity kinases (those acting on Ser/Thr and Tyr residues)",
+ "2.7.12.n Dual-specificity kinases (those acting on Ser/Thr and Tyr residues)",
+ "2.7.13.- Protein-histidine kinases",
+ "2.7.13.n Protein-histidine kinases",
+ "2.7.99.- Other protein kinases",
+ "2.7.99.n Other protein kinases",
+ "2.8.-.- Transferring sulfur-containing groups",
+ "2.8.n.n Transferring sulfur-containing groups",
+ "2.8.1.- Sulfurtransferases",
+ "2.8.1.n Sulfurtransferases",
+ "2.8.2.- Sulfotransferases",
+ "2.8.2.n Sulfotransferases",
+ "2.8.3.- CoA-transferases",
+ "2.8.3.n CoA-transferases",
+ "2.8.4.- Transferring alkylthio groups",
+ "2.8.4.n Transferring alkylthio groups",
+ "2.9.-.- Transferring selenium-containing groups",
+ "2.9.n.n Transferring selenium-containing groups",
+ "2.9.1.- Selenotransferases",
+ "2.9.1.n Selenotransferases",
+ "3.-.-.- Hydrolases",
+ "3.n.n.n Hydrolases",
+ "3.1.-.- Acting on ester bonds",
+ "3.1.n.n Acting on ester bonds",
+ "3.1.1.- Carboxylic ester hydrolases",
+ "3.1.1.n Carboxylic ester hydrolases",
+ "3.1.2.- Thiolester hydrolases",
+ "3.1.2.n Thiolester hydrolases",
+ "3.1.3.- Phosphoric monoester hydrolases",
+ "3.1.3.n Phosphoric monoester hydrolases",
+ "3.1.4.- Phosphoric diester hydrolases",
+ "3.1.4.n Phosphoric diester hydrolases",
+ "3.1.5.- Triphosphoric monoester hydrolases",
+ "3.1.5.n Triphosphoric monoester hydrolases",
+ "3.1.6.- Sulfuric ester hydrolases",
+ "3.1.6.n Sulfuric ester hydrolases",
+ "3.1.7.- Diphosphoric monoester hydrolases",
+ "3.1.7.n Diphosphoric monoester hydrolases",
+ "3.1.8.- Phosphoric triester hydrolases",
+ "3.1.8.n Phosphoric triester hydrolases",
+ "3.1.11.- Exodeoxyribonucleases producing 5'-phosphomonoesters",
+ "3.1.11.n Exodeoxyribonucleases producing 5'-phosphomonoesters",
+ "3.1.13.- Exoribonucleases producing 5'-phosphomonoesters",
+ "3.1.13.n Exoribonucleases producing 5'-phosphomonoesters",
+ "3.1.14.- Exoribonucleases producing 3'-phosphomonoesters",
+ "3.1.14.n Exoribonucleases producing 3'-phosphomonoesters",
+ "3.1.15.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
+ "3.1.15.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
+ "3.1.16.- Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.1.16.n Exonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.1.21.- Endodeoxyribonucleases producing 5'-phosphomonoesters",
+ "3.1.21.n Endodeoxyribonucleases producing 5'-phosphomonoesters",
+ "3.1.22.- Endodeoxyribonucleases producing other than 5'-phosphomonoesters",
+ "3.1.22.n Endodeoxyribonucleases producing other than 5'-phosphomonoesters",
+ "3.1.25.- Site-specific endodeoxyribonucleases specific for altered bases",
+ "3.1.25.n Site-specific endodeoxyribonucleases specific for altered bases",
+ "3.1.26.- Endoribonucleases producing 5'-phosphomonoesters",
+ "3.1.26.n Endoribonucleases producing 5'-phosphomonoesters",
+ "3.1.27.- Endoribonucleases producing other than 5'-phosphomonoesters",
+ "3.1.27.n Endoribonucleases producing other than 5'-phosphomonoesters",
+ "3.1.30.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
+ "3.1.30.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 5'-phosphomonoesters",
+ "3.1.31.- Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.1.31.n Endoribonucleases active with either ribo- or deoxyribonucleic acid and producing 3'-phosphomonoesters",
+ "3.2.-.- Glycosylases",
+ "3.2.n.n Glycosylases",
+ "3.2.1.- Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds",
+ "3.2.1.n Glycosidases, i.e. enzymes hydrolyzing O- and S-glycosyl compounds",
+ "3.2.2.- Hydrolyzing N-glycosyl compounds",
+ "3.2.2.n Hydrolyzing N-glycosyl compounds",
+ "3.3.-.- Acting on ether bonds",
+ "3.3.n.n Acting on ether bonds",
+ "3.3.1.- Thioether and trialkylsulfonium hydrolases",
+ "3.3.1.n Thioether and trialkylsulfonium hydrolases",
+ "3.3.2.- Ether hydrolases",
+ "3.3.2.n Ether hydrolases",
+ "3.4.-.- Acting on peptide bonds (peptide hydrolases)",
+ "3.4.n.n Acting on peptide bonds (peptide hydrolases)",
+ "3.4.11.- Aminopeptidases",
+ "3.4.11.n Aminopeptidases",
+ "3.4.13.- Dipeptidases",
+ "3.4.13.n Dipeptidases",
+ "3.4.14.- Dipeptidyl-peptidases and tripeptidyl-peptidases",
+ "3.4.14.n Dipeptidyl-peptidases and tripeptidyl-peptidases",
+ "3.4.15.- Peptidyl-dipeptidases",
+ "3.4.15.n Peptidyl-dipeptidases",
+ "3.4.16.- Serine-type carboxypeptidases",
+ "3.4.16.n Serine-type carboxypeptidases",
+ "3.4.17.- Metallocarboxypeptidases",
+ "3.4.17.n Metallocarboxypeptidases",
+ "3.4.18.- Cysteine-type carboxypeptidases",
+ "3.4.18.n Cysteine-type carboxypeptidases",
+ "3.4.19.- Omega peptidases",
+ "3.4.19.n Omega peptidases",
+ "3.4.21.- Serine endopeptidases",
+ "3.4.21.n Serine endopeptidases",
+ "3.4.22.- Cysteine endopeptidases",
+ "3.4.22.n Cysteine endopeptidases",
+ "3.4.23.- Aspartic endopeptidases",
+ "3.4.23.n Aspartic endopeptidases",
+ "3.4.24.- Metalloendopeptidases",
+ "3.4.24.n Metalloendopeptidases",
+ "3.4.25.- Threonine endopeptidases",
+ "3.4.25.n Threonine endopeptidases",
+ "3.4.99.- Endopeptidases of unknown catalytic mechanism",
+ "3.4.99.n Endopeptidases of unknown catalytic mechanism",
+ "3.5.-.- Acting on carbon-nitrogen bonds, other than peptide bonds",
+ "3.5.n.n Acting on carbon-nitrogen bonds, other than peptide bonds",
+ "3.5.1.- In linear amides",
+ "3.5.1.n In linear amides",
+ "3.5.2.- In cyclic amides",
+ "3.5.2.n In cyclic amides",
+ "3.5.3.- In linear amidines",
+ "3.5.3.n In linear amidines",
+ "3.5.4.- In cyclic amidines",
+ "3.5.4.n In cyclic amidines",
+ "3.5.5.- In nitriles",
+ "3.5.5.n In nitriles",
+ "3.5.99.- In other compounds",
+ "3.5.99.n In other compounds",
+ "3.6.-.- Acting on acid anhydrides",
+ "3.6.n.n Acting on acid anhydrides",
+ "3.6.1.- In phosphorous-containing anhydrides",
+ "3.6.1.n In phosphorous-containing anhydrides",
+ "3.6.2.- In sulfonyl-containing anhydrides",
+ "3.6.2.n In sulfonyl-containing anhydrides",
+ "3.6.3.- Acting on acid anhydrides; catalyzing transmembrane movement of substances",
+ "3.6.3.n Acting on acid anhydrides; catalyzing transmembrane movement of substances",
+ "3.6.4.- Acting on acid anhydrides; involved in cellular and subcellular movement",
+ "3.6.4.n Acting on acid anhydrides; involved in cellular and subcellular movement",
+ "3.6.5.- Acting on GTP; involved in cellular and subcellular movement",
+ "3.6.5.n Acting on GTP; involved in cellular and subcellular movement",
+ "3.7.-.- Acting on carbon-carbon bonds",
+ "3.7.n.n Acting on carbon-carbon bonds",
+ "3.7.1.- In ketonic substances",
+ "3.7.1.n In ketonic substances",
+ "3.8.-.- Acting on halide bonds",
+ "3.8.n.n Acting on halide bonds",
+ "3.8.1.- In C-halide compounds",
+ "3.8.1.n In C-halide compounds",
+ "3.9.-.- Acting on phosphorus-nitrogen bonds",
+ "3.9.n.n Acting on phosphorus-nitrogen bonds",
+ "3.9.1.- Acting on phosphorus-nitrogen bonds",
+ "3.9.1.n Acting on phosphorus-nitrogen bonds",
+ "3.10.-.- Acting on sulfur-nitrogen bonds",
+ "3.10.n.n Acting on sulfur-nitrogen bonds",
+ "3.10.1.- Acting on sulfur-nitrogen bonds",
+ "3.10.1.n Acting on sulfur-nitrogen bonds",
+ "3.11.-.- Acting on carbon-phosphorus bonds",
+ "3.11.n.n Acting on carbon-phosphorus bonds",
+ "3.11.1.- Acting on carbon-phosphorus bonds",
+ "3.11.1.n Acting on carbon-phosphorus bonds",
+ "3.12.-.- Acting on sulfur-sulfur bonds",
+ "3.12.n.n Acting on sulfur-sulfur bonds",
+ "3.12.1.- Acting on sulfur-sulfur bonds",
+ "3.12.1.n Acting on sulfur-sulfur bonds",
+ "3.13.-.- Acting on carbon-sulfur bonds",
+ "3.13.n.n Acting on carbon-sulfur bonds",
+ "3.13.1.- Acting on carbon-sulfur bonds",
+ "3.13.1.n Acting on carbon-sulfur bonds",
+ "4.-.-.- Lyases",
+ "4.n.n.n Lyases",
+ "4.1.-.- Carbon-carbon lyases",
+ "4.1.n.n Carbon-carbon lyases",
+ "4.1.1.- Carboxy-lyases",
+ "4.1.1.n Carboxy-lyases",
+ "4.1.2.- Aldehyde-lyases",
+ "4.1.2.n Aldehyde-lyases",
+ "4.1.3.- Oxo-acid-lyases",
+ "4.1.3.n Oxo-acid-lyases",
+ "4.1.99.- Other carbon-carbon lyases",
+ "4.1.99.n Other carbon-carbon lyases",
+ "4.2.-.- Carbon-oxygen lyases",
+ "4.2.n.n Carbon-oxygen lyases",
+ "4.2.1.- Hydro-lyases",
+ "4.2.1.n Hydro-lyases",
+ "4.2.2.- Acting on polysaccharides",
+ "4.2.2.n Acting on polysaccharides",
+ "4.2.3.- Acting on phosphates",
+ "4.2.3.n Acting on phosphates",
+ "4.2.99.- Other carbon-oxygen lyases",
+ "4.2.99.n Other carbon-oxygen lyases",
+ "4.3.-.- Carbon-nitrogen lyases",
+ "4.3.n.n Carbon-nitrogen lyases",
+ "4.3.1.- Ammonia-lyases",
+ "4.3.1.n Ammonia-lyases",
+ "4.3.2.- Lyases acting on amides, amidines, etc",
+ "4.3.2.n Lyases acting on amides, amidines, etc",
+ "4.3.3.- Amine-lyases",
+ "4.3.3.n Amine-lyases",
+ "4.4.-.- Carbon-sulfur lyases",
+ "4.4.n.n Carbon-sulfur lyases",
+ "4.4.1.- Carbon-sulfur lyases",
+ "4.4.1.n Carbon-sulfur lyases",
+ "4.5.-.- Carbon-halide lyases",
+ "4.5.n.n Carbon-halide lyases",
+ "4.5.1.- Carbon-halide lyases",
+ "4.5.1.n Carbon-halide lyases",
+ "4.6.-.- Phosphorus-oxygen lyases",
+ "4.6.n.n Phosphorus-oxygen lyases",
+ "4.6.1.- Phosphorus-oxygen lyases",
+ "4.6.1.n Phosphorus-oxygen lyases",
+ "4.99.-.- Other lyases",
+ "4.99.n.n Other lyases",
+ "4.99.1.- Sole sub-subclass for lyases that do not belong in the other subclasses",
+ "4.99.1.n Sole sub-subclass for lyases that do not belong in the other subclasses",
+ "5.-.-.- Isomerases",
+ "5.n.n.n Isomerases",
+ "5.1.-.- Racemases and epimerases",
+ "5.1.n.n Racemases and epimerases",
+ "5.1.1.- Acting on amino acids and derivatives",
+ "5.1.1.n Acting on amino acids and derivatives",
+ "5.1.2.- Acting on hydroxy acids and derivatives",
+ "5.1.2.n Acting on hydroxy acids and derivatives",
+ "5.1.3.- Acting on carbohydrates and derivatives",
+ "5.1.3.n Acting on carbohydrates and derivatives",
+ "5.1.99.- Acting on other compounds",
+ "5.1.99.n Acting on other compounds",
+ "5.2.-.- Cis-trans-isomerases",
+ "5.2.n.n Cis-trans-isomerases",
+ "5.2.1.- Cis-trans Isomerases",
+ "5.2.1.n Cis-trans Isomerases",
+ "5.3.-.- Intramolecular oxidoreductases",
+ "5.3.n.n Intramolecular oxidoreductases",
+ "5.3.1.- Interconverting aldoses and ketoses, and related compounds",
+ "5.3.1.n Interconverting aldoses and ketoses, and related compounds",
+ "5.3.2.- Interconverting keto- and enol- groups",
+ "5.3.2.n Interconverting keto- and enol- groups",
+ "5.3.3.- Transposing C==C bonds",
+ "5.3.3.n Transposing C==C bonds",
+ "5.3.4.- Transposing S-S bonds",
+ "5.3.4.n Transposing S-S bonds",
+ "5.3.99.- Other intramolecular oxidoreductases",
+ "5.3.99.n Other intramolecular oxidoreductases",
+ "5.4.-.- Intramolecular transferases (mutases)",
+ "5.4.n.n Intramolecular transferases (mutases)",
+ "5.4.1.- Transferring acyl groups",
+ "5.4.1.n Transferring acyl groups",
+ "5.4.2.- Phosphotransferases (phosphomutases)",
+ "5.4.2.n Phosphotransferases (phosphomutases)",
+ "5.4.3.- Transferring amino groups",
+ "5.4.3.n Transferring amino groups",
+ "5.4.4.- Transferring hydroxy groups",
+ "5.4.4.n Transferring hydroxy groups",
+ "5.4.99.- Transferring other groups",
+ "5.4.99.n Transferring other groups",
+ "5.5.-.- Intramolecular lyases",
+ "5.5.n.n Intramolecular lyases",
+ "5.5.1.- Intramolecular lyases",
+ "5.5.1.n Intramolecular lyases",
+ "5.99.-.- Other isomerases",
+ "5.99.n.n Other isomerases",
+ "5.99.1.- Sole sub-subclass for isomerases that do not belong in the other subclasses",
+ "5.99.1.n Sole sub-subclass for isomerases that do not belong in the other subclasses",
+ "6.-.-.- Ligases",
+ "6.n.n.n Ligases",
+ "6.1.-.- Forming carbon-oxygen bonds",
+ "6.1.n.n Forming carbon-oxygen bonds",
+ "6.1.1.- Ligases forming aminoacyl-tRNA and related compounds",
+ "6.1.1.n Ligases forming aminoacyl-tRNA and related compounds",
+ "6.2.-.- Forming carbon-sulfur bonds",
+ "6.2.n.n Forming carbon-sulfur bonds",
+ "6.2.1.- Acid--thiol ligases",
+ "6.2.1.n Acid--thiol ligases",
+ "6.3.-.- Forming carbon-nitrogen bonds",
+ "6.3.n.n Forming carbon-nitrogen bonds",
+ "6.3.1.- Acid--ammonia (or amide) ligases (amide synthases)",
+ "6.3.1.n Acid--ammonia (or amide) ligases (amide synthases)",
+ "6.3.2.- Acid--D-amino-acid ligases (peptide synthases)",
+ "6.3.2.n Acid--D-amino-acid ligases (peptide synthases)",
+ "6.3.3.- Cyclo-ligases",
+ "6.3.3.n Cyclo-ligases",
+ "6.3.4.- Other carbon--nitrogen ligases",
+ "6.3.4.n Other carbon--nitrogen ligases",
+ "6.3.5.- Carbon--nitrogen ligases with glutamine as amido-N-donor",
+ "6.3.5.n Carbon--nitrogen ligases with glutamine as amido-N-donor",
+ "6.4.-.- Forming carbon-carbon bonds",
+ "6.4.n.n Forming carbon-carbon bonds",
+ "6.4.1.- Ligases that form carbon-carbon bonds",
+ "6.4.1.n Ligases that form carbon-carbon bonds",
+ "6.5.-.- Forming phosphoric ester bonds",
+ "6.5.n.n Forming phosphoric ester bonds",
+ "6.5.1.- Ligases that form phosphoric-ester bonds",
+ "6.5.1.n Ligases that form phosphoric-ester bonds",
+ "6.6.-.- Forming nitrogen-metal bonds",
+ "6.6.n.n Forming nitrogen-metal bonds",
+ "6.6.1.- Forming coordination complexes",
+ "6.6.1.n Forming coordination complexes"
};
diff --git a/api/ecnum_deleted.inc b/api/ecnum_deleted.inc
index 4a6b6a9e..56fca97e 100644
--- a/api/ecnum_deleted.inc
+++ b/api/ecnum_deleted.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_deleted.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $
+/* $Id: ecnum_deleted.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -32,39 +32,31 @@
static const char* const kECNum_deleted[] = {
"1.1.1.74",
- "1.1.1.155",
- "1.1.1.249",
- "1.1.3.25",
+ "1.1.1.293",
"1.1.3.31",
"1.1.5.1",
"1.2.1.6",
+ "1.2.2.2",
"1.2.3.10",
- "1.2.7.9",
"1.3.1.23",
"1.3.1.55",
"1.3.1.59",
"1.3.1.61",
- "1.4.3.6",
"1.4.3.18",
"1.5.3.3",
"1.6.2.3",
"1.6.5.1",
"1.7.1.8",
"1.7.99.2",
- "1.7.99.5",
"1.8.1.1",
"1.12.99.2",
- "1.12.99.5",
"1.13.1.7",
"1.13.11.7",
"1.13.11.42",
"1.13.12.10",
- "1.13.12.11",
"1.14.1.9",
"1.14.1.11",
- "1.14.11.5",
"1.14.13.65",
- "1.14.14.4",
"1.14.99.18",
"1.99.1.3",
"1.99.1.4",
@@ -73,9 +65,6 @@ static const char* const kECNum_deleted[] = {
"1.99.1.10",
"1.99.1.12",
"2.1.1.30",
- "2.1.1.73",
- "2.1.1.92",
- "2.1.1.93",
"2.1.1.138",
"2.1.3.4",
"2.3.1.70",
@@ -83,27 +72,16 @@ static const char* const kECNum_deleted[] = {
"2.3.1.124",
"2.4.1.6",
"2.4.1.75",
- "2.4.1.112",
- "2.4.1.112",
"2.4.1.154",
- "2.4.1.233",
"2.4.1.235",
- "2.5.1.64",
"2.6.1.20",
- "2.6.1.61",
"2.6.1.69",
"2.7.1.9",
"2.7.1.57",
- "2.7.1.70",
- "2.7.1.97",
"2.7.1.98",
- "2.7.1.120",
"2.7.7.20",
- "2.7.7.29",
- "2.8.2.12",
"2.8.3.4",
"3.1.1.9",
- "3.1.1.16",
"3.1.2.9",
"3.1.3.61",
"3.1.4.24",
@@ -172,20 +150,14 @@ static const char* const kECNum_deleted[] = {
"3.4.99.39",
"3.4.99.40",
"3.4.99.42",
- "3.5.1.80",
- "3.6.3.13",
- "3.6.3.45",
"3.13.1.2",
"4.1.1.13",
"4.1.2.3",
"4.1.2.6",
+ "4.1.99.15",
"4.2.1.23",
- "4.2.1.71",
- "4.2.1.86",
"4.2.99.5",
- "4.3.1.5",
"4.3.1.11",
- "4.3.1.21",
"4.4.1.12",
"5.2.1.11",
"5.3.1.2",
@@ -193,7 +165,5 @@ static const char* const kECNum_deleted[] = {
"5.3.1.18",
"5.3.99.1",
"5.4.3.1",
- "6.1.1.8",
- "6.2.1.29",
- "6.3.2.15"
+ "6.1.1.8"
};
diff --git a/api/ecnum_replaced.inc b/api/ecnum_replaced.inc
index 3aea7c17..f511ad44 100644
--- a/api/ecnum_replaced.inc
+++ b/api/ecnum_replaced.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_replaced.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $
+/* $Id: ecnum_replaced.inc,v 1.4 2011/06/30 16:04:31 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,23 +31,29 @@
*/
static const char* const kECNum_replaced[] = {
- "1.1.1.5 1.1.1.303",
+ "1.1.1.5 1.1.1.303 1.1.1.304",
"1.1.1.68 1.5.1.20",
"1.1.1.70 1.2.1.3",
"1.1.1.89 1.1.1.86",
"1.1.1.109 1.3.1.28",
"1.1.1.139 1.1.1.21",
+ "1.1.1.155 1.1.1.87",
"1.1.1.171 1.5.1.20",
"1.1.1.180 1.1.1.131",
- "1.1.1.182 1.1.1.228",
+ "1.1.1.182 1.1.1.198 1.1.1.227 1.1.1.228",
"1.1.1.204 1.17.1.4",
"1.1.1.242 1.3.1.69",
+ "1.1.1.249 2.5.1.46",
"1.1.1.253 1.5.1.33",
+ "1.1.1.n1 1.1.1.305",
+ "1.1.1.n2 1.1.1.300",
+ "1.1.1.n10 1.1.1.301",
"1.1.2.1 1.1.5.3",
"1.1.3.1 1.1.3.15",
"1.1.3.2 1.13.12.4",
"1.1.3.22 1.17.3.2",
"1.1.3.24 1.3.3.12",
+ "1.1.3.25 1.1.99.18",
"1.1.3.26 1.21.3.2",
"1.1.3.32 1.14.21.1",
"1.1.3.33 1.14.21.2",
@@ -55,20 +61,27 @@ static const char* const kECNum_replaced[] = {
"1.1.3.35 1.14.21.4",
"1.1.3.36 1.14.21.5",
"1.1.99.5 1.1.5.3",
+ "1.1.99.8 1.1.2.7 1.1.2.8",
"1.1.99.15 1.5.1.20",
"1.1.99.16 1.1.5.4",
"1.1.99.17 1.1.5.2",
"1.1.99.19 1.17.99.4",
- "1.2.1.1 1.1.1.284",
+ "1.1.99.23 1.1.2.6",
+ "1.1.99.25 1.1.5.8",
+ "1.1.99.34 1.1.98.2",
+ "1.2.1.1 1.1.1.284 4.4.1.22",
"1.2.1.14 1.1.1.205",
"1.2.1.34 1.1.1.131",
"1.2.1.35 1.1.1.203",
"1.2.1.37 1.17.1.4",
"1.2.1.55 1.1.1.279",
"1.2.1.56 1.1.1.280",
+ "1.2.1.66 1.1.1.306",
+ "1.2.1.n1 1.2.1.77",
"1.2.3.2 1.17.3.2",
"1.2.3.12 1.14.13.82",
"1.2.4.3 1.2.4.4",
+ "1.2.7.9 1.2.7.3",
"1.2.99.1 1.17.99.4",
"1.3.1.50 1.1.1.252",
"1.3.2.1 1.3.99.2",
@@ -77,6 +90,7 @@ static const char* const kECNum_replaced[] = {
"1.3.99.9 1.21.99.1",
"1.3.99.11 1.3.5.2",
"1.4.1.6 1.21.4.1",
+ "1.4.3.6 1.4.3.21 1.4.3.22",
"1.4.3.9 1.4.3.4",
"1.4.3.17 1.3.3.10",
"1.4.4.1 1.21.4.1",
@@ -86,6 +100,17 @@ static const char* const kECNum_replaced[] = {
"1.5.1.35 1.2.1.19",
"1.5.3.8 1.3.3.8",
"1.5.3.9 1.21.3.3",
+ "1.5.3.11 1.5.3.13 1.5.3.14 1.5.3.15 1.5.3.16 1.5.3.17",
+ "1.5.3.n1 1.5.3.16",
+ "1.5.3.n2 1.5.3.16",
+ "1.5.3.n3 1.5.3.13 1.5.3.16",
+ "1.5.3.n4 1.5.3.13",
+ "1.5.3.n5 1.5.3.16",
+ "1.5.3.n6 1.5.3.14 1.5.3.15",
+ "1.5.3.n7 1.5.3.14 1.5.3.15",
+ "1.5.3.n8 1.5.3.14 1.5.3.15",
+ "1.5.3.n9 1.5.3.14 1.5.3.15",
+ "1.5.3.n10 1.5.3.13",
"1.5.99.7 1.5.8.2",
"1.5.99.10 1.5.8.1",
"1.6.2.1 1.6.99.3",
@@ -125,7 +150,8 @@ static const char* const kECNum_replaced[] = {
"1.6.99.12 1.16.1.6",
"1.6.99.13 1.16.1.7",
"1.7.99.3 1.7.2.1",
- "1.8.4.5 1.8.4.13",
+ "1.7.99.5 1.5.1.20",
+ "1.8.4.5 1.8.4.13 1.8.4.14",
"1.8.4.6 1.8.4.11",
"1.8.6.1 2.5.1.18",
"1.8.99.4 1.8.4.8",
@@ -138,6 +164,7 @@ static const char* const kECNum_replaced[] = {
"1.12.99.1 1.12.98.1",
"1.12.99.3 1.12.5.1",
"1.12.99.4 1.12.98.2",
+ "1.12.99.5 1.13.11.47",
"1.13.1.1 1.13.11.1",
"1.13.1.2 1.13.11.2",
"1.13.1.3 1.13.11.3",
@@ -152,12 +179,13 @@ static const char* const kECNum_replaced[] = {
"1.13.1.13 1.13.11.12",
"1.13.11.21 1.14.99.36",
"1.13.11.32 1.13.12.16",
+ "1.13.12.11 1.14.13.8",
"1.13.99.2 1.14.12.10",
"1.13.99.4 1.14.12.9",
"1.13.99.5 1.13.11.47",
"1.14.1.1 1.14.14.1",
"1.14.1.2 1.14.13.9",
- "1.14.1.3 1.14.99.7",
+ "1.14.1.3 1.14.99.7 5.4.99.7",
"1.14.1.4 1.14.99.2",
"1.14.1.5 1.14.13.5",
"1.14.1.6 1.14.15.4",
@@ -167,12 +195,18 @@ static const char* const kECNum_replaced[] = {
"1.14.2.1 1.14.17.1",
"1.14.2.2 1.13.11.27",
"1.14.3.1 1.14.16.1",
+ "1.14.11.5 1.14.11.6",
"1.14.12.2 1.14.13.35",
"1.14.12.6 1.14.13.66",
+ "1.14.12.n1 1.14.12.21",
"1.14.13.45 1.14.18.2",
"1.14.14.2 1.14.14.1",
+ "1.14.14.4 1.14.15.7",
"1.14.14.6 1.14.13.111",
"1.14.17.2 1.14.18.1",
+ "1.14.19.n1 1.14.19.4",
+ "1.14.19.n2 1.14.19.5",
+ "1.14.19.n3 1.14.19.6",
"1.14.99.5 1.14.19.1",
"1.14.99.6 1.14.19.2",
"1.14.99.8 1.14.14.1",
@@ -180,6 +214,7 @@ static const char* const kECNum_replaced[] = {
"1.14.99.16 1.14.13.72",
"1.14.99.17 1.14.16.5",
"1.14.99.25 1.14.19.3",
+ "1.14.99.n1 1.14.99.41",
"1.17.1.6 1.17.99.5",
"1.17.4.3 1.17.7.1",
"1.18.2.1 1.18.6.1",
@@ -196,7 +231,7 @@ static const char* const kECNum_replaced[] = {
"1.99.1.7 1.14.15.4",
"1.99.1.9 1.14.99.9",
"1.99.1.11 1.14.99.10",
- "1.99.1.13 1.14.99.7",
+ "1.99.1.13 1.14.99.7 5.4.99.7",
"1.99.1.14 1.13.11.27",
"1.99.2.1 1.13.11.12",
"1.99.2.2 1.13.11.1",
@@ -204,18 +239,27 @@ static const char* const kECNum_replaced[] = {
"1.99.2.4 1.13.11.4",
"1.99.2.5 1.13.11.5",
"1.99.2.6 1.13.99.1",
- "2.1.1.23 2.1.1.126",
- "2.1.1.24 2.1.1.100",
- "2.1.1.58 2.6.1.5",
+ "2.1.1.23 2.1.1.124 2.1.1.125 2.1.1.126",
+ "2.1.1.24 2.1.1.77 2.1.1.80 2.1.1.100",
+ "2.1.1.29 2.1.1.202 2.1.1.203 2.1.1.204",
+ "2.1.1.48 2.1.1.181 2.1.1.182 2.1.1.183 2.1.1.184",
+ "2.1.1.51 2.1.1.187 2.1.1.188",
+ "2.1.1.52 2.1.1.171 2.1.1.172 2.1.1.173 2.1.1.174",
+ "2.1.1.58 2.1.1.57",
+ "2.1.1.73 2.1.1.37",
"2.1.1.81 2.1.1.49",
+ "2.1.1.92 2.1.1.69",
+ "2.1.1.93 2.1.1.70",
"2.1.1.134 2.1.1.129",
"2.1.1.135 1.16.1.8",
"2.1.2.6 2.1.2.5",
"2.1.2.12 2.1.1.74",
+ "2.1.2.n1 2.1.2.13",
"2.3.1.55 2.3.1.82",
+ "2.3.1.n1 2.3.1.191",
"2.4.1.3 2.4.1.25",
"2.4.1.42 2.4.1.17",
- "2.4.1.51 2.4.1.145",
+ "2.4.1.51 2.4.1.101 2.4.1.143 2.4.1.144 2.4.1.145",
"2.4.1.55 2.7.8.14",
"2.4.1.59 2.4.1.17",
"2.4.1.61 2.4.1.17",
@@ -228,35 +272,51 @@ static const char* const kECNum_replaced[] = {
"2.4.1.98 2.4.1.90",
"2.4.1.107 2.4.1.17",
"2.4.1.108 2.4.1.17",
+ "2.4.1.112 2.4.1.186",
"2.4.1.124 2.4.1.87",
"2.4.1.151 2.4.1.87",
"2.4.1.169 2.4.2.39",
"2.4.1.200 4.2.2.17",
"2.4.1.204 2.4.2.40",
+ "2.4.1.233 2.4.1.115",
+ "2.4.1.n1 2.4.1.245",
+ "2.4.1.n3 2.4.1.250",
"2.4.2.13 2.5.1.6",
+ "2.4.2.n1 2.4.2.43",
"2.5.1.8 2.5.1.75",
+ "2.5.1.11 2.5.1.84 2.5.1.85",
"2.5.1.12 2.5.1.18",
"2.5.1.13 2.5.1.18",
"2.5.1.14 2.5.1.18",
+ "2.5.1.33 2.5.1.82 2.5.1.83",
"2.5.1.37 4.4.1.20",
"2.5.1.40 4.2.3.9",
+ "2.5.1.64 2.2.1.9 4.2.99.20",
+ "2.5.1.n1 2.2.1.9",
+ "2.5.1.n2 2.5.1.81",
+ "2.5.1.n3 2.5.1.73",
"2.6.1.10 2.6.1.21",
"2.6.1.25 2.6.1.24",
"2.6.1.53 1.4.1.13",
+ "2.6.1.61 2.6.1.40",
+ "2.6.1.n1 2.6.1.87",
"2.6.2.1 2.1.4.1",
"2.7.1.37 2.7.11.1",
"2.7.1.38 2.7.11.19",
+ "2.7.1.70 2.7.11.1",
"2.7.1.75 2.7.1.21",
"2.7.1.96 2.7.1.86",
+ "2.7.1.97 2.7.11.14",
"2.7.1.99 2.7.11.2",
"2.7.1.104 2.7.99.1",
"2.7.1.109 2.7.11.31",
"2.7.1.110 2.7.11.3",
"2.7.1.111 2.7.11.27",
- "2.7.1.112 2.7.10.1",
+ "2.7.1.112 2.7.10.1 2.7.10.2",
"2.7.1.115 2.7.11.4",
"2.7.1.116 2.7.11.5",
"2.7.1.117 2.7.11.18",
+ "2.7.1.120 2.7.11.17",
"2.7.1.123 2.7.11.17",
"2.7.1.124 2.7.11.6",
"2.7.1.125 2.7.11.14",
@@ -271,6 +331,9 @@ static const char* const kECNum_replaced[] = {
"2.7.1.141 2.7.11.23",
"2.7.1.152 2.7.4.21",
"2.7.1.155 2.7.4.24",
+ "2.7.1.n2 2.7.1.161",
+ "2.7.1.n3 2.7.1.164",
+ "2.7.1.n6 2.7.1.163",
"2.7.2.5 6.3.4.16",
"2.7.2.9 6.3.5.5",
"2.7.3.11 2.7.13.1",
@@ -285,16 +348,28 @@ static const char* const kECNum_replaced[] = {
"2.7.5.7 5.4.2.8",
"2.7.7.16 3.1.27.5",
"2.7.7.17 3.1.27.1",
+ "2.7.7.21 2.7.7.72",
+ "2.7.7.25 2.7.7.72",
"2.7.7.26 3.1.27.3",
+ "2.7.7.29 2.7.7.28",
+ "2.7.7.n2 2.7.7.67",
+ "2.7.7.n3 2.7.7.73",
"2.7.8.16 2.7.8.2",
+ "2.7.8.n1 2.7.8.30",
+ "2.8.2.12 2.8.2.8",
+ "2.9.1.n1 2.9.1.2",
"3.1.1.12 3.1.1.1",
+ "3.1.1.16 3.1.1.24 5.3.3.4",
"3.1.1.18 3.1.1.17",
"3.1.1.62 3.5.1.47",
"3.1.1.69 3.5.1.89",
+ "3.1.1.n1 3.5.1.103",
"3.1.2.8 3.1.2.6",
"3.1.2.24 3.13.1.3",
- "3.1.3.30 3.1.3.7",
+ "3.1.2.n1 3.1.2.28",
+ "3.1.3.30 3.1.3.31",
"3.1.3.65 3.1.3.64",
+ "3.1.3.n3 3.1.3.78",
"3.1.4.5 3.1.21.1",
"3.1.4.6 3.1.22.1",
"3.1.4.7 3.1.31.1",
@@ -314,6 +389,7 @@ static const char* const kECNum_replaced[] = {
"3.1.4.31 3.1.11.4",
"3.1.4.36 3.1.4.43",
"3.1.4.47 4.6.1.14",
+ "3.1.4.n1 3.1.4.53",
"3.1.22.3 3.1.21.7",
"3.1.23.1 3.1.21.4",
"3.1.23.2 3.1.21.4",
@@ -379,6 +455,7 @@ static const char* const kECNum_replaced[] = {
"3.1.24.3 3.1.21.5",
"3.1.24.4 3.1.21.5",
"3.1.25.2 4.2.99.18",
+ "3.1.26.n1 3.1.26.12",
"3.2.1.12 3.2.1.54",
"3.2.1.13 3.2.1.54",
"3.2.1.29 3.2.1.52",
@@ -393,7 +470,7 @@ static const char* const kECNum_replaced[] = {
"3.2.2.18 3.5.1.52",
"3.2.3.1 3.2.1.147",
"3.3.1.3 4.4.1.21",
- "3.3.2.3 3.3.2.9",
+ "3.3.2.3 3.3.2.9 3.3.2.10",
"3.4.1.1 3.4.11.1",
"3.4.1.2 3.4.11.2",
"3.4.1.3 3.4.11.4",
@@ -401,12 +478,12 @@ static const char* const kECNum_replaced[] = {
"3.4.2.1 3.4.17.1",
"3.4.2.2 3.4.17.2",
"3.4.2.3 3.4.17.4",
- "3.4.3.1 3.4.13.18",
- "3.4.3.2 3.4.13.18",
+ "3.4.3.1 3.4.13.18 3.4.13.19",
+ "3.4.3.2 3.4.13.18 3.4.13.19",
"3.4.3.3 3.4.13.3",
"3.4.3.4 3.4.13.5",
"3.4.3.5 3.4.11.2",
- "3.4.3.6 3.4.13.18",
+ "3.4.3.6 3.4.13.18 3.4.13.19",
"3.4.3.7 3.4.13.9",
"3.4.4.1 3.4.23.1",
"3.4.4.2 3.4.23.2",
@@ -414,7 +491,7 @@ static const char* const kECNum_replaced[] = {
"3.4.4.4 3.4.21.4",
"3.4.4.5 3.4.21.1",
"3.4.4.6 3.4.21.1",
- "3.4.4.7 3.4.21.36",
+ "3.4.4.7 3.4.21.36 3.4.21.37",
"3.4.4.8 3.4.21.9",
"3.4.4.9 3.4.14.1",
"3.4.4.10 3.4.22.2",
@@ -423,17 +500,18 @@ static const char* const kECNum_replaced[] = {
"3.4.4.13 3.4.21.5",
"3.4.4.14 3.4.21.7",
"3.4.4.15 3.4.23.15",
- "3.4.4.16 3.4.21.62",
- "3.4.4.17 3.4.23.20",
+ "3.4.4.16 3.4.21.62 3.4.21.63 3.4.21.64 3.4.21.65 3.4.21.66 3.4.21.67",
+ "3.4.4.17 3.4.21.103 3.4.23.20 3.4.23.21 3.4.23.22 3.4.23.23 3.4.23.24 3.4.23.25 3.4.23.26 3.4.23.28 3.4.23.29 3.4.23.30",
"3.4.4.18 3.4.22.10",
"3.4.4.19 3.4.24.3",
"3.4.4.20 3.4.22.8",
"3.4.4.21 3.4.21.34",
"3.4.4.22 3.4.23.3",
"3.4.4.23 3.4.23.5",
- "3.4.4.24 3.4.22.32",
+ "3.4.4.24 3.4.22.32 3.4.22.33",
"3.4.11.8 3.4.19.3",
- "3.4.12.1 3.4.16.5",
+ "3.4.11.n1 3.4.11.24",
+ "3.4.12.1 3.4.16.5 3.4.16.6",
"3.4.12.2 3.4.17.1",
"3.4.12.3 3.4.17.2",
"3.4.12.4 3.4.16.2",
@@ -443,48 +521,48 @@ static const char* const kECNum_replaced[] = {
"3.4.12.8 3.4.17.4",
"3.4.12.10 3.4.19.9",
"3.4.12.11 3.4.17.6",
- "3.4.12.12 3.4.16.5",
- "3.4.13.1 3.4.13.18",
- "3.4.13.2 3.4.13.18",
+ "3.4.12.12 3.4.16.5 3.4.16.6",
+ "3.4.13.1 3.4.13.18 3.4.13.19",
+ "3.4.13.2 3.4.13.18 3.4.13.19",
"3.4.13.6 3.4.11.2",
- "3.4.13.8 3.4.13.18",
+ "3.4.13.8 3.4.13.18 3.4.13.19",
"3.4.13.10 3.4.19.5",
- "3.4.13.11 3.4.13.18",
+ "3.4.13.11 3.4.13.18 3.4.13.19",
"3.4.13.13 3.4.13.3",
- "3.4.13.15 3.4.13.18",
+ "3.4.13.15 3.4.13.18 3.4.13.19",
"3.4.14.3 3.4.19.1",
- "3.4.14.8 3.4.14.9",
+ "3.4.14.8 3.4.14.9 3.4.14.10",
"3.4.15.2 3.4.19.2",
"3.4.15.3 3.4.15.5",
- "3.4.16.1 3.4.16.5",
- "3.4.16.3 3.4.16.5",
+ "3.4.16.1 3.4.16.5 3.4.16.6",
+ "3.4.16.3 3.4.16.5 3.4.16.6",
"3.4.17.7 3.5.1.28",
"3.4.17.9 3.4.17.4",
"3.4.19.8 3.4.17.21",
"3.4.19.10 3.5.1.28",
- "3.4.21.8 3.4.21.35",
- "3.4.21.11 3.4.21.36",
- "3.4.21.13 3.4.16.5",
- "3.4.21.14 3.4.21.62",
+ "3.4.21.8 3.4.21.34 3.4.21.35",
+ "3.4.21.11 3.4.21.36 3.4.21.37",
+ "3.4.21.13 3.4.16.5 3.4.16.6",
+ "3.4.21.14 3.4.21.62 3.4.21.63 3.4.21.64 3.4.21.65 3.4.21.67",
"3.4.21.15 3.4.21.63",
"3.4.21.28 3.4.21.74",
"3.4.21.29 3.4.21.74",
"3.4.21.30 3.4.21.74",
- "3.4.21.31 3.4.21.68",
+ "3.4.21.31 3.4.21.68 3.4.21.73",
"3.4.21.44 3.4.21.43",
"3.4.21.87 3.4.23.49",
- "3.4.22.4 3.4.22.32",
+ "3.4.22.4 3.4.22.32 3.4.22.33",
"3.4.22.5 3.4.22.33",
"3.4.22.9 3.4.21.48",
"3.4.22.11 3.4.24.56",
"3.4.22.12 3.4.19.9",
- "3.4.22.17 3.4.22.52",
+ "3.4.22.17 3.4.22.52 3.4.22.53",
"3.4.22.18 3.4.21.26",
"3.4.22.19 3.4.24.15",
"3.4.22.21 3.4.25.1",
"3.4.22.22 3.4.24.37",
"3.4.22.23 3.4.21.61",
- "3.4.23.6 3.4.23.18",
+ "3.4.23.6 3.4.21.103 3.4.23.18 3.4.23.19 3.4.23.20 3.4.23.21 3.4.23.22 3.4.23.23 3.4.23.24 3.4.23.25 3.4.23.26 3.4.23.28 3.4.23.30",
"3.4.23.7 3.4.23.20",
"3.4.23.8 3.4.23.25",
"3.4.23.9 3.4.23.21",
@@ -492,8 +570,8 @@ static const char* const kECNum_replaced[] = {
"3.4.23.27 3.4.21.103",
"3.4.23.33 3.4.21.101",
"3.4.23.37 3.4.21.100",
- "3.4.24.4 3.4.24.25",
- "3.4.24.5 3.4.25.1",
+ "3.4.24.4 3.4.24.25 3.4.24.26 3.4.24.27 3.4.24.28 3.4.24.29 3.4.24.30 3.4.24.31 3.4.24.32 3.4.24.39 3.4.24.40",
+ "3.4.24.5 3.4.22.52 3.4.22.53 3.4.25.1",
"3.4.24.8 3.4.24.3",
"3.4.99.1 3.4.23.28",
"3.4.99.4 3.4.23.12",
@@ -504,7 +582,7 @@ static const char* const kECNum_replaced[] = {
"3.4.99.19 3.4.23.15",
"3.4.99.22 3.4.24.29",
"3.4.99.25 3.4.23.21",
- "3.4.99.26 3.4.21.73",
+ "3.4.99.26 3.4.21.68 3.4.21.73",
"3.4.99.28 3.4.21.60",
"3.4.99.30 3.4.24.20",
"3.4.99.31 3.4.24.15",
@@ -520,6 +598,9 @@ static const char* const kECNum_replaced[] = {
"3.5.1.34 3.4.13.5",
"3.5.1.37 3.5.1.26",
"3.5.1.45 6.3.4.6",
+ "3.5.1.80 3.5.1.25",
+ "3.5.1.n1 3.5.1.108",
+ "3.5.1.n2 3.5.1.99",
"3.5.2.8 3.5.2.6",
"3.5.5.3 4.2.1.104",
"3.6.1.4 3.6.1.3",
@@ -536,6 +617,9 @@ static const char* const kECNum_replaced[] = {
"3.6.1.49 3.6.5.4",
"3.6.1.50 3.6.5.5",
"3.6.1.51 3.6.5.6",
+ "3.6.1.n5 3.6.1.54",
+ "3.6.3.13 3.6.3.1",
+ "3.6.3.45 3.6.3.44",
"3.8.1.4 1.97.1.10",
"3.8.2.1 3.1.8.2",
"4.1.1.10 4.1.1.12",
@@ -546,7 +630,9 @@ static const char* const kECNum_replaced[] = {
"4.1.2.15 2.5.1.54",
"4.1.2.16 2.5.1.55",
"4.1.2.31 4.1.3.16",
- "4.1.2.39 4.1.2.37",
+ "4.1.2.37 4.1.2.46 4.1.2.47",
+ "4.1.2.39 4.1.2.46 4.1.2.47",
+ "4.1.2.n1 4.1.2.44",
"4.1.3.2 2.3.3.9",
"4.1.3.5 2.3.3.10",
"4.1.3.7 2.3.3.1",
@@ -581,16 +667,22 @@ static const char* const kECNum_replaced[] = {
"4.2.1.29 4.99.1.6",
"4.2.1.37 3.3.2.4",
"4.2.1.38 4.3.1.20",
- "4.2.1.63 3.3.2.9",
- "4.2.1.64 3.3.2.9",
+ "4.2.1.63 3.3.2.9 3.3.2.10",
+ "4.2.1.64 3.3.2.9 3.3.2.10",
+ "4.2.1.71 4.2.1.27",
"4.2.1.72 4.1.1.78",
+ "4.2.1.86 4.2.1.98",
"4.2.1.102 4.2.1.100",
- "4.2.2.4 4.2.2.20",
+ "4.2.2.4 4.2.2.20 4.2.2.21",
+ "4.2.3.n1 4.2.3.38",
+ "4.2.3.n3 4.2.3.56",
+ "4.2.3.n5 4.2.3.52",
+ "4.2.3.n9 4.2.3.44",
"4.2.99.1 4.2.2.1",
"4.2.99.2 4.2.3.1",
"4.2.99.3 4.2.2.2",
"4.2.99.4 4.2.2.3",
- "4.2.99.6 4.2.2.5",
+ "4.2.99.6 4.2.2.5 4.2.2.20 4.2.2.21",
"4.2.99.7 4.2.3.2",
"4.2.99.8 2.5.1.47",
"4.2.99.9 2.5.1.48",
@@ -602,7 +694,10 @@ static const char* const kECNum_replaced[] = {
"4.2.99.16 2.5.1.53",
"4.2.99.17 2.5.1.51",
"4.2.99.19 4.4.1.23",
+ "4.2.99.n1 4.2.99.20",
+ "4.3.1.5 4.3.1.23 4.3.1.24 4.3.1.25",
"4.3.1.8 2.5.1.61",
+ "4.3.1.21 4.3.1.9",
"4.3.99.1 4.2.1.104",
"4.4.1.7 2.5.1.18",
"4.4.1.18 1.8.3.5",
@@ -618,7 +713,12 @@ static const char* const kECNum_replaced[] = {
"5.3.1.19 2.6.1.16",
"5.4.99.6 5.4.4.2",
"5.4.99.10 5.4.99.11",
+ "6.1.1.n1 6.3.1.13",
+ "6.1.1.n2 6.1.1.27",
"6.2.1.21 6.2.1.30",
+ "6.2.1.29 6.2.1.7",
"6.3.1.3 6.3.4.13",
+ "6.3.2.15 6.3.2.10",
+ "6.3.2.22 6.3.1.14",
"6.3.5.8 2.6.1.85"
};
diff --git a/api/ecnum_specific.inc b/api/ecnum_specific.inc
index b7f0d4f1..03f8d197 100644
--- a/api/ecnum_specific.inc
+++ b/api/ecnum_specific.inc
@@ -1,4 +1,4 @@
-/* $Id: ecnum_specific.inc,v 1.1 2010/07/09 16:36:07 ucko Exp $
+/* $Id: ecnum_specific.inc,v 1.3 2011/06/30 16:04:31 kazimird Exp $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
@@ -31,4262 +31,4529 @@
*/
static const char* const kECNum_specific[] = {
- "1.1.1.1",
- "1.1.1.2",
- "1.1.1.3",
- "1.1.1.4",
- "1.1.1.6",
- "1.1.1.7",
- "1.1.1.8",
- "1.1.1.9",
- "1.1.1.10",
- "1.1.1.11",
- "1.1.1.12",
- "1.1.1.13",
- "1.1.1.14",
- "1.1.1.15",
- "1.1.1.16",
- "1.1.1.17",
- "1.1.1.18",
- "1.1.1.19",
- "1.1.1.20",
- "1.1.1.21",
- "1.1.1.22",
- "1.1.1.23",
- "1.1.1.24",
- "1.1.1.25",
- "1.1.1.26",
- "1.1.1.27",
- "1.1.1.28",
- "1.1.1.29",
- "1.1.1.30",
- "1.1.1.31",
- "1.1.1.32",
- "1.1.1.33",
- "1.1.1.34",
- "1.1.1.35",
- "1.1.1.36",
- "1.1.1.37",
- "1.1.1.38",
- "1.1.1.39",
- "1.1.1.40",
- "1.1.1.41",
- "1.1.1.42",
- "1.1.1.43",
- "1.1.1.44",
- "1.1.1.45",
- "1.1.1.46",
- "1.1.1.47",
- "1.1.1.48",
- "1.1.1.49",
- "1.1.1.50",
- "1.1.1.51",
- "1.1.1.52",
- "1.1.1.53",
- "1.1.1.54",
- "1.1.1.55",
- "1.1.1.56",
- "1.1.1.57",
- "1.1.1.58",
- "1.1.1.59",
- "1.1.1.60",
- "1.1.1.61",
- "1.1.1.62",
- "1.1.1.63",
- "1.1.1.64",
- "1.1.1.65",
- "1.1.1.66",
- "1.1.1.67",
- "1.1.1.69",
- "1.1.1.71",
- "1.1.1.72",
- "1.1.1.73",
- "1.1.1.75",
- "1.1.1.76",
- "1.1.1.77",
- "1.1.1.78",
- "1.1.1.79",
- "1.1.1.80",
- "1.1.1.81",
- "1.1.1.82",
- "1.1.1.83",
- "1.1.1.84",
- "1.1.1.85",
- "1.1.1.86",
- "1.1.1.87",
- "1.1.1.88",
- "1.1.1.90",
- "1.1.1.91",
- "1.1.1.92",
- "1.1.1.93",
- "1.1.1.94",
- "1.1.1.95",
- "1.1.1.96",
- "1.1.1.97",
- "1.1.1.98",
- "1.1.1.99",
- "1.1.1.100",
- "1.1.1.101",
- "1.1.1.102",
- "1.1.1.103",
- "1.1.1.104",
- "1.1.1.105",
- "1.1.1.106",
- "1.1.1.107",
- "1.1.1.108",
- "1.1.1.110",
- "1.1.1.111",
- "1.1.1.112",
- "1.1.1.113",
- "1.1.1.114",
- "1.1.1.115",
- "1.1.1.116",
- "1.1.1.117",
- "1.1.1.118",
- "1.1.1.119",
- "1.1.1.120",
- "1.1.1.121",
- "1.1.1.122",
- "1.1.1.123",
- "1.1.1.124",
- "1.1.1.125",
- "1.1.1.126",
- "1.1.1.127",
- "1.1.1.128",
- "1.1.1.129",
- "1.1.1.130",
- "1.1.1.131",
- "1.1.1.132",
- "1.1.1.133",
- "1.1.1.134",
- "1.1.1.135",
- "1.1.1.136",
- "1.1.1.137",
- "1.1.1.138",
- "1.1.1.140",
- "1.1.1.141",
- "1.1.1.142",
- "1.1.1.143",
- "1.1.1.144",
- "1.1.1.145",
- "1.1.1.146",
- "1.1.1.147",
- "1.1.1.148",
- "1.1.1.149",
- "1.1.1.150",
- "1.1.1.151",
- "1.1.1.152",
- "1.1.1.153",
- "1.1.1.154",
- "1.1.1.156",
- "1.1.1.157",
- "1.1.1.158",
- "1.1.1.159",
- "1.1.1.160",
- "1.1.1.161",
- "1.1.1.162",
- "1.1.1.163",
- "1.1.1.164",
- "1.1.1.165",
- "1.1.1.166",
- "1.1.1.167",
- "1.1.1.168",
- "1.1.1.169",
- "1.1.1.170",
- "1.1.1.172",
- "1.1.1.173",
- "1.1.1.174",
- "1.1.1.175",
- "1.1.1.176",
- "1.1.1.177",
- "1.1.1.178",
- "1.1.1.179",
- "1.1.1.181",
- "1.1.1.183",
- "1.1.1.184",
- "1.1.1.185",
- "1.1.1.186",
- "1.1.1.187",
- "1.1.1.188",
- "1.1.1.189",
- "1.1.1.190",
- "1.1.1.191",
- "1.1.1.192",
- "1.1.1.193",
- "1.1.1.194",
- "1.1.1.195",
- "1.1.1.196",
- "1.1.1.197",
- "1.1.1.198",
- "1.1.1.199",
- "1.1.1.200",
- "1.1.1.201",
- "1.1.1.202",
- "1.1.1.203",
- "1.1.1.205",
- "1.1.1.206",
- "1.1.1.207",
- "1.1.1.208",
- "1.1.1.209",
- "1.1.1.210",
- "1.1.1.211",
- "1.1.1.212",
- "1.1.1.213",
- "1.1.1.214",
- "1.1.1.215",
- "1.1.1.216",
- "1.1.1.217",
- "1.1.1.218",
- "1.1.1.219",
- "1.1.1.220",
- "1.1.1.221",
- "1.1.1.222",
- "1.1.1.223",
- "1.1.1.224",
- "1.1.1.225",
- "1.1.1.226",
- "1.1.1.227",
- "1.1.1.228",
- "1.1.1.229",
- "1.1.1.230",
- "1.1.1.231",
- "1.1.1.232",
- "1.1.1.233",
- "1.1.1.234",
- "1.1.1.235",
- "1.1.1.236",
- "1.1.1.237",
- "1.1.1.238",
- "1.1.1.239",
- "1.1.1.240",
- "1.1.1.241",
- "1.1.1.243",
- "1.1.1.244",
- "1.1.1.245",
- "1.1.1.246",
- "1.1.1.247",
- "1.1.1.248",
- "1.1.1.250",
- "1.1.1.251",
- "1.1.1.252",
- "1.1.1.254",
- "1.1.1.255",
- "1.1.1.256",
- "1.1.1.257",
- "1.1.1.258",
- "1.1.1.259",
- "1.1.1.260",
- "1.1.1.261",
- "1.1.1.262",
- "1.1.1.263",
- "1.1.1.264",
- "1.1.1.265",
- "1.1.1.266",
- "1.1.1.267",
- "1.1.1.268",
- "1.1.1.269",
- "1.1.1.270",
- "1.1.1.271",
- "1.1.1.272",
- "1.1.1.273",
- "1.1.1.274",
- "1.1.1.275",
- "1.1.1.276",
- "1.1.1.277",
- "1.1.1.278",
- "1.1.1.279",
- "1.1.1.280",
- "1.1.1.281",
- "1.1.1.282",
- "1.1.1.283",
- "1.1.1.284",
- "1.1.1.285",
- "1.1.1.286",
- "1.1.1.287",
- "1.1.1.288",
- "1.1.1.289",
- "1.1.1.290",
- "1.1.1.291",
- "1.1.1.292",
- "1.1.1.294",
- "1.1.1.295",
- "1.1.1.296",
- "1.1.1.297",
- "1.1.1.298",
- "1.1.1.299",
- "1.1.1.300",
- "1.1.1.301",
- "1.1.1.302",
- "1.1.1.303",
- "1.1.1.304",
- "1.1.2.2",
- "1.1.2.3",
- "1.1.2.4",
- "1.1.2.5",
- "1.1.3.3",
- "1.1.3.4",
- "1.1.3.5",
- "1.1.3.6",
- "1.1.3.7",
- "1.1.3.8",
- "1.1.3.9",
- "1.1.3.10",
- "1.1.3.11",
- "1.1.3.12",
- "1.1.3.13",
- "1.1.3.14",
- "1.1.3.15",
- "1.1.3.16",
- "1.1.3.17",
- "1.1.3.18",
- "1.1.3.19",
- "1.1.3.20",
- "1.1.3.21",
- "1.1.3.23",
- "1.1.3.27",
- "1.1.3.28",
- "1.1.3.29",
- "1.1.3.30",
- "1.1.3.37",
- "1.1.3.38",
- "1.1.3.39",
- "1.1.3.40",
- "1.1.3.41",
- "1.1.4.1",
- "1.1.4.2",
- "1.1.5.2",
- "1.1.5.3",
- "1.1.5.4",
- "1.1.5.5",
- "1.1.5.6",
- "1.1.5.7",
- "1.1.99.1",
- "1.1.99.2",
- "1.1.99.3",
- "1.1.99.4",
- "1.1.99.6",
- "1.1.99.7",
- "1.1.99.8",
- "1.1.99.9",
- "1.1.99.10",
- "1.1.99.11",
- "1.1.99.12",
- "1.1.99.13",
- "1.1.99.14",
- "1.1.99.18",
- "1.1.99.20",
- "1.1.99.21",
- "1.1.99.22",
- "1.1.99.23",
- "1.1.99.24",
- "1.1.99.25",
- "1.1.99.26",
- "1.1.99.27",
- "1.1.99.28",
- "1.1.99.29",
- "1.1.99.30",
- "1.1.99.31",
- "1.1.99.32",
- "1.1.99.33",
- "1.2.1.2",
- "1.2.1.3",
- "1.2.1.4",
- "1.2.1.5",
- "1.2.1.7",
- "1.2.1.8",
- "1.2.1.9",
- "1.2.1.10",
- "1.2.1.11",
- "1.2.1.12",
- "1.2.1.13",
- "1.2.1.15",
- "1.2.1.16",
- "1.2.1.17",
- "1.2.1.18",
- "1.2.1.19",
- "1.2.1.20",
- "1.2.1.21",
- "1.2.1.22",
- "1.2.1.23",
- "1.2.1.24",
- "1.2.1.25",
- "1.2.1.26",
- "1.2.1.27",
- "1.2.1.28",
- "1.2.1.29",
- "1.2.1.30",
- "1.2.1.31",
- "1.2.1.32",
- "1.2.1.33",
- "1.2.1.36",
- "1.2.1.38",
- "1.2.1.39",
- "1.2.1.40",
- "1.2.1.41",
- "1.2.1.42",
- "1.2.1.43",
- "1.2.1.44",
- "1.2.1.45",
- "1.2.1.46",
- "1.2.1.47",
- "1.2.1.48",
- "1.2.1.49",
- "1.2.1.50",
- "1.2.1.51",
- "1.2.1.52",
- "1.2.1.53",
- "1.2.1.54",
- "1.2.1.57",
- "1.2.1.58",
- "1.2.1.59",
- "1.2.1.60",
- "1.2.1.61",
- "1.2.1.62",
- "1.2.1.63",
- "1.2.1.64",
- "1.2.1.65",
- "1.2.1.66",
- "1.2.1.67",
- "1.2.1.68",
- "1.2.1.69",
- "1.2.1.70",
- "1.2.1.71",
- "1.2.1.72",
- "1.2.1.73",
- "1.2.1.74",
- "1.2.1.75",
- "1.2.1.76",
- "1.2.1.77",
- "1.2.1.78",
- "1.2.2.1",
- "1.2.2.2",
- "1.2.2.3",
- "1.2.2.4",
- "1.2.3.1",
- "1.2.3.3",
- "1.2.3.4",
- "1.2.3.5",
- "1.2.3.6",
- "1.2.3.7",
- "1.2.3.8",
- "1.2.3.9",
- "1.2.3.11",
- "1.2.3.13",
- "1.2.3.14",
- "1.2.4.1",
- "1.2.4.2",
- "1.2.4.4",
- "1.2.7.1",
- "1.2.7.2",
- "1.2.7.3",
- "1.2.7.4",
- "1.2.7.5",
- "1.2.7.6",
- "1.2.7.7",
- "1.2.7.8",
- "1.2.99.2",
- "1.2.99.3",
- "1.2.99.4",
- "1.2.99.5",
- "1.2.99.6",
- "1.2.99.7",
- "1.3.1.1",
- "1.3.1.2",
- "1.3.1.3",
- "1.3.1.4",
- "1.3.1.5",
- "1.3.1.6",
- "1.3.1.7",
- "1.3.1.8",
- "1.3.1.9",
- "1.3.1.10",
- "1.3.1.11",
- "1.3.1.12",
- "1.3.1.13",
- "1.3.1.14",
- "1.3.1.15",
- "1.3.1.16",
- "1.3.1.17",
- "1.3.1.18",
- "1.3.1.19",
- "1.3.1.20",
- "1.3.1.21",
- "1.3.1.22",
- "1.3.1.24",
- "1.3.1.25",
- "1.3.1.26",
- "1.3.1.27",
- "1.3.1.28",
- "1.3.1.29",
- "1.3.1.30",
- "1.3.1.31",
- "1.3.1.32",
- "1.3.1.33",
- "1.3.1.34",
- "1.3.1.35",
- "1.3.1.36",
- "1.3.1.37",
- "1.3.1.38",
- "1.3.1.39",
- "1.3.1.40",
- "1.3.1.41",
- "1.3.1.42",
- "1.3.1.43",
- "1.3.1.44",
- "1.3.1.45",
- "1.3.1.46",
- "1.3.1.47",
- "1.3.1.48",
- "1.3.1.49",
- "1.3.1.51",
- "1.3.1.52",
- "1.3.1.53",
- "1.3.1.54",
- "1.3.1.56",
- "1.3.1.57",
- "1.3.1.58",
- "1.3.1.60",
- "1.3.1.62",
- "1.3.1.63",
- "1.3.1.64",
- "1.3.1.65",
- "1.3.1.66",
- "1.3.1.67",
- "1.3.1.68",
- "1.3.1.69",
- "1.3.1.70",
- "1.3.1.71",
- "1.3.1.72",
- "1.3.1.73",
- "1.3.1.74",
- "1.3.1.75",
- "1.3.1.76",
- "1.3.1.77",
- "1.3.1.78",
- "1.3.1.79",
- "1.3.1.80",
- "1.3.1.81",
- "1.3.1.82",
- "1.3.1.83",
- "1.3.1.84",
- "1.3.2.3",
- "1.3.3.1",
- "1.3.3.3",
- "1.3.3.4",
- "1.3.3.5",
- "1.3.3.6",
- "1.3.3.7",
- "1.3.3.8",
- "1.3.3.9",
- "1.3.3.10",
- "1.3.3.11",
- "1.3.3.12",
- "1.3.5.1",
- "1.3.5.2",
- "1.3.7.1",
- "1.3.7.2",
- "1.3.7.3",
- "1.3.7.4",
- "1.3.7.5",
- "1.3.7.6",
- "1.3.99.1",
- "1.3.99.2",
- "1.3.99.3",
- "1.3.99.4",
- "1.3.99.5",
- "1.3.99.6",
- "1.3.99.7",
- "1.3.99.8",
- "1.3.99.10",
- "1.3.99.12",
- "1.3.99.13",
- "1.3.99.14",
- "1.3.99.15",
- "1.3.99.16",
- "1.3.99.17",
- "1.3.99.18",
- "1.3.99.19",
- "1.3.99.20",
- "1.3.99.21",
- "1.3.99.22",
- "1.3.99.23",
- "1.3.99.24",
- "1.3.99.25",
- "1.4.1.1",
- "1.4.1.2",
- "1.4.1.3",
- "1.4.1.4",
- "1.4.1.5",
- "1.4.1.7",
- "1.4.1.8",
- "1.4.1.9",
- "1.4.1.10",
- "1.4.1.11",
- "1.4.1.12",
- "1.4.1.13",
- "1.4.1.14",
- "1.4.1.15",
- "1.4.1.16",
- "1.4.1.17",
- "1.4.1.18",
- "1.4.1.19",
- "1.4.1.20",
- "1.4.1.21",
- "1.4.2.1",
- "1.4.3.1",
- "1.4.3.2",
- "1.4.3.3",
- "1.4.3.4",
- "1.4.3.5",
- "1.4.3.7",
- "1.4.3.8",
- "1.4.3.10",
- "1.4.3.11",
- "1.4.3.12",
- "1.4.3.13",
- "1.4.3.14",
- "1.4.3.15",
- "1.4.3.16",
- "1.4.3.19",
- "1.4.3.20",
- "1.4.3.21",
- "1.4.3.22",
- "1.4.3.23",
- "1.4.4.2",
- "1.4.5.1",
- "1.4.7.1",
- "1.4.99.1",
- "1.4.99.2",
- "1.4.99.3",
- "1.4.99.4",
- "1.4.99.5",
- "1.5.1.1",
- "1.5.1.2",
- "1.5.1.3",
- "1.5.1.5",
- "1.5.1.6",
- "1.5.1.7",
- "1.5.1.8",
- "1.5.1.9",
- "1.5.1.10",
- "1.5.1.11",
- "1.5.1.12",
- "1.5.1.15",
- "1.5.1.16",
- "1.5.1.17",
- "1.5.1.18",
- "1.5.1.19",
- "1.5.1.20",
- "1.5.1.21",
- "1.5.1.22",
- "1.5.1.23",
- "1.5.1.24",
- "1.5.1.25",
- "1.5.1.26",
- "1.5.1.27",
- "1.5.1.28",
- "1.5.1.29",
- "1.5.1.30",
- "1.5.1.31",
- "1.5.1.32",
- "1.5.1.33",
- "1.5.1.34",
- "1.5.3.1",
- "1.5.3.2",
- "1.5.3.4",
- "1.5.3.5",
- "1.5.3.6",
- "1.5.3.7",
- "1.5.3.10",
- "1.5.3.11",
- "1.5.3.12",
- "1.5.3.13",
- "1.5.3.14",
- "1.5.3.15",
- "1.5.3.16",
- "1.5.3.17",
- "1.5.4.1",
- "1.5.5.1",
- "1.5.7.1",
- "1.5.8.1",
- "1.5.8.2",
- "1.5.99.1",
- "1.5.99.2",
- "1.5.99.3",
- "1.5.99.4",
- "1.5.99.5",
- "1.5.99.6",
- "1.5.99.8",
- "1.5.99.9",
- "1.5.99.11",
- "1.5.99.12",
- "1.5.99.13",
- "1.6.1.1",
- "1.6.1.2",
- "1.6.2.2",
- "1.6.2.4",
- "1.6.2.5",
- "1.6.2.6",
- "1.6.3.1",
- "1.6.5.2",
- "1.6.5.3",
- "1.6.5.4",
- "1.6.5.5",
- "1.6.5.6",
- "1.6.5.7",
- "1.6.6.9",
- "1.6.99.1",
- "1.6.99.3",
- "1.6.99.5",
- "1.6.99.6",
- "1.7.1.1",
- "1.7.1.2",
- "1.7.1.3",
- "1.7.1.4",
- "1.7.1.5",
- "1.7.1.6",
- "1.7.1.7",
- "1.7.1.9",
- "1.7.1.10",
- "1.7.1.11",
- "1.7.1.12",
- "1.7.1.13",
- "1.7.2.1",
- "1.7.2.2",
- "1.7.2.3",
- "1.7.3.1",
- "1.7.3.2",
- "1.7.3.3",
- "1.7.3.4",
- "1.7.3.5",
- "1.7.5.1",
- "1.7.7.1",
- "1.7.7.2",
- "1.7.99.1",
- "1.7.99.4",
- "1.7.99.6",
- "1.7.99.7",
- "1.7.99.8",
- "1.8.1.2",
- "1.8.1.3",
- "1.8.1.4",
- "1.8.1.5",
- "1.8.1.6",
- "1.8.1.7",
- "1.8.1.8",
- "1.8.1.9",
- "1.8.1.10",
- "1.8.1.11",
- "1.8.1.12",
- "1.8.1.13",
- "1.8.1.14",
- "1.8.1.15",
- "1.8.1.16",
- "1.8.2.1",
- "1.8.2.2",
- "1.8.3.1",
- "1.8.3.2",
- "1.8.3.3",
- "1.8.3.4",
- "1.8.3.5",
- "1.8.4.1",
- "1.8.4.2",
- "1.8.4.3",
- "1.8.4.4",
- "1.8.4.7",
- "1.8.4.8",
- "1.8.4.9",
- "1.8.4.10",
- "1.8.4.11",
- "1.8.4.12",
- "1.8.4.13",
- "1.8.4.14",
- "1.8.5.1",
- "1.8.5.2",
- "1.8.7.1",
- "1.8.98.1",
- "1.8.98.2",
- "1.8.99.1",
- "1.8.99.2",
- "1.8.99.3",
- "1.9.3.1",
- "1.9.6.1",
- "1.9.99.1",
- "1.10.1.1",
- "1.10.2.1",
- "1.10.2.2",
- "1.10.3.1",
- "1.10.3.2",
- "1.10.3.3",
- "1.10.3.4",
- "1.10.3.5",
- "1.10.3.6",
- "1.10.99.1",
- "1.10.99.2",
- "1.10.99.3",
- "1.11.1.1",
- "1.11.1.2",
- "1.11.1.3",
- "1.11.1.5",
- "1.11.1.6",
- "1.11.1.7",
- "1.11.1.8",
- "1.11.1.9",
- "1.11.1.10",
- "1.11.1.11",
- "1.11.1.12",
- "1.11.1.13",
- "1.11.1.14",
- "1.11.1.15",
- "1.11.1.16",
- "1.11.1.17",
- "1.12.1.2",
- "1.12.1.3",
- "1.12.2.1",
- "1.12.5.1",
- "1.12.7.2",
- "1.12.98.1",
- "1.12.98.2",
- "1.12.98.3",
- "1.12.99.6",
- "1.13.11.1",
- "1.13.11.2",
- "1.13.11.3",
- "1.13.11.4",
- "1.13.11.5",
- "1.13.11.6",
- "1.13.11.8",
- "1.13.11.9",
- "1.13.11.10",
- "1.13.11.11",
- "1.13.11.12",
- "1.13.11.13",
- "1.13.11.14",
- "1.13.11.15",
- "1.13.11.16",
- "1.13.11.17",
- "1.13.11.18",
- "1.13.11.19",
- "1.13.11.20",
- "1.13.11.22",
- "1.13.11.23",
- "1.13.11.24",
- "1.13.11.25",
- "1.13.11.26",
- "1.13.11.27",
- "1.13.11.28",
- "1.13.11.29",
- "1.13.11.30",
- "1.13.11.31",
- "1.13.11.33",
- "1.13.11.34",
- "1.13.11.35",
- "1.13.11.36",
- "1.13.11.37",
- "1.13.11.38",
- "1.13.11.39",
- "1.13.11.40",
- "1.13.11.41",
- "1.13.11.43",
- "1.13.11.44",
- "1.13.11.45",
- "1.13.11.46",
- "1.13.11.47",
- "1.13.11.48",
- "1.13.11.49",
- "1.13.11.50",
- "1.13.11.51",
- "1.13.11.52",
- "1.13.11.53",
- "1.13.11.54",
- "1.13.11.55",
- "1.13.11.56",
- "1.13.12.1",
- "1.13.12.2",
- "1.13.12.3",
- "1.13.12.4",
- "1.13.12.5",
- "1.13.12.6",
- "1.13.12.7",
- "1.13.12.8",
- "1.13.12.9",
- "1.13.12.12",
- "1.13.12.13",
- "1.13.12.14",
- "1.13.12.15",
- "1.13.12.16",
- "1.13.12.17",
- "1.13.99.1",
- "1.13.99.3",
- "1.14.11.1",
- "1.14.11.2",
- "1.14.11.3",
- "1.14.11.4",
- "1.14.11.6",
- "1.14.11.7",
- "1.14.11.8",
- "1.14.11.9",
- "1.14.11.10",
- "1.14.11.11",
- "1.14.11.12",
- "1.14.11.13",
- "1.14.11.14",
- "1.14.11.15",
- "1.14.11.16",
- "1.14.11.17",
- "1.14.11.18",
- "1.14.11.19",
- "1.14.11.20",
- "1.14.11.21",
- "1.14.11.22",
- "1.14.11.23",
- "1.14.11.24",
- "1.14.11.25",
- "1.14.11.26",
- "1.14.11.27",
- "1.14.11.28",
- "1.14.12.1",
- "1.14.12.3",
- "1.14.12.4",
- "1.14.12.5",
- "1.14.12.7",
- "1.14.12.8",
- "1.14.12.9",
- "1.14.12.10",
- "1.14.12.11",
- "1.14.12.12",
- "1.14.12.13",
- "1.14.12.14",
- "1.14.12.15",
- "1.14.12.16",
- "1.14.12.17",
- "1.14.12.18",
- "1.14.12.19",
- "1.14.12.20",
- "1.14.12.21",
- "1.14.13.1",
- "1.14.13.2",
- "1.14.13.3",
- "1.14.13.4",
- "1.14.13.5",
- "1.14.13.6",
- "1.14.13.7",
- "1.14.13.8",
- "1.14.13.9",
- "1.14.13.10",
- "1.14.13.11",
- "1.14.13.12",
- "1.14.13.13",
- "1.14.13.14",
- "1.14.13.15",
- "1.14.13.16",
- "1.14.13.17",
- "1.14.13.18",
- "1.14.13.19",
- "1.14.13.20",
- "1.14.13.21",
- "1.14.13.22",
- "1.14.13.23",
- "1.14.13.24",
- "1.14.13.25",
- "1.14.13.26",
- "1.14.13.27",
- "1.14.13.28",
- "1.14.13.29",
- "1.14.13.30",
- "1.14.13.31",
- "1.14.13.32",
- "1.14.13.33",
- "1.14.13.34",
- "1.14.13.35",
- "1.14.13.36",
- "1.14.13.37",
- "1.14.13.38",
- "1.14.13.39",
- "1.14.13.40",
- "1.14.13.41",
- "1.14.13.42",
- "1.14.13.43",
- "1.14.13.44",
- "1.14.13.46",
- "1.14.13.47",
- "1.14.13.48",
- "1.14.13.49",
- "1.14.13.50",
- "1.14.13.51",
- "1.14.13.52",
- "1.14.13.53",
- "1.14.13.54",
- "1.14.13.55",
- "1.14.13.56",
- "1.14.13.57",
- "1.14.13.58",
- "1.14.13.59",
- "1.14.13.60",
- "1.14.13.61",
- "1.14.13.62",
- "1.14.13.63",
- "1.14.13.64",
- "1.14.13.66",
- "1.14.13.67",
- "1.14.13.68",
- "1.14.13.69",
- "1.14.13.70",
- "1.14.13.71",
- "1.14.13.72",
- "1.14.13.73",
- "1.14.13.74",
- "1.14.13.75",
- "1.14.13.76",
- "1.14.13.77",
- "1.14.13.78",
- "1.14.13.79",
- "1.14.13.80",
- "1.14.13.81",
- "1.14.13.82",
- "1.14.13.83",
- "1.14.13.84",
- "1.14.13.85",
- "1.14.13.86",
- "1.14.13.87",
- "1.14.13.88",
- "1.14.13.89",
- "1.14.13.90",
- "1.14.13.91",
- "1.14.13.92",
- "1.14.13.93",
- "1.14.13.94",
- "1.14.13.95",
- "1.14.13.96",
- "1.14.13.97",
- "1.14.13.98",
- "1.14.13.99",
- "1.14.13.100",
- "1.14.13.101",
- "1.14.13.102",
- "1.14.13.103",
- "1.14.13.104",
- "1.14.13.105",
- "1.14.13.106",
- "1.14.13.107",
- "1.14.13.108",
- "1.14.13.109",
- "1.14.13.110",
- "1.14.13.111",
- "1.14.13.112",
- "1.14.13.113",
- "1.14.14.1",
- "1.14.14.3",
- "1.14.14.5",
- "1.14.14.7",
- "1.14.15.1",
- "1.14.15.2",
- "1.14.15.3",
- "1.14.15.4",
- "1.14.15.5",
- "1.14.15.6",
- "1.14.15.7",
- "1.14.15.8",
- "1.14.16.1",
- "1.14.16.2",
- "1.14.16.3",
- "1.14.16.4",
- "1.14.16.5",
- "1.14.16.6",
- "1.14.17.1",
- "1.14.17.3",
- "1.14.17.4",
- "1.14.18.1",
- "1.14.18.2",
- "1.14.19.1",
- "1.14.19.2",
- "1.14.19.3",
- "1.14.19.4",
- "1.14.19.5",
- "1.14.19.6",
- "1.14.20.1",
- "1.14.21.1",
- "1.14.21.2",
- "1.14.21.3",
- "1.14.21.4",
- "1.14.21.5",
- "1.14.21.6",
- "1.14.21.7",
- "1.14.99.1",
- "1.14.99.2",
- "1.14.99.3",
- "1.14.99.4",
- "1.14.99.7",
- "1.14.99.9",
- "1.14.99.10",
- "1.14.99.11",
- "1.14.99.12",
- "1.14.99.14",
- "1.14.99.15",
- "1.14.99.19",
- "1.14.99.20",
- "1.14.99.21",
- "1.14.99.22",
- "1.14.99.23",
- "1.14.99.24",
- "1.14.99.26",
- "1.14.99.27",
- "1.14.99.28",
- "1.14.99.29",
- "1.14.99.30",
- "1.14.99.31",
- "1.14.99.32",
- "1.14.99.33",
- "1.14.99.34",
- "1.14.99.35",
- "1.14.99.36",
- "1.14.99.37",
- "1.14.99.38",
- "1.14.99.39",
- "1.14.99.40",
- "1.15.1.1",
- "1.15.1.2",
- "1.16.1.1",
- "1.16.1.2",
- "1.16.1.3",
- "1.16.1.4",
- "1.16.1.5",
- "1.16.1.6",
- "1.16.1.7",
- "1.16.1.8",
- "1.16.3.1",
- "1.16.8.1",
- "1.17.1.1",
- "1.17.1.2",
- "1.17.1.3",
- "1.17.1.4",
- "1.17.1.5",
- "1.17.3.1",
- "1.17.3.2",
- "1.17.3.3",
- "1.17.4.1",
- "1.17.4.2",
- "1.17.5.1",
- "1.17.7.1",
- "1.17.99.1",
- "1.17.99.2",
- "1.17.99.3",
- "1.17.99.4",
- "1.17.99.5",
- "1.18.1.1",
- "1.18.1.2",
- "1.18.1.3",
- "1.18.1.4",
- "1.18.6.1",
- "1.19.6.1",
- "1.20.1.1",
- "1.20.4.1",
- "1.20.4.2",
- "1.20.4.3",
- "1.20.98.1",
- "1.20.99.1",
- "1.21.3.1",
- "1.21.3.2",
- "1.21.3.3",
- "1.21.3.4",
- "1.21.3.5",
- "1.21.3.6",
- "1.21.4.1",
- "1.21.4.2",
- "1.21.4.3",
- "1.21.4.4",
- "1.21.99.1",
- "1.22.1.1",
- "1.97.1.1",
- "1.97.1.2",
- "1.97.1.3",
- "1.97.1.4",
- "1.97.1.8",
- "1.97.1.9",
- "1.97.1.10",
- "1.97.1.11",
- "2.1.1.1",
- "2.1.1.2",
- "2.1.1.3",
- "2.1.1.4",
- "2.1.1.5",
- "2.1.1.6",
- "2.1.1.7",
- "2.1.1.8",
- "2.1.1.9",
- "2.1.1.10",
- "2.1.1.11",
- "2.1.1.12",
- "2.1.1.13",
- "2.1.1.14",
- "2.1.1.15",
- "2.1.1.16",
- "2.1.1.17",
- "2.1.1.18",
- "2.1.1.19",
- "2.1.1.20",
- "2.1.1.21",
- "2.1.1.22",
- "2.1.1.25",
- "2.1.1.26",
- "2.1.1.27",
- "2.1.1.28",
- "2.1.1.29",
- "2.1.1.31",
- "2.1.1.32",
- "2.1.1.33",
- "2.1.1.34",
- "2.1.1.35",
- "2.1.1.36",
- "2.1.1.37",
- "2.1.1.38",
- "2.1.1.39",
- "2.1.1.40",
- "2.1.1.41",
- "2.1.1.42",
- "2.1.1.43",
- "2.1.1.44",
- "2.1.1.45",
- "2.1.1.46",
- "2.1.1.47",
- "2.1.1.48",
- "2.1.1.49",
- "2.1.1.50",
- "2.1.1.51",
- "2.1.1.52",
- "2.1.1.53",
- "2.1.1.54",
- "2.1.1.55",
- "2.1.1.56",
- "2.1.1.57",
- "2.1.1.59",
- "2.1.1.60",
- "2.1.1.61",
- "2.1.1.62",
- "2.1.1.63",
- "2.1.1.64",
- "2.1.1.65",
- "2.1.1.66",
- "2.1.1.67",
- "2.1.1.68",
- "2.1.1.69",
- "2.1.1.70",
- "2.1.1.71",
- "2.1.1.72",
- "2.1.1.74",
- "2.1.1.75",
- "2.1.1.76",
- "2.1.1.77",
- "2.1.1.78",
- "2.1.1.79",
- "2.1.1.80",
- "2.1.1.82",
- "2.1.1.83",
- "2.1.1.84",
- "2.1.1.85",
- "2.1.1.86",
- "2.1.1.87",
- "2.1.1.88",
- "2.1.1.89",
- "2.1.1.90",
- "2.1.1.91",
- "2.1.1.94",
- "2.1.1.95",
- "2.1.1.96",
- "2.1.1.97",
- "2.1.1.98",
- "2.1.1.99",
- "2.1.1.100",
- "2.1.1.101",
- "2.1.1.102",
- "2.1.1.103",
- "2.1.1.104",
- "2.1.1.105",
- "2.1.1.106",
- "2.1.1.107",
- "2.1.1.108",
- "2.1.1.109",
- "2.1.1.110",
- "2.1.1.111",
- "2.1.1.112",
- "2.1.1.113",
- "2.1.1.114",
- "2.1.1.115",
- "2.1.1.116",
- "2.1.1.117",
- "2.1.1.118",
- "2.1.1.119",
- "2.1.1.120",
- "2.1.1.121",
- "2.1.1.122",
- "2.1.1.123",
- "2.1.1.124",
- "2.1.1.125",
- "2.1.1.126",
- "2.1.1.127",
- "2.1.1.128",
- "2.1.1.129",
- "2.1.1.130",
- "2.1.1.131",
- "2.1.1.132",
- "2.1.1.133",
- "2.1.1.136",
- "2.1.1.137",
- "2.1.1.139",
- "2.1.1.140",
- "2.1.1.141",
- "2.1.1.142",
- "2.1.1.143",
- "2.1.1.144",
- "2.1.1.145",
- "2.1.1.146",
- "2.1.1.147",
- "2.1.1.148",
- "2.1.1.149",
- "2.1.1.150",
- "2.1.1.151",
- "2.1.1.152",
- "2.1.1.153",
- "2.1.1.154",
- "2.1.1.155",
- "2.1.1.156",
- "2.1.1.157",
- "2.1.1.158",
- "2.1.1.159",
- "2.1.1.160",
- "2.1.1.161",
- "2.1.1.162",
- "2.1.1.163",
- "2.1.1.164",
- "2.1.1.165",
- "2.1.2.1",
- "2.1.2.2",
- "2.1.2.3",
- "2.1.2.4",
- "2.1.2.5",
- "2.1.2.7",
- "2.1.2.8",
- "2.1.2.9",
- "2.1.2.10",
- "2.1.2.11",
- "2.1.3.1",
- "2.1.3.2",
- "2.1.3.3",
- "2.1.3.5",
- "2.1.3.6",
- "2.1.3.7",
- "2.1.3.8",
- "2.1.3.9",
- "2.1.3.10",
- "2.1.3.11",
- "2.1.4.1",
- "2.1.4.2",
- "2.2.1.1",
- "2.2.1.2",
- "2.2.1.3",
- "2.2.1.4",
- "2.2.1.5",
- "2.2.1.6",
- "2.2.1.7",
- "2.2.1.8",
- "2.2.1.9",
- "2.3.1.1",
- "2.3.1.2",
- "2.3.1.3",
- "2.3.1.4",
- "2.3.1.5",
- "2.3.1.6",
- "2.3.1.7",
- "2.3.1.8",
- "2.3.1.9",
- "2.3.1.10",
- "2.3.1.11",
- "2.3.1.12",
- "2.3.1.13",
- "2.3.1.14",
- "2.3.1.15",
- "2.3.1.16",
- "2.3.1.17",
- "2.3.1.18",
- "2.3.1.19",
- "2.3.1.20",
- "2.3.1.21",
- "2.3.1.22",
- "2.3.1.23",
- "2.3.1.24",
- "2.3.1.25",
- "2.3.1.26",
- "2.3.1.27",
- "2.3.1.28",
- "2.3.1.29",
- "2.3.1.30",
- "2.3.1.31",
- "2.3.1.32",
- "2.3.1.33",
- "2.3.1.34",
- "2.3.1.35",
- "2.3.1.36",
- "2.3.1.37",
- "2.3.1.38",
- "2.3.1.39",
- "2.3.1.40",
- "2.3.1.41",
- "2.3.1.42",
- "2.3.1.43",
- "2.3.1.44",
- "2.3.1.45",
- "2.3.1.46",
- "2.3.1.47",
- "2.3.1.48",
- "2.3.1.49",
- "2.3.1.50",
- "2.3.1.51",
- "2.3.1.52",
- "2.3.1.53",
- "2.3.1.54",
- "2.3.1.56",
- "2.3.1.57",
- "2.3.1.58",
- "2.3.1.59",
- "2.3.1.60",
- "2.3.1.61",
- "2.3.1.62",
- "2.3.1.63",
- "2.3.1.64",
- "2.3.1.65",
- "2.3.1.66",
- "2.3.1.67",
- "2.3.1.68",
- "2.3.1.69",
- "2.3.1.71",
- "2.3.1.72",
- "2.3.1.73",
- "2.3.1.74",
- "2.3.1.75",
- "2.3.1.76",
- "2.3.1.77",
- "2.3.1.78",
- "2.3.1.79",
- "2.3.1.80",
- "2.3.1.81",
- "2.3.1.82",
- "2.3.1.83",
- "2.3.1.84",
- "2.3.1.85",
- "2.3.1.86",
- "2.3.1.87",
- "2.3.1.88",
- "2.3.1.89",
- "2.3.1.90",
- "2.3.1.91",
- "2.3.1.92",
- "2.3.1.93",
- "2.3.1.94",
- "2.3.1.95",
- "2.3.1.96",
- "2.3.1.97",
- "2.3.1.98",
- "2.3.1.99",
- "2.3.1.100",
- "2.3.1.101",
- "2.3.1.102",
- "2.3.1.103",
- "2.3.1.104",
- "2.3.1.105",
- "2.3.1.106",
- "2.3.1.107",
- "2.3.1.108",
- "2.3.1.109",
- "2.3.1.110",
- "2.3.1.111",
- "2.3.1.112",
- "2.3.1.113",
- "2.3.1.114",
- "2.3.1.115",
- "2.3.1.116",
- "2.3.1.117",
- "2.3.1.118",
- "2.3.1.119",
- "2.3.1.121",
- "2.3.1.122",
- "2.3.1.123",
- "2.3.1.125",
- "2.3.1.126",
- "2.3.1.127",
- "2.3.1.128",
- "2.3.1.129",
- "2.3.1.130",
- "2.3.1.131",
- "2.3.1.132",
- "2.3.1.133",
- "2.3.1.134",
- "2.3.1.135",
- "2.3.1.136",
- "2.3.1.137",
- "2.3.1.138",
- "2.3.1.139",
- "2.3.1.140",
- "2.3.1.141",
- "2.3.1.142",
- "2.3.1.143",
- "2.3.1.144",
- "2.3.1.145",
- "2.3.1.146",
- "2.3.1.147",
- "2.3.1.148",
- "2.3.1.149",
- "2.3.1.150",
- "2.3.1.151",
- "2.3.1.152",
- "2.3.1.153",
- "2.3.1.154",
- "2.3.1.155",
- "2.3.1.156",
- "2.3.1.157",
- "2.3.1.158",
- "2.3.1.159",
- "2.3.1.160",
- "2.3.1.161",
- "2.3.1.162",
- "2.3.1.163",
- "2.3.1.164",
- "2.3.1.165",
- "2.3.1.166",
- "2.3.1.167",
- "2.3.1.168",
- "2.3.1.169",
- "2.3.1.170",
- "2.3.1.171",
- "2.3.1.172",
- "2.3.1.173",
- "2.3.1.174",
- "2.3.1.175",
- "2.3.1.176",
- "2.3.1.177",
- "2.3.1.178",
- "2.3.1.179",
- "2.3.1.180",
- "2.3.1.181",
- "2.3.1.182",
- "2.3.1.183",
- "2.3.1.184",
- "2.3.1.185",
- "2.3.1.186",
- "2.3.1.187",
- "2.3.1.188",
- "2.3.1.189",
- "2.3.1.190",
- "2.3.2.1",
- "2.3.2.2",
- "2.3.2.3",
- "2.3.2.4",
- "2.3.2.5",
- "2.3.2.6",
- "2.3.2.7",
- "2.3.2.8",
- "2.3.2.9",
- "2.3.2.10",
- "2.3.2.11",
- "2.3.2.12",
- "2.3.2.13",
- "2.3.2.14",
- "2.3.2.15",
- "2.3.3.1",
- "2.3.3.2",
- "2.3.3.3",
- "2.3.3.4",
- "2.3.3.5",
- "2.3.3.6",
- "2.3.3.7",
- "2.3.3.8",
- "2.3.3.9",
- "2.3.3.10",
- "2.3.3.11",
- "2.3.3.12",
- "2.3.3.13",
- "2.3.3.14",
- "2.3.3.15",
- "2.4.1.1",
- "2.4.1.2",
- "2.4.1.4",
- "2.4.1.5",
- "2.4.1.7",
- "2.4.1.8",
- "2.4.1.9",
- "2.4.1.10",
- "2.4.1.11",
- "2.4.1.12",
- "2.4.1.13",
- "2.4.1.14",
- "2.4.1.15",
- "2.4.1.16",
- "2.4.1.17",
- "2.4.1.18",
- "2.4.1.19",
- "2.4.1.20",
- "2.4.1.21",
- "2.4.1.22",
- "2.4.1.23",
- "2.4.1.24",
- "2.4.1.25",
- "2.4.1.26",
- "2.4.1.27",
- "2.4.1.28",
- "2.4.1.29",
- "2.4.1.30",
- "2.4.1.31",
- "2.4.1.32",
- "2.4.1.33",
- "2.4.1.34",
- "2.4.1.35",
- "2.4.1.36",
- "2.4.1.37",
- "2.4.1.38",
- "2.4.1.39",
- "2.4.1.40",
- "2.4.1.41",
- "2.4.1.43",
- "2.4.1.44",
- "2.4.1.45",
- "2.4.1.46",
- "2.4.1.47",
- "2.4.1.48",
- "2.4.1.49",
- "2.4.1.50",
- "2.4.1.52",
- "2.4.1.53",
- "2.4.1.54",
- "2.4.1.56",
- "2.4.1.57",
- "2.4.1.58",
- "2.4.1.60",
- "2.4.1.62",
- "2.4.1.63",
- "2.4.1.64",
- "2.4.1.65",
- "2.4.1.66",
- "2.4.1.67",
- "2.4.1.68",
- "2.4.1.69",
- "2.4.1.70",
- "2.4.1.71",
- "2.4.1.73",
- "2.4.1.74",
- "2.4.1.78",
- "2.4.1.79",
- "2.4.1.80",
- "2.4.1.81",
- "2.4.1.82",
- "2.4.1.83",
- "2.4.1.85",
- "2.4.1.86",
- "2.4.1.87",
- "2.4.1.88",
- "2.4.1.90",
- "2.4.1.91",
- "2.4.1.92",
- "2.4.1.94",
- "2.4.1.95",
- "2.4.1.96",
- "2.4.1.97",
- "2.4.1.99",
- "2.4.1.100",
- "2.4.1.101",
- "2.4.1.102",
- "2.4.1.103",
- "2.4.1.104",
- "2.4.1.105",
- "2.4.1.106",
- "2.4.1.109",
- "2.4.1.110",
- "2.4.1.111",
- "2.4.1.113",
- "2.4.1.114",
- "2.4.1.115",
- "2.4.1.116",
- "2.4.1.117",
- "2.4.1.118",
- "2.4.1.119",
- "2.4.1.120",
- "2.4.1.121",
- "2.4.1.122",
- "2.4.1.123",
- "2.4.1.125",
- "2.4.1.126",
- "2.4.1.127",
- "2.4.1.128",
- "2.4.1.129",
- "2.4.1.130",
- "2.4.1.131",
- "2.4.1.132",
- "2.4.1.133",
- "2.4.1.134",
- "2.4.1.135",
- "2.4.1.136",
- "2.4.1.137",
- "2.4.1.138",
- "2.4.1.139",
- "2.4.1.140",
- "2.4.1.141",
- "2.4.1.142",
- "2.4.1.143",
- "2.4.1.144",
- "2.4.1.145",
- "2.4.1.146",
- "2.4.1.147",
- "2.4.1.148",
- "2.4.1.149",
- "2.4.1.150",
- "2.4.1.152",
- "2.4.1.153",
- "2.4.1.155",
- "2.4.1.156",
- "2.4.1.157",
- "2.4.1.158",
- "2.4.1.159",
- "2.4.1.160",
- "2.4.1.161",
- "2.4.1.162",
- "2.4.1.163",
- "2.4.1.164",
- "2.4.1.165",
- "2.4.1.166",
- "2.4.1.167",
- "2.4.1.168",
- "2.4.1.170",
- "2.4.1.171",
- "2.4.1.172",
- "2.4.1.173",
- "2.4.1.174",
- "2.4.1.175",
- "2.4.1.176",
- "2.4.1.177",
- "2.4.1.178",
- "2.4.1.179",
- "2.4.1.180",
- "2.4.1.181",
- "2.4.1.182",
- "2.4.1.183",
- "2.4.1.184",
- "2.4.1.185",
- "2.4.1.186",
- "2.4.1.187",
- "2.4.1.188",
- "2.4.1.189",
- "2.4.1.190",
- "2.4.1.191",
- "2.4.1.192",
- "2.4.1.193",
- "2.4.1.194",
- "2.4.1.195",
- "2.4.1.196",
- "2.4.1.197",
- "2.4.1.198",
- "2.4.1.199",
- "2.4.1.201",
- "2.4.1.202",
- "2.4.1.203",
- "2.4.1.205",
- "2.4.1.206",
- "2.4.1.207",
- "2.4.1.208",
- "2.4.1.209",
- "2.4.1.210",
- "2.4.1.211",
- "2.4.1.212",
- "2.4.1.213",
- "2.4.1.214",
- "2.4.1.215",
- "2.4.1.216",
- "2.4.1.217",
- "2.4.1.218",
- "2.4.1.219",
- "2.4.1.220",
- "2.4.1.221",
- "2.4.1.222",
- "2.4.1.223",
- "2.4.1.224",
- "2.4.1.225",
- "2.4.1.226",
- "2.4.1.227",
- "2.4.1.228",
- "2.4.1.229",
- "2.4.1.230",
- "2.4.1.231",
- "2.4.1.232",
- "2.4.1.234",
- "2.4.1.236",
- "2.4.1.237",
- "2.4.1.238",
- "2.4.1.239",
- "2.4.1.240",
- "2.4.1.241",
- "2.4.1.242",
- "2.4.1.243",
- "2.4.1.244",
- "2.4.1.245",
- "2.4.1.246",
- "2.4.1.247",
- "2.4.1.248",
- "2.4.1.249",
- "2.4.1.250",
- "2.4.2.1",
- "2.4.2.2",
- "2.4.2.3",
- "2.4.2.4",
- "2.4.2.5",
- "2.4.2.6",
- "2.4.2.7",
- "2.4.2.8",
- "2.4.2.9",
- "2.4.2.10",
- "2.4.2.11",
- "2.4.2.12",
- "2.4.2.14",
- "2.4.2.15",
- "2.4.2.16",
- "2.4.2.17",
- "2.4.2.18",
- "2.4.2.19",
- "2.4.2.20",
- "2.4.2.21",
- "2.4.2.22",
- "2.4.2.23",
- "2.4.2.24",
- "2.4.2.25",
- "2.4.2.26",
- "2.4.2.27",
- "2.4.2.28",
- "2.4.2.29",
- "2.4.2.30",
- "2.4.2.31",
- "2.4.2.32",
- "2.4.2.33",
- "2.4.2.34",
- "2.4.2.35",
- "2.4.2.36",
- "2.4.2.37",
- "2.4.2.38",
- "2.4.2.39",
- "2.4.2.40",
- "2.4.2.41",
- "2.4.2.42",
- "2.4.99.1",
- "2.4.99.2",
- "2.4.99.3",
- "2.4.99.4",
- "2.4.99.5",
- "2.4.99.6",
- "2.4.99.7",
- "2.4.99.8",
- "2.4.99.9",
- "2.4.99.10",
- "2.4.99.11",
- "2.5.1.1",
- "2.5.1.2",
- "2.5.1.3",
- "2.5.1.4",
- "2.5.1.5",
- "2.5.1.6",
- "2.5.1.7",
- "2.5.1.9",
- "2.5.1.10",
- "2.5.1.11",
- "2.5.1.15",
- "2.5.1.16",
- "2.5.1.17",
- "2.5.1.18",
- "2.5.1.19",
- "2.5.1.20",
- "2.5.1.21",
- "2.5.1.22",
- "2.5.1.23",
- "2.5.1.24",
- "2.5.1.25",
- "2.5.1.26",
- "2.5.1.27",
- "2.5.1.28",
- "2.5.1.29",
- "2.5.1.30",
- "2.5.1.31",
- "2.5.1.32",
- "2.5.1.33",
- "2.5.1.34",
- "2.5.1.35",
- "2.5.1.36",
- "2.5.1.38",
- "2.5.1.39",
- "2.5.1.41",
- "2.5.1.42",
- "2.5.1.43",
- "2.5.1.44",
- "2.5.1.45",
- "2.5.1.46",
- "2.5.1.47",
- "2.5.1.48",
- "2.5.1.49",
- "2.5.1.50",
- "2.5.1.51",
- "2.5.1.52",
- "2.5.1.53",
- "2.5.1.54",
- "2.5.1.55",
- "2.5.1.56",
- "2.5.1.57",
- "2.5.1.58",
- "2.5.1.59",
- "2.5.1.60",
- "2.5.1.61",
- "2.5.1.62",
- "2.5.1.63",
- "2.5.1.65",
- "2.5.1.66",
- "2.5.1.67",
- "2.5.1.68",
- "2.5.1.69",
- "2.5.1.70",
- "2.5.1.71",
- "2.5.1.72",
- "2.5.1.73",
- "2.5.1.74",
- "2.5.1.75",
- "2.5.1.76",
- "2.5.1.77",
- "2.5.1.78",
- "2.5.1.79",
- "2.5.1.80",
- "2.6.1.1",
- "2.6.1.2",
- "2.6.1.3",
- "2.6.1.4",
- "2.6.1.5",
- "2.6.1.6",
- "2.6.1.7",
- "2.6.1.8",
- "2.6.1.9",
- "2.6.1.11",
- "2.6.1.12",
- "2.6.1.13",
- "2.6.1.14",
- "2.6.1.15",
- "2.6.1.16",
- "2.6.1.17",
- "2.6.1.18",
- "2.6.1.19",
- "2.6.1.21",
- "2.6.1.22",
- "2.6.1.23",
- "2.6.1.24",
- "2.6.1.26",
- "2.6.1.27",
- "2.6.1.28",
- "2.6.1.29",
- "2.6.1.30",
- "2.6.1.31",
- "2.6.1.32",
- "2.6.1.33",
- "2.6.1.34",
- "2.6.1.35",
- "2.6.1.36",
- "2.6.1.37",
- "2.6.1.38",
- "2.6.1.39",
- "2.6.1.40",
- "2.6.1.41",
- "2.6.1.42",
- "2.6.1.43",
- "2.6.1.44",
- "2.6.1.45",
- "2.6.1.46",
- "2.6.1.47",
- "2.6.1.48",
- "2.6.1.49",
- "2.6.1.50",
- "2.6.1.51",
- "2.6.1.52",
- "2.6.1.54",
- "2.6.1.55",
- "2.6.1.56",
- "2.6.1.57",
- "2.6.1.58",
- "2.6.1.59",
- "2.6.1.60",
- "2.6.1.62",
- "2.6.1.63",
- "2.6.1.64",
- "2.6.1.65",
- "2.6.1.66",
- "2.6.1.67",
- "2.6.1.68",
- "2.6.1.70",
- "2.6.1.71",
- "2.6.1.72",
- "2.6.1.73",
- "2.6.1.74",
- "2.6.1.75",
- "2.6.1.76",
- "2.6.1.77",
- "2.6.1.78",
- "2.6.1.79",
- "2.6.1.80",
- "2.6.1.81",
- "2.6.1.82",
- "2.6.1.83",
- "2.6.1.84",
- "2.6.1.85",
- "2.6.1.86",
- "2.6.3.1",
- "2.6.99.1",
- "2.6.99.2",
- "2.7.1.1",
- "2.7.1.2",
- "2.7.1.3",
- "2.7.1.4",
- "2.7.1.5",
- "2.7.1.6",
- "2.7.1.7",
- "2.7.1.8",
- "2.7.1.10",
- "2.7.1.11",
- "2.7.1.12",
- "2.7.1.13",
- "2.7.1.14",
- "2.7.1.15",
- "2.7.1.16",
- "2.7.1.17",
- "2.7.1.18",
- "2.7.1.19",
- "2.7.1.20",
- "2.7.1.21",
- "2.7.1.22",
- "2.7.1.23",
- "2.7.1.24",
- "2.7.1.25",
- "2.7.1.26",
- "2.7.1.27",
- "2.7.1.28",
- "2.7.1.29",
- "2.7.1.30",
- "2.7.1.31",
- "2.7.1.32",
- "2.7.1.33",
- "2.7.1.34",
- "2.7.1.35",
- "2.7.1.36",
- "2.7.1.39",
- "2.7.1.40",
- "2.7.1.41",
- "2.7.1.42",
- "2.7.1.43",
- "2.7.1.44",
- "2.7.1.45",
- "2.7.1.46",
- "2.7.1.47",
- "2.7.1.48",
- "2.7.1.49",
- "2.7.1.50",
- "2.7.1.51",
- "2.7.1.52",
- "2.7.1.53",
- "2.7.1.54",
- "2.7.1.55",
- "2.7.1.56",
- "2.7.1.58",
- "2.7.1.59",
- "2.7.1.60",
- "2.7.1.61",
- "2.7.1.62",
- "2.7.1.63",
- "2.7.1.64",
- "2.7.1.65",
- "2.7.1.66",
- "2.7.1.67",
- "2.7.1.68",
- "2.7.1.69",
- "2.7.1.71",
- "2.7.1.72",
- "2.7.1.73",
- "2.7.1.74",
- "2.7.1.76",
- "2.7.1.77",
- "2.7.1.78",
- "2.7.1.79",
- "2.7.1.80",
- "2.7.1.81",
- "2.7.1.82",
- "2.7.1.83",
- "2.7.1.84",
- "2.7.1.85",
- "2.7.1.86",
- "2.7.1.87",
- "2.7.1.88",
- "2.7.1.89",
- "2.7.1.90",
- "2.7.1.91",
- "2.7.1.92",
- "2.7.1.93",
- "2.7.1.94",
- "2.7.1.95",
- "2.7.1.100",
- "2.7.1.101",
- "2.7.1.102",
- "2.7.1.103",
- "2.7.1.105",
- "2.7.1.106",
- "2.7.1.107",
- "2.7.1.108",
- "2.7.1.113",
- "2.7.1.114",
- "2.7.1.118",
- "2.7.1.119",
- "2.7.1.121",
- "2.7.1.122",
- "2.7.1.127",
- "2.7.1.130",
- "2.7.1.134",
- "2.7.1.136",
- "2.7.1.137",
- "2.7.1.138",
- "2.7.1.140",
- "2.7.1.142",
- "2.7.1.143",
- "2.7.1.144",
- "2.7.1.145",
- "2.7.1.146",
- "2.7.1.147",
- "2.7.1.148",
- "2.7.1.149",
- "2.7.1.150",
- "2.7.1.151",
- "2.7.1.153",
- "2.7.1.154",
- "2.7.1.156",
- "2.7.1.157",
- "2.7.1.158",
- "2.7.1.159",
- "2.7.1.160",
- "2.7.1.161",
- "2.7.1.162",
- "2.7.1.163",
- "2.7.1.164",
- "2.7.1.165",
- "2.7.2.1",
- "2.7.2.2",
- "2.7.2.3",
- "2.7.2.4",
- "2.7.2.6",
- "2.7.2.7",
- "2.7.2.8",
- "2.7.2.10",
- "2.7.2.11",
- "2.7.2.12",
- "2.7.2.13",
- "2.7.2.14",
- "2.7.2.15",
- "2.7.3.1",
- "2.7.3.2",
- "2.7.3.3",
- "2.7.3.4",
- "2.7.3.5",
- "2.7.3.6",
- "2.7.3.7",
- "2.7.3.8",
- "2.7.3.9",
- "2.7.3.10",
- "2.7.4.1",
- "2.7.4.2",
- "2.7.4.3",
- "2.7.4.4",
- "2.7.4.6",
- "2.7.4.7",
- "2.7.4.8",
- "2.7.4.9",
- "2.7.4.10",
- "2.7.4.11",
- "2.7.4.12",
- "2.7.4.13",
- "2.7.4.14",
- "2.7.4.15",
- "2.7.4.16",
- "2.7.4.17",
- "2.7.4.18",
- "2.7.4.19",
- "2.7.4.20",
- "2.7.4.21",
- "2.7.4.22",
- "2.7.4.23",
- "2.7.4.24",
- "2.7.6.1",
- "2.7.6.2",
- "2.7.6.3",
- "2.7.6.4",
- "2.7.6.5",
- "2.7.7.1",
- "2.7.7.2",
- "2.7.7.3",
- "2.7.7.4",
- "2.7.7.5",
- "2.7.7.6",
- "2.7.7.7",
- "2.7.7.8",
- "2.7.7.9",
- "2.7.7.10",
- "2.7.7.11",
- "2.7.7.12",
- "2.7.7.13",
- "2.7.7.14",
- "2.7.7.15",
- "2.7.7.18",
- "2.7.7.19",
- "2.7.7.21",
- "2.7.7.22",
- "2.7.7.23",
- "2.7.7.24",
- "2.7.7.25",
- "2.7.7.27",
- "2.7.7.28",
- "2.7.7.30",
- "2.7.7.31",
- "2.7.7.32",
- "2.7.7.33",
- "2.7.7.34",
- "2.7.7.35",
- "2.7.7.36",
- "2.7.7.37",
- "2.7.7.38",
- "2.7.7.39",
- "2.7.7.40",
- "2.7.7.41",
- "2.7.7.42",
- "2.7.7.43",
- "2.7.7.44",
- "2.7.7.45",
- "2.7.7.46",
- "2.7.7.47",
- "2.7.7.48",
- "2.7.7.49",
- "2.7.7.50",
- "2.7.7.51",
- "2.7.7.52",
- "2.7.7.53",
- "2.7.7.54",
- "2.7.7.55",
- "2.7.7.56",
- "2.7.7.57",
- "2.7.7.58",
- "2.7.7.59",
- "2.7.7.60",
- "2.7.7.61",
- "2.7.7.62",
- "2.7.7.63",
- "2.7.7.64",
- "2.7.7.65",
- "2.7.7.66",
- "2.7.7.67",
- "2.7.7.68",
- "2.7.8.1",
- "2.7.8.2",
- "2.7.8.3",
- "2.7.8.4",
- "2.7.8.5",
- "2.7.8.6",
- "2.7.8.7",
- "2.7.8.8",
- "2.7.8.9",
- "2.7.8.10",
- "2.7.8.11",
- "2.7.8.12",
- "2.7.8.13",
- "2.7.8.14",
- "2.7.8.15",
- "2.7.8.17",
- "2.7.8.18",
- "2.7.8.19",
- "2.7.8.20",
- "2.7.8.21",
- "2.7.8.22",
- "2.7.8.23",
- "2.7.8.24",
- "2.7.8.25",
- "2.7.8.26",
- "2.7.8.27",
- "2.7.8.28",
- "2.7.9.1",
- "2.7.9.2",
- "2.7.9.3",
- "2.7.9.4",
- "2.7.9.5",
- "2.7.10.1",
- "2.7.10.2",
- "2.7.11.1",
- "2.7.11.2",
- "2.7.11.3",
- "2.7.11.4",
- "2.7.11.5",
- "2.7.11.6",
- "2.7.11.7",
- "2.7.11.8",
- "2.7.11.9",
- "2.7.11.10",
- "2.7.11.11",
- "2.7.11.12",
- "2.7.11.13",
- "2.7.11.14",
- "2.7.11.15",
- "2.7.11.16",
- "2.7.11.17",
- "2.7.11.18",
- "2.7.11.19",
- "2.7.11.20",
- "2.7.11.21",
- "2.7.11.22",
- "2.7.11.23",
- "2.7.11.24",
- "2.7.11.25",
- "2.7.11.26",
- "2.7.11.27",
- "2.7.11.28",
- "2.7.11.29",
- "2.7.11.30",
- "2.7.11.31",
- "2.7.12.1",
- "2.7.12.2",
- "2.7.13.1",
- "2.7.13.2",
- "2.7.13.3",
- "2.7.99.1",
- "2.8.1.1",
- "2.8.1.2",
- "2.8.1.3",
- "2.8.1.4",
- "2.8.1.5",
- "2.8.1.6",
- "2.8.1.7",
- "2.8.1.8",
- "2.8.2.1",
- "2.8.2.2",
- "2.8.2.3",
- "2.8.2.4",
- "2.8.2.5",
- "2.8.2.6",
- "2.8.2.7",
- "2.8.2.8",
- "2.8.2.9",
- "2.8.2.10",
- "2.8.2.11",
- "2.8.2.13",
- "2.8.2.14",
- "2.8.2.15",
- "2.8.2.16",
- "2.8.2.17",
- "2.8.2.18",
- "2.8.2.19",
- "2.8.2.20",
- "2.8.2.21",
- "2.8.2.22",
- "2.8.2.23",
- "2.8.2.24",
- "2.8.2.25",
- "2.8.2.26",
- "2.8.2.27",
- "2.8.2.28",
- "2.8.2.29",
- "2.8.2.30",
- "2.8.2.31",
- "2.8.2.32",
- "2.8.2.33",
- "2.8.2.34",
- "2.8.3.1",
- "2.8.3.2",
- "2.8.3.3",
- "2.8.3.5",
- "2.8.3.6",
- "2.8.3.7",
- "2.8.3.8",
- "2.8.3.9",
- "2.8.3.10",
- "2.8.3.11",
- "2.8.3.12",
- "2.8.3.13",
- "2.8.3.14",
- "2.8.3.15",
- "2.8.3.16",
- "2.8.3.17",
- "2.8.4.1",
- "2.8.4.2",
- "2.9.1.1",
- "2.9.1.2",
- "3.1.1.1",
- "3.1.1.2",
- "3.1.1.3",
- "3.1.1.4",
- "3.1.1.5",
- "3.1.1.6",
- "3.1.1.7",
- "3.1.1.8",
- "3.1.1.10",
- "3.1.1.11",
- "3.1.1.13",
- "3.1.1.14",
- "3.1.1.15",
- "3.1.1.17",
- "3.1.1.19",
- "3.1.1.20",
- "3.1.1.21",
- "3.1.1.22",
- "3.1.1.23",
- "3.1.1.24",
- "3.1.1.25",
- "3.1.1.26",
- "3.1.1.27",
- "3.1.1.28",
- "3.1.1.29",
- "3.1.1.30",
- "3.1.1.31",
- "3.1.1.32",
- "3.1.1.33",
- "3.1.1.34",
- "3.1.1.35",
- "3.1.1.36",
- "3.1.1.37",
- "3.1.1.38",
- "3.1.1.39",
- "3.1.1.40",
- "3.1.1.41",
- "3.1.1.42",
- "3.1.1.43",
- "3.1.1.44",
- "3.1.1.45",
- "3.1.1.46",
- "3.1.1.47",
- "3.1.1.48",
- "3.1.1.49",
- "3.1.1.50",
- "3.1.1.51",
- "3.1.1.52",
- "3.1.1.53",
- "3.1.1.54",
- "3.1.1.55",
- "3.1.1.56",
- "3.1.1.57",
- "3.1.1.58",
- "3.1.1.59",
- "3.1.1.60",
- "3.1.1.61",
- "3.1.1.63",
- "3.1.1.64",
- "3.1.1.65",
- "3.1.1.66",
- "3.1.1.67",
- "3.1.1.68",
- "3.1.1.70",
- "3.1.1.71",
- "3.1.1.72",
- "3.1.1.73",
- "3.1.1.74",
- "3.1.1.75",
- "3.1.1.76",
- "3.1.1.77",
- "3.1.1.78",
- "3.1.1.79",
- "3.1.1.80",
- "3.1.1.81",
- "3.1.1.82",
- "3.1.1.83",
- "3.1.1.84",
- "3.1.2.1",
- "3.1.2.2",
- "3.1.2.3",
- "3.1.2.4",
- "3.1.2.5",
- "3.1.2.6",
- "3.1.2.7",
- "3.1.2.10",
- "3.1.2.11",
- "3.1.2.12",
- "3.1.2.13",
- "3.1.2.14",
- "3.1.2.15",
- "3.1.2.16",
- "3.1.2.17",
- "3.1.2.18",
- "3.1.2.19",
- "3.1.2.20",
- "3.1.2.21",
- "3.1.2.22",
- "3.1.2.23",
- "3.1.2.25",
- "3.1.2.26",
- "3.1.2.27",
- "3.1.3.1",
- "3.1.3.2",
- "3.1.3.3",
- "3.1.3.4",
- "3.1.3.5",
- "3.1.3.6",
- "3.1.3.7",
- "3.1.3.8",
- "3.1.3.9",
- "3.1.3.10",
- "3.1.3.11",
- "3.1.3.12",
- "3.1.3.13",
- "3.1.3.14",
- "3.1.3.15",
- "3.1.3.16",
- "3.1.3.17",
- "3.1.3.18",
- "3.1.3.19",
- "3.1.3.20",
- "3.1.3.21",
- "3.1.3.22",
- "3.1.3.23",
- "3.1.3.24",
- "3.1.3.25",
- "3.1.3.26",
- "3.1.3.27",
- "3.1.3.28",
- "3.1.3.29",
- "3.1.3.31",
- "3.1.3.32",
- "3.1.3.33",
- "3.1.3.34",
- "3.1.3.35",
- "3.1.3.36",
- "3.1.3.37",
- "3.1.3.38",
- "3.1.3.39",
- "3.1.3.40",
- "3.1.3.41",
- "3.1.3.42",
- "3.1.3.43",
- "3.1.3.44",
- "3.1.3.45",
- "3.1.3.46",
- "3.1.3.47",
- "3.1.3.48",
- "3.1.3.49",
- "3.1.3.50",
- "3.1.3.51",
- "3.1.3.52",
- "3.1.3.53",
- "3.1.3.54",
- "3.1.3.55",
- "3.1.3.56",
- "3.1.3.57",
- "3.1.3.58",
- "3.1.3.59",
- "3.1.3.60",
- "3.1.3.62",
- "3.1.3.63",
- "3.1.3.64",
- "3.1.3.66",
- "3.1.3.67",
- "3.1.3.68",
- "3.1.3.69",
- "3.1.3.70",
- "3.1.3.71",
- "3.1.3.72",
- "3.1.3.73",
- "3.1.3.74",
- "3.1.3.75",
- "3.1.3.76",
- "3.1.3.77",
- "3.1.3.78",
- "3.1.3.79",
- "3.1.3.80",
- "3.1.4.1",
- "3.1.4.2",
- "3.1.4.3",
- "3.1.4.4",
- "3.1.4.11",
- "3.1.4.12",
- "3.1.4.13",
- "3.1.4.14",
- "3.1.4.15",
- "3.1.4.16",
- "3.1.4.17",
- "3.1.4.35",
- "3.1.4.37",
- "3.1.4.38",
- "3.1.4.39",
- "3.1.4.40",
- "3.1.4.41",
- "3.1.4.42",
- "3.1.4.43",
- "3.1.4.44",
- "3.1.4.45",
- "3.1.4.46",
- "3.1.4.48",
- "3.1.4.49",
- "3.1.4.50",
- "3.1.4.51",
- "3.1.4.52",
- "3.1.4.53",
- "3.1.5.1",
- "3.1.6.1",
- "3.1.6.2",
- "3.1.6.3",
- "3.1.6.4",
- "3.1.6.6",
- "3.1.6.7",
- "3.1.6.8",
- "3.1.6.9",
- "3.1.6.10",
- "3.1.6.11",
- "3.1.6.12",
- "3.1.6.13",
- "3.1.6.14",
- "3.1.6.15",
- "3.1.6.16",
- "3.1.6.17",
- "3.1.6.18",
- "3.1.7.1",
- "3.1.7.2",
- "3.1.7.3",
- "3.1.7.4",
- "3.1.7.5",
- "3.1.8.1",
- "3.1.8.2",
- "3.1.11.1",
- "3.1.11.2",
- "3.1.11.3",
- "3.1.11.4",
- "3.1.11.5",
- "3.1.11.6",
- "3.1.13.1",
- "3.1.13.2",
- "3.1.13.3",
- "3.1.13.4",
- "3.1.13.5",
- "3.1.14.1",
- "3.1.15.1",
- "3.1.16.1",
- "3.1.21.1",
- "3.1.21.2",
- "3.1.21.3",
- "3.1.21.4",
- "3.1.21.5",
- "3.1.21.6",
- "3.1.21.7",
- "3.1.22.1",
- "3.1.22.2",
- "3.1.22.4",
- "3.1.22.5",
- "3.1.25.1",
- "3.1.26.1",
- "3.1.26.2",
- "3.1.26.3",
- "3.1.26.4",
- "3.1.26.5",
- "3.1.26.6",
- "3.1.26.7",
- "3.1.26.8",
- "3.1.26.9",
- "3.1.26.10",
- "3.1.26.11",
- "3.1.26.12",
- "3.1.26.13",
- "3.1.27.1",
- "3.1.27.2",
- "3.1.27.3",
- "3.1.27.4",
- "3.1.27.5",
- "3.1.27.6",
- "3.1.27.7",
- "3.1.27.8",
- "3.1.27.9",
- "3.1.27.10",
- "3.1.30.1",
- "3.1.30.2",
- "3.1.31.1",
- "3.2.1.1",
- "3.2.1.2",
- "3.2.1.3",
- "3.2.1.4",
- "3.2.1.6",
- "3.2.1.7",
- "3.2.1.8",
- "3.2.1.10",
- "3.2.1.11",
- "3.2.1.14",
- "3.2.1.15",
- "3.2.1.17",
- "3.2.1.18",
- "3.2.1.20",
- "3.2.1.21",
- "3.2.1.22",
- "3.2.1.23",
- "3.2.1.24",
- "3.2.1.25",
- "3.2.1.26",
- "3.2.1.28",
- "3.2.1.31",
- "3.2.1.32",
- "3.2.1.33",
- "3.2.1.35",
- "3.2.1.36",
- "3.2.1.37",
- "3.2.1.38",
- "3.2.1.39",
- "3.2.1.40",
- "3.2.1.41",
- "3.2.1.42",
- "3.2.1.43",
- "3.2.1.44",
- "3.2.1.45",
- "3.2.1.46",
- "3.2.1.47",
- "3.2.1.48",
- "3.2.1.49",
- "3.2.1.50",
- "3.2.1.51",
- "3.2.1.52",
- "3.2.1.53",
- "3.2.1.54",
- "3.2.1.55",
- "3.2.1.56",
- "3.2.1.57",
- "3.2.1.58",
- "3.2.1.59",
- "3.2.1.60",
- "3.2.1.61",
- "3.2.1.62",
- "3.2.1.63",
- "3.2.1.64",
- "3.2.1.65",
- "3.2.1.66",
- "3.2.1.67",
- "3.2.1.68",
- "3.2.1.70",
- "3.2.1.71",
- "3.2.1.72",
- "3.2.1.73",
- "3.2.1.74",
- "3.2.1.75",
- "3.2.1.76",
- "3.2.1.77",
- "3.2.1.78",
- "3.2.1.80",
- "3.2.1.81",
- "3.2.1.82",
- "3.2.1.83",
- "3.2.1.84",
- "3.2.1.85",
- "3.2.1.86",
- "3.2.1.87",
- "3.2.1.88",
- "3.2.1.89",
- "3.2.1.91",
- "3.2.1.92",
- "3.2.1.93",
- "3.2.1.94",
- "3.2.1.95",
- "3.2.1.96",
- "3.2.1.97",
- "3.2.1.98",
- "3.2.1.99",
- "3.2.1.100",
- "3.2.1.101",
- "3.2.1.102",
- "3.2.1.103",
- "3.2.1.104",
- "3.2.1.105",
- "3.2.1.106",
- "3.2.1.107",
- "3.2.1.108",
- "3.2.1.109",
- "3.2.1.111",
- "3.2.1.112",
- "3.2.1.113",
- "3.2.1.114",
- "3.2.1.115",
- "3.2.1.116",
- "3.2.1.117",
- "3.2.1.118",
- "3.2.1.119",
- "3.2.1.120",
- "3.2.1.121",
- "3.2.1.122",
- "3.2.1.123",
- "3.2.1.124",
- "3.2.1.125",
- "3.2.1.126",
- "3.2.1.127",
- "3.2.1.128",
- "3.2.1.129",
- "3.2.1.130",
- "3.2.1.131",
- "3.2.1.132",
- "3.2.1.133",
- "3.2.1.134",
- "3.2.1.135",
- "3.2.1.136",
- "3.2.1.137",
- "3.2.1.139",
- "3.2.1.140",
- "3.2.1.141",
- "3.2.1.142",
- "3.2.1.143",
- "3.2.1.144",
- "3.2.1.145",
- "3.2.1.146",
- "3.2.1.147",
- "3.2.1.149",
- "3.2.1.150",
- "3.2.1.151",
- "3.2.1.152",
- "3.2.1.153",
- "3.2.1.154",
- "3.2.1.155",
- "3.2.1.156",
- "3.2.1.157",
- "3.2.1.158",
- "3.2.1.159",
- "3.2.1.161",
- "3.2.1.162",
- "3.2.1.163",
- "3.2.1.164",
- "3.2.1.165",
- "3.2.2.1",
- "3.2.2.2",
- "3.2.2.3",
- "3.2.2.4",
- "3.2.2.5",
- "3.2.2.6",
- "3.2.2.7",
- "3.2.2.8",
- "3.2.2.9",
- "3.2.2.10",
- "3.2.2.11",
- "3.2.2.12",
- "3.2.2.13",
- "3.2.2.14",
- "3.2.2.15",
- "3.2.2.16",
- "3.2.2.17",
- "3.2.2.19",
- "3.2.2.20",
- "3.2.2.21",
- "3.2.2.22",
- "3.2.2.23",
- "3.2.2.24",
- "3.2.2.25",
- "3.2.2.26",
- "3.2.2.27",
- "3.2.2.28",
- "3.2.2.29",
- "3.3.1.1",
- "3.3.1.2",
- "3.3.2.1",
- "3.3.2.2",
- "3.3.2.4",
- "3.3.2.5",
- "3.3.2.6",
- "3.3.2.7",
- "3.3.2.8",
- "3.3.2.9",
- "3.3.2.10",
- "3.3.2.11",
- "3.4.11.1",
- "3.4.11.2",
- "3.4.11.3",
- "3.4.11.4",
- "3.4.11.5",
- "3.4.11.6",
- "3.4.11.7",
- "3.4.11.9",
- "3.4.11.10",
- "3.4.11.13",
- "3.4.11.14",
- "3.4.11.15",
- "3.4.11.16",
- "3.4.11.17",
- "3.4.11.18",
- "3.4.11.19",
- "3.4.11.20",
- "3.4.11.21",
- "3.4.11.22",
- "3.4.11.23",
- "3.4.11.24",
- "3.4.13.3",
- "3.4.13.4",
- "3.4.13.5",
- "3.4.13.7",
- "3.4.13.9",
- "3.4.13.12",
- "3.4.13.17",
- "3.4.13.18",
- "3.4.13.19",
- "3.4.13.20",
- "3.4.13.21",
- "3.4.13.22",
- "3.4.14.1",
- "3.4.14.2",
- "3.4.14.4",
- "3.4.14.5",
- "3.4.14.6",
- "3.4.14.9",
- "3.4.14.10",
- "3.4.14.11",
- "3.4.14.12",
- "3.4.15.1",
- "3.4.15.4",
- "3.4.15.5",
- "3.4.15.6",
- "3.4.16.2",
- "3.4.16.4",
- "3.4.16.5",
- "3.4.16.6",
- "3.4.17.1",
- "3.4.17.2",
- "3.4.17.3",
- "3.4.17.4",
- "3.4.17.6",
- "3.4.17.8",
- "3.4.17.10",
- "3.4.17.11",
- "3.4.17.12",
- "3.4.17.13",
- "3.4.17.14",
- "3.4.17.15",
- "3.4.17.16",
- "3.4.17.17",
- "3.4.17.18",
- "3.4.17.19",
- "3.4.17.20",
- "3.4.17.21",
- "3.4.17.22",
- "3.4.17.23",
- "3.4.18.1",
- "3.4.19.1",
- "3.4.19.2",
- "3.4.19.3",
- "3.4.19.5",
- "3.4.19.6",
- "3.4.19.7",
- "3.4.19.9",
- "3.4.19.11",
- "3.4.19.12",
- "3.4.21.1",
- "3.4.21.2",
- "3.4.21.3",
- "3.4.21.4",
- "3.4.21.5",
- "3.4.21.6",
- "3.4.21.7",
- "3.4.21.9",
- "3.4.21.10",
- "3.4.21.12",
- "3.4.21.19",
- "3.4.21.20",
- "3.4.21.21",
- "3.4.21.22",
- "3.4.21.25",
- "3.4.21.26",
- "3.4.21.27",
- "3.4.21.32",
- "3.4.21.34",
- "3.4.21.35",
- "3.4.21.36",
- "3.4.21.37",
- "3.4.21.38",
- "3.4.21.39",
- "3.4.21.41",
- "3.4.21.42",
- "3.4.21.43",
- "3.4.21.45",
- "3.4.21.46",
- "3.4.21.47",
- "3.4.21.48",
- "3.4.21.49",
- "3.4.21.50",
- "3.4.21.53",
- "3.4.21.54",
- "3.4.21.55",
- "3.4.21.57",
- "3.4.21.59",
- "3.4.21.60",
- "3.4.21.61",
- "3.4.21.62",
- "3.4.21.63",
- "3.4.21.64",
- "3.4.21.65",
- "3.4.21.66",
- "3.4.21.67",
- "3.4.21.68",
- "3.4.21.69",
- "3.4.21.70",
- "3.4.21.71",
- "3.4.21.72",
- "3.4.21.73",
- "3.4.21.74",
- "3.4.21.75",
- "3.4.21.76",
- "3.4.21.77",
- "3.4.21.78",
- "3.4.21.79",
- "3.4.21.80",
- "3.4.21.81",
- "3.4.21.82",
- "3.4.21.83",
- "3.4.21.84",
- "3.4.21.85",
- "3.4.21.86",
- "3.4.21.88",
- "3.4.21.89",
- "3.4.21.90",
- "3.4.21.91",
- "3.4.21.92",
- "3.4.21.93",
- "3.4.21.94",
- "3.4.21.95",
- "3.4.21.96",
- "3.4.21.97",
- "3.4.21.98",
- "3.4.21.99",
- "3.4.21.100",
- "3.4.21.101",
- "3.4.21.102",
- "3.4.21.103",
- "3.4.21.104",
- "3.4.21.105",
- "3.4.21.106",
- "3.4.21.107",
- "3.4.21.108",
- "3.4.21.109",
- "3.4.21.110",
- "3.4.21.111",
- "3.4.21.112",
- "3.4.21.113",
- "3.4.21.114",
- "3.4.21.115",
- "3.4.21.116",
- "3.4.21.117",
- "3.4.21.118",
- "3.4.21.119",
- "3.4.21.120",
- "3.4.22.1",
- "3.4.22.2",
- "3.4.22.3",
- "3.4.22.6",
- "3.4.22.7",
- "3.4.22.8",
- "3.4.22.10",
- "3.4.22.14",
- "3.4.22.15",
- "3.4.22.16",
- "3.4.22.24",
- "3.4.22.25",
- "3.4.22.26",
- "3.4.22.27",
- "3.4.22.28",
- "3.4.22.29",
- "3.4.22.30",
- "3.4.22.31",
- "3.4.22.32",
- "3.4.22.33",
- "3.4.22.34",
- "3.4.22.35",
- "3.4.22.36",
- "3.4.22.37",
- "3.4.22.38",
- "3.4.22.39",
- "3.4.22.40",
- "3.4.22.41",
- "3.4.22.42",
- "3.4.22.43",
- "3.4.22.44",
- "3.4.22.45",
- "3.4.22.46",
- "3.4.22.47",
- "3.4.22.48",
- "3.4.22.49",
- "3.4.22.50",
- "3.4.22.51",
- "3.4.22.52",
- "3.4.22.53",
- "3.4.22.54",
- "3.4.22.55",
- "3.4.22.56",
- "3.4.22.57",
- "3.4.22.58",
- "3.4.22.59",
- "3.4.22.60",
- "3.4.22.61",
- "3.4.22.62",
- "3.4.22.63",
- "3.4.22.64",
- "3.4.22.65",
- "3.4.22.66",
- "3.4.22.67",
- "3.4.22.68",
- "3.4.22.69",
- "3.4.22.70",
- "3.4.22.71",
- "3.4.23.1",
- "3.4.23.2",
- "3.4.23.3",
- "3.4.23.4",
- "3.4.23.5",
- "3.4.23.12",
- "3.4.23.15",
- "3.4.23.16",
- "3.4.23.17",
- "3.4.23.18",
- "3.4.23.19",
- "3.4.23.20",
- "3.4.23.21",
- "3.4.23.22",
- "3.4.23.23",
- "3.4.23.24",
- "3.4.23.25",
- "3.4.23.26",
- "3.4.23.28",
- "3.4.23.29",
- "3.4.23.30",
- "3.4.23.31",
- "3.4.23.32",
- "3.4.23.34",
- "3.4.23.35",
- "3.4.23.36",
- "3.4.23.38",
- "3.4.23.39",
- "3.4.23.40",
- "3.4.23.41",
- "3.4.23.42",
- "3.4.23.43",
- "3.4.23.44",
- "3.4.23.45",
- "3.4.23.46",
- "3.4.23.47",
- "3.4.23.48",
- "3.4.23.49",
- "3.4.23.50",
- "3.4.23.51",
- "3.4.24.1",
- "3.4.24.3",
- "3.4.24.6",
- "3.4.24.7",
- "3.4.24.11",
- "3.4.24.12",
- "3.4.24.13",
- "3.4.24.14",
- "3.4.24.15",
- "3.4.24.16",
- "3.4.24.17",
- "3.4.24.18",
- "3.4.24.19",
- "3.4.24.20",
- "3.4.24.21",
- "3.4.24.22",
- "3.4.24.23",
- "3.4.24.24",
- "3.4.24.25",
- "3.4.24.26",
- "3.4.24.27",
- "3.4.24.28",
- "3.4.24.29",
- "3.4.24.30",
- "3.4.24.31",
- "3.4.24.32",
- "3.4.24.33",
- "3.4.24.34",
- "3.4.24.35",
- "3.4.24.36",
- "3.4.24.37",
- "3.4.24.38",
- "3.4.24.39",
- "3.4.24.40",
- "3.4.24.41",
- "3.4.24.42",
- "3.4.24.43",
- "3.4.24.44",
- "3.4.24.45",
- "3.4.24.46",
- "3.4.24.47",
- "3.4.24.48",
- "3.4.24.49",
- "3.4.24.50",
- "3.4.24.51",
- "3.4.24.52",
- "3.4.24.53",
- "3.4.24.54",
- "3.4.24.55",
- "3.4.24.56",
- "3.4.24.57",
- "3.4.24.58",
- "3.4.24.59",
- "3.4.24.60",
- "3.4.24.61",
- "3.4.24.62",
- "3.4.24.63",
- "3.4.24.64",
- "3.4.24.65",
- "3.4.24.66",
- "3.4.24.67",
- "3.4.24.68",
- "3.4.24.69",
- "3.4.24.70",
- "3.4.24.71",
- "3.4.24.72",
- "3.4.24.73",
- "3.4.24.74",
- "3.4.24.75",
- "3.4.24.76",
- "3.4.24.77",
- "3.4.24.78",
- "3.4.24.79",
- "3.4.24.80",
- "3.4.24.81",
- "3.4.24.82",
- "3.4.24.83",
- "3.4.24.84",
- "3.4.24.85",
- "3.4.24.86",
- "3.4.24.87",
- "3.4.25.1",
- "3.4.25.2",
- "3.5.1.1",
- "3.5.1.2",
- "3.5.1.3",
- "3.5.1.4",
- "3.5.1.5",
- "3.5.1.6",
- "3.5.1.7",
- "3.5.1.8",
- "3.5.1.9",
- "3.5.1.10",
- "3.5.1.11",
- "3.5.1.12",
- "3.5.1.13",
- "3.5.1.14",
- "3.5.1.15",
- "3.5.1.16",
- "3.5.1.17",
- "3.5.1.18",
- "3.5.1.19",
- "3.5.1.20",
- "3.5.1.21",
- "3.5.1.22",
- "3.5.1.23",
- "3.5.1.24",
- "3.5.1.25",
- "3.5.1.26",
- "3.5.1.27",
- "3.5.1.28",
- "3.5.1.29",
- "3.5.1.30",
- "3.5.1.31",
- "3.5.1.32",
- "3.5.1.33",
- "3.5.1.35",
- "3.5.1.36",
- "3.5.1.38",
- "3.5.1.39",
- "3.5.1.40",
- "3.5.1.41",
- "3.5.1.42",
- "3.5.1.43",
- "3.5.1.44",
- "3.5.1.46",
- "3.5.1.47",
- "3.5.1.48",
- "3.5.1.49",
- "3.5.1.50",
- "3.5.1.51",
- "3.5.1.52",
- "3.5.1.53",
- "3.5.1.54",
- "3.5.1.55",
- "3.5.1.56",
- "3.5.1.57",
- "3.5.1.58",
- "3.5.1.59",
- "3.5.1.60",
- "3.5.1.61",
- "3.5.1.62",
- "3.5.1.63",
- "3.5.1.64",
- "3.5.1.65",
- "3.5.1.66",
- "3.5.1.67",
- "3.5.1.68",
- "3.5.1.69",
- "3.5.1.70",
- "3.5.1.71",
- "3.5.1.72",
- "3.5.1.73",
- "3.5.1.74",
- "3.5.1.75",
- "3.5.1.76",
- "3.5.1.77",
- "3.5.1.78",
- "3.5.1.79",
- "3.5.1.81",
- "3.5.1.82",
- "3.5.1.83",
- "3.5.1.84",
- "3.5.1.85",
- "3.5.1.86",
- "3.5.1.87",
- "3.5.1.88",
- "3.5.1.89",
- "3.5.1.90",
- "3.5.1.91",
- "3.5.1.92",
- "3.5.1.93",
- "3.5.1.94",
- "3.5.1.95",
- "3.5.1.96",
- "3.5.1.97",
- "3.5.1.98",
- "3.5.1.99",
- "3.5.1.100",
- "3.5.1.101",
- "3.5.1.102",
- "3.5.1.103",
- "3.5.2.1",
- "3.5.2.2",
- "3.5.2.3",
- "3.5.2.4",
- "3.5.2.5",
- "3.5.2.6",
- "3.5.2.7",
- "3.5.2.9",
- "3.5.2.10",
- "3.5.2.11",
- "3.5.2.12",
- "3.5.2.13",
- "3.5.2.14",
- "3.5.2.15",
- "3.5.2.16",
- "3.5.2.17",
- "3.5.2.18",
- "3.5.3.1",
- "3.5.3.2",
- "3.5.3.3",
- "3.5.3.4",
- "3.5.3.5",
- "3.5.3.6",
- "3.5.3.7",
- "3.5.3.8",
- "3.5.3.9",
- "3.5.3.10",
- "3.5.3.11",
- "3.5.3.12",
- "3.5.3.13",
- "3.5.3.14",
- "3.5.3.15",
- "3.5.3.16",
- "3.5.3.17",
- "3.5.3.18",
- "3.5.3.19",
- "3.5.3.20",
- "3.5.3.21",
- "3.5.3.22",
- "3.5.3.23",
- "3.5.4.1",
- "3.5.4.2",
- "3.5.4.3",
- "3.5.4.4",
- "3.5.4.5",
- "3.5.4.6",
- "3.5.4.7",
- "3.5.4.8",
- "3.5.4.9",
- "3.5.4.10",
- "3.5.4.11",
- "3.5.4.12",
- "3.5.4.13",
- "3.5.4.14",
- "3.5.4.15",
- "3.5.4.16",
- "3.5.4.17",
- "3.5.4.18",
- "3.5.4.19",
- "3.5.4.20",
- "3.5.4.21",
- "3.5.4.22",
- "3.5.4.23",
- "3.5.4.24",
- "3.5.4.25",
- "3.5.4.26",
- "3.5.4.27",
- "3.5.4.28",
- "3.5.4.29",
- "3.5.4.30",
- "3.5.5.1",
- "3.5.5.2",
- "3.5.5.4",
- "3.5.5.5",
- "3.5.5.6",
- "3.5.5.7",
- "3.5.5.8",
- "3.5.99.1",
- "3.5.99.2",
- "3.5.99.3",
- "3.5.99.4",
- "3.5.99.5",
- "3.5.99.6",
- "3.5.99.7",
- "3.6.1.1",
- "3.6.1.2",
- "3.6.1.3",
- "3.6.1.5",
- "3.6.1.6",
- "3.6.1.7",
- "3.6.1.8",
- "3.6.1.9",
- "3.6.1.10",
- "3.6.1.11",
- "3.6.1.12",
- "3.6.1.13",
- "3.6.1.14",
- "3.6.1.15",
- "3.6.1.16",
- "3.6.1.17",
- "3.6.1.18",
- "3.6.1.19",
- "3.6.1.20",
- "3.6.1.21",
- "3.6.1.22",
- "3.6.1.23",
- "3.6.1.24",
- "3.6.1.25",
- "3.6.1.26",
- "3.6.1.27",
- "3.6.1.28",
- "3.6.1.29",
- "3.6.1.30",
- "3.6.1.31",
- "3.6.1.39",
- "3.6.1.40",
- "3.6.1.41",
- "3.6.1.42",
- "3.6.1.43",
- "3.6.1.44",
- "3.6.1.45",
- "3.6.1.52",
- "3.6.1.53",
- "3.6.2.1",
- "3.6.2.2",
- "3.6.3.1",
- "3.6.3.2",
- "3.6.3.3",
- "3.6.3.4",
- "3.6.3.5",
- "3.6.3.6",
- "3.6.3.7",
- "3.6.3.8",
- "3.6.3.9",
- "3.6.3.10",
- "3.6.3.11",
- "3.6.3.12",
- "3.6.3.14",
- "3.6.3.15",
- "3.6.3.16",
- "3.6.3.17",
- "3.6.3.18",
- "3.6.3.19",
- "3.6.3.20",
- "3.6.3.21",
- "3.6.3.22",
- "3.6.3.23",
- "3.6.3.24",
- "3.6.3.25",
- "3.6.3.26",
- "3.6.3.27",
- "3.6.3.28",
- "3.6.3.29",
- "3.6.3.30",
- "3.6.3.31",
- "3.6.3.32",
- "3.6.3.33",
- "3.6.3.34",
- "3.6.3.35",
- "3.6.3.36",
- "3.6.3.37",
- "3.6.3.38",
- "3.6.3.39",
- "3.6.3.40",
- "3.6.3.41",
- "3.6.3.42",
- "3.6.3.43",
- "3.6.3.44",
- "3.6.3.46",
- "3.6.3.47",
- "3.6.3.48",
- "3.6.3.49",
- "3.6.3.50",
- "3.6.3.51",
- "3.6.3.52",
- "3.6.3.53",
- "3.6.4.1",
- "3.6.4.2",
- "3.6.4.3",
- "3.6.4.4",
- "3.6.4.5",
- "3.6.4.6",
- "3.6.4.7",
- "3.6.4.8",
- "3.6.4.9",
- "3.6.4.10",
- "3.6.4.11",
- "3.6.4.12",
- "3.6.4.13",
- "3.6.5.1",
- "3.6.5.2",
- "3.6.5.3",
- "3.6.5.4",
- "3.6.5.5",
- "3.6.5.6",
- "3.7.1.1",
- "3.7.1.2",
- "3.7.1.3",
- "3.7.1.4",
- "3.7.1.5",
- "3.7.1.6",
- "3.7.1.7",
- "3.7.1.8",
- "3.7.1.9",
- "3.7.1.10",
- "3.7.1.11",
- "3.8.1.1",
- "3.8.1.2",
- "3.8.1.3",
- "3.8.1.5",
- "3.8.1.6",
- "3.8.1.7",
- "3.8.1.8",
- "3.8.1.9",
- "3.8.1.10",
- "3.8.1.11",
- "3.9.1.1",
- "3.10.1.1",
- "3.10.1.2",
- "3.11.1.1",
- "3.11.1.2",
- "3.11.1.3",
- "3.12.1.1",
- "3.13.1.1",
- "3.13.1.3",
- "4.1.1.1",
- "4.1.1.2",
- "4.1.1.3",
- "4.1.1.4",
- "4.1.1.5",
- "4.1.1.6",
- "4.1.1.7",
- "4.1.1.8",
- "4.1.1.9",
- "4.1.1.11",
- "4.1.1.12",
- "4.1.1.14",
- "4.1.1.15",
- "4.1.1.16",
- "4.1.1.17",
- "4.1.1.18",
- "4.1.1.19",
- "4.1.1.20",
- "4.1.1.21",
- "4.1.1.22",
- "4.1.1.23",
- "4.1.1.24",
- "4.1.1.25",
- "4.1.1.28",
- "4.1.1.29",
- "4.1.1.30",
- "4.1.1.31",
- "4.1.1.32",
- "4.1.1.33",
- "4.1.1.34",
- "4.1.1.35",
- "4.1.1.36",
- "4.1.1.37",
- "4.1.1.38",
- "4.1.1.39",
- "4.1.1.40",
- "4.1.1.41",
- "4.1.1.42",
- "4.1.1.43",
- "4.1.1.44",
- "4.1.1.45",
- "4.1.1.46",
- "4.1.1.47",
- "4.1.1.48",
- "4.1.1.49",
- "4.1.1.50",
- "4.1.1.51",
- "4.1.1.52",
- "4.1.1.53",
- "4.1.1.54",
- "4.1.1.55",
- "4.1.1.56",
- "4.1.1.57",
- "4.1.1.58",
- "4.1.1.59",
- "4.1.1.60",
- "4.1.1.61",
- "4.1.1.62",
- "4.1.1.63",
- "4.1.1.64",
- "4.1.1.65",
- "4.1.1.66",
- "4.1.1.67",
- "4.1.1.68",
- "4.1.1.69",
- "4.1.1.70",
- "4.1.1.71",
- "4.1.1.72",
- "4.1.1.73",
- "4.1.1.74",
- "4.1.1.75",
- "4.1.1.76",
- "4.1.1.77",
- "4.1.1.78",
- "4.1.1.79",
- "4.1.1.80",
- "4.1.1.81",
- "4.1.1.82",
- "4.1.1.83",
- "4.1.1.84",
- "4.1.1.85",
- "4.1.1.86",
- "4.1.1.87",
- "4.1.1.88",
- "4.1.1.89",
- "4.1.1.90",
- "4.1.2.2",
- "4.1.2.4",
- "4.1.2.5",
- "4.1.2.8",
- "4.1.2.9",
- "4.1.2.10",
- "4.1.2.11",
- "4.1.2.12",
- "4.1.2.13",
- "4.1.2.14",
- "4.1.2.17",
- "4.1.2.18",
- "4.1.2.19",
- "4.1.2.20",
- "4.1.2.21",
- "4.1.2.22",
- "4.1.2.23",
- "4.1.2.24",
- "4.1.2.25",
- "4.1.2.26",
- "4.1.2.27",
- "4.1.2.28",
- "4.1.2.29",
- "4.1.2.30",
- "4.1.2.32",
- "4.1.2.33",
- "4.1.2.34",
- "4.1.2.35",
- "4.1.2.36",
- "4.1.2.37",
- "4.1.2.38",
- "4.1.2.40",
- "4.1.2.41",
- "4.1.2.42",
- "4.1.2.43",
- "4.1.2.44",
- "4.1.2.45",
- "4.1.3.1",
- "4.1.3.3",
- "4.1.3.4",
- "4.1.3.6",
- "4.1.3.13",
- "4.1.3.14",
- "4.1.3.16",
- "4.1.3.17",
- "4.1.3.22",
- "4.1.3.24",
- "4.1.3.25",
- "4.1.3.26",
- "4.1.3.27",
- "4.1.3.30",
- "4.1.3.32",
- "4.1.3.34",
- "4.1.3.35",
- "4.1.3.36",
- "4.1.3.38",
- "4.1.3.39",
- "4.1.3.40",
- "4.1.99.1",
- "4.1.99.2",
- "4.1.99.3",
- "4.1.99.5",
- "4.1.99.11",
- "4.1.99.12",
- "4.1.99.13",
- "4.1.99.14",
- "4.1.99.15",
- "4.2.1.1",
- "4.2.1.2",
- "4.2.1.3",
- "4.2.1.4",
- "4.2.1.5",
- "4.2.1.6",
- "4.2.1.7",
- "4.2.1.8",
- "4.2.1.9",
- "4.2.1.10",
- "4.2.1.11",
- "4.2.1.12",
- "4.2.1.17",
- "4.2.1.18",
- "4.2.1.19",
- "4.2.1.20",
- "4.2.1.22",
- "4.2.1.24",
- "4.2.1.25",
- "4.2.1.27",
- "4.2.1.28",
- "4.2.1.30",
- "4.2.1.31",
- "4.2.1.32",
- "4.2.1.33",
- "4.2.1.34",
- "4.2.1.35",
- "4.2.1.36",
- "4.2.1.39",
- "4.2.1.40",
- "4.2.1.41",
- "4.2.1.42",
- "4.2.1.43",
- "4.2.1.44",
- "4.2.1.45",
- "4.2.1.46",
- "4.2.1.47",
- "4.2.1.48",
- "4.2.1.49",
- "4.2.1.50",
- "4.2.1.51",
- "4.2.1.52",
- "4.2.1.53",
- "4.2.1.54",
- "4.2.1.55",
- "4.2.1.56",
- "4.2.1.57",
- "4.2.1.58",
- "4.2.1.59",
- "4.2.1.60",
- "4.2.1.61",
- "4.2.1.62",
- "4.2.1.65",
- "4.2.1.66",
- "4.2.1.67",
- "4.2.1.68",
- "4.2.1.69",
- "4.2.1.70",
- "4.2.1.73",
- "4.2.1.74",
- "4.2.1.75",
- "4.2.1.76",
- "4.2.1.77",
- "4.2.1.78",
- "4.2.1.79",
- "4.2.1.80",
- "4.2.1.81",
- "4.2.1.82",
- "4.2.1.83",
- "4.2.1.84",
- "4.2.1.85",
- "4.2.1.87",
- "4.2.1.88",
- "4.2.1.89",
- "4.2.1.90",
- "4.2.1.91",
- "4.2.1.92",
- "4.2.1.93",
- "4.2.1.94",
- "4.2.1.95",
- "4.2.1.96",
- "4.2.1.97",
- "4.2.1.98",
- "4.2.1.99",
- "4.2.1.100",
- "4.2.1.101",
- "4.2.1.103",
- "4.2.1.104",
- "4.2.1.105",
- "4.2.1.106",
- "4.2.1.107",
- "4.2.1.108",
- "4.2.1.109",
- "4.2.1.110",
- "4.2.1.111",
- "4.2.1.112",
- "4.2.1.113",
- "4.2.1.114",
- "4.2.1.115",
- "4.2.1.116",
- "4.2.1.117",
- "4.2.1.118",
- "4.2.1.119",
- "4.2.1.120",
- "4.2.2.1",
- "4.2.2.2",
- "4.2.2.3",
- "4.2.2.5",
- "4.2.2.6",
- "4.2.2.7",
- "4.2.2.8",
- "4.2.2.9",
- "4.2.2.10",
- "4.2.2.11",
- "4.2.2.12",
- "4.2.2.13",
- "4.2.2.14",
- "4.2.2.15",
- "4.2.2.16",
- "4.2.2.17",
- "4.2.2.18",
- "4.2.2.19",
- "4.2.2.20",
- "4.2.2.21",
- "4.2.2.22",
- "4.2.3.1",
- "4.2.3.2",
- "4.2.3.3",
- "4.2.3.4",
- "4.2.3.5",
- "4.2.3.6",
- "4.2.3.7",
- "4.2.3.8",
- "4.2.3.9",
- "4.2.3.10",
- "4.2.3.11",
- "4.2.3.12",
- "4.2.3.13",
- "4.2.3.14",
- "4.2.3.15",
- "4.2.3.16",
- "4.2.3.17",
- "4.2.3.18",
- "4.2.3.19",
- "4.2.3.20",
- "4.2.3.21",
- "4.2.3.22",
- "4.2.3.23",
- "4.2.3.24",
- "4.2.3.25",
- "4.2.3.26",
- "4.2.3.27",
- "4.2.3.28",
- "4.2.3.29",
- "4.2.3.30",
- "4.2.3.31",
- "4.2.3.32",
- "4.2.3.33",
- "4.2.3.34",
- "4.2.3.35",
- "4.2.3.36",
- "4.2.3.37",
- "4.2.3.38",
- "4.2.3.39",
- "4.2.3.40",
- "4.2.3.41",
- "4.2.3.42",
- "4.2.3.43",
- "4.2.3.44",
- "4.2.3.45",
- "4.2.99.12",
- "4.2.99.18",
- "4.2.99.20",
- "4.3.1.1",
- "4.3.1.2",
- "4.3.1.3",
- "4.3.1.4",
- "4.3.1.6",
- "4.3.1.7",
- "4.3.1.9",
- "4.3.1.10",
- "4.3.1.12",
- "4.3.1.13",
- "4.3.1.14",
- "4.3.1.15",
- "4.3.1.16",
- "4.3.1.17",
- "4.3.1.18",
- "4.3.1.19",
- "4.3.1.20",
- "4.3.1.22",
- "4.3.1.23",
- "4.3.1.24",
- "4.3.1.25",
- "4.3.1.26",
- "4.3.2.1",
- "4.3.2.2",
- "4.3.2.3",
- "4.3.2.4",
- "4.3.2.5",
- "4.3.3.1",
- "4.3.3.2",
- "4.3.3.3",
- "4.3.3.4",
- "4.3.3.5",
- "4.3.99.2",
- "4.4.1.1",
- "4.4.1.2",
- "4.4.1.3",
- "4.4.1.4",
- "4.4.1.5",
- "4.4.1.6",
- "4.4.1.8",
- "4.4.1.9",
- "4.4.1.10",
- "4.4.1.11",
- "4.4.1.13",
- "4.4.1.14",
- "4.4.1.15",
- "4.4.1.16",
- "4.4.1.17",
- "4.4.1.19",
- "4.4.1.20",
- "4.4.1.21",
- "4.4.1.22",
- "4.4.1.23",
- "4.4.1.24",
- "4.4.1.25",
- "4.5.1.1",
- "4.5.1.2",
- "4.5.1.3",
- "4.5.1.4",
- "4.5.1.5",
- "4.6.1.1",
- "4.6.1.2",
- "4.6.1.6",
- "4.6.1.12",
- "4.6.1.13",
- "4.6.1.14",
- "4.6.1.15",
- "4.99.1.1",
- "4.99.1.2",
- "4.99.1.3",
- "4.99.1.4",
- "4.99.1.5",
- "4.99.1.6",
- "4.99.1.7",
- "4.99.1.8",
- "5.1.1.1",
- "5.1.1.2",
- "5.1.1.3",
- "5.1.1.4",
- "5.1.1.5",
- "5.1.1.6",
- "5.1.1.7",
- "5.1.1.8",
- "5.1.1.9",
- "5.1.1.10",
- "5.1.1.11",
- "5.1.1.12",
- "5.1.1.13",
- "5.1.1.14",
- "5.1.1.15",
- "5.1.1.16",
- "5.1.1.17",
- "5.1.1.18",
- "5.1.2.1",
- "5.1.2.2",
- "5.1.2.3",
- "5.1.2.4",
- "5.1.2.5",
- "5.1.2.6",
- "5.1.3.1",
- "5.1.3.2",
- "5.1.3.3",
- "5.1.3.4",
- "5.1.3.5",
- "5.1.3.6",
- "5.1.3.7",
- "5.1.3.8",
- "5.1.3.9",
- "5.1.3.10",
- "5.1.3.11",
- "5.1.3.12",
- "5.1.3.13",
- "5.1.3.14",
- "5.1.3.15",
- "5.1.3.16",
- "5.1.3.17",
- "5.1.3.18",
- "5.1.3.19",
- "5.1.3.20",
- "5.1.3.21",
- "5.1.3.22",
- "5.1.3.23",
- "5.1.99.1",
- "5.1.99.2",
- "5.1.99.3",
- "5.1.99.4",
- "5.1.99.5",
- "5.2.1.1",
- "5.2.1.2",
- "5.2.1.3",
- "5.2.1.4",
- "5.2.1.5",
- "5.2.1.6",
- "5.2.1.7",
- "5.2.1.8",
- "5.2.1.9",
- "5.2.1.10",
- "5.3.1.1",
- "5.3.1.3",
- "5.3.1.4",
- "5.3.1.5",
- "5.3.1.6",
- "5.3.1.7",
- "5.3.1.8",
- "5.3.1.9",
- "5.3.1.12",
- "5.3.1.13",
- "5.3.1.14",
- "5.3.1.15",
- "5.3.1.16",
- "5.3.1.17",
- "5.3.1.20",
- "5.3.1.21",
- "5.3.1.22",
- "5.3.1.23",
- "5.3.1.24",
- "5.3.1.25",
- "5.3.1.26",
- "5.3.1.27",
- "5.3.2.1",
- "5.3.2.2",
- "5.3.3.1",
- "5.3.3.2",
- "5.3.3.3",
- "5.3.3.4",
- "5.3.3.5",
- "5.3.3.6",
- "5.3.3.7",
- "5.3.3.8",
- "5.3.3.9",
- "5.3.3.10",
- "5.3.3.11",
- "5.3.3.12",
- "5.3.3.13",
- "5.3.3.14",
- "5.3.3.15",
- "5.3.4.1",
- "5.3.99.2",
- "5.3.99.3",
- "5.3.99.4",
- "5.3.99.5",
- "5.3.99.6",
- "5.3.99.7",
- "5.3.99.8",
- "5.3.99.9",
- "5.4.1.1",
- "5.4.1.2",
- "5.4.2.1",
- "5.4.2.2",
- "5.4.2.3",
- "5.4.2.4",
- "5.4.2.5",
- "5.4.2.6",
- "5.4.2.7",
- "5.4.2.8",
- "5.4.2.9",
- "5.4.2.10",
- "5.4.3.2",
- "5.4.3.3",
- "5.4.3.4",
- "5.4.3.5",
- "5.4.3.6",
- "5.4.3.7",
- "5.4.3.8",
- "5.4.4.1",
- "5.4.4.2",
- "5.4.4.3",
- "5.4.99.1",
- "5.4.99.2",
- "5.4.99.3",
- "5.4.99.4",
- "5.4.99.5",
- "5.4.99.7",
- "5.4.99.8",
- "5.4.99.9",
- "5.4.99.11",
- "5.4.99.12",
- "5.4.99.13",
- "5.4.99.14",
- "5.4.99.15",
- "5.4.99.16",
- "5.4.99.17",
- "5.4.99.18",
- "5.5.1.1",
- "5.5.1.2",
- "5.5.1.3",
- "5.5.1.4",
- "5.5.1.5",
- "5.5.1.6",
- "5.5.1.7",
- "5.5.1.8",
- "5.5.1.9",
- "5.5.1.10",
- "5.5.1.11",
- "5.5.1.12",
- "5.5.1.13",
- "5.5.1.14",
- "5.5.1.15",
- "5.5.1.16",
- "5.99.1.1",
- "5.99.1.2",
- "5.99.1.3",
- "5.99.1.4",
- "6.1.1.1",
- "6.1.1.2",
- "6.1.1.3",
- "6.1.1.4",
- "6.1.1.5",
- "6.1.1.6",
- "6.1.1.7",
- "6.1.1.9",
- "6.1.1.10",
- "6.1.1.11",
- "6.1.1.12",
- "6.1.1.13",
- "6.1.1.14",
- "6.1.1.15",
- "6.1.1.16",
- "6.1.1.17",
- "6.1.1.18",
- "6.1.1.19",
- "6.1.1.20",
- "6.1.1.21",
- "6.1.1.22",
- "6.1.1.23",
- "6.1.1.24",
- "6.1.1.25",
- "6.1.1.26",
- "6.1.1.27",
- "6.2.1.1",
- "6.2.1.2",
- "6.2.1.3",
- "6.2.1.4",
- "6.2.1.5",
- "6.2.1.6",
- "6.2.1.7",
- "6.2.1.8",
- "6.2.1.9",
- "6.2.1.10",
- "6.2.1.11",
- "6.2.1.12",
- "6.2.1.13",
- "6.2.1.14",
- "6.2.1.15",
- "6.2.1.16",
- "6.2.1.17",
- "6.2.1.18",
- "6.2.1.19",
- "6.2.1.20",
- "6.2.1.22",
- "6.2.1.23",
- "6.2.1.24",
- "6.2.1.25",
- "6.2.1.26",
- "6.2.1.27",
- "6.2.1.28",
- "6.2.1.30",
- "6.2.1.31",
- "6.2.1.32",
- "6.2.1.33",
- "6.2.1.34",
- "6.2.1.35",
- "6.2.1.36",
- "6.3.1.1",
- "6.3.1.2",
- "6.3.1.4",
- "6.3.1.5",
- "6.3.1.6",
- "6.3.1.7",
- "6.3.1.8",
- "6.3.1.9",
- "6.3.1.10",
- "6.3.1.11",
- "6.3.1.12",
- "6.3.1.13",
- "6.3.2.1",
- "6.3.2.2",
- "6.3.2.3",
- "6.3.2.4",
- "6.3.2.5",
- "6.3.2.6",
- "6.3.2.7",
- "6.3.2.8",
- "6.3.2.9",
- "6.3.2.10",
- "6.3.2.11",
- "6.3.2.12",
- "6.3.2.13",
- "6.3.2.14",
- "6.3.2.16",
- "6.3.2.17",
- "6.3.2.18",
- "6.3.2.19",
- "6.3.2.20",
- "6.3.2.21",
- "6.3.2.22",
- "6.3.2.23",
- "6.3.2.24",
- "6.3.2.25",
- "6.3.2.26",
- "6.3.2.27",
- "6.3.2.28",
- "6.3.2.29",
- "6.3.2.30",
- "6.3.2.31",
- "6.3.2.32",
- "6.3.2.33",
- "6.3.2.34",
- "6.3.3.1",
- "6.3.3.2",
- "6.3.3.3",
- "6.3.3.4",
- "6.3.4.1",
- "6.3.4.2",
- "6.3.4.3",
- "6.3.4.4",
- "6.3.4.5",
- "6.3.4.6",
- "6.3.4.7",
- "6.3.4.8",
- "6.3.4.9",
- "6.3.4.10",
- "6.3.4.11",
- "6.3.4.12",
- "6.3.4.13",
- "6.3.4.14",
- "6.3.4.15",
- "6.3.4.16",
- "6.3.4.17",
- "6.3.4.18",
- "6.3.5.1",
- "6.3.5.2",
- "6.3.5.3",
- "6.3.5.4",
- "6.3.5.5",
- "6.3.5.6",
- "6.3.5.7",
- "6.3.5.9",
- "6.3.5.10",
- "6.4.1.1",
- "6.4.1.2",
- "6.4.1.3",
- "6.4.1.4",
- "6.4.1.5",
- "6.4.1.6",
- "6.4.1.7",
- "6.5.1.1",
- "6.5.1.2",
- "6.5.1.3",
- "6.5.1.4",
- "6.6.1.1",
- "6.6.1.2"
+ "1.1.1.1 Alcohol dehydrogenase",
+ "1.1.1.2 Alcohol dehydrogenase (NADP(+))",
+ "1.1.1.3 Homoserine dehydrogenase",
+ "1.1.1.4 (R,R)-butanediol dehydrogenase",
+ "1.1.1.6 Glycerol dehydrogenase",
+ "1.1.1.7 Propanediol-phosphate dehydrogenase",
+ "1.1.1.8 Glycerol-3-phosphate dehydrogenase (NAD(+))",
+ "1.1.1.9 D-xylulose reductase",
+ "1.1.1.10 L-xylulose reductase",
+ "1.1.1.11 D-arabinitol 4-dehydrogenase",
+ "1.1.1.12 L-arabinitol 4-dehydrogenase",
+ "1.1.1.13 L-arabinitol 2-dehydrogenase",
+ "1.1.1.14 L-iditol 2-dehydrogenase",
+ "1.1.1.15 D-iditol 2-dehydrogenase",
+ "1.1.1.16 Galactitol 2-dehydrogenase",
+ "1.1.1.17 Mannitol-1-phosphate 5-dehydrogenase",
+ "1.1.1.18 Inositol 2-dehydrogenase",
+ "1.1.1.19 Glucuronate reductase",
+ "1.1.1.20 Glucuronolactone reductase",
+ "1.1.1.21 Aldehyde reductase",
+ "1.1.1.22 UDP-glucose 6-dehydrogenase",
+ "1.1.1.23 Histidinol dehydrogenase",
+ "1.1.1.24 Quinate dehydrogenase",
+ "1.1.1.25 Shikimate dehydrogenase",
+ "1.1.1.26 Glyoxylate reductase",
+ "1.1.1.27 L-lactate dehydrogenase",
+ "1.1.1.28 D-lactate dehydrogenase",
+ "1.1.1.29 Glycerate dehydrogenase",
+ "1.1.1.30 3-hydroxybutyrate dehydrogenase",
+ "1.1.1.31 3-hydroxyisobutyrate dehydrogenase",
+ "1.1.1.32 Mevaldate reductase",
+ "1.1.1.33 Mevaldate reductase (NADPH)",
+ "1.1.1.34 Hydroxymethylglutaryl-CoA reductase (NADPH)",
+ "1.1.1.35 3-hydroxyacyl-CoA dehydrogenase",
+ "1.1.1.36 Acetoacetyl-CoA reductase",
+ "1.1.1.37 Malate dehydrogenase",
+ "1.1.1.38 Malate dehydrogenase (oxaloacetate-decarboxylating)",
+ "1.1.1.39 Malate dehydrogenase (decarboxylating)",
+ "1.1.1.40 Malate dehydrogenase (oxaloacetate-decarboxylating) (NADP(+))",
+ "1.1.1.41 Isocitrate dehydrogenase (NAD(+))",
+ "1.1.1.42 Isocitrate dehydrogenase (NADP(+))",
+ "1.1.1.43 Phosphogluconate 2-dehydrogenase",
+ "1.1.1.44 Phosphogluconate dehydrogenase (decarboxylating)",
+ "1.1.1.45 L-gulonate 3-dehydrogenase",
+ "1.1.1.46 L-arabinose 1-dehydrogenase",
+ "1.1.1.47 Glucose 1-dehydrogenase",
+ "1.1.1.48 Galactose 1-dehydrogenase",
+ "1.1.1.49 Glucose-6-phosphate dehydrogenase",
+ "1.1.1.50 3-alpha-hydroxysteroid dehydrogenase (B-specific)",
+ "1.1.1.51 3(or 17)-beta-hydroxysteroid dehydrogenase",
+ "1.1.1.52 3-alpha-hydroxycholanate dehydrogenase",
+ "1.1.1.53 3-alpha-(or 20-beta)-hydroxysteroid dehydrogenase",
+ "1.1.1.54 Allyl-alcohol dehydrogenase",
+ "1.1.1.55 Lactaldehyde reductase (NADPH)",
+ "1.1.1.56 Ribitol 2-dehydrogenase",
+ "1.1.1.57 Fructuronate reductase",
+ "1.1.1.58 Tagaturonate reductase",
+ "1.1.1.59 3-hydroxypropionate dehydrogenase",
+ "1.1.1.60 2-hydroxy-3-oxopropionate reductase",
+ "1.1.1.61 4-hydroxybutyrate dehydrogenase",
+ "1.1.1.62 Estradiol 17-beta-dehydrogenase",
+ "1.1.1.63 Testosterone 17-beta-dehydrogenase",
+ "1.1.1.64 Testosterone 17-beta-dehydrogenase (NADP(+))",
+ "1.1.1.65 Pyridoxine 4-dehydrogenase",
+ "1.1.1.66 Omega-hydroxydecanoate dehydrogenase",
+ "1.1.1.67 Mannitol 2-dehydrogenase",
+ "1.1.1.69 Gluconate 5-dehydrogenase",
+ "1.1.1.71 Alcohol dehydrogenase (NAD(P)(+))",
+ "1.1.1.72 Glycerol dehydrogenase (NADP(+))",
+ "1.1.1.73 Octanol dehydrogenase",
+ "1.1.1.75 (R)-aminopropanol dehydrogenase",
+ "1.1.1.76 (S,S)-butanediol dehydrogenase",
+ "1.1.1.77 Lactaldehyde reductase",
+ "1.1.1.78 Methylglyoxal reductase (NADH-dependent)",
+ "1.1.1.79 Glyoxylate reductase (NADP(+))",
+ "1.1.1.80 Isopropanol dehydrogenase (NADP(+))",
+ "1.1.1.81 Hydroxypyruvate reductase",
+ "1.1.1.82 Malate dehydrogenase (NADP(+))",
+ "1.1.1.83 D-malate dehydrogenase (decarboxylating)",
+ "1.1.1.84 Dimethylmalate dehydrogenase",
+ "1.1.1.85 3-isopropylmalate dehydrogenase",
+ "1.1.1.86 Ketol-acid reductoisomerase",
+ "1.1.1.87 Homoisocitrate dehydrogenase",
+ "1.1.1.88 Hydroxymethylglutaryl-CoA reductase",
+ "1.1.1.90 Aryl-alcohol dehydrogenase",
+ "1.1.1.91 Aryl-alcohol dehydrogenase (NADP(+))",
+ "1.1.1.92 Oxaloglycolate reductase (decarboxylating)",
+ "1.1.1.93 Tartrate dehydrogenase",
+ "1.1.1.94 Glycerol-3-phosphate dehydrogenase (NAD(P)(+))",
+ "1.1.1.95 Phosphoglycerate dehydrogenase",
+ "1.1.1.96 Diiodophenylpyruvate reductase",
+ "1.1.1.97 3-hydroxybenzyl-alcohol dehydrogenase",
+ "1.1.1.98 (R)-2-hydroxy-fatty-acid dehydrogenase",
+ "1.1.1.99 (S)-2-hydroxy-fatty-acid dehydrogenase",
+ "1.1.1.100 3-oxoacyl-[acyl-carrier-protein] reductase",
+ "1.1.1.101 Acylglycerone-phosphate reductase",
+ "1.1.1.102 3-dehydrosphinganine reductase",
+ "1.1.1.103 L-threonine 3-dehydrogenase",
+ "1.1.1.104 4-oxoproline reductase",
+ "1.1.1.105 Retinol dehydrogenase",
+ "1.1.1.106 Pantoate 4-dehydrogenase",
+ "1.1.1.107 Pyridoxal 4-dehydrogenase",
+ "1.1.1.108 Carnitine 3-dehydrogenase",
+ "1.1.1.110 Indolelactate dehydrogenase",
+ "1.1.1.111 3-(imidazol-5-yl)lactate dehydrogenase",
+ "1.1.1.112 Indanol dehydrogenase",
+ "1.1.1.113 L-xylose 1-dehydrogenase",
+ "1.1.1.114 Apiose 1-reductase",
+ "1.1.1.115 Ribose 1-dehydrogenase (NADP(+))",
+ "1.1.1.116 D-arabinose 1-dehydrogenase",
+ "1.1.1.117 D-arabinose 1-dehydrogenase (NAD(P)(+))",
+ "1.1.1.118 Glucose 1-dehydrogenase (NAD(+))",
+ "1.1.1.119 Glucose 1-dehydrogenase (NADP(+))",
+ "1.1.1.120 Galactose 1-dehydrogenase (NADP(+))",
+ "1.1.1.121 Aldose 1-dehydrogenase",
+ "1.1.1.122 D-threo-aldose 1-dehydrogenase",
+ "1.1.1.123 Sorbose 5-dehydrogenase (NADP(+))",
+ "1.1.1.124 Fructose 5-dehydrogenase (NADP(+))",
+ "1.1.1.125 2-deoxy-D-gluconate 3-dehydrogenase",
+ "1.1.1.126 2-dehydro-3-deoxy-D-gluconate 6-dehydrogenase",
+ "1.1.1.127 2-dehydro-3-deoxy-D-gluconate 5-dehydrogenase",
+ "1.1.1.128 L-idonate 2-dehydrogenase",
+ "1.1.1.129 L-threonate 3-dehydrogenase",
+ "1.1.1.130 3-dehydro-L-gulonate 2-dehydrogenase",
+ "1.1.1.131 Mannuronate reductase",
+ "1.1.1.132 GDP-mannose 6-dehydrogenase",
+ "1.1.1.133 dTDP-4-dehydrorhamnose reductase",
+ "1.1.1.134 dTDP-6-deoxy-L-talose 4-dehydrogenase",
+ "1.1.1.135 GDP-6-deoxy-D-talose 4-dehydrogenase",
+ "1.1.1.136 UDP-N-acetylglucosamine 6-dehydrogenase",
+ "1.1.1.137 Ribitol-5-phosphate 2-dehydrogenase",
+ "1.1.1.138 Mannitol 2-dehydrogenase (NADP(+))",
+ "1.1.1.140 Sorbitol-6-phosphate 2-dehydrogenase",
+ "1.1.1.141 15-hydroxyprostaglandin dehydrogenase (NAD(+))",
+ "1.1.1.142 D-pinitol dehydrogenase",
+ "1.1.1.143 Sequoyitol dehydrogenase",
+ "1.1.1.144 Perillyl-alcohol dehydrogenase",
+ "1.1.1.145 3-beta-hydroxy-Delta(5)-steroid dehydrogenase",
+ "1.1.1.146 11-beta-hydroxysteroid dehydrogenase",
+ "1.1.1.147 16-alpha-hydroxysteroid dehydrogenase",
+ "1.1.1.148 Estradiol 17-alpha-dehydrogenase",
+ "1.1.1.149 20-alpha-hydroxysteroid dehydrogenase",
+ "1.1.1.150 21-hydroxysteroid dehydrogenase (NAD(+))",
+ "1.1.1.151 21-hydroxysteroid dehydrogenase (NADP(+))",
+ "1.1.1.152 3-alpha-hydroxy-5-beta-androstane-17-one 3-alpha-dehydrogenase",
+ "1.1.1.153 Sepiapterin reductase",
+ "1.1.1.154 Ureidoglycolate dehydrogenase",
+ "1.1.1.156 Glycerol 2-dehydrogenase (NADP(+))",
+ "1.1.1.157 3-hydroxybutyryl-CoA dehydrogenase",
+ "1.1.1.158 UDP-N-acetylmuramate dehydrogenase",
+ "1.1.1.159 7-alpha-hydroxysteroid dehydrogenase",
+ "1.1.1.160 Dihydrobunolol dehydrogenase",
+ "1.1.1.161 Cholestanetetraol 26-dehydrogenase",
+ "1.1.1.162 Erythrulose reductase",
+ "1.1.1.163 Cyclopentanol dehydrogenase",
+ "1.1.1.164 Hexadecanol dehydrogenase",
+ "1.1.1.165 2-alkyn-1-ol dehydrogenase",
+ "1.1.1.166 Hydroxycyclohexanecarboxylate dehydrogenase",
+ "1.1.1.167 Hydroxymalonate dehydrogenase",
+ "1.1.1.168 2-dehydropantolactone reductase (A-specific)",
+ "1.1.1.169 2-dehydropantoate 2-reductase",
+ "1.1.1.170 Sterol-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)",
+ "1.1.1.172 2-oxoadipate reductase",
+ "1.1.1.173 L-rhamnose 1-dehydrogenase",
+ "1.1.1.174 Cyclohexane-1,2-diol dehydrogenase",
+ "1.1.1.175 D-xylose 1-dehydrogenase",
+ "1.1.1.176 12-alpha-hydroxysteroid dehydrogenase",
+ "1.1.1.177 Glycerol-3-phosphate 1-dehydrogenase (NADP(+))",
+ "1.1.1.178 3-hydroxy-2-methylbutyryl-CoA dehydrogenase",
+ "1.1.1.179 D-xylose 1-dehydrogenase (NADP(+))",
+ "1.1.1.181 Cholest-5-ene-3-beta,7-alpha-diol 3-beta-dehydrogenase",
+ "1.1.1.183 Geraniol dehydrogenase",
+ "1.1.1.184 Carbonyl reductase (NADPH)",
+ "1.1.1.185 L-glycol dehydrogenase",
+ "1.1.1.186 dTDP-galactose 6-dehydrogenase",
+ "1.1.1.187 GDP-4-dehydro-D-rhamnose reductase",
+ "1.1.1.188 Prostaglandin-F synthase",
+ "1.1.1.189 Prostaglandin-E(2) 9-reductase",
+ "1.1.1.190 Indole-3-acetaldehyde reductase (NADH)",
+ "1.1.1.191 Indole-3-acetaldehyde reductase (NADPH)",
+ "1.1.1.192 Long-chain-alcohol dehydrogenase",
+ "1.1.1.193 5-amino-6-(5-phosphoribosylamino)uracil reductase",
+ "1.1.1.194 Coniferyl-alcohol dehydrogenase",
+ "1.1.1.195 Cinnamyl-alcohol dehydrogenase",
+ "1.1.1.196 15-hydroxyprostaglandin-D dehydrogenase (NADP(+))",
+ "1.1.1.197 15-hydroxyprostaglandin dehydrogenase (NADP(+))",
+ "1.1.1.198 (+)-borneol dehydrogenase",
+ "1.1.1.199 (S)-usnate reductase",
+ "1.1.1.200 Aldose-6-phosphate reductase (NADPH)",
+ "1.1.1.201 7-beta-hydroxysteroid dehydrogenase (NADP(+))",
+ "1.1.1.202 1,3-propanediol dehydrogenase",
+ "1.1.1.203 Uronate dehydrogenase",
+ "1.1.1.205 IMP dehydrogenase",
+ "1.1.1.206 Tropinone reductase I",
+ "1.1.1.207 (-)-menthol dehydrogenase",
+ "1.1.1.208 (+)-neomenthol dehydrogenase",
+ "1.1.1.209 3(or 17)-alpha-hydroxysteroid dehydrogenase",
+ "1.1.1.210 3-beta-(or 20-alpha)-hydroxysteroid dehydrogenase",
+ "1.1.1.211 Long-chain-3-hydroxyacyl-CoA dehydrogenase",
+ "1.1.1.212 3-oxoacyl-[acyl-carrier-protein] reductase (NADH)",
+ "1.1.1.213 3-alpha-hydroxysteroid dehydrogenase (A-specific)",
+ "1.1.1.214 2-dehydropantolactone reductase (B-specific)",
+ "1.1.1.215 Gluconate 2-dehydrogenase",
+ "1.1.1.216 Farnesol dehydrogenase",
+ "1.1.1.217 Benzyl-2-methyl-hydroxybutyrate dehydrogenase",
+ "1.1.1.218 Morphine 6-dehydrogenase",
+ "1.1.1.219 Dihydrokaempferol 4-reductase",
+ "1.1.1.220 6-pyruvoyltetrahydropterin 2'-reductase",
+ "1.1.1.221 Vomifoliol dehydrogenase",
+ "1.1.1.222 (R)-4-hydroxyphenyllactate dehydrogenase",
+ "1.1.1.223 Isopiperitenol dehydrogenase",
+ "1.1.1.224 Mannose-6-phosphate 6-reductase",
+ "1.1.1.225 Chlordecone reductase",
+ "1.1.1.226 4-hydroxycyclohexanecarboxylate dehydrogenase",
+ "1.1.1.227 (-)-borneol dehydrogenase",
+ "1.1.1.228 (+)-sabinol dehydrogenase",
+ "1.1.1.229 Diethyl 2-methyl-3-oxosuccinate reductase",
+ "1.1.1.230 3-alpha-hydroxyglycyrrhetinate dehydrogenase",
+ "1.1.1.231 15-hydroxyprostaglandin-I dehydrogenase (NADP(+))",
+ "1.1.1.232 15-hydroxyicosatetraenoate dehydrogenase",
+ "1.1.1.233 N-acylmannosamine 1-dehydrogenase",
+ "1.1.1.234 Flavanone 4-reductase",
+ "1.1.1.235 8-oxocoformycin reductase",
+ "1.1.1.236 Tropinone reductase II",
+ "1.1.1.237 Hydroxyphenylpyruvate reductase",
+ "1.1.1.238 12-beta-hydroxysteroid dehydrogenase",
+ "1.1.1.239 3-alpha-(17-beta)-hydroxysteroid dehydrogenase (NAD(+))",
+ "1.1.1.240 N-acetylhexosamine 1-dehydrogenase",
+ "1.1.1.241 6-endo-hydroxycineole dehydrogenase",
+ "1.1.1.243 Carveol dehydrogenase",
+ "1.1.1.244 Methanol dehydrogenase",
+ "1.1.1.245 Cyclohexanol dehydrogenase",
+ "1.1.1.246 Pterocarpin synthase",
+ "1.1.1.247 Codeinone reductase (NADPH)",
+ "1.1.1.248 Salutaridine reductase (NADPH)",
+ "1.1.1.250 D-arabinitol 2-dehydrogenase",
+ "1.1.1.251 Galactitol-1-phosphate 5-dehydrogenase",
+ "1.1.1.252 Tetrahydroxynaphthalene reductase",
+ "1.1.1.254 (S)-carnitine 3-dehydrogenase",
+ "1.1.1.255 Mannitol dehydrogenase",
+ "1.1.1.256 Fluoren-9-ol dehydrogenase",
+ "1.1.1.257 4-(hydroxymethyl)benzenesulfonate dehydrogenase",
+ "1.1.1.258 6-hydroxyhexanoate dehydrogenase",
+ "1.1.1.259 3-hydroxypimeloyl-CoA dehydrogenase",
+ "1.1.1.260 Sulcatone reductase",
+ "1.1.1.261 sn-glycerol-1-phosphate dehydrogenase",
+ "1.1.1.262 4-hydroxythreonine-4-phosphate dehydrogenase",
+ "1.1.1.263 1,5-anhydro-D-fructose reductase",
+ "1.1.1.264 L-idonate 5-dehydrogenase",
+ "1.1.1.265 3-methylbutanal reductase",
+ "1.1.1.266 dTDP-4-dehydro-6-deoxyglucose reductase",
+ "1.1.1.267 1-deoxy-D-xylulose-5-phosphate reductoisomerase",
+ "1.1.1.268 2-(R)-hydroxypropyl-CoM dehydrogenase",
+ "1.1.1.269 2-(S)-hydroxypropyl-CoM dehydrogenase",
+ "1.1.1.270 3-keto-steroid reductase",
+ "1.1.1.271 GDP-L-fucose synthase",
+ "1.1.1.272 (R)-2-hydroxyacid dehydrogenase",
+ "1.1.1.273 Vellosimine dehydrogenase",
+ "1.1.1.274 2,5-didehydrogluconate reductase",
+ "1.1.1.275 (+)-trans-carveol dehydrogenase",
+ "1.1.1.276 Serine 3-dehydrogenase",
+ "1.1.1.277 3-beta-hydroxy-5-beta-steroid dehydrogenase",
+ "1.1.1.278 3-beta-hydroxy-5-alpha-steroid dehydrogenase",
+ "1.1.1.279 (R)-3-hydroxyacid-ester dehydrogenase",
+ "1.1.1.280 (S)-3-hydroxyacid-ester dehydrogenase",
+ "1.1.1.281 GDP-4-dehydro-6-deoxy-D-mannose reductase",
+ "1.1.1.282 Quinate/shikimate dehydrogenase",
+ "1.1.1.283 Methylglyoxal reductase (NADPH-dependent)",
+ "1.1.1.284 S-(hydroxymethyl)glutathione dehydrogenase",
+ "1.1.1.285 3''-deamino-3''-oxonicotianamine reductase",
+ "1.1.1.286 Isocitrate--homoisocitrate dehydrogenase",
+ "1.1.1.287 D-arabinitol dehydrogenase (NADP(+))",
+ "1.1.1.288 Xanthoxin dehydrogenase",
+ "1.1.1.289 Sorbose reductase",
+ "1.1.1.290 4-phosphoerythronate dehydrogenase",
+ "1.1.1.291 2-hydroxymethylglutarate dehydrogenase",
+ "1.1.1.292 1,5-anhydro-D-fructose reductase (1,5-anhydro-D-mannitol-forming)",
+ "1.1.1.294 Chlorophyll(ide) b reductase",
+ "1.1.1.295 Momilactone-A synthase",
+ "1.1.1.296 Dihydrocarveol dehydrogenase",
+ "1.1.1.297 Limonene-1,2-diol dehydrogenase",
+ "1.1.1.298 3-hydroxypropionate dehydrogenase (NADP(+))",
+ "1.1.1.299 Malate dehydrogenase (NAD(P)(+))",
+ "1.1.1.300 NADP-retinol dehydrogenase",
+ "1.1.1.301 D-arabitol-phosphate dehydrogenase",
+ "1.1.1.302 2,5-diamino-6-(ribosylamino)-4(3H)-pyrimidinone 5'-phosphate reductase",
+ "1.1.1.303 Diacetyl reductase ((R)-acetoin forming)",
+ "1.1.1.304 Diacetyl reductase ((S)-acetoin forming)",
+ "1.1.1.305 UDP-glucuronic acid oxidase (UDP-4-keto-hexauronic acid decarboxylating)",
+ "1.1.1.306 S-(hydroxymethyl)mycothiol dehydrogenase",
+ "1.1.1.307 D-xylose reductase",
+ "1.1.1.308 Sulfopropanediol 3-dehydrogenase",
+ "1.1.1.309 Phosphonoacetaldehyde reductase (NADH)",
+ "1.1.1.310 (S)-sulfolactate dehydrogenase",
+ "1.1.1.311 (S)-1-phenylethanol dehydrogenase",
+ "1.1.1.n3 UDP-N-acetyl-D-mannosamine dehydrogenase",
+ "1.1.1.n4 (-)-trans-carveol dehydrogenase",
+ "1.1.1.n5 3-methylmalate dehydrogenase",
+ "1.1.1.n6 D-chiro-inositol 3-dehydrogenase",
+ "1.1.1.n7 Benzil reductase",
+ "1.1.1.n8 L-idonate dehydrogenase",
+ "1.1.1.n9 D-galacturonate reductase",
+ "1.1.1.n11 Succinic semialdehyde reductase",
+ "1.1.2.2 Mannitol dehydrogenase (cytochrome)",
+ "1.1.2.3 L-lactate dehydrogenase (cytochrome)",
+ "1.1.2.4 D-lactate dehydrogenase (cytochrome)",
+ "1.1.2.5 D-lactate dehydrogenase (cytochrome c-553)",
+ "1.1.2.6 Polyvinyl alcohol dehydrogenase (cytochrome)",
+ "1.1.2.7 Methanol dehydrogenase (cytochrome c)",
+ "1.1.2.8 Alcohol dehydrogenase (cytochrome c)",
+ "1.1.3.3 Malate oxidase",
+ "1.1.3.4 Glucose oxidase",
+ "1.1.3.5 Hexose oxidase",
+ "1.1.3.6 Cholesterol oxidase",
+ "1.1.3.7 Aryl-alcohol oxidase",
+ "1.1.3.8 L-gulonolactone oxidase",
+ "1.1.3.9 Galactose oxidase",
+ "1.1.3.10 Pyranose oxidase",
+ "1.1.3.11 L-sorbose oxidase",
+ "1.1.3.12 Pyridoxine 4-oxidase",
+ "1.1.3.13 Alcohol oxidase",
+ "1.1.3.14 Catechol oxidase (dimerizing)",
+ "1.1.3.15 (S)-2-hydroxy-acid oxidase",
+ "1.1.3.16 Ecdysone oxidase",
+ "1.1.3.17 Choline oxidase",
+ "1.1.3.18 Secondary-alcohol oxidase",
+ "1.1.3.19 4-hydroxymandelate oxidase",
+ "1.1.3.20 Long-chain-alcohol oxidase",
+ "1.1.3.21 Glycerol-3-phosphate oxidase",
+ "1.1.3.23 Thiamine oxidase",
+ "1.1.3.27 Hydroxyphytanate oxidase",
+ "1.1.3.28 Nucleoside oxidase",
+ "1.1.3.29 N-acylhexosamine oxidase",
+ "1.1.3.30 Polyvinyl-alcohol oxidase",
+ "1.1.3.37 D-arabinono-1,4-lactone oxidase",
+ "1.1.3.38 Vanillyl-alcohol oxidase",
+ "1.1.3.39 Nucleoside oxidase (H(2)O(2)-forming)",
+ "1.1.3.40 D-mannitol oxidase",
+ "1.1.3.41 Alditol oxidase",
+ "1.1.4.1 Vitamin-K-epoxide reductase (warfarin-sensitive)",
+ "1.1.4.2 Vitamin-K-epoxide reductase (warfarin-insensitive)",
+ "1.1.5.2 Quinoprotein glucose dehydrogenase",
+ "1.1.5.3 Glycerol-3-phosphate dehydrogenase",
+ "1.1.5.4 Malate dehydrogenase (quinone)",
+ "1.1.5.5 Alcohol dehydrogenase (quinone)",
+ "1.1.5.6 Formate dehydrogenase-N",
+ "1.1.5.7 Cyclic alcohol dehydrogenase (quinone)",
+ "1.1.5.8 Quinate dehydrogenase (quinone)",
+ "1.1.5.n1 Quinoprotein inositol dehydrogenase",
+ "1.1.98.1 Alcohol dehydrogenase (azurin)",
+ "1.1.98.2 Glucose-6-phosphate dehydrogenase (coenzyme-F420)",
+ "1.1.99.1 Choline dehydrogenase",
+ "1.1.99.2 2-hydroxyglutarate dehydrogenase",
+ "1.1.99.3 Gluconate 2-dehydrogenase (acceptor)",
+ "1.1.99.4 Dehydrogluconate dehydrogenase",
+ "1.1.99.6 D-2-hydroxy-acid dehydrogenase",
+ "1.1.99.7 Lactate--malate transhydrogenase",
+ "1.1.99.9 Pyridoxine 5-dehydrogenase",
+ "1.1.99.10 Glucose dehydrogenase (acceptor)",
+ "1.1.99.11 Fructose 5-dehydrogenase",
+ "1.1.99.12 Sorbose dehydrogenase",
+ "1.1.99.13 Glucoside 3-dehydrogenase",
+ "1.1.99.14 Glycolate dehydrogenase",
+ "1.1.99.18 Cellobiose dehydrogenase (acceptor)",
+ "1.1.99.20 Alkan-1-ol dehydrogenase (acceptor)",
+ "1.1.99.21 D-sorbitol dehydrogenase (acceptor)",
+ "1.1.99.22 Glycerol dehydrogenase (acceptor)",
+ "1.1.99.24 Hydroxyacid-oxoacid transhydrogenase",
+ "1.1.99.26 3-hydroxycyclohexanone dehydrogenase",
+ "1.1.99.27 (R)-pantolactone dehydrogenase (flavin)",
+ "1.1.99.28 Glucose-fructose oxidoreductase",
+ "1.1.99.29 Pyranose dehydrogenase (acceptor)",
+ "1.1.99.30 2-oxo-acid reductase",
+ "1.1.99.31 (S)-mandelate dehydrogenase",
+ "1.1.99.32 L-sorbose 1-dehydrogenase",
+ "1.1.99.33 Formate dehydrogenase (acceptor)",
+ "1.1.99.35 Soluble quinoprotein glucose dehydrogenase",
+ "1.1.99.36 NDMA-dependent alcohol dehydrogenase",
+ "1.1.99.37 NDMA-dependent methanol dehydrogenase",
+ "1.2.1.2 Formate dehydrogenase",
+ "1.2.1.3 Aldehyde dehydrogenase (NAD(+))",
+ "1.2.1.4 Aldehyde dehydrogenase (NADP(+))",
+ "1.2.1.5 Aldehyde dehydrogenase (NAD(P)(+))",
+ "1.2.1.7 Benzaldehyde dehydrogenase (NADP(+))",
+ "1.2.1.8 Betaine-aldehyde dehydrogenase",
+ "1.2.1.9 Glyceraldehyde-3-phosphate dehydrogenase (NADP(+))",
+ "1.2.1.10 Acetaldehyde dehydrogenase (acetylating)",
+ "1.2.1.11 Aspartate-semialdehyde dehydrogenase",
+ "1.2.1.12 Glyceraldehyde-3-phosphate dehydrogenase (phosphorylating)",
+ "1.2.1.13 Glyceraldehyde-3-phosphate dehydrogenase (NADP(+)) (phosphorylating)",
+ "1.2.1.15 Malonate-semialdehyde dehydrogenase",
+ "1.2.1.16 Succinate-semialdehyde dehydrogenase (NAD(P)(+))",
+ "1.2.1.17 Glyoxylate dehydrogenase (acylating)",
+ "1.2.1.18 Malonate-semialdehyde dehydrogenase (acetylating)",
+ "1.2.1.19 Aminobutyraldehyde dehydrogenase",
+ "1.2.1.20 Glutarate-semialdehyde dehydrogenase",
+ "1.2.1.21 Glycolaldehyde dehydrogenase",
+ "1.2.1.22 Lactaldehyde dehydrogenase",
+ "1.2.1.23 2-oxoaldehyde dehydrogenase (NAD(+))",
+ "1.2.1.24 Succinate-semialdehyde dehydrogenase (NAD(+))",
+ "1.2.1.25 2-oxoisovalerate dehydrogenase (acylating)",
+ "1.2.1.26 2,5-dioxovalerate dehydrogenase",
+ "1.2.1.27 Methylmalonate-semialdehyde dehydrogenase (acylating)",
+ "1.2.1.28 Benzaldehyde dehydrogenase (NAD(+))",
+ "1.2.1.29 Aryl-aldehyde dehydrogenase",
+ "1.2.1.30 Aryl-aldehyde dehydrogenase (NADP(+))",
+ "1.2.1.31 L-aminoadipate-semialdehyde dehydrogenase",
+ "1.2.1.32 Aminomuconate-semialdehyde dehydrogenase",
+ "1.2.1.33 (R)-dehydropantoate dehydrogenase",
+ "1.2.1.36 Retinal dehydrogenase",
+ "1.2.1.38 N-acetyl-gamma-glutamyl-phosphate reductase",
+ "1.2.1.39 Phenylacetaldehyde dehydrogenase",
+ "1.2.1.40 3-alpha,7-alpha,12-alpha-trihydroxycholestan-26-al 26-oxidoreductase",
+ "1.2.1.41 Glutamate-5-semialdehyde dehydrogenase",
+ "1.2.1.42 Hexadecanal dehydrogenase (acylating)",
+ "1.2.1.43 Formate dehydrogenase (NADP(+))",
+ "1.2.1.44 Cinnamoyl-CoA reductase",
+ "1.2.1.45 4-carboxy-2-hydroxymuconate-6-semialdehyde dehydrogenase",
+ "1.2.1.46 Formaldehyde dehydrogenase",
+ "1.2.1.47 4-trimethylammoniobutyraldehyde dehydrogenase",
+ "1.2.1.48 Long-chain-aldehyde dehydrogenase",
+ "1.2.1.49 2-oxoaldehyde dehydrogenase (NADP(+))",
+ "1.2.1.50 Long-chain-fatty-acyl-CoA reductase",
+ "1.2.1.51 Pyruvate dehydrogenase (NADP(+))",
+ "1.2.1.52 Oxoglutarate dehydrogenase (NADP(+))",
+ "1.2.1.53 4-hydroxyphenylacetaldehyde dehydrogenase",
+ "1.2.1.54 Gamma-guanidinobutyraldehyde dehydrogenase",
+ "1.2.1.57 Butanal dehydrogenase",
+ "1.2.1.58 Phenylglyoxylate dehydrogenase (acylating)",
+ "1.2.1.59 Glyceraldehyde-3-phosphate dehydrogenase (NAD(P)(+)) (phosphorylating)",
+ "1.2.1.60 5-carboxymethyl-2-hydroxymuconic-semialdehyde dehydrogenase",
+ "1.2.1.61 4-hydroxymuconic-semialdehyde dehydrogenase",
+ "1.2.1.62 4-formylbenzenesulfonate dehydrogenase",
+ "1.2.1.63 6-oxohexanoate dehydrogenase",
+ "1.2.1.64 4-hydroxybenzaldehyde dehydrogenase",
+ "1.2.1.65 Salicylaldehyde dehydrogenase",
+ "1.2.1.67 Vanillin dehydrogenase",
+ "1.2.1.68 Coniferyl-aldehyde dehydrogenase",
+ "1.2.1.69 Fluoroacetaldehyde dehydrogenase",
+ "1.2.1.70 Glutamyl-tRNA reductase",
+ "1.2.1.71 Succinylglutamate-semialdehyde dehydrogenase",
+ "1.2.1.72 Erythrose-4-phosphate dehydrogenase",
+ "1.2.1.73 Sulfoacetaldehyde dehydrogenase",
+ "1.2.1.74 Abietadienal dehydrogenase",
+ "1.2.1.75 Malonyl CoA reductase (malonate semialdehyde-forming)",
+ "1.2.1.76 Succinate-semialdehyde dehydrogenase (acetylating)",
+ "1.2.1.77 3,4-dehydroadipyl-CoA semialdehyde dehydrogenase (NADP(+))",
+ "1.2.1.78 2-formylbenzoate dehydrogenase",
+ "1.2.1.79 Succinate-semialdehyde dehydrogenase (NADP(+))",
+ "1.2.1.80 Long-chain acyl-[acyl-carrier-protein] reductase",
+ "1.2.1.n2 Fatty acyl-CoA reductase",
+ "1.2.2.1 Formate dehydrogenase (cytochrome)",
+ "1.2.2.3 Formate dehydrogenase (cytochrome c-553)",
+ "1.2.2.4 Carbon-monoxide dehydrogenase (cytochrome b-561)",
+ "1.2.3.1 Aldehyde oxidase",
+ "1.2.3.3 Pyruvate oxidase",
+ "1.2.3.4 Oxalate oxidase",
+ "1.2.3.5 Glyoxylate oxidase",
+ "1.2.3.6 Pyruvate oxidase (CoA-acetylating)",
+ "1.2.3.7 Indole-3-acetaldehyde oxidase",
+ "1.2.3.8 Pyridoxal oxidase",
+ "1.2.3.9 Aryl-aldehyde oxidase",
+ "1.2.3.11 Retinal oxidase",
+ "1.2.3.13 4-hydroxyphenylpyruvate oxidase",
+ "1.2.3.14 Abscisic-aldehyde oxidase",
+ "1.2.4.1 Pyruvate dehydrogenase (acetyl-transferring)",
+ "1.2.4.2 Oxoglutarate dehydrogenase (succinyl-transferring)",
+ "1.2.4.4 3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring)",
+ "1.2.5.1 Pyruvate dehydrogenase (quinone)",
+ "1.2.7.1 Pyruvate synthase",
+ "1.2.7.2 2-oxobutyrate synthase",
+ "1.2.7.3 2-oxoglutarate synthase",
+ "1.2.7.4 Carbon-monoxide dehydrogenase (ferredoxin)",
+ "1.2.7.5 Aldehyde ferredoxin oxidoreductase",
+ "1.2.7.6 Glyceraldehyde-3-phosphate dehydrogenase (ferredoxin)",
+ "1.2.7.7 3-methyl-2-oxobutanoate dehydrogenase (ferredoxin)",
+ "1.2.7.8 Indolepyruvate ferredoxin oxidoreductase",
+ "1.2.99.2 Carbon-monoxide dehydrogenase (acceptor)",
+ "1.2.99.3 Aldehyde dehydrogenase (pyrroloquinoline-quinone)",
+ "1.2.99.4 Formaldehyde dismutase",
+ "1.2.99.5 Formylmethanofuran dehydrogenase",
+ "1.2.99.6 Carboxylate reductase",
+ "1.2.99.7 Aldehyde dehydrogenase (FAD-independent)",
+ "1.3.1.1 Dihydrouracil dehydrogenase (NAD(+))",
+ "1.3.1.2 Dihydropyrimidine dehydrogenase (NADP(+))",
+ "1.3.1.3 Delta(4)-3-oxosteroid 5-beta-reductase",
+ "1.3.1.4 Cortisone alpha-reductase",
+ "1.3.1.5 Cucurbitacin Delta(23)-reductase",
+ "1.3.1.6 Fumarate reductase (NADH)",
+ "1.3.1.7 Meso-tartrate dehydrogenase",
+ "1.3.1.8 Acyl-CoA dehydrogenase (NADP(+))",
+ "1.3.1.9 Enoyl-[acyl-carrier-protein] reductase (NADH)",
+ "1.3.1.10 Enoyl-[acyl-carrier-protein] reductase (NADPH, B-specific)",
+ "1.3.1.11 2-coumarate reductase",
+ "1.3.1.12 Prephenate dehydrogenase",
+ "1.3.1.13 Prephenate dehydrogenase (NADP(+))",
+ "1.3.1.14 Orotate reductase (NADH)",
+ "1.3.1.15 Orotate reductase (NADPH)",
+ "1.3.1.16 Beta-nitroacrylate reductase",
+ "1.3.1.17 3-methyleneoxindole reductase",
+ "1.3.1.18 Kynurenate-7,8-dihydrodiol dehydrogenase",
+ "1.3.1.19 Cis-1,2-dihydrobenzene-1,2-diol dehydrogenase",
+ "1.3.1.20 Trans-1,2-dihydrobenzene-1,2-diol dehydrogenase",
+ "1.3.1.21 7-dehydrocholesterol reductase",
+ "1.3.1.22 Cholestenone 5-alpha-reductase",
+ "1.3.1.24 Biliverdin reductase",
+ "1.3.1.25 1,6-dihydroxycyclohexa-2,4-diene-1-carboxylate dehydrogenase",
+ "1.3.1.26 Dihydrodipicolinate reductase",
+ "1.3.1.27 2-hexadecenal reductase",
+ "1.3.1.28 2,3-dihydro-2,3-dihydroxybenzoate dehydrogenase",
+ "1.3.1.29 Cis-1,2-dihydro-1,2-dihydroxynaphthalene dehydrogenase",
+ "1.3.1.30 Progesterone 5-alpha-reductase",
+ "1.3.1.31 2-enoate reductase",
+ "1.3.1.32 Maleylacetate reductase",
+ "1.3.1.33 Protochlorophyllide reductase",
+ "1.3.1.34 2,4-dienoyl-CoA reductase (NADPH)",
+ "1.3.1.35 Phosphatidylcholine desaturase",
+ "1.3.1.36 Geissoschizine dehydrogenase",
+ "1.3.1.37 Cis-2-enoyl-CoA reductase (NADPH)",
+ "1.3.1.38 Trans-2-enoyl-CoA reductase (NADPH)",
+ "1.3.1.39 Enoyl-[acyl-carrier-protein] reductase (NADPH, A-specific)",
+ "1.3.1.40 2-hydroxy-6-oxo-6-phenylhexa-2,4-dienoate reductase",
+ "1.3.1.41 Xanthommatin reductase",
+ "1.3.1.42 12-oxophytodienoate reductase",
+ "1.3.1.43 Arogenate dehydrogenase",
+ "1.3.1.44 Trans-2-enoyl-CoA reductase (NAD(+))",
+ "1.3.1.45 2'-hydroxyisoflavone reductase",
+ "1.3.1.46 Biochanin-A reductase",
+ "1.3.1.47 Alpha-santonin 1,2-reductase",
+ "1.3.1.48 15-oxoprostaglandin 13-oxidase",
+ "1.3.1.49 Cis-3,4-dihydrophenanthrene-3,4-diol dehydrogenase",
+ "1.3.1.51 2'-hydroxydaidzein reductase",
+ "1.3.1.52 2-methyl-branched-chain-enoyl-CoA reductase",
+ "1.3.1.53 (3S,4R)-3,4-dihydroxycyclohexa-1,5-diene-1,4-dicarboxylate dehydrogenase",
+ "1.3.1.54 Precorrin-6A reductase",
+ "1.3.1.56 Cis-2,3-dihydrobiphenyl-2,3-diol dehydrogenase",
+ "1.3.1.57 Phloroglucinol reductase",
+ "1.3.1.58 2,3-dihydroxy-2,3-dihydro-p-cumate dehydrogenase",
+ "1.3.1.60 Dibenzothiophene dihydrodiol dehydrogenase",
+ "1.3.1.62 Pimeloyl-CoA dehydrogenase",
+ "1.3.1.63 2,4-dichlorobenzoyl-CoA reductase",
+ "1.3.1.64 Phthalate 4,5-cis-dihydrodiol dehydrogenase",
+ "1.3.1.65 5,6-dihydroxy-3-methyl-2-oxo-1,2,5,6-tetrahydroquinoline dehydrogenase",
+ "1.3.1.66 Cis-dihydroethylcatechol dehydrogenase",
+ "1.3.1.67 Cis-1,2-dihydroxy-4-methylcyclohexa-3,5-diene-1-carboxylate dehydrogenase",
+ "1.3.1.68 1,2-dihydroxy-6-methylcyclohexa-3,5-dienecarboxylate dehydrogenase",
+ "1.3.1.69 Zeatin reductase",
+ "1.3.1.70 Delta(14)-sterol reductase",
+ "1.3.1.71 Delta(24(24(1)))-sterol reductase",
+ "1.3.1.72 Delta(24)-sterol reductase",
+ "1.3.1.73 1,2-dihydrovomilenine reductase",
+ "1.3.1.74 2-alkenal reductase",
+ "1.3.1.75 Divinyl chlorophyllide a 8-vinyl-reductase",
+ "1.3.1.76 Precorrin-2 dehydrogenase",
+ "1.3.1.77 Anthocyanidin reductase",
+ "1.3.1.78 Arogenate dehydrogenase (NADP(+))",
+ "1.3.1.79 Arogenate dehydrogenase (NAD(P)(+))",
+ "1.3.1.80 Red chlorophyll catabolite reductase",
+ "1.3.1.81 (+)-pulegone reductase",
+ "1.3.1.82 (-)-isopiperitenone reductase",
+ "1.3.1.83 Geranylgeranyl diphosphate reductase",
+ "1.3.1.84 Acrylyl-CoA reductase (NADPH)",
+ "1.3.1.85 Crotonyl-CoA carboxylase/reductase",
+ "1.3.1.86 Crotonyl-CoA reductase",
+ "1.3.1.n1 3-(cis-5,6-dihydroxycyclohexa-1,3-dien-1-yl)propanoate dehydrogenase",
+ "1.3.2.3 L-galactonolactone dehydrogenase",
+ "1.3.3.1 Dihydroorotate oxidase",
+ "1.3.3.3 Coproporphyrinogen oxidase",
+ "1.3.3.4 Protoporphyrinogen oxidase",
+ "1.3.3.5 Bilirubin oxidase",
+ "1.3.3.6 Acyl-CoA oxidase",
+ "1.3.3.7 Dihydrouracil oxidase",
+ "1.3.3.8 Tetrahydroberberine oxidase",
+ "1.3.3.9 Secologanin synthase",
+ "1.3.3.10 Tryptophan alpha,beta-oxidase",
+ "1.3.3.11 Pyrroloquinoline-quinone synthase",
+ "1.3.3.12 L-galactonolactone oxidase",
+ "1.3.5.1 Succinate dehydrogenase (ubiquinone)",
+ "1.3.5.2 Dihydroorotate dehydrogenase",
+ "1.3.5.3 Protoporphyrinogen IX dehydrogenase (menaquinone)",
+ "1.3.5.4 Fumarate reductase (menaquinone)",
+ "1.3.7.1 6-hydroxynicotinate reductase",
+ "1.3.7.2 15,16-dihydrobiliverdin:ferredoxin oxidoreductase",
+ "1.3.7.3 Phycoerythrobilin:ferredoxin oxidoreductase",
+ "1.3.7.4 Phytochromobilin:ferredoxin oxidoreductase",
+ "1.3.7.5 Phycocyanobilin:ferredoxin oxidoreductase",
+ "1.3.7.6 Phycoerythrobilin synthase",
+ "1.3.7.7 Ferredoxin:protochlorophyllide reductase (ATP-dependent)",
+ "1.3.99.1 Succinate dehydrogenase",
+ "1.3.99.2 Butyryl-CoA dehydrogenase",
+ "1.3.99.3 Acyl-CoA dehydrogenase",
+ "1.3.99.4 3-oxosteroid 1-dehydrogenase",
+ "1.3.99.5 3-oxo-5-alpha-steroid 4-dehydrogenase",
+ "1.3.99.6 3-oxo-5-beta-steroid 4-dehydrogenase",
+ "1.3.99.7 Glutaryl-CoA dehydrogenase",
+ "1.3.99.8 2-furoyl-CoA dehydrogenase",
+ "1.3.99.10 Isovaleryl-CoA dehydrogenase",
+ "1.3.99.12 2-methylacyl-CoA dehydrogenase",
+ "1.3.99.13 Long-chain-acyl-CoA dehydrogenase",
+ "1.3.99.14 Cyclohexanone dehydrogenase",
+ "1.3.99.15 Benzoyl-CoA reductase",
+ "1.3.99.16 Isoquinoline 1-oxidoreductase",
+ "1.3.99.17 Quinoline 2-oxidoreductase",
+ "1.3.99.18 Quinaldate 4-oxidoreductase",
+ "1.3.99.19 Quinoline-4-carboxylate 2-oxidoreductase",
+ "1.3.99.20 4-hydroxybenzoyl-CoA reductase",
+ "1.3.99.21 (R)-benzylsuccinyl-CoA dehydrogenase",
+ "1.3.99.22 Coproporphyrinogen dehydrogenase",
+ "1.3.99.23 All-trans-retinol 13,14-reductase",
+ "1.3.99.24 2-amino-4-deoxychorismate dehydrogenase",
+ "1.3.99.25 Carvone reductase",
+ "1.3.99.n1 3-hydroxybenzoyl-CoA reductase",
+ "1.3.99.n2 2-iminoacetate synthase",
+ "1.4.1.1 Alanine dehydrogenase",
+ "1.4.1.2 Glutamate dehydrogenase",
+ "1.4.1.3 Glutamate dehydrogenase (NAD(P)(+))",
+ "1.4.1.4 Glutamate dehydrogenase (NADP(+))",
+ "1.4.1.5 L-amino-acid dehydrogenase",
+ "1.4.1.7 Serine 2-dehydrogenase",
+ "1.4.1.8 Valine dehydrogenase (NADP(+))",
+ "1.4.1.9 Leucine dehydrogenase",
+ "1.4.1.10 Glycine dehydrogenase",
+ "1.4.1.11 L-erythro-3,5-diaminohexanoate dehydrogenase",
+ "1.4.1.12 2,4-diaminopentanoate dehydrogenase",
+ "1.4.1.13 Glutamate synthase (NADPH)",
+ "1.4.1.14 Glutamate synthase (NADH)",
+ "1.4.1.15 Lysine dehydrogenase",
+ "1.4.1.16 Diaminopimelate dehydrogenase",
+ "1.4.1.17 N-methylalanine dehydrogenase",
+ "1.4.1.18 Lysine 6-dehydrogenase",
+ "1.4.1.19 Tryptophan dehydrogenase",
+ "1.4.1.20 Phenylalanine dehydrogenase",
+ "1.4.1.21 Aspartate dehydrogenase",
+ "1.4.2.1 Glycine dehydrogenase (cytochrome)",
+ "1.4.3.1 D-aspartate oxidase",
+ "1.4.3.2 L-amino-acid oxidase",
+ "1.4.3.3 D-amino-acid oxidase",
+ "1.4.3.4 Monoamine oxidase",
+ "1.4.3.5 Pyridoxal 5'-phosphate synthase",
+ "1.4.3.7 D-glutamate oxidase",
+ "1.4.3.8 Ethanolamine oxidase",
+ "1.4.3.10 Putrescine oxidase",
+ "1.4.3.11 L-glutamate oxidase",
+ "1.4.3.12 Cyclohexylamine oxidase",
+ "1.4.3.13 Protein-lysine 6-oxidase",
+ "1.4.3.14 L-lysine oxidase",
+ "1.4.3.15 D-glutamate(D-aspartate) oxidase",
+ "1.4.3.16 L-aspartate oxidase",
+ "1.4.3.19 Glycine oxidase",
+ "1.4.3.20 L-lysine 6-oxidase",
+ "1.4.3.21 Primary-amine oxidase",
+ "1.4.3.22 Diamine oxidase",
+ "1.4.3.23 7-chloro-L-tryptophan oxidase",
+ "1.4.4.2 Glycine dehydrogenase (decarboxylating)",
+ "1.4.5.1 D-amino acid dehydrogenase (quinone)",
+ "1.4.7.1 Glutamate synthase (ferredoxin)",
+ "1.4.99.1 D-amino-acid dehydrogenase",
+ "1.4.99.2 Taurine dehydrogenase",
+ "1.4.99.3 Amine dehydrogenase",
+ "1.4.99.4 Aralkylamine dehydrogenase",
+ "1.4.99.5 Glycine dehydrogenase (cyanide-forming)",
+ "1.5.1.1 Pyrroline-2-carboxylate reductase",
+ "1.5.1.2 Pyrroline-5-carboxylate reductase",
+ "1.5.1.3 Dihydrofolate reductase",
+ "1.5.1.5 Methylenetetrahydrofolate dehydrogenase (NADP(+))",
+ "1.5.1.6 Formyltetrahydrofolate dehydrogenase",
+ "1.5.1.7 Saccharopine dehydrogenase (NAD(+), L-lysine-forming)",
+ "1.5.1.8 Saccharopine dehydrogenase (NADP(+), L-lysine-forming)",
+ "1.5.1.9 Saccharopine dehydrogenase (NAD(+), L-glutamate-forming)",
+ "1.5.1.10 Saccharopine dehydrogenase (NADP(+), L-glutamate-forming)",
+ "1.5.1.11 D-octopine dehydrogenase",
+ "1.5.1.12 1-pyrroline-5-carboxylate dehydrogenase",
+ "1.5.1.15 Methylenetetrahydrofolate dehydrogenase (NAD(+))",
+ "1.5.1.16 D-lysopine dehydrogenase",
+ "1.5.1.17 Alanopine dehydrogenase",
+ "1.5.1.18 Ephedrine dehydrogenase",
+ "1.5.1.19 D-nopaline dehydrogenase",
+ "1.5.1.20 Methylenetetrahydrofolate reductase (NAD(P)H)",
+ "1.5.1.21 Delta(1)-piperideine-2-carboxylate reductase",
+ "1.5.1.22 Strombine dehydrogenase",
+ "1.5.1.23 Tauropine dehydrogenase",
+ "1.5.1.24 N(5)-(carboxyethyl)ornithine synthase",
+ "1.5.1.25 Thiomorpholine-carboxylate dehydrogenase",
+ "1.5.1.26 Beta-alanopine dehydrogenase",
+ "1.5.1.27 1,2-dehydroreticulinium reductase (NADPH)",
+ "1.5.1.28 Opine dehydrogenase",
+ "1.5.1.29 FMN reductase",
+ "1.5.1.30 Flavin reductase",
+ "1.5.1.31 Berberine reductase",
+ "1.5.1.32 Vomilenine reductase",
+ "1.5.1.33 Pteridine reductase",
+ "1.5.1.34 6,7-dihydropteridine reductase",
+ "1.5.3.1 Sarcosine oxidase",
+ "1.5.3.2 N-methyl-L-amino-acid oxidase",
+ "1.5.3.4 N(6)-methyl-lysine oxidase",
+ "1.5.3.5 (S)-6-hydroxynicotine oxidase",
+ "1.5.3.6 (R)-6-hydroxynicotine oxidase",
+ "1.5.3.7 L-pipecolate oxidase",
+ "1.5.3.10 Dimethylglycine oxidase",
+ "1.5.3.12 Dihydrobenzophenanthridine oxidase",
+ "1.5.3.13 N(1)-acetylpolyamine oxidase",
+ "1.5.3.14 Polyamine oxidase (propane-1,3-diamine-forming)",
+ "1.5.3.15 N(8)-acetylspermidine oxidase (propane-1,3-diamine-forming)",
+ "1.5.3.16 Spermine oxidase",
+ "1.5.3.17 Non-specific polyamine oxidase",
+ "1.5.3.18 L-saccharopine oxidase",
+ "1.5.4.1 Pyrimidodiazepine synthase",
+ "1.5.5.1 Electron-transferring-flavoprotein dehydrogenase",
+ "1.5.7.1 Methylenetetrahydrofolate reductase (ferredoxin)",
+ "1.5.8.1 Dimethylamine dehydrogenase",
+ "1.5.8.2 Trimethylamine dehydrogenase",
+ "1.5.99.1 Sarcosine dehydrogenase",
+ "1.5.99.2 Dimethylglycine dehydrogenase",
+ "1.5.99.3 L-pipecolate dehydrogenase",
+ "1.5.99.4 Nicotine dehydrogenase",
+ "1.5.99.5 Methylglutamate dehydrogenase",
+ "1.5.99.6 Spermidine dehydrogenase",
+ "1.5.99.8 Proline dehydrogenase",
+ "1.5.99.9 Methylenetetrahydromethanopterin dehydrogenase",
+ "1.5.99.11 5,10-methylenetetrahydromethanopterin reductase",
+ "1.5.99.12 Cytokinin dehydrogenase",
+ "1.5.99.13 D-proline dehydrogenase",
+ "1.6.1.1 NAD(P)(+) transhydrogenase (B-specific)",
+ "1.6.1.2 NAD(P)(+) transhydrogenase (AB-specific)",
+ "1.6.2.2 Cytochrome-b5 reductase",
+ "1.6.2.4 NADPH--hemoprotein reductase",
+ "1.6.2.5 NADPH--cytochrome-c2 reductase",
+ "1.6.2.6 Leghemoglobin reductase",
+ "1.6.3.1 NAD(P)H oxidase",
+ "1.6.5.2 NAD(P)H dehydrogenase (quinone)",
+ "1.6.5.3 NADH dehydrogenase (ubiquinone)",
+ "1.6.5.4 Monodehydroascorbate reductase (NADH)",
+ "1.6.5.5 NADPH:quinone reductase",
+ "1.6.5.6 p-benzoquinone reductase (NADPH)",
+ "1.6.5.7 2-hydroxy-1,4-benzoquinone reductase",
+ "1.6.5.8 NADH:ubiquinone reductase (Na(+)-transporting)",
+ "1.6.6.9 Trimethylamine-N-oxide reductase",
+ "1.6.99.1 NADPH dehydrogenase",
+ "1.6.99.3 NADH dehydrogenase",
+ "1.6.99.5 NADH dehydrogenase (quinone)",
+ "1.6.99.6 NADPH dehydrogenase (quinone)",
+ "1.7.1.1 Nitrate reductase (NADH)",
+ "1.7.1.2 Nitrate reductase (NAD(P)H)",
+ "1.7.1.3 Nitrate reductase (NADPH)",
+ "1.7.1.4 Nitrite reductase (NAD(P)H)",
+ "1.7.1.5 Hyponitrite reductase",
+ "1.7.1.6 Azobenzene reductase",
+ "1.7.1.7 GMP reductase",
+ "1.7.1.9 Nitroquinoline-N-oxide reductase",
+ "1.7.1.10 Hydroxylamine reductase (NADH)",
+ "1.7.1.11 4-(dimethylamino)phenylazoxybenzene reductase",
+ "1.7.1.12 N-hydroxy-2-acetamidofluorene reductase",
+ "1.7.1.13 PreQ(1) synthase",
+ "1.7.2.1 Nitrite reductase (NO-forming)",
+ "1.7.2.2 Nitrite reductase (cytochrome; ammonia-forming)",
+ "1.7.2.3 Trimethylamine-N-oxide reductase (cytochrome c)",
+ "1.7.3.1 Nitroalkane oxidase",
+ "1.7.3.2 Acetylindoxyl oxidase",
+ "1.7.3.3 Factor independent urate hydroxylase",
+ "1.7.3.4 Hydroxylamine oxidase",
+ "1.7.3.5 3-aci-nitropropanoate oxidase",
+ "1.7.5.1 Nitrate reductase (quinone)",
+ "1.7.7.1 Ferredoxin--nitrite reductase",
+ "1.7.7.2 Ferredoxin--nitrate reductase",
+ "1.7.99.1 Hydroxylamine reductase",
+ "1.7.99.4 Nitrate reductase",
+ "1.7.99.6 Nitrous-oxide reductase",
+ "1.7.99.7 Nitric-oxide reductase",
+ "1.7.99.8 Hydrazine oxidoreductase",
+ "1.8.1.2 Sulfite reductase (NADPH)",
+ "1.8.1.3 Hypotaurine dehydrogenase",
+ "1.8.1.4 Dihydrolipoyl dehydrogenase",
+ "1.8.1.5 2-oxopropyl-CoM reductase (carboxylating)",
+ "1.8.1.6 Cystine reductase",
+ "1.8.1.7 Glutathione-disulfide reductase",
+ "1.8.1.8 Protein-disulfide reductase",
+ "1.8.1.9 Thioredoxin-disulfide reductase",
+ "1.8.1.10 CoA-glutathione reductase",
+ "1.8.1.11 Asparagusate reductase",
+ "1.8.1.12 Trypanothione-disulfide reductase",
+ "1.8.1.13 Bis-gamma-glutamylcystine reductase",
+ "1.8.1.14 CoA-disulfide reductase",
+ "1.8.1.15 Mycothione reductase",
+ "1.8.1.16 Glutathione amide reductase",
+ "1.8.2.1 Sulfite dehydrogenase",
+ "1.8.2.2 Thiosulfate dehydrogenase",
+ "1.8.3.1 Sulfite oxidase",
+ "1.8.3.2 Thiol oxidase",
+ "1.8.3.3 Glutathione oxidase",
+ "1.8.3.4 Methanethiol oxidase",
+ "1.8.3.5 Prenylcysteine oxidase",
+ "1.8.4.1 Glutathione--homocystine transhydrogenase",
+ "1.8.4.2 Protein-disulfide reductase (glutathione)",
+ "1.8.4.3 Glutathione--CoA-glutathione transhydrogenase",
+ "1.8.4.4 Glutathione--cystine transhydrogenase",
+ "1.8.4.7 Enzyme-thiol transhydrogenase (glutathione-disulfide)",
+ "1.8.4.8 Phosphoadenylyl-sulfate reductase (thioredoxin)",
+ "1.8.4.9 Adenylyl-sulfate reductase (glutathione)",
+ "1.8.4.10 Adenylyl-sulfate reductase (thioredoxin)",
+ "1.8.4.11 Peptide-methionine (S)-S-oxide reductase",
+ "1.8.4.12 Peptide-methionine (R)-S-oxide reductase",
+ "1.8.4.13 L-methionine (S)-S-oxide reductase",
+ "1.8.4.14 L-methionine (R)-S-oxide reductase",
+ "1.8.5.1 Glutathione dehydrogenase (ascorbate)",
+ "1.8.5.2 Thiosulfate dehydrogenase (quinone)",
+ "1.8.7.1 Sulfite reductase (ferredoxin)",
+ "1.8.7.2 Ferredoxin:thioredoxin reductase",
+ "1.8.98.1 CoB--CoM heterodisulfide reductase",
+ "1.8.98.2 Sulfiredoxin",
+ "1.8.99.1 Sulfite reductase",
+ "1.8.99.2 Adenylyl-sulfate reductase",
+ "1.8.99.3 Hydrogensulfite reductase",
+ "1.9.3.1 Cytochrome-c oxidase",
+ "1.9.6.1 Nitrate reductase (cytochrome)",
+ "1.9.99.1 Iron--cytochrome-c reductase",
+ "1.10.1.1 Trans-acenaphthene-1,2-diol dehydrogenase",
+ "1.10.2.1 L-ascorbate--cytochrome-b5 reductase",
+ "1.10.2.2 Ubiquinol--cytochrome-c reductase",
+ "1.10.3.1 Catechol oxidase",
+ "1.10.3.2 Laccase",
+ "1.10.3.3 L-ascorbate oxidase",
+ "1.10.3.4 o-aminophenol oxidase",
+ "1.10.3.5 3-hydroxyanthranilate oxidase",
+ "1.10.3.6 Rifamycin-B oxidase",
+ "1.10.99.1 Plastoquinol--plastocyanin reductase",
+ "1.10.99.2 Ribosyldihydronicotinamide dehydrogenase (quinone)",
+ "1.10.99.3 Violaxanthin de-epoxidase",
+ "1.11.1.1 NADH peroxidase",
+ "1.11.1.2 NADPH peroxidase",
+ "1.11.1.3 Fatty-acid peroxidase",
+ "1.11.1.5 Cytochrome-c peroxidase",
+ "1.11.1.6 Catalase",
+ "1.11.1.7 Peroxidase",
+ "1.11.1.8 Iodide peroxidase",
+ "1.11.1.9 Glutathione peroxidase",
+ "1.11.1.10 Chloride peroxidase",
+ "1.11.1.11 L-ascorbate peroxidase",
+ "1.11.1.12 Phospholipid-hydroperoxide glutathione peroxidase",
+ "1.11.1.13 Manganese peroxidase",
+ "1.11.1.14 Lignin peroxidase",
+ "1.11.1.15 Peroxiredoxin",
+ "1.11.1.16 Versatile peroxidase",
+ "1.11.1.17 Glutathione amide-dependent peroxidase",
+ "1.11.1.18 Bromide peroxidase",
+ "1.11.1.19 Dye decolorizing peroxidase",
+ "1.11.1.20 Prostamide/prostaglandin F(2-alpha) synthase",
+ "1.11.2.1 Unspecific peroxygenase",
+ "1.11.2.2 Myeloperoxidase",
+ "1.11.2.3 Plant seed peroxygenase",
+ "1.11.2.4 Fatty-acid peroxygenase",
+ "1.12.1.2 Hydrogen dehydrogenase",
+ "1.12.1.3 Hydrogen dehydrogenase (NADP(+))",
+ "1.12.2.1 Cytochrome-c3 hydrogenase",
+ "1.12.5.1 Hydrogen:quinone oxidoreductase",
+ "1.12.7.2 Ferredoxin hydrogenase",
+ "1.12.98.1 Coenzyme F420 hydrogenase",
+ "1.12.98.2 5,10-methenyltetrahydromethanopterin hydrogenase",
+ "1.12.98.3 Methanosarcina-phenazine hydrogenase",
+ "1.12.99.6 Hydrogenase (acceptor)",
+ "1.13.11.1 Catechol 1,2-dioxygenase",
+ "1.13.11.2 Catechol 2,3-dioxygenase",
+ "1.13.11.3 Protocatechuate 3,4-dioxygenase",
+ "1.13.11.4 Gentisate 1,2-dioxygenase",
+ "1.13.11.5 Homogentisate 1,2-dioxygenase",
+ "1.13.11.6 3-hydroxyanthranilate 3,4-dioxygenase",
+ "1.13.11.8 Protocatechuate 4,5-dioxygenase",
+ "1.13.11.9 2,5-dihydroxypyridine 5,6-dioxygenase",
+ "1.13.11.10 7,8-dihydroxykynurenate 8,8a-dioxygenase",
+ "1.13.11.11 Tryptophan 2,3-dioxygenase",
+ "1.13.11.12 Lipoxygenase",
+ "1.13.11.13 Ascorbate 2,3-dioxygenase",
+ "1.13.11.14 2,3-dihydroxybenzoate 3,4-dioxygenase",
+ "1.13.11.15 3,4-dihydroxyphenylacetate 2,3-dioxygenase",
+ "1.13.11.16 3-carboxyethylcatechol 2,3-dioxygenase",
+ "1.13.11.17 Indole 2,3-dioxygenase",
+ "1.13.11.18 Sulfur dioxygenase",
+ "1.13.11.19 Cysteamine dioxygenase",
+ "1.13.11.20 Cysteine dioxygenase",
+ "1.13.11.22 Caffeate 3,4-dioxygenase",
+ "1.13.11.23 2,3-dihydroxyindole 2,3-dioxygenase",
+ "1.13.11.24 Quercetin 2,3-dioxygenase",
+ "1.13.11.25 3,4-dihydroxy-9,10-secoandrosta-1,3,5(10)-triene-9,17-dione 4,5-dioxygenase",
+ "1.13.11.26 Peptide-tryptophan 2,3-dioxygenase",
+ "1.13.11.27 4-hydroxyphenylpyruvate dioxygenase",
+ "1.13.11.28 2,3-dihydroxybenzoate 2,3-dioxygenase",
+ "1.13.11.29 Stizolobate synthase",
+ "1.13.11.30 Stizolobinate synthase",
+ "1.13.11.31 Arachidonate 12-lipoxygenase",
+ "1.13.11.33 Arachidonate 15-lipoxygenase",
+ "1.13.11.34 Arachidonate 5-lipoxygenase",
+ "1.13.11.35 Pyrogallol 1,2-oxygenase",
+ "1.13.11.36 Chloridazon-catechol dioxygenase",
+ "1.13.11.37 Hydroxyquinol 1,2-dioxygenase",
+ "1.13.11.38 1-hydroxy-2-naphthoate 1,2-dioxygenase",
+ "1.13.11.39 Biphenyl-2,3-diol 1,2-dioxygenase",
+ "1.13.11.40 Arachidonate 8-lipoxygenase",
+ "1.13.11.41 2,4'-dihydroxyacetophenone dioxygenase",
+ "1.13.11.43 Lignostilbene alpha-beta-dioxygenase",
+ "1.13.11.44 Linoleate diol synthase",
+ "1.13.11.45 Linoleate 11-lipoxygenase",
+ "1.13.11.46 4-hydroxymandelate synthase",
+ "1.13.11.47 3-hydroxy-4-oxoquinoline 2,4-dioxygenase",
+ "1.13.11.48 3-hydroxy-2-methylquinolin-4-one 2,4-dioxygenase",
+ "1.13.11.49 Chlorite O(2)-lyase",
+ "1.13.11.50 Acetylacetone-cleaving enzyme",
+ "1.13.11.51 9-cis-epoxycarotenoid dioxygenase",
+ "1.13.11.52 Indoleamine 2,3-dioxygenase",
+ "1.13.11.53 Acireductone dioxygenase (Ni(2+)-requiring)",
+ "1.13.11.54 Acireductone dioxygenase (Fe(2+)-requiring)",
+ "1.13.11.55 Sulfur oxygenase/reductase",
+ "1.13.11.56 1,2-dihydroxynaphthalene dioxygenase",
+ "1.13.11.n1 2-aminophenol 1,6-dioxygenase",
+ "1.13.12.1 Arginine 2-monooxygenase",
+ "1.13.12.2 Lysine 2-monooxygenase",
+ "1.13.12.3 Tryptophan 2-monooxygenase",
+ "1.13.12.4 Lactate 2-monooxygenase",
+ "1.13.12.5 Renilla-luciferin 2-monooxygenase",
+ "1.13.12.6 Cypridina-luciferin 2-monooxygenase",
+ "1.13.12.7 Photinus-luciferin 4-monooxygenase (ATP-hydrolyzing)",
+ "1.13.12.8 Watasenia-luciferin 2-monooxygenase",
+ "1.13.12.9 Phenylalanine 2-monooxygenase",
+ "1.13.12.12 Apo-beta-carotenoid-14',13'-dioxygenase",
+ "1.13.12.13 Oplophorus-luciferin 2-monooxygenase",
+ "1.13.12.14 Chlorophyllide-a oxygenase",
+ "1.13.12.15 3,4-dihydroxyphenylalanine oxidative deaminase",
+ "1.13.12.16 Nitronate monooxygenase",
+ "1.13.12.17 Dichloroarcyriaflavin A synthase",
+ "1.13.12.18 Dinoflagellate luciferase",
+ "1.13.99.1 Inositol oxygenase",
+ "1.13.99.3 Tryptophan 2'-dioxygenase",
+ "1.14.11.1 Gamma-butyrobetaine dioxygenase",
+ "1.14.11.2 Procollagen-proline dioxygenase",
+ "1.14.11.3 Pyrimidine-deoxynucleoside 2'-dioxygenase",
+ "1.14.11.4 Procollagen-lysine 5-dioxygenase",
+ "1.14.11.6 Thymine dioxygenase",
+ "1.14.11.7 Procollagen-proline 3-dioxygenase",
+ "1.14.11.8 Trimethyllysine dioxygenase",
+ "1.14.11.9 Flavanone 3-dioxygenase",
+ "1.14.11.10 Pyrimidine-deoxynucleoside 1'-dioxygenase",
+ "1.14.11.11 Hyoscyamine (6S)-dioxygenase",
+ "1.14.11.12 Gibberellin-44 dioxygenase",
+ "1.14.11.13 Gibberellin 2-beta-dioxygenase",
+ "1.14.11.14 6-beta-hydroxyhyoscyamine epoxidase",
+ "1.14.11.15 Gibberellin 3-beta-dioxygenase",
+ "1.14.11.16 Peptide-aspartate beta-dioxygenase",
+ "1.14.11.17 Taurine dioxygenase",
+ "1.14.11.18 Phytanoyl-CoA dioxygenase",
+ "1.14.11.19 Leucocyanidin oxygenase",
+ "1.14.11.20 Deacetoxyvindoline 4-hydroxylase",
+ "1.14.11.21 Clavaminate synthase",
+ "1.14.11.22 Flavone synthase",
+ "1.14.11.23 Flavonol synthase",
+ "1.14.11.24 2'-deoxymugineic-acid 2'-dioxygenase",
+ "1.14.11.25 Mugineic-acid 3-dioxygenase",
+ "1.14.11.26 Deacetoxycephalosporin-C hydroxylase",
+ "1.14.11.27 [Histone H3]-lysine-36 demethylase",
+ "1.14.11.28 Proline 3-hydroxylase",
+ "1.14.11.29 Hypoxia-inducible factor-proline dioxygenase",
+ "1.14.11.30 Hypoxia-inducible factor-asparagine dioxygenase",
+ "1.14.11.31 Thebaine 6-O-demethylase",
+ "1.14.11.32 Codeine 3-O-demethylase",
+ "1.14.11.n1 L-asparagine oxygenase",
+ "1.14.12.1 Anthranilate 1,2-dioxygenase (deaminating, decarboxylating)",
+ "1.14.12.3 Benzene 1,2-dioxygenase",
+ "1.14.12.4 3-hydroxy-2-methylpyridinecarboxylate dioxygenase",
+ "1.14.12.5 5-pyridoxate dioxygenase",
+ "1.14.12.7 Phthalate 4,5-dioxygenase",
+ "1.14.12.8 4-sulfobenzoate 3,4-dioxygenase",
+ "1.14.12.9 4-chlorophenylacetate 3,4-dioxygenase",
+ "1.14.12.10 Benzoate 1,2-dioxygenase",
+ "1.14.12.11 Toluene dioxygenase",
+ "1.14.12.12 Naphthalene 1,2-dioxygenase",
+ "1.14.12.13 2-chlorobenzoate 1,2-dioxygenase",
+ "1.14.12.14 2-aminobenzenesulfonate 2,3-dioxygenase",
+ "1.14.12.15 Terephthalate 1,2-dioxygenase",
+ "1.14.12.16 2-hydroxyquinoline 5,6-dioxygenase",
+ "1.14.12.17 Nitric oxide dioxygenase",
+ "1.14.12.18 Biphenyl 2,3-dioxygenase",
+ "1.14.12.19 3-phenylpropanoate dioxygenase",
+ "1.14.12.20 Pheophorbide a oxygenase",
+ "1.14.12.21 Benzoyl-CoA 2,3-dioxygenase",
+ "1.14.12.22 Carbazole 1,9a-dioxygenase",
+ "1.14.13.1 Salicylate 1-monooxygenase",
+ "1.14.13.2 4-hydroxybenzoate 3-monooxygenase",
+ "1.14.13.3 4-hydroxyphenylacetate 3-monooxygenase",
+ "1.14.13.4 Melilotate 3-monooxygenase",
+ "1.14.13.5 Imidazoleacetate 4-monooxygenase",
+ "1.14.13.6 Orcinol 2-monooxygenase",
+ "1.14.13.7 Phenol 2-monooxygenase",
+ "1.14.13.8 Flavin-containing monooxygenase",
+ "1.14.13.9 Kynurenine 3-monooxygenase",
+ "1.14.13.10 2,6-dihydroxypyridine 3-monooxygenase",
+ "1.14.13.11 Trans-cinnamate 4-monooxygenase",
+ "1.14.13.12 Benzoate 4-monooxygenase",
+ "1.14.13.13 Calcidiol 1-monooxygenase",
+ "1.14.13.14 Trans-cinnamate 2-monooxygenase",
+ "1.14.13.15 Cholestanetriol 26-monooxygenase",
+ "1.14.13.16 Cyclopentanone monooxygenase",
+ "1.14.13.17 Cholesterol 7-alpha-monooxygenase",
+ "1.14.13.18 4-hydroxyphenylacetate 1-monooxygenase",
+ "1.14.13.19 Taxifolin 8-monooxygenase",
+ "1.14.13.20 2,4-dichlorophenol 6-monooxygenase",
+ "1.14.13.21 Flavonoid 3'-monooxygenase",
+ "1.14.13.22 Cyclohexanone monooxygenase",
+ "1.14.13.23 3-hydroxybenzoate 4-monooxygenase",
+ "1.14.13.24 3-hydroxybenzoate 6-monooxygenase",
+ "1.14.13.25 Methane monooxygenase",
+ "1.14.13.26 Phosphatidylcholine 12-monooxygenase",
+ "1.14.13.27 4-aminobenzoate 1-monooxygenase",
+ "1.14.13.28 3,9-dihydroxypterocarpan 6A-monooxygenase",
+ "1.14.13.29 4-nitrophenol 2-monooxygenase",
+ "1.14.13.30 Leukotriene-B(4) 20-monooxygenase",
+ "1.14.13.31 2-nitrophenol 2-monooxygenase",
+ "1.14.13.32 Albendazole monooxygenase",
+ "1.14.13.33 4-hydroxybenzoate 3-monooxygenase (NAD(P)H)",
+ "1.14.13.34 Leukotriene-E(4) 20-monooxygenase",
+ "1.14.13.35 Anthranilate 3-monooxygenase (deaminating)",
+ "1.14.13.36 5-O-(4-coumaroyl)-D-quinate 3'-monooxygenase",
+ "1.14.13.37 Methyltetrahydroprotoberberine 14-monooxygenase",
+ "1.14.13.38 Anhydrotetracycline monooxygenase",
+ "1.14.13.39 Nitric-oxide synthase",
+ "1.14.13.40 Anthraniloyl-CoA monooxygenase",
+ "1.14.13.41 Tyrosine N-monooxygenase",
+ "1.14.13.42 Hydroxyphenylacetonitrile 2-monooxygenase",
+ "1.14.13.43 Questin monooxygenase",
+ "1.14.13.44 2-hydroxybiphenyl 3-monooxygenase",
+ "1.14.13.46 (-)-menthol monooxygenase",
+ "1.14.13.47 (S)-limonene 3-monooxygenase",
+ "1.14.13.48 (S)-limonene 6-monooxygenase",
+ "1.14.13.49 (S)-limonene 7-monooxygenase",
+ "1.14.13.50 Pentachlorophenol monooxygenase",
+ "1.14.13.51 6-oxocineole dehydrogenase",
+ "1.14.13.52 Isoflavone 3'-hydroxylase",
+ "1.14.13.53 4'-methoxyisoflavone 2'-hydroxylase",
+ "1.14.13.54 Ketosteroid monooxygenase",
+ "1.14.13.55 Protopine 6-monooxygenase",
+ "1.14.13.56 Dihydrosanguinarine 10-monooxygenase",
+ "1.14.13.57 Dihydrochelirubine 12-monooxygenase",
+ "1.14.13.58 Benzoyl-CoA 3-monooxygenase",
+ "1.14.13.59 L-lysine 6-monooxygenase (NADPH)",
+ "1.14.13.60 27-hydroxycholesterol 7-alpha-monooxygenase",
+ "1.14.13.61 2-hydroxyquinoline 8-monooxygenase",
+ "1.14.13.62 4-hydroxyquinoline 3-monooxygenase",
+ "1.14.13.63 3-hydroxyphenylacetate 6-hydroxylase",
+ "1.14.13.64 4-hydroxybenzoate 1-hydroxylase",
+ "1.14.13.66 2-hydroxycyclohexanone 2-monooxygenase",
+ "1.14.13.67 Quinine 3-monooxygenase",
+ "1.14.13.68 4-hydroxyphenylacetaldehyde oxime monooxygenase",
+ "1.14.13.69 Alkene monooxygenase",
+ "1.14.13.70 Sterol 14-demethylase",
+ "1.14.13.71 N-methylcoclaurine 3'-monooxygenase",
+ "1.14.13.72 Methylsterol monooxygenase",
+ "1.14.13.73 Tabersonine 16-hydroxylase",
+ "1.14.13.74 7-deoxyloganin 7-hydroxylase",
+ "1.14.13.75 Vinorine hydroxylase",
+ "1.14.13.76 Taxane 10-beta-hydroxylase",
+ "1.14.13.77 Taxane 13-alpha-hydroxylase",
+ "1.14.13.78 Ent-kaurene oxidase",
+ "1.14.13.79 Ent-kaurenoic acid oxidase",
+ "1.14.13.80 (R)-limonene 6-monooxygenase",
+ "1.14.13.81 Magnesium-protoporphyrin IX monomethyl ester (oxidative) cyclase",
+ "1.14.13.82 Vanillate monooxygenase",
+ "1.14.13.83 Precorrin-3B synthase",
+ "1.14.13.84 4-hydroxyacetophenone monooxygenase",
+ "1.14.13.85 Glyceollin synthase",
+ "1.14.13.86 2-hydroxyisoflavanone synthase",
+ "1.14.13.87 Licodione synthase",
+ "1.14.13.88 Flavonoid 3',5'-hydroxylase",
+ "1.14.13.89 Isoflavone 2'-hydroxylase",
+ "1.14.13.90 Zeaxanthin epoxidase",
+ "1.14.13.91 Deoxysarpagine hydroxylase",
+ "1.14.13.92 Phenylacetone monooxygenase",
+ "1.14.13.93 (+)-abscisic acid 8'-hydroxylase",
+ "1.14.13.94 Lithocholate 6-beta-hydroxylase",
+ "1.14.13.95 7-alpha-hydroxycholest-4-en-3-one 12-alpha-hydroxylase",
+ "1.14.13.96 5-beta-cholestane-3-alpha,7-alpha-diol 12-alpha-hydroxylase",
+ "1.14.13.97 Taurochenodeoxycholate 6-alpha-hydroxylase",
+ "1.14.13.98 Cholesterol 24-hydroxylase",
+ "1.14.13.99 24-hydroxycholesterol 7-alpha-hydroxylase",
+ "1.14.13.100 25-hydroxycholesterol 7-alpha-hydroxylase",
+ "1.14.13.101 Senecionine N-oxygenase",
+ "1.14.13.102 Psoralen synthase",
+ "1.14.13.103 8-dimethylallylnaringenin 2'-hydroxylase",
+ "1.14.13.104 (+)-menthofuran synthase",
+ "1.14.13.105 Monocyclic monoterpene ketone monooxygenase",
+ "1.14.13.106 Epi-isozizaene 5-monooxygenase",
+ "1.14.13.107 Limonene 1,2-monooxygenase",
+ "1.14.13.108 Abietadiene hydroxylase",
+ "1.14.13.109 Abietadienol hydroxylase",
+ "1.14.13.110 Geranylgeraniol 18-hydroxylase",
+ "1.14.13.111 Methanesulfonate monooxygenase",
+ "1.14.13.112 3-epi-6-deoxocathasterone 23-monooxygenase",
+ "1.14.13.113 FAD-dependent urate hydroxylase",
+ "1.14.13.114 6-hydroxynicotinate 3-monooxygenase",
+ "1.14.13.115 Angelicin synthase",
+ "1.14.13.116 Geranylhydroquinone 3''-hydroxylase",
+ "1.14.13.117 Isoleucine N-monooxygenase",
+ "1.14.13.118 Valine N-monooxygenase",
+ "1.14.13.119 5-epiaristolochene 1,3-dihydroxylase",
+ "1.14.13.120 Costunolide synthase",
+ "1.14.13.121 Premnaspirodiene oxygenase",
+ "1.14.13.n1 Phenylalanine N-monooxygenase",
+ "1.14.13.n2 Tryptophan N-monooxygenase",
+ "1.14.13.n3 3-(3-hydroxy-phenyl)propanoic acid hydroxylase",
+ "1.14.13.n4 Vitamin D(3) 24-hydroxylase",
+ "1.14.13.n5 Dihomomethionine N-hydroxylase",
+ "1.14.13.n6 Hexahomomethionine N-hydroxylase",
+ "1.14.13.n7 4-nitrophenol 2-hydroxylase",
+ "1.14.14.1 Unspecific monooxygenase",
+ "1.14.14.3 Alkanal monooxygenase (FMN-linked)",
+ "1.14.14.5 Alkanesulfonate monooxygenase",
+ "1.14.14.7 Tryptophan 7-halogenase",
+ "1.14.14.8 Anthranilate 3-monooxygenase (FAD)",
+ "1.14.15.1 Camphor 5-monooxygenase",
+ "1.14.15.2 Camphor 1,2-monooxygenase",
+ "1.14.15.3 Alkane 1-monooxygenase",
+ "1.14.15.4 Steroid 11-beta-monooxygenase",
+ "1.14.15.5 Corticosterone 18-monooxygenase",
+ "1.14.15.6 Cholesterol monooxygenase (side-chain-cleaving)",
+ "1.14.15.7 Choline monooxygenase",
+ "1.14.15.8 Steroid 15-beta-monooxygenase",
+ "1.14.16.1 Phenylalanine 4-monooxygenase",
+ "1.14.16.2 Tyrosine 3-monooxygenase",
+ "1.14.16.3 Anthranilate 3-monooxygenase",
+ "1.14.16.4 Tryptophan 5-monooxygenase",
+ "1.14.16.5 Alkylglycerol monooxygenase",
+ "1.14.16.6 Mandelate 4-monooxygenase",
+ "1.14.17.1 Dopamine beta-monooxygenase",
+ "1.14.17.3 Peptidylglycine monooxygenase",
+ "1.14.17.4 Aminocyclopropanecarboxylate oxidase",
+ "1.14.18.1 Monophenol monooxygenase",
+ "1.14.18.2 CMP-N-acetylneuraminate monooxygenase",
+ "1.14.19.1 Stearoyl-CoA 9-desaturase",
+ "1.14.19.2 Acyl-[acyl-carrier-protein] desaturase",
+ "1.14.19.3 Linoleoyl-CoA desaturase",
+ "1.14.19.4 Delta(8)-fatty-acid desaturase",
+ "1.14.19.5 Delta(11)-fatty-acid desaturase",
+ "1.14.19.6 Delta(12)-fatty-acid desaturase",
+ "1.14.20.1 Deacetoxycephalosporin-C synthase",
+ "1.14.21.1 (S)-stylopine synthase",
+ "1.14.21.2 (S)-cheilanthifoline synthase",
+ "1.14.21.3 Berbamunine synthase",
+ "1.14.21.4 Salutaridine synthase",
+ "1.14.21.5 (S)-canadine synthase",
+ "1.14.21.6 Lathosterol oxidase",
+ "1.14.21.7 Biflaviolin synthase",
+ "1.14.21.8 Pseudobaptigenin synthase",
+ "1.14.99.1 Prostaglandin-endoperoxide synthase",
+ "1.14.99.2 Kynurenine 7,8-hydroxylase",
+ "1.14.99.3 Heme oxygenase",
+ "1.14.99.4 Progesterone monooxygenase",
+ "1.14.99.7 Squalene monooxygenase",
+ "1.14.99.9 Steroid 17-alpha-monooxygenase",
+ "1.14.99.10 Steroid 21-monooxygenase",
+ "1.14.99.11 Estradiol 6-beta-monooxygenase",
+ "1.14.99.12 Androst-4-ene-3,17-dione monooxygenase",
+ "1.14.99.14 Progesterone 11-alpha-monooxygenase",
+ "1.14.99.15 4-methoxybenzoate monooxygenase (O-demethylating)",
+ "1.14.99.19 Plasmanylethanolamine desaturase",
+ "1.14.99.20 Phylloquinone monooxygenase (2,3-epoxidizing)",
+ "1.14.99.21 Latia-luciferin monooxygenase (demethylating)",
+ "1.14.99.22 Ecdysone 20-monooxygenase",
+ "1.14.99.23 3-hydroxybenzoate 2-monooxygenase",
+ "1.14.99.24 Steroid 9-alpha-monooxygenase",
+ "1.14.99.26 2-hydroxypyridine 5-monooxygenase",
+ "1.14.99.27 Juglone 3-monooxygenase",
+ "1.14.99.28 Linalool 8-monooxygenase",
+ "1.14.99.29 Deoxyhypusine monooxygenase",
+ "1.14.99.30 Carotene 7,8-desaturase",
+ "1.14.99.31 Myristoyl-CoA 11-(E) desaturase",
+ "1.14.99.32 Myristoyl-CoA 11-(Z) desaturase",
+ "1.14.99.33 Delta(12)-fatty acid dehydrogenase",
+ "1.14.99.34 Monoprenyl isoflavone epoxidase",
+ "1.14.99.35 Thiophene-2-carbonyl-CoA monooxygenase",
+ "1.14.99.36 Beta-carotene 15,15'-monooxygenase",
+ "1.14.99.37 Taxadiene 5-alpha-hydroxylase",
+ "1.14.99.38 Cholesterol 25-hydroxylase",
+ "1.14.99.39 Ammonia monooxygenase",
+ "1.14.99.40 5,6-dimethylbenzimidazole synthase",
+ "1.14.99.41 All-trans-8'-apo-beta-carotenal 15,15'-oxygenase",
+ "1.14.99.n2 Beta,beta-carotene 9',10'-oxygenase",
+ "1.14.99.n3 Zeaxanthin 7,8-dioxygenase",
+ "1.15.1.1 Superoxide dismutase",
+ "1.15.1.2 Superoxide reductase",
+ "1.16.1.1 Mercury(II) reductase",
+ "1.16.1.2 Diferric-transferrin reductase",
+ "1.16.1.3 Aquacobalamin reductase",
+ "1.16.1.4 Cob(II)alamin reductase",
+ "1.16.1.5 Aquacobalamin reductase (NADPH)",
+ "1.16.1.6 Cyanocobalamin reductase (cyanide-eliminating)",
+ "1.16.1.7 Ferric-chelate reductase",
+ "1.16.1.8 [Methionine synthase] reductase",
+ "1.16.3.1 Ferroxidase",
+ "1.16.8.1 Cob(II)yrinic acid a,c-diamide reductase",
+ "1.17.1.1 CDP-4-dehydro-6-deoxyglucose reductase",
+ "1.17.1.2 4-hydroxy-3-methylbut-2-enyl diphosphate reductase",
+ "1.17.1.3 Leucoanthocyanidin reductase",
+ "1.17.1.4 Xanthine dehydrogenase",
+ "1.17.1.5 Nicotinate dehydrogenase",
+ "1.17.2.1 Nicotinate dehydrogenase (cytochrome)",
+ "1.17.3.1 Pteridine oxidase",
+ "1.17.3.2 Xanthine oxidase",
+ "1.17.3.3 6-hydroxynicotinate dehydrogenase",
+ "1.17.4.1 Ribonucleoside-diphosphate reductase",
+ "1.17.4.2 Ribonucleoside-triphosphate reductase",
+ "1.17.5.1 Phenylacetyl-CoA dehydrogenase",
+ "1.17.5.2 Caffeine dehydrogenase",
+ "1.17.7.1 (E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase",
+ "1.17.99.1 4-methylphenol dehydrogenase (hydroxylating)",
+ "1.17.99.2 Ethylbenzene hydroxylase",
+ "1.17.99.3 3-alpha,7-alpha,12-alpha-trihydroxy-5-beta-cholestanoyl-CoA 24-hydroxylase",
+ "1.17.99.4 Uracil/thymine dehydrogenase",
+ "1.17.99.5 Bile-acid 7-alpha-dehydroxylase",
+ "1.18.1.1 Rubredoxin--NAD(+) reductase",
+ "1.18.1.2 Ferredoxin--NADP(+) reductase",
+ "1.18.1.3 Ferredoxin--NAD(+) reductase",
+ "1.18.1.4 Rubredoxin--NAD(P)(+) reductase",
+ "1.18.6.1 Nitrogenase",
+ "1.19.6.1 Nitrogenase (flavodoxin)",
+ "1.20.1.1 Phosphonate dehydrogenase",
+ "1.20.4.1 Arsenate reductase (glutaredoxin)",
+ "1.20.4.2 Methylarsonate reductase",
+ "1.20.4.3 Mycoredoxin",
+ "1.20.98.1 Arsenate reductase (azurin)",
+ "1.20.99.1 Arsenate reductase (donor)",
+ "1.21.3.1 Isopenicillin-N synthase",
+ "1.21.3.2 Columbamine oxidase",
+ "1.21.3.3 Reticuline oxidase",
+ "1.21.3.4 Sulochrin oxidase ((+)-bisdechlorogeodin-forming)",
+ "1.21.3.5 Sulochrin oxidase ((-)-bisdechlorogeodin-forming)",
+ "1.21.3.6 Aureusidin synthase",
+ "1.21.4.1 D-proline reductase (dithiol)",
+ "1.21.4.2 Glycine reductase",
+ "1.21.4.3 Sarcosine reductase",
+ "1.21.4.4 Betaine reductase",
+ "1.21.99.1 Beta-cyclopiazonate dehydrogenase",
+ "1.22.1.1 Iodotyrosine deiodinase",
+ "1.97.1.1 Chlorate reductase",
+ "1.97.1.2 Pyrogallol hydroxytransferase",
+ "1.97.1.3 Sulfur reductase",
+ "1.97.1.4 [Formate-C-acetyltransferase]-activating enzyme",
+ "1.97.1.8 Tetrachloroethene reductive dehalogenase",
+ "1.97.1.9 Selenate reductase",
+ "1.97.1.10 Thyroxine 5'-deiodinase",
+ "1.97.1.11 Thyroxine 5-deiodinase",
+ "2.1.1.1 Nicotinamide N-methyltransferase",
+ "2.1.1.2 Guanidinoacetate N-methyltransferase",
+ "2.1.1.3 Thetin--homocysteine S-methyltransferase",
+ "2.1.1.4 Acetylserotonin O-methyltransferase",
+ "2.1.1.5 Betaine--homocysteine S-methyltransferase",
+ "2.1.1.6 Catechol O-methyltransferase",
+ "2.1.1.7 Nicotinate N-methyltransferase",
+ "2.1.1.8 Histamine N-methyltransferase",
+ "2.1.1.9 Thiol S-methyltransferase",
+ "2.1.1.10 Homocysteine S-methyltransferase",
+ "2.1.1.11 Magnesium protoporphyrin IX methyltransferase",
+ "2.1.1.12 Methionine S-methyltransferase",
+ "2.1.1.13 Methionine synthase",
+ "2.1.1.14 5-methyltetrahydropteroyltriglutamate--homocysteine S-methyltransferase",
+ "2.1.1.15 Fatty-acid O-methyltransferase",
+ "2.1.1.16 Methylene-fatty-acyl-phospholipid synthase",
+ "2.1.1.17 Phosphatidylethanolamine N-methyltransferase",
+ "2.1.1.18 Polysaccharide O-methyltransferase",
+ "2.1.1.19 Trimethylsulfonium--tetrahydrofolate N-methyltransferase",
+ "2.1.1.20 Glycine N-methyltransferase",
+ "2.1.1.21 Methylamine--glutamate N-methyltransferase",
+ "2.1.1.22 Carnosine N-methyltransferase",
+ "2.1.1.25 Phenol O-methyltransferase",
+ "2.1.1.26 Iodophenol O-methyltransferase",
+ "2.1.1.27 Tyramine N-methyltransferase",
+ "2.1.1.28 Phenylethanolamine N-methyltransferase",
+ "2.1.1.31 tRNA (guanine-N(1)-)-methyltransferase",
+ "2.1.1.32 tRNA (guanine-N(2)-)-methyltransferase",
+ "2.1.1.33 tRNA (guanine-N(7)-)-methyltransferase",
+ "2.1.1.34 tRNA guanosine-2'-O-methyltransferase",
+ "2.1.1.35 tRNA (uracil-5-)-methyltransferase",
+ "2.1.1.36 tRNA (adenine-N(1)-)-methyltransferase",
+ "2.1.1.37 DNA (cytosine-5-)-methyltransferase",
+ "2.1.1.38 O-demethylpuromycin O-methyltransferase",
+ "2.1.1.39 Inositol 3-methyltransferase",
+ "2.1.1.40 Inositol 1-methyltransferase",
+ "2.1.1.41 Sterol 24-C-methyltransferase",
+ "2.1.1.42 Luteolin O-methyltransferase",
+ "2.1.1.43 Histone-lysine N-methyltransferase",
+ "2.1.1.44 Dimethylhistidine N-methyltransferase",
+ "2.1.1.45 Thymidylate synthase",
+ "2.1.1.46 Isoflavone 4'-O-methyltransferase",
+ "2.1.1.47 Indolepyruvate C-methyltransferase",
+ "2.1.1.49 Amine N-methyltransferase",
+ "2.1.1.50 Loganate O-methyltransferase",
+ "2.1.1.53 Putrescine N-methyltransferase",
+ "2.1.1.54 Deoxycytidylate C-methyltransferase",
+ "2.1.1.55 tRNA (adenine-N(6)-)-methyltransferase",
+ "2.1.1.56 mRNA (guanine-N(7)-)-methyltransferase",
+ "2.1.1.57 mRNA (nucleoside-2'-O-)-methyltransferase",
+ "2.1.1.59 [Cytochrome c]-lysine N-methyltransferase",
+ "2.1.1.60 Calmodulin-lysine N-methyltransferase",
+ "2.1.1.61 tRNA (5-methylaminomethyl-2-thiouridylate)-methyltransferase",
+ "2.1.1.62 mRNA (2'-O-methyladenosine-N(6)-)-methyltransferase",
+ "2.1.1.63 Methylated-DNA--[protein]-cysteine S-methyltransferase",
+ "2.1.1.64 3-demethylubiquinol 3-O-methyltransferase",
+ "2.1.1.65 Licodione 2'-O-methyltransferase",
+ "2.1.1.66 rRNA (adenosine-2'-O-)-methyltransferase",
+ "2.1.1.67 Thiopurine S-methyltransferase",
+ "2.1.1.68 Caffeate O-methyltransferase",
+ "2.1.1.69 5-hydroxyfuranocoumarin 5-O-methyltransferase",
+ "2.1.1.70 8-hydroxyfuranocoumarin 8-O-methyltransferase",
+ "2.1.1.71 Phosphatidyl-N-methylethanolamine N-methyltransferase",
+ "2.1.1.72 Site-specific DNA-methyltransferase (adenine-specific)",
+ "2.1.1.74 Methylenetetrahydrofolate--tRNA-(uracil-5-)-methyltransferase (FADH(2)-oxidizing)",
+ "2.1.1.75 Apigenin 4'-O-methyltransferase",
+ "2.1.1.76 Quercetin 3-O-methyltransferase",
+ "2.1.1.77 Protein-L-isoaspartate(D-aspartate) O-methyltransferase",
+ "2.1.1.78 Isoorientin 3'-O-methyltransferase",
+ "2.1.1.79 Cyclopropane-fatty-acyl-phospholipid synthase",
+ "2.1.1.80 Protein-glutamate O-methyltransferase",
+ "2.1.1.82 3-methylquercetin 7-O-methyltransferase",
+ "2.1.1.83 3,7-dimethylquercetin 4'-O-methyltransferase",
+ "2.1.1.84 Methylquercetagetin 6-O-methyltransferase",
+ "2.1.1.85 Protein-histidine N-methyltransferase",
+ "2.1.1.86 Tetrahydromethanopterin S-methyltransferase",
+ "2.1.1.87 Pyridine N-methyltransferase",
+ "2.1.1.88 8-hydroxyquercetin 8-O-methyltransferase",
+ "2.1.1.89 Tetrahydrocolumbamine 2-O-methyltransferase",
+ "2.1.1.90 Methanol--5-hydroxybenzimidazolylcobamide Co-methyltransferase",
+ "2.1.1.91 Isobutyraldoxime O-methyltransferase",
+ "2.1.1.94 Tabersonine 16-O-methyltransferase",
+ "2.1.1.95 Tocopherol O-methyltransferase",
+ "2.1.1.96 Thioether S-methyltransferase",
+ "2.1.1.97 3-hydroxyanthranilate 4-C-methyltransferase",
+ "2.1.1.98 Diphthine synthase",
+ "2.1.1.99 3-hydroxy-16-methoxy-2,3-dihydrotabersonine N-methyltransferase",
+ "2.1.1.100 Protein-S-isoprenylcysteine O-methyltransferase",
+ "2.1.1.101 Macrocin O-methyltransferase",
+ "2.1.1.102 Demethylmacrocin O-methyltransferase",
+ "2.1.1.103 Phosphoethanolamine N-methyltransferase",
+ "2.1.1.104 Caffeoyl-CoA O-methyltransferase",
+ "2.1.1.105 N-benzoyl-4-hydroxyanthranilate 4-O-methyltransferase",
+ "2.1.1.106 Tryptophan 2-C-methyltransferase",
+ "2.1.1.107 Uroporphyrinogen-III C-methyltransferase",
+ "2.1.1.108 6-hydroxymellein O-methyltransferase",
+ "2.1.1.109 Demethylsterigmatocystin 6-O-methyltransferase",
+ "2.1.1.110 Sterigmatocystin 8-O-methyltransferase",
+ "2.1.1.111 Anthranilate N-methyltransferase",
+ "2.1.1.112 Glucuronoxylan 4-O-methyltransferase",
+ "2.1.1.113 Site-specific DNA-methyltransferase (cytosine-N(4)-specific)",
+ "2.1.1.114 Polyprenyldihydroxybenzoate methyltransferase",
+ "2.1.1.115 (RS)-1-benzyl-1,2,3,4-tetrahydroisoquinoline N-methyltransferase",
+ "2.1.1.116 3'-hydroxy-N-methyl-(S)-coclaurine 4'-O-methyltransferase",
+ "2.1.1.117 (S)-scoulerine 9-O-methyltransferase",
+ "2.1.1.118 Columbamine O-methyltransferase",
+ "2.1.1.119 10-hydroxydihydrosanguinarine 10-O-methyltransferase",
+ "2.1.1.120 12-hydroxydihydrochelirubine 12-O-methyltransferase",
+ "2.1.1.121 6-O-methylnorlaudanosoline 5'-O-methyltransferase",
+ "2.1.1.122 (S)-tetrahydroprotoberberine N-methyltransferase",
+ "2.1.1.123 [Cytochrome c]-methionine S-methyltransferase",
+ "2.1.1.124 [Cytochrome c]-arginine N-methyltransferase",
+ "2.1.1.125 Histone-arginine N-methyltransferase",
+ "2.1.1.126 [Myelin basic protein]-arginine N-methyltransferase",
+ "2.1.1.127 [Ribulose-bisphosphate carboxylase]-lysine N-methyltransferase",
+ "2.1.1.128 (RS)-norcoclaurine 6-O-methyltransferase",
+ "2.1.1.129 Inositol 4-methyltransferase",
+ "2.1.1.130 Precorrin-2 C(20)-methyltransferase",
+ "2.1.1.131 Precorrin-3B C(17)-methyltransferase",
+ "2.1.1.132 Precorrin-6Y C(5,15)-methyltransferase (decarboxylating)",
+ "2.1.1.133 Precorrin-4 C(11)-methyltransferase",
+ "2.1.1.136 Chlorophenol O-methyltransferase",
+ "2.1.1.137 Arsenite methyltransferase",
+ "2.1.1.139 3'-demethylstaurosporine O-methyltransferase",
+ "2.1.1.140 (S)-coclaurine-N-methyltransferase",
+ "2.1.1.141 Jasmonate O-methyltransferase",
+ "2.1.1.142 Cycloartenol 24-C-methyltransferase",
+ "2.1.1.143 24-methylenesterol C-methyltransferase",
+ "2.1.1.144 Trans-aconitate 2-methyltransferase",
+ "2.1.1.145 Trans-aconitate 3-methyltransferase",
+ "2.1.1.146 (Iso)eugenol O-methyltransferase",
+ "2.1.1.147 Corydaline synthase",
+ "2.1.1.148 Thymidylate synthase (FAD)",
+ "2.1.1.149 Myricetin O-methyltransferase",
+ "2.1.1.150 Isoflavone 7-O-methyltransferase",
+ "2.1.1.151 Cobalt-factor II C(20)-methyltransferase",
+ "2.1.1.152 Precorrin-6A synthase (deacetylating)",
+ "2.1.1.153 Vitexin 2''-O-rhamnoside 7-O-methyltransferase",
+ "2.1.1.154 Isoliquiritigenin 2'-O-methyltransferase",
+ "2.1.1.155 Kaempferol 4'-O-methyltransferase",
+ "2.1.1.156 Glycine/sarcosine N-methyltransferase",
+ "2.1.1.157 Sarcosine/dimethylglycine N-methyltransferase",
+ "2.1.1.158 7-methylxanthosine synthase",
+ "2.1.1.159 Theobromine synthase",
+ "2.1.1.160 Caffeine synthase",
+ "2.1.1.161 Dimethylglycine N-methyltransferase",
+ "2.1.1.162 Glycine/sarcosine/dimethylglycine N-methyltransferase",
+ "2.1.1.163 Demethylmenaquinone methyltransferase",
+ "2.1.1.164 Demethylrebeccamycin-D-glucose O-methyltransferase",
+ "2.1.1.165 Methyl halide transferase",
+ "2.1.1.166 23S rRNA (uridine(2552)-2'-O-)-methyltransferase",
+ "2.1.1.167 27S pre-rRNA (guanosine(2922)-2'-O-)-methyltransferase",
+ "2.1.1.168 21S rRNA (uridine(2791)-2'-O-)-methyltransferase",
+ "2.1.1.169 Tricetin 3',4',5'-O-trimethyltransferase",
+ "2.1.1.170 16S rRNA (guanine(527)-N(7))-methyltransferase",
+ "2.1.1.171 16S rRNA (guanine(966)-N(2))-methyltransferase",
+ "2.1.1.172 16S rRNA (guanine(1207)-N(2))-methyltransferase",
+ "2.1.1.173 23S rRNA (guanine(2445)-N(2))-methyltransferase",
+ "2.1.1.174 23S rRNA (guanine(1835)-N(2))-methyltransferase",
+ "2.1.1.175 Tricin synthase",
+ "2.1.1.176 16S rRNA (cytosine(967)-C(5))-methyltransferase",
+ "2.1.1.177 23S rRNA (pseudouridine(1915)-N(3))-methyltransferase",
+ "2.1.1.178 16S rRNA (cytosine(1407)-C(5))-methyltransferase",
+ "2.1.1.179 16S rRNA (guanine(1405)-C(7))-methyltransferase",
+ "2.1.1.180 16S rRNA (adenine(1408)-N(1))-methyltransferase",
+ "2.1.1.181 23S rRNA (adenine(1618)-N(6))-methyltransferase",
+ "2.1.1.182 16S rRNA (adenine(1518)-N(6)/adenine(1519)-N(6))-dimethyltransferase",
+ "2.1.1.183 18S rRNA (adenine(1779)-N(6)/adenine(1780)-N(6))-dimethyltransferase",
+ "2.1.1.184 23S rRNA (adenine(2085)-N(6))-dimethyltransferase",
+ "2.1.1.185 23S rRNA (guanine(2251)-2'-O)-methyltransferase",
+ "2.1.1.186 23S rRNA (cytidine(2498)-2'-O)-methyltransferase",
+ "2.1.1.187 23S rRNA (guanine(745)-N(1))-methyltransferase",
+ "2.1.1.188 23S rRNA (guanine(748)-N(1))-methyltransferase",
+ "2.1.1.189 23S rRNA (uracil(747)-C(5))-methyltransferase",
+ "2.1.1.190 23S rRNA (uracil(1939)-C(5))-methyltransferase",
+ "2.1.1.191 23S rRNA (cytosine(1962)-C(5))-methyltransferase",
+ "2.1.1.192 23S rRNA (adenine(2503)-C(2))-methyltransferase",
+ "2.1.1.193 16S rRNA (uracil(1498)-N(3))-methyltransferase",
+ "2.1.1.194 23S rRNA (adenine(2503)-C(2),C(8))-methyltransferase",
+ "2.1.1.195 Cobalt-precorrin-5B (C(1))-methyltransferase",
+ "2.1.1.196 Cobalt-precorrin-7 (C(15))-methyltransferase (decarboxylating)",
+ "2.1.1.197 Malonyl-CoA O-methyltransferase",
+ "2.1.1.198 16S rRNA (cytidine(1402)-2'-O)-methyltransferase",
+ "2.1.1.199 16S rRNA (cytosine(1402)-N(4))-methyltransferase",
+ "2.1.1.200 tRNA (cytidine(32)/uridine(32)-2'-O)-methyltransferase",
+ "2.1.1.201 2-methoxy-6-polyprenyl-1,4-benzoquinol methylase",
+ "2.1.1.202 Multisite-specific tRNA:(cytosine-C(5))-methyltransferase",
+ "2.1.1.203 tRNA (cytosine(34)-C(5))-methyltransferase",
+ "2.1.1.204 tRNA (cytosine(38)-C(5))-methyltransferase",
+ "2.1.1.205 tRNA (cytidine(32)/guanosine(34)-2'-O)-methyltransferase",
+ "2.1.1.206 tRNA (cytidine(56)-2'-O)-methyltransferase",
+ "2.1.1.n1 Resorcinol O-methyltransferase",
+ "2.1.1.n2 tRNA (uridine(44)-2'-O-)-methyltransferase",
+ "2.1.1.n3 Selenocysteine Se-methyltransferase",
+ "2.1.1.n4 Thiocyanate methyltransferase",
+ "2.1.1.n5 N-terminal protein methyltransferase",
+ "2.1.1.n6 Geranyl diphosphate 2-C-methyltransferase",
+ "2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase",
+ "2.1.2.1 Glycine hydroxymethyltransferase",
+ "2.1.2.2 Phosphoribosylglycinamide formyltransferase",
+ "2.1.2.3 Phosphoribosylaminoimidazolecarboxamide formyltransferase",
+ "2.1.2.4 Glycine formimidoyltransferase",
+ "2.1.2.5 Glutamate formimidoyltransferase",
+ "2.1.2.7 D-alanine 2-hydroxymethyltransferase",
+ "2.1.2.8 Deoxycytidylate 5-hydroxymethyltransferase",
+ "2.1.2.9 Methionyl-tRNA formyltransferase",
+ "2.1.2.10 Aminomethyltransferase",
+ "2.1.2.11 3-methyl-2-oxobutanoate hydroxymethyltransferase",
+ "2.1.2.13 UDP-4-amino-4-deoxy-L-arabinose formyltransferase",
+ "2.1.3.1 Methylmalonyl-CoA carboxytransferase",
+ "2.1.3.2 Aspartate carbamoyltransferase",
+ "2.1.3.3 Ornithine carbamoyltransferase",
+ "2.1.3.5 Oxamate carbamoyltransferase",
+ "2.1.3.6 Putrescine carbamoyltransferase",
+ "2.1.3.7 3-hydroxymethylcephem carbamoyltransferase",
+ "2.1.3.8 Lysine carbamoyltransferase",
+ "2.1.3.9 N-acetylornithine carbamoyltransferase",
+ "2.1.3.10 Malonyl-S-ACP:biotin-protein carboxyltransferase",
+ "2.1.3.11 N-succinylornithine carbamoyltransferase",
+ "2.1.4.1 Glycine amidinotransferase",
+ "2.1.4.2 Scyllo-inosamine-4-phosphate amidinotransferase",
+ "2.2.1.1 Transketolase",
+ "2.2.1.2 Transaldolase",
+ "2.2.1.3 Formaldehyde transketolase",
+ "2.2.1.4 Acetoin--ribose-5-phosphate transaldolase",
+ "2.2.1.5 2-hydroxy-3-oxoadipate synthase",
+ "2.2.1.6 Acetolactate synthase",
+ "2.2.1.7 1-deoxy-D-xylulose-5-phosphate synthase",
+ "2.2.1.8 Fluorothreonine transaldolase",
+ "2.2.1.9 2-succinyl-5-enolpyruvyl-6-hydroxy-3-cyclohexene-1-carboxylic-acid synthase",
+ "2.3.1.1 Amino-acid N-acetyltransferase",
+ "2.3.1.2 Imidazole N-acetyltransferase",
+ "2.3.1.3 Glucosamine N-acetyltransferase",
+ "2.3.1.4 Glucosamine-phosphate N-acetyltransferase",
+ "2.3.1.5 Arylamine N-acetyltransferase",
+ "2.3.1.6 Choline O-acetyltransferase",
+ "2.3.1.7 Carnitine O-acetyltransferase",
+ "2.3.1.8 Phosphate acetyltransferase",
+ "2.3.1.9 Acetyl-CoA C-acetyltransferase",
+ "2.3.1.10 Hydrogen-sulfide S-acetyltransferase",
+ "2.3.1.11 Thioethanolamine S-acetyltransferase",
+ "2.3.1.12 Dihydrolipoyllysine-residue acetyltransferase",
+ "2.3.1.13 Glycine N-acyltransferase",
+ "2.3.1.14 Glutamine N-phenylacetyltransferase",
+ "2.3.1.15 Glycerol-3-phosphate O-acyltransferase",
+ "2.3.1.16 Acetyl-CoA C-acyltransferase",
+ "2.3.1.17 Aspartate N-acetyltransferase",
+ "2.3.1.18 Galactoside O-acetyltransferase",
+ "2.3.1.19 Phosphate butyryltransferase",
+ "2.3.1.20 Diacylglycerol O-acyltransferase",
+ "2.3.1.21 Carnitine O-palmitoyltransferase",
+ "2.3.1.22 2-acylglycerol O-acyltransferase",
+ "2.3.1.23 1-acylglycerophosphocholine O-acyltransferase",
+ "2.3.1.24 Sphingosine N-acyltransferase",
+ "2.3.1.25 Plasmalogen synthase",
+ "2.3.1.26 Sterol O-acyltransferase",
+ "2.3.1.27 Cortisol O-acetyltransferase",
+ "2.3.1.28 Chloramphenicol O-acetyltransferase",
+ "2.3.1.29 Glycine C-acetyltransferase",
+ "2.3.1.30 Serine O-acetyltransferase",
+ "2.3.1.31 Homoserine O-acetyltransferase",
+ "2.3.1.32 Lysine N-acetyltransferase",
+ "2.3.1.33 Histidine N-acetyltransferase",
+ "2.3.1.34 D-tryptophan N-acetyltransferase",
+ "2.3.1.35 Glutamate N-acetyltransferase",
+ "2.3.1.36 D-amino-acid N-acetyltransferase",
+ "2.3.1.37 5-aminolevulinate synthase",
+ "2.3.1.38 [Acyl-carrier-protein] S-acetyltransferase",
+ "2.3.1.39 [Acyl-carrier-protein] S-malonyltransferase",
+ "2.3.1.40 Acyl-[acyl-carrier-protein]--phospholipid O-acyltransferase",
+ "2.3.1.41 Beta-ketoacyl-acyl-carrier-protein synthase I",
+ "2.3.1.42 Glycerone-phosphate O-acyltransferase",
+ "2.3.1.43 Phosphatidylcholine--sterol O-acyltransferase",
+ "2.3.1.44 N-acetylneuraminate 4-O-acetyltransferase",
+ "2.3.1.45 N-acetylneuraminate 7-O(or 9-O)-acetyltransferase",
+ "2.3.1.46 Homoserine O-succinyltransferase",
+ "2.3.1.47 8-amino-7-oxononanoate synthase",
+ "2.3.1.48 Histone acetyltransferase",
+ "2.3.1.49 Deacetyl-[citrate-(pro-3S)-lyase] S-acetyltransferase",
+ "2.3.1.50 Serine C-palmitoyltransferase",
+ "2.3.1.51 1-acylglycerol-3-phosphate O-acyltransferase",
+ "2.3.1.52 2-acylglycerol-3-phosphate O-acyltransferase",
+ "2.3.1.53 Phenylalanine N-acetyltransferase",
+ "2.3.1.54 Formate C-acetyltransferase",
+ "2.3.1.56 Aromatic-hydroxylamine O-acetyltransferase",
+ "2.3.1.57 Diamine N-acetyltransferase",
+ "2.3.1.58 2,3-diaminopropionate N-oxalyltransferase",
+ "2.3.1.59 Gentamicin 2'-N-acetyltransferase",
+ "2.3.1.60 Gentamicin 3'-N-acetyltransferase",
+ "2.3.1.61 Dihydrolipoyllysine-residue succinyltransferase",
+ "2.3.1.62 2-acylglycerophosphocholine O-acyltransferase",
+ "2.3.1.63 1-alkylglycerophosphocholine O-acyltransferase",
+ "2.3.1.64 Agmatine N(4)-coumaroyltransferase",
+ "2.3.1.65 Bile acid-CoA:amino acid N-acyltransferase",
+ "2.3.1.66 Leucine N-acetyltransferase",
+ "2.3.1.67 1-alkylglycerophosphocholine O-acetyltransferase",
+ "2.3.1.68 Glutamine N-acyltransferase",
+ "2.3.1.69 Monoterpenol O-acetyltransferase",
+ "2.3.1.71 Glycine N-benzoyltransferase",
+ "2.3.1.72 Indoleacetylglucose--inositol O-acyltransferase",
+ "2.3.1.73 Diacylglycerol--sterol O-acyltransferase",
+ "2.3.1.74 Naringenin-chalcone synthase",
+ "2.3.1.75 Long-chain-alcohol O-fatty-acyltransferase",
+ "2.3.1.76 Retinol O-fatty-acyltransferase",
+ "2.3.1.77 Triacylglycerol--sterol O-acyltransferase",
+ "2.3.1.78 Heparan-alpha-glucosaminide N-acetyltransferase",
+ "2.3.1.79 Maltose O-acetyltransferase",
+ "2.3.1.80 Cysteine-S-conjugate N-acetyltransferase",
+ "2.3.1.81 Aminoglycoside N(3')-acetyltransferase",
+ "2.3.1.82 Aminoglycoside N(6')-acetyltransferase",
+ "2.3.1.83 Phosphatidylcholine--dolichol O-acyltransferase",
+ "2.3.1.84 Alcohol O-acetyltransferase",
+ "2.3.1.85 Fatty-acid synthase",
+ "2.3.1.86 Fatty-acyl-CoA synthase",
+ "2.3.1.87 Aralkylamine N-acetyltransferase",
+ "2.3.1.88 Peptide alpha-N-acetyltransferase",
+ "2.3.1.89 Tetrahydrodipicolinate N-acetyltransferase",
+ "2.3.1.90 Beta-glucogallin O-galloyltransferase",
+ "2.3.1.91 Sinapoylglucose--choline O-sinapoyltransferase",
+ "2.3.1.92 Sinapoylglucose--malate O-sinapoyltransferase",
+ "2.3.1.93 13-hydroxylupinine O-tigloyltransferase",
+ "2.3.1.94 6-deoxyerythronolide-B synthase",
+ "2.3.1.95 Trihydroxystilbene synthase",
+ "2.3.1.96 Glycoprotein N-palmitoyltransferase",
+ "2.3.1.97 Glycylpeptide N-tetradecanoyltransferase",
+ "2.3.1.98 Chlorogenate--glucarate O-hydroxycinnamoyltransferase",
+ "2.3.1.99 Quinate O-hydroxycinnamoyltransferase",
+ "2.3.1.100 [Myelin-proteolipid] O-palmitoyltransferase",
+ "2.3.1.101 Formylmethanofuran--tetrahydromethanopterin N-formyltransferase",
+ "2.3.1.102 N(6)-hydroxylysine O-acetyltransferase",
+ "2.3.1.103 Sinapoylglucose--sinapoylglucose O-sinapoyltransferase",
+ "2.3.1.104 1-alkenylglycerophosphocholine O-acyltransferase",
+ "2.3.1.105 Alkylglycerophosphate 2-O-acetyltransferase",
+ "2.3.1.106 Tartronate O-hydroxycinnamoyltransferase",
+ "2.3.1.107 Deacetylvindoline O-acetyltransferase",
+ "2.3.1.108 Alpha-tubulin N-acetyltransferase",
+ "2.3.1.109 Arginine N-succinyltransferase",
+ "2.3.1.110 Tyramine N-feruloyltransferase",
+ "2.3.1.111 Mycocerosate synthase",
+ "2.3.1.112 D-tryptophan N-malonyltransferase",
+ "2.3.1.113 Anthranilate N-malonyltransferase",
+ "2.3.1.114 3,4-dichloroaniline N-malonyltransferase",
+ "2.3.1.115 Isoflavone-7-O-beta-glucoside 6''-O-malonyltransferase",
+ "2.3.1.116 Flavonol-3-O-beta-glucoside O-malonyltransferase",
+ "2.3.1.117 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase",
+ "2.3.1.118 N-hydroxyarylamine O-acetyltransferase",
+ "2.3.1.119 Icosanoyl-CoA synthase",
+ "2.3.1.121 1-alkenylglycerophosphoethanolamine O-acyltransferase",
+ "2.3.1.122 Trehalose O-mycolyltransferase",
+ "2.3.1.123 Dolichol O-acyltransferase",
+ "2.3.1.125 1-alkyl-2-acetylglycerol O-acyltransferase",
+ "2.3.1.126 Isocitrate O-dihydroxycinnamoyltransferase",
+ "2.3.1.127 Ornithine N-benzoyltransferase",
+ "2.3.1.128 Ribosomal-protein-alanine N-acetyltransferase",
+ "2.3.1.129 Acyl-[acyl-carrier-protein]--UDP-N-acetylglucosamine O-acyltransferase",
+ "2.3.1.130 Galactarate O-hydroxycinnamoyltransferase",
+ "2.3.1.131 Glucarate O-hydroxycinnamoyltransferase",
+ "2.3.1.132 Glucarolactone O-hydroxycinnamoyltransferase",
+ "2.3.1.133 Shikimate O-hydroxycinnamoyltransferase",
+ "2.3.1.134 Galactolipid O-acyltransferase",
+ "2.3.1.135 Phosphatidylcholine--retinol O-acyltransferase",
+ "2.3.1.136 Polysialic-acid O-acetyltransferase",
+ "2.3.1.137 Carnitine O-octanoyltransferase",
+ "2.3.1.138 Putrescine N-hydroxycinnamoyltransferase",
+ "2.3.1.139 Ecdysone O-acyltransferase",
+ "2.3.1.140 Rosmarinate synthase",
+ "2.3.1.141 Galactosylacylglycerol O-acyltransferase",
+ "2.3.1.142 Glycoprotein O-fatty-acyltransferase",
+ "2.3.1.143 Beta-glucogallin--tetrakisgalloylglucose O-galloyltransferase",
+ "2.3.1.144 Anthranilate N-benzoyltransferase",
+ "2.3.1.145 Piperidine N-piperoyltransferase",
+ "2.3.1.146 Pinosylvin synthase",
+ "2.3.1.147 Glycerophospholipid arachidonoyl-transferase (CoA-independent)",
+ "2.3.1.148 Glycerophospholipid acyltransferase (CoA-dependent)",
+ "2.3.1.149 Platelet-activating factor acetyltransferase",
+ "2.3.1.150 Salutaridinol 7-O-acetyltransferase",
+ "2.3.1.151 Benzophenone synthase",
+ "2.3.1.152 Alcohol O-cinnamoyltransferase",
+ "2.3.1.153 Anthocyanin 5-aromatic acyltransferase",
+ "2.3.1.154 Propionyl-CoA C(2)-trimethyltridecanoyltransferase",
+ "2.3.1.155 Acetyl-CoA C-myristoyltransferase",
+ "2.3.1.156 Phloroisovalerophenone synthase",
+ "2.3.1.157 Glucosamine-1-phosphate N-acetyltransferase",
+ "2.3.1.158 Phospholipid:diacylglycerol acyltransferase",
+ "2.3.1.159 Acridone synthase",
+ "2.3.1.160 Vinorine synthase",
+ "2.3.1.161 Lovastatin nonaketide synthase",
+ "2.3.1.162 Taxadien-5-alpha-ol O-acetyltransferase",
+ "2.3.1.163 10-hydroxytaxane O-acetyltransferase",
+ "2.3.1.164 Isopenicillin-N N-acyltransferase",
+ "2.3.1.165 6-methylsalicylic acid synthase",
+ "2.3.1.166 2-alpha-hydroxytaxane 2-O-benzoyltransferase",
+ "2.3.1.167 10-deacetylbaccatin III 10-O-acetyltransferase",
+ "2.3.1.168 Dihydrolipoyllysine-residue (2-methylpropanoyl)transferase",
+ "2.3.1.169 CO-methylating acetyl-CoA synthase",
+ "2.3.1.170 6'-deoxychalcone synthase",
+ "2.3.1.171 Anthocyanin 6''-O-malonyltransferase",
+ "2.3.1.172 Anthocyanin 5-O-glucoside 6'''-O-malonyltransferase",
+ "2.3.1.173 Flavonol-3-O-triglucoside O-coumaroyltransferase",
+ "2.3.1.174 3-oxoadipyl-CoA thiolase",
+ "2.3.1.175 Deacetylcephalosporin-C acetyltransferase",
+ "2.3.1.176 Propanoyl-CoA C-acyltransferase",
+ "2.3.1.177 Biphenyl synthase",
+ "2.3.1.178 Diaminobutyrate acetyltransferase",
+ "2.3.1.179 Beta-ketoacyl-acyl-carrier-protein synthase II",
+ "2.3.1.180 Beta-ketoacyl-acyl-carrier-protein synthase III",
+ "2.3.1.181 Lipoyl(octanoyl) transferase",
+ "2.3.1.182 (R)-citramalate synthase",
+ "2.3.1.183 Phosphinothricin acetyltransferase",
+ "2.3.1.184 Acyl-homoserine-lactone synthase",
+ "2.3.1.185 Tropine acyltransferase",
+ "2.3.1.186 Pseudotropine acyltransferase",
+ "2.3.1.187 Acetyl-S-ACP:malonate ACP transferase",
+ "2.3.1.188 Omega-hydroxypalmitate O-feruloyl transferase",
+ "2.3.1.189 Mycothiol synthase",
+ "2.3.1.190 Acetoin dehydrogenase",
+ "2.3.1.191 UDP-3-O-(3-hydroxymyristoyl)glucosamine N-acyltransferase",
+ "2.3.1.192 Glycine N-phenylacetyltransferase",
+ "2.3.1.193 tRNA(Met) cytidine acetyltransferase",
+ "2.3.1.194 Acetoacetyl-CoA synthase",
+ "2.3.1.195 (Z)-3-hexen-1-ol acetyltransferase",
+ "2.3.2.1 D-glutamyltransferase",
+ "2.3.2.2 Gamma-glutamyltransferase",
+ "2.3.2.3 Lysyltransferase",
+ "2.3.2.4 Gamma-glutamylcyclotransferase",
+ "2.3.2.5 Glutaminyl-peptide cyclotransferase",
+ "2.3.2.6 Leucyltransferase",
+ "2.3.2.7 Aspartyltransferase",
+ "2.3.2.8 Arginyltransferase",
+ "2.3.2.9 Agaritine gamma-glutamyltransferase",
+ "2.3.2.10 UDP-N-acetylmuramoylpentapeptide-lysine N(6)-alanyltransferase",
+ "2.3.2.11 Alanylphosphatidylglycerol synthase",
+ "2.3.2.12 Peptidyltransferase",
+ "2.3.2.13 Protein-glutamine gamma-glutamyltransferase",
+ "2.3.2.14 D-alanine gamma-glutamyltransferase",
+ "2.3.2.15 Glutathione gamma-glutamylcysteinyltransferase",
+ "2.3.2.16 Lipid II:glycine glycyltransferase",
+ "2.3.2.17 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-glycyl)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase",
+ "2.3.2.18 N-acetylmuramoyl-L-alanyl-D-glutamyl-L-lysyl-(N(6)-triglycine)-D-alanyl-D-alanine-diphosphoundecaprenyl-N-acetylglucosamine:glycine glycyltransferase",
+ "2.3.3.1 Citrate (Si)-synthase",
+ "2.3.3.2 Decylcitrate synthase",
+ "2.3.3.3 Citrate (Re)-synthase",
+ "2.3.3.4 Decylhomocitrate synthase",
+ "2.3.3.5 2-methylcitrate synthase",
+ "2.3.3.6 2-ethylmalate synthase",
+ "2.3.3.7 3-ethylmalate synthase",
+ "2.3.3.8 ATP citrate synthase",
+ "2.3.3.9 Malate synthase",
+ "2.3.3.10 Hydroxymethylglutaryl-CoA synthase",
+ "2.3.3.11 2-hydroxyglutarate synthase",
+ "2.3.3.12 3-propylmalate synthase",
+ "2.3.3.13 2-isopropylmalate synthase",
+ "2.3.3.14 Homocitrate synthase",
+ "2.3.3.15 Sulfoacetaldehyde acetyltransferase",
+ "2.4.1.1 Phosphorylase",
+ "2.4.1.2 Dextrin dextranase",
+ "2.4.1.4 Amylosucrase",
+ "2.4.1.5 Dextransucrase",
+ "2.4.1.7 Sucrose phosphorylase",
+ "2.4.1.8 Maltose phosphorylase",
+ "2.4.1.9 Inulosucrase",
+ "2.4.1.10 Levansucrase",
+ "2.4.1.11 Glycogen(starch) synthase",
+ "2.4.1.12 Cellulose synthase (UDP-forming)",
+ "2.4.1.13 Sucrose synthase",
+ "2.4.1.14 Sucrose-phosphate synthase",
+ "2.4.1.15 Alpha,alpha-trehalose-phosphate synthase (UDP-forming)",
+ "2.4.1.16 Chitin synthase",
+ "2.4.1.17 Glucuronosyltransferase",
+ "2.4.1.18 1,4-alpha-glucan branching enzyme",
+ "2.4.1.19 Cyclomaltodextrin glucanotransferase",
+ "2.4.1.20 Cellobiose phosphorylase",
+ "2.4.1.21 Starch synthase",
+ "2.4.1.22 Lactose synthase",
+ "2.4.1.23 Sphingosine beta-galactosyltransferase",
+ "2.4.1.24 1,4-alpha-glucan 6-alpha-glucosyltransferase",
+ "2.4.1.25 4-alpha-glucanotransferase",
+ "2.4.1.26 DNA alpha-glucosyltransferase",
+ "2.4.1.27 DNA beta-glucosyltransferase",
+ "2.4.1.28 Glucosyl-DNA beta-glucosyltransferase",
+ "2.4.1.29 Cellulose synthase (GDP-forming)",
+ "2.4.1.30 1,3-beta-oligoglucan phosphorylase",
+ "2.4.1.31 Laminaribiose phosphorylase",
+ "2.4.1.32 Glucomannan 4-beta-mannosyltransferase",
+ "2.4.1.33 Alginate synthase",
+ "2.4.1.34 1,3-beta-glucan synthase",
+ "2.4.1.35 Phenol beta-glucosyltransferase",
+ "2.4.1.36 Alpha,alpha-trehalose-phosphate synthase (GDP-forming)",
+ "2.4.1.37 Fucosylgalactoside 3-alpha-galactosyltransferase",
+ "2.4.1.38 Beta-N-acetylglucosaminylglycopeptide beta-1,4-galactosyltransferase",
+ "2.4.1.39 Steroid N-acetylglucosaminyltransferase",
+ "2.4.1.40 Glycoprotein-fucosylgalactoside alpha-N-acetylgalactosaminyltransferase",
+ "2.4.1.41 Polypeptide N-acetylgalactosaminyltransferase",
+ "2.4.1.43 Polygalacturonate 4-alpha-galacturonosyltransferase",
+ "2.4.1.44 Lipopolysaccharide 3-alpha-galactosyltransferase",
+ "2.4.1.45 2-hydroxyacylsphingosine 1-beta-galactosyltransferase",
+ "2.4.1.46 Monogalactosyldiacylglycerol synthase",
+ "2.4.1.47 N-acylsphingosine galactosyltransferase",
+ "2.4.1.48 Heteroglycan alpha-mannosyltransferase",
+ "2.4.1.49 Cellodextrin phosphorylase",
+ "2.4.1.50 Procollagen galactosyltransferase",
+ "2.4.1.52 Poly(glycerol-phosphate) alpha-glucosyltransferase",
+ "2.4.1.53 Poly(ribitol-phosphate) beta-glucosyltransferase",
+ "2.4.1.54 Undecaprenyl-phosphate mannosyltransferase",
+ "2.4.1.56 Lipopolysaccharide N-acetylglucosaminyltransferase",
+ "2.4.1.57 Phosphatidylinositol alpha-mannosyltransferase",
+ "2.4.1.58 Lipopolysaccharide glucosyltransferase I",
+ "2.4.1.60 Abequosyltransferase",
+ "2.4.1.62 Ganglioside galactosyltransferase",
+ "2.4.1.63 Linamarin synthase",
+ "2.4.1.64 Alpha,alpha-trehalose phosphorylase",
+ "2.4.1.65 3-galactosyl-N-acetylglucosaminide 4-alpha-L-fucosyltransferase",
+ "2.4.1.66 Procollagen glucosyltransferase",
+ "2.4.1.67 Galactinol--raffinose galactosyltransferase",
+ "2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase",
+ "2.4.1.69 Galactoside 2-alpha-L-fucosyltransferase",
+ "2.4.1.70 Poly(ribitol-phosphate) N-acetylglucosaminyltransferase",
+ "2.4.1.71 Arylamine glucosyltransferase",
+ "2.4.1.73 Lipopolysaccharide glucosyltransferase II",
+ "2.4.1.74 Glycosaminoglycan galactosyltransferase",
+ "2.4.1.78 Phosphopolyprenol glucosyltransferase",
+ "2.4.1.79 Globotriaosylceramide 3-beta-N-acetylgalactosaminyltransferase",
+ "2.4.1.80 Ceramide glucosyltransferase",
+ "2.4.1.81 Flavone 7-O-beta-glucosyltransferase",
+ "2.4.1.82 Galactinol--sucrose galactosyltransferase",
+ "2.4.1.83 Dolichyl-phosphate beta-D-mannosyltransferase",
+ "2.4.1.85 Cyanohydrin beta-glucosyltransferase",
+ "2.4.1.86 Glucosaminylgalactosylglucosylceramide beta-galactosyltransferase",
+ "2.4.1.87 N-acetyllactosaminide 3-alpha-galactosyltransferase",
+ "2.4.1.88 Globoside alpha-N-acetylgalactosaminyltransferase",
+ "2.4.1.90 N-acetyllactosamine synthase",
+ "2.4.1.91 Flavonol 3-O-glucosyltransferase",
+ "2.4.1.92 (N-acetylneuraminyl)-galactosylglucosylceramide N-acetylgalactosaminyltransferase",
+ "2.4.1.94 Protein N-acetylglucosaminyltransferase",
+ "2.4.1.95 Bilirubin-glucuronoside glucuronosyltransferase",
+ "2.4.1.96 sn-glycerol-3-phosphate 1-galactosyltransferase",
+ "2.4.1.97 1,3-beta-D-glucan phosphorylase",
+ "2.4.1.99 Sucrose:sucrose fructosyltransferase",
+ "2.4.1.100 2,1-fructan:2,1-fructan 1-fructosyltransferase",
+ "2.4.1.101 Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.102 Beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase",
+ "2.4.1.103 Alizarin 2-beta-glucosyltransferase",
+ "2.4.1.104 o-dihydroxycoumarin 7-O-glucosyltransferase",
+ "2.4.1.105 Vitexin beta-glucosyltransferase",
+ "2.4.1.106 Isovitexin beta-glucosyltransferase",
+ "2.4.1.109 Dolichyl-phosphate-mannose-protein mannosyltransferase",
+ "2.4.1.110 tRNA-queuosine beta-mannosyltransferase",
+ "2.4.1.111 Coniferyl-alcohol glucosyltransferase",
+ "2.4.1.113 Alpha-1,4-glucan-protein synthase (ADP-forming)",
+ "2.4.1.114 2-coumarate O-beta-glucosyltransferase",
+ "2.4.1.115 Anthocyanidin 3-O-glucosyltransferase",
+ "2.4.1.116 Cyanidin 3-O-rutinoside 5-O-glucosyltransferase",
+ "2.4.1.117 Dolichyl-phosphate beta-glucosyltransferase",
+ "2.4.1.118 Cytokinin 7-beta-glucosyltransferase",
+ "2.4.1.119 Dolichyl-diphosphooligosaccharide--protein glycotransferase",
+ "2.4.1.120 Sinapate 1-glucosyltransferase",
+ "2.4.1.121 Indole-3-acetate beta-glucosyltransferase",
+ "2.4.1.122 Glycoprotein-N-acetylgalactosamine 3-beta-galactosyltransferase",
+ "2.4.1.123 Inositol 3-alpha-galactosyltransferase",
+ "2.4.1.125 Sucrose--1,6-alpha-glucan 3(6)-alpha-glucosyltransferase",
+ "2.4.1.126 Hydroxycinnamate 4-beta-glucosyltransferase",
+ "2.4.1.127 Monoterpenol beta-glucosyltransferase",
+ "2.4.1.128 Scopoletin glucosyltransferase",
+ "2.4.1.129 Peptidoglycan glycosyltransferase",
+ "2.4.1.130 Dolichyl-phosphate-mannose--glycolipid alpha-mannosyltransferase",
+ "2.4.1.131 Glycolipid 2-alpha-mannosyltransferase",
+ "2.4.1.132 Glycolipid 3-alpha-mannosyltransferase",
+ "2.4.1.133 Xylosylprotein 4-beta-galactosyltransferase",
+ "2.4.1.134 Galactosylxylosylprotein 3-beta-galactosyltransferase",
+ "2.4.1.135 Galactosylgalactosylxylosylprotein 3-beta-glucuronosyltransferase",
+ "2.4.1.136 Gallate 1-beta-glucosyltransferase",
+ "2.4.1.137 sn-glycerol-3-phosphate 2-alpha-galactosyltransferase",
+ "2.4.1.138 Mannotetraose 2-alpha-N-acetylglucosaminyltransferase",
+ "2.4.1.139 Maltose synthase",
+ "2.4.1.140 Alternansucrase",
+ "2.4.1.141 N-acetylglucosaminyldiphosphodolichol N-acetylglucosaminyltransferase",
+ "2.4.1.142 Chitobiosyldiphosphodolichol beta-mannosyltransferase",
+ "2.4.1.143 Alpha-1,6-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.144 Beta-1,4-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.145 Alpha-1,3-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.146 Beta-1,3-galactosyl-O-glycosyl-glycoprotein beta-1,3-N-acetylglucosaminyltransferase",
+ "2.4.1.147 Acetylgalactosaminyl-O-glycosyl-glycoprotein beta-1,3-N-acetylglucosaminyltransferase",
+ "2.4.1.148 Acetylgalactosaminyl-O-glycosyl-glycoprotein beta-1,6-N-acetylglucosaminyltransferase",
+ "2.4.1.149 N-acetyllactosaminide beta-1,3-N-acetylglucosaminyltransferase",
+ "2.4.1.150 N-acetyllactosaminide beta-1,6-N-acetylglucosaminyl-transferase",
+ "2.4.1.152 4-galactosyl-N-acetylglucosaminide 3-alpha-L-fucosyltransferase",
+ "2.4.1.153 Dolichyl-phosphate alpha-N-acetylglucosaminyltransferase",
+ "2.4.1.155 Alpha-1,6-mannosyl-glycoprotein 6-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.156 Indolylacetyl-myo-inositol galactosyltransferase",
+ "2.4.1.157 1,2-diacylglycerol 3-glucosyltransferase",
+ "2.4.1.158 13-hydroxydocosanoate 13-beta-glucosyltransferase",
+ "2.4.1.159 Flavonol-3-O-glucoside L-rhamnosyltransferase",
+ "2.4.1.160 Pyridoxine 5'-O-beta-D-glucosyltransferase",
+ "2.4.1.161 Oligosaccharide 4-alpha-D-glucosyltransferase",
+ "2.4.1.162 Aldose beta-D-fructosyltransferase",
+ "2.4.1.163 Beta-galactosyl-N-acetylglucosaminylgalactosylglucosyl-ceramide beta-1,3-acetylglucosaminyltransferase",
+ "2.4.1.164 Galactosyl-N-acetylglucosaminylgalactosylglucosyl-ceramide beta-1,6-N-acetylglucosaminyltransferase",
+ "2.4.1.165 N-acetylneuraminylgalactosylglucosylceramide beta-1,4-N-acetylgalactosaminyltransferase",
+ "2.4.1.166 Raffinose--raffinose alpha-galactosyltransferase",
+ "2.4.1.167 Sucrose 6(F)-alpha-galactosyltransferase",
+ "2.4.1.168 Xyloglucan 4-glucosyltransferase",
+ "2.4.1.170 Isoflavone 7-O-glucosyltransferase",
+ "2.4.1.171 Methyl-ONN-azoxymethanol beta-D-glucosyltransferase",
+ "2.4.1.172 Salicyl-alcohol beta-D-glucosyltransferase",
+ "2.4.1.173 Sterol 3-beta-glucosyltransferase",
+ "2.4.1.174 Glucuronylgalactosylproteoglycan 4-beta-N-acetylgalactosaminyltransferase",
+ "2.4.1.175 Glucuronosyl-N-acetylgalactosaminyl-proteoglycan 4-beta-N-acetylgalactosaminyltransferase",
+ "2.4.1.176 Gibberellin beta-D-glucosyltransferase",
+ "2.4.1.177 Cinnamate beta-D-glucosyltransferase",
+ "2.4.1.178 Hydroxymandelonitrile glucosyltransferase",
+ "2.4.1.179 Lactosylceramide beta-1,3-galactosyltransferase",
+ "2.4.1.180 Lipopolysaccharide N-acetylmannosaminouronosyltransferase",
+ "2.4.1.181 Hydroxyanthraquinone glucosyltransferase",
+ "2.4.1.182 Lipid-A-disaccharide synthase",
+ "2.4.1.183 Alpha-1,3-glucan synthase",
+ "2.4.1.184 Galactolipid galactosyltransferase",
+ "2.4.1.185 Flavanone 7-O-beta-glucosyltransferase",
+ "2.4.1.186 Glycogenin glucosyltransferase",
+ "2.4.1.187 N-acetylglucosaminyldiphosphoundecaprenol N-acetyl-beta-D-mannosaminyltransferase",
+ "2.4.1.188 N-acetylglucosaminyldiphosphoundecaprenol glucosyltransferase",
+ "2.4.1.189 Luteolin 7-O-glucuronosyltransferase",
+ "2.4.1.190 Luteolin-7-O-glucuronide 2''-O-glucuronosyltransferase",
+ "2.4.1.191 Luteolin-7-O-diglucuronide 4'-O-glucuronosyltransferase",
+ "2.4.1.192 Nuatigenin 3-beta-glucosyltransferase",
+ "2.4.1.193 Sarsapogenin 3-beta-glucosyltransferase",
+ "2.4.1.194 4-hydroxybenzoate 4-O-beta-D-glucosyltransferase",
+ "2.4.1.195 N-hydroxythioamide S-beta-glucosyltransferase",
+ "2.4.1.196 Nicotinate glucosyltransferase",
+ "2.4.1.197 High-mannose-oligosaccharide beta-1,4-N-acetylglucosaminyltransferase",
+ "2.4.1.198 Phosphatidylinositol N-acetylglucosaminyltransferase",
+ "2.4.1.199 Beta-mannosylphosphodecaprenol--mannooligosaccharide 6-mannosyltransferase",
+ "2.4.1.201 Alpha-1,6-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.202 2,4-dihydroxy-7-methoxy-2H-1,4-benzoxazin-3(4H)-one 2-D-glucosyltransferase",
+ "2.4.1.203 Trans-zeatin O-beta-D-glucosyltransferase",
+ "2.4.1.205 Galactogen 6-beta-galactosyltransferase",
+ "2.4.1.206 Lactosylceramide 1,3-N-acetyl-beta-D-glucosaminyltransferase",
+ "2.4.1.207 Xyloglucan:xyloglucosyl transferase",
+ "2.4.1.208 Diglucosyl diacylglycerol synthase",
+ "2.4.1.209 Cis-p-coumarate glucosyltransferase",
+ "2.4.1.210 Limonoid glucosyltransferase",
+ "2.4.1.211 1,3-beta-galactosyl-N-acetylhexosamine phosphorylase",
+ "2.4.1.212 Hyaluronan synthase",
+ "2.4.1.213 Glucosylglycerol-phosphate synthase",
+ "2.4.1.214 Glycoprotein 3-alpha-L-fucosyltransferase",
+ "2.4.1.215 Cis-zeatin O-beta-D-glucosyltransferase",
+ "2.4.1.216 Trehalose 6-phosphate phosphorylase",
+ "2.4.1.217 Mannosyl-3-phosphoglycerate synthase",
+ "2.4.1.218 Hydroquinone glucosyltransferase",
+ "2.4.1.219 Vomilenine glucosyltransferase",
+ "2.4.1.220 Indoxyl-UDPG glucosyltransferase",
+ "2.4.1.221 Peptide-O-fucosyltransferase",
+ "2.4.1.222 O-fucosylpeptide 3-beta-N-acetylglucosaminyltransferase",
+ "2.4.1.223 Glucuronyl-galactosyl-proteoglycan 4-alpha-N-acetylglucosaminyltransferase",
+ "2.4.1.224 Glucuronosyl-N-acetylglucosaminyl-proteoglycan 4-alpha-N-acetylglucosaminyltransferase",
+ "2.4.1.225 N-acetylglucosaminyl-proteoglycan 4-beta-glucuronosyltransferase",
+ "2.4.1.226 N-acetylgalactosaminyl-proteoglycan 3-beta-glucuronosyltransferase",
+ "2.4.1.227 Undecaprenyldiphospho-muramoylpentapeptide beta-N-acetylglucosaminyltransferase",
+ "2.4.1.228 Lactosylceramide 4-alpha-galactosyltransferase",
+ "2.4.1.229 [Skp1-protein]-hydroxyproline N-acetylglucosaminyltransferase",
+ "2.4.1.230 Kojibiose phosphorylase",
+ "2.4.1.231 Alpha,alpha-trehalose phosphorylase (configuration-retaining)",
+ "2.4.1.232 Initiation-specific alpha-1,6-mannosyltransferase",
+ "2.4.1.234 Kaempferol 3-O-galactosyltransferase",
+ "2.4.1.236 Flavanone 7-O-glucoside 2''-O-beta-L-rhamnosyltransferase",
+ "2.4.1.237 Flavonol 7-O-beta-glucosyltransferase",
+ "2.4.1.238 Anthocyanin 3'-O-beta-glucosyltransferase",
+ "2.4.1.239 Flavonol-3-O-glucoside glucosyltransferase",
+ "2.4.1.240 Flavonol-3-O-glycoside glucosyltransferase",
+ "2.4.1.241 Digalactosyldiacylglycerol synthase",
+ "2.4.1.242 NDP-glucose--starch glucosyltransferase",
+ "2.4.1.243 6(G)-fructosyltransferase",
+ "2.4.1.244 N-acetyl-beta-glucosaminyl-glycoprotein 4-beta-N-acetylgalactosaminyltransferase",
+ "2.4.1.245 Alpha,alpha-trehalose synthase",
+ "2.4.1.246 Mannosylfructose-phosphate synthase",
+ "2.4.1.247 Beta-D-galactosyl-(1->4)-L-rhamnose phosphorylase",
+ "2.4.1.248 Cycloisomaltooligosaccharide glucanotransferase",
+ "2.4.1.249 Delphinidin 3',5'-O-glucosyltransferase",
+ "2.4.1.250 D-inositol-3-phosphate glycosyltransferase",
+ "2.4.1.251 GlcA-beta-(1->2)-D-Man-alpha-(1->3)-D-Glc-beta-(1->4)-D-Glc-alpha-1-diphospho-di-trans,octa-cis-undecaprenol 4-beta-mannosyltransferase",
+ "2.4.1.252 GDP-mannose:cellobiosyl-diphosphopolyprenol alpha-mannosyltransferase",
+ "2.4.1.253 Baicalein 7-O-glucuronosyltransferase",
+ "2.4.1.254 Cyanidin-3-O-glucoside 2-O-glucuronosyltransferase",
+ "2.4.1.255 Protein O-GlcNAc transferase",
+ "2.4.1.n2 Loliose synthase",
+ "2.4.2.1 Purine-nucleoside phosphorylase",
+ "2.4.2.2 Pyrimidine-nucleoside phosphorylase",
+ "2.4.2.3 Uridine phosphorylase",
+ "2.4.2.4 Thymidine phosphorylase",
+ "2.4.2.5 Nucleoside ribosyltransferase",
+ "2.4.2.6 Nucleoside deoxyribosyltransferase",
+ "2.4.2.7 Adenine phosphoribosyltransferase",
+ "2.4.2.8 Hypoxanthine phosphoribosyltransferase",
+ "2.4.2.9 Uracil phosphoribosyltransferase",
+ "2.4.2.10 Orotate phosphoribosyltransferase",
+ "2.4.2.11 Nicotinate phosphoribosyltransferase",
+ "2.4.2.12 Nicotinamide phosphoribosyltransferase",
+ "2.4.2.14 Amidophosphoribosyltransferase",
+ "2.4.2.15 Guanosine phosphorylase",
+ "2.4.2.16 Urate-ribonucleotide phosphorylase",
+ "2.4.2.17 ATP phosphoribosyltransferase",
+ "2.4.2.18 Anthranilate phosphoribosyltransferase",
+ "2.4.2.19 Nicotinate-nucleotide diphosphorylase (carboxylating)",
+ "2.4.2.20 Dioxotetrahydropyrimidine phosphoribosyltransferase",
+ "2.4.2.21 Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase",
+ "2.4.2.22 Xanthine phosphoribosyltransferase",
+ "2.4.2.23 Deoxyuridine phosphorylase",
+ "2.4.2.24 1,4-beta-D-xylan synthase",
+ "2.4.2.25 Flavone apiosyltransferase",
+ "2.4.2.26 Protein xylosyltransferase",
+ "2.4.2.27 dTDP-dihydrostreptose--streptidine-6-phosphate dihydrostreptosyltransferase",
+ "2.4.2.28 S-methyl-5'-thioadenosine phosphorylase",
+ "2.4.2.29 tRNA-guanine transglycosylase",
+ "2.4.2.30 NAD(+) ADP-ribosyltransferase",
+ "2.4.2.31 NAD(+)--protein-arginine ADP-ribosyltransferase",
+ "2.4.2.32 Dolichyl-phosphate D-xylosyltransferase",
+ "2.4.2.33 Dolichyl-xylosyl-phosphate--protein xylosyltransferase",
+ "2.4.2.34 Indolylacetylinositol arabinosyltransferase",
+ "2.4.2.35 Flavonol-3-O-glycoside xylosyltransferase",
+ "2.4.2.36 NAD(+)--diphthamide ADP-ribosyltransferase",
+ "2.4.2.37 NAD(+)--dinitrogen-reductase ADP-D-ribosyltransferase",
+ "2.4.2.38 Glycoprotein 2-beta-D-xylosyltransferase",
+ "2.4.2.39 Xyloglucan 6-xylosyltransferase",
+ "2.4.2.40 Zeatin O-beta-D-xylosyltransferase",
+ "2.4.2.41 Xylogalacturonan beta-1,3-xylosyltransferase",
+ "2.4.2.42 UDP-D-xylose:beta-D-glucoside alpha-1,3-D-xylosyltransferase",
+ "2.4.2.43 Lipid IV(A) 4-amino-4-deoxy-L-arabinosyltransferase",
+ "2.4.99.1 Beta-galactoside alpha-2,6-sialyltransferase",
+ "2.4.99.2 Monosialoganglioside sialyltransferase",
+ "2.4.99.3 Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase",
+ "2.4.99.4 Beta-galactoside alpha-2,3-sialyltransferase",
+ "2.4.99.5 Galactosyldiacylglycerol alpha-2,3-sialyltransferase",
+ "2.4.99.6 N-acetyllactosaminide alpha-2,3-sialyltransferase",
+ "2.4.99.7 Alpha-N-acetylneuraminyl-2,3-beta-galactosyl-1,3-N-acetylgalactosaminide 6-alpha-sialyltransferase",
+ "2.4.99.8 Alpha-N-acetylneuraminate alpha-2,8-sialyltransferase",
+ "2.4.99.9 Lactosylceramide alpha-2,3-sialyltransferase",
+ "2.4.99.10 Neolactotetraosylceramide alpha-2,3-sialyltransferase",
+ "2.4.99.11 Lactosylceramide alpha-2,6-N-sialyltransferase",
+ "2.4.99.12 Lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.13 (KDO)-lipid IV(A) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.14 (KDO)(2)-lipid IV(A) (2-8) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.4.99.15 (KDO)(3)-lipid IV(A) (2-4) 3-deoxy-D-manno-octulosonic acid transferase",
+ "2.5.1.1 Dimethylallyltranstransferase",
+ "2.5.1.2 Thiamine pyridinylase",
+ "2.5.1.3 Thiamine-phosphate diphosphorylase",
+ "2.5.1.4 Adenosylmethionine cyclotransferase",
+ "2.5.1.5 Galactose-6-sulfurylase",
+ "2.5.1.6 Methionine adenosyltransferase",
+ "2.5.1.7 UDP-N-acetylglucosamine 1-carboxyvinyltransferase",
+ "2.5.1.9 Riboflavin synthase",
+ "2.5.1.10 (2E,6E)-farnesyl diphosphate synthase",
+ "2.5.1.15 Dihydropteroate synthase",
+ "2.5.1.16 Spermidine synthase",
+ "2.5.1.17 Cob(I)yrinic acid a,c-diamide adenosyltransferase",
+ "2.5.1.18 Glutathione transferase",
+ "2.5.1.19 3-phosphoshikimate 1-carboxyvinyltransferase",
+ "2.5.1.20 Rubber cis-polyprenylcistransferase",
+ "2.5.1.21 Squalene synthase",
+ "2.5.1.22 Spermine synthase",
+ "2.5.1.23 Sym-norspermidine synthase",
+ "2.5.1.24 Discadenine synthase",
+ "2.5.1.25 tRNA-uridine aminocarboxypropyltransferase",
+ "2.5.1.26 Alkylglycerone-phosphate synthase",
+ "2.5.1.27 Adenylate dimethylallyltransferase",
+ "2.5.1.28 Dimethylallylcistransferase",
+ "2.5.1.29 Farnesyltranstransferase",
+ "2.5.1.30 Heptaprenyl diphosphate synthase",
+ "2.5.1.31 Di-trans,poly-cis-undecaprenyl-diphosphate synthase ((2E,6E)-farnesyl-diphosphate specific)",
+ "2.5.1.32 Phytoene synthase",
+ "2.5.1.34 4-dimethylallyltryptophan synthase",
+ "2.5.1.35 Aspulvinone dimethylallyltransferase",
+ "2.5.1.36 Trihydroxypterocarpan dimethylallyltransferase",
+ "2.5.1.38 Isonocardicin synthase",
+ "2.5.1.39 4-hydroxybenzoate polyprenyltransferase",
+ "2.5.1.41 Phosphoglycerol geranylgeranyltransferase",
+ "2.5.1.42 Geranylgeranylglycerol-phosphate geranylgeranyltransferase",
+ "2.5.1.43 Nicotianamine synthase",
+ "2.5.1.44 Homospermidine synthase",
+ "2.5.1.45 Homospermidine synthase (spermidine-specific)",
+ "2.5.1.46 Deoxyhypusine synthase",
+ "2.5.1.47 Cysteine synthase",
+ "2.5.1.48 Cystathionine gamma-synthase",
+ "2.5.1.49 O-acetylhomoserine aminocarboxypropyltransferase",
+ "2.5.1.50 Zeatin 9-aminocarboxyethyltransferase",
+ "2.5.1.51 Beta-pyrazolylalanine synthase",
+ "2.5.1.52 L-mimosine synthase",
+ "2.5.1.53 Uracilylalanine synthase",
+ "2.5.1.54 3-deoxy-7-phosphoheptulonate synthase",
+ "2.5.1.55 3-deoxy-8-phosphooctulonate synthase",
+ "2.5.1.56 N-acetylneuraminate synthase",
+ "2.5.1.57 N-acylneuraminate-9-phosphate synthase",
+ "2.5.1.58 Protein farnesyltransferase",
+ "2.5.1.59 Protein geranylgeranyltransferase type I",
+ "2.5.1.60 Protein geranylgeranyltransferase type II",
+ "2.5.1.61 Hydroxymethylbilane synthase",
+ "2.5.1.62 Chlorophyll synthase",
+ "2.5.1.63 Adenosyl-fluoride synthase",
+ "2.5.1.65 O-phosphoserine sulfhydrylase",
+ "2.5.1.66 N(2)-(2-carboxyethyl)arginine synthase",
+ "2.5.1.67 Chrysanthemyl diphosphate synthase",
+ "2.5.1.68 (2Z,6E)-farnesyl diphosphate synthase",
+ "2.5.1.69 Lavandulyl diphosphate synthase",
+ "2.5.1.70 Naringenin 8-dimethylallyltransferase",
+ "2.5.1.71 Leachianone-G 2''-dimethylallyltransferase",
+ "2.5.1.72 Quinolinate synthase",
+ "2.5.1.73 O-phospho-L-seryl-tRNA:Cys-tRNA synthase",
+ "2.5.1.74 1,4-dihydroxy-2-naphthoate polyprenyltransferase",
+ "2.5.1.75 tRNA dimethylallyltransferase",
+ "2.5.1.76 Cysteate synthase",
+ "2.5.1.77 7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase",
+ "2.5.1.78 6,7-dimethyl-8-ribityllumazine synthase",
+ "2.5.1.79 Thermospermine synthase",
+ "2.5.1.80 7-dimethylallyltryptophan synthase",
+ "2.5.1.81 Geranylfarnesyl diphosphate synthase",
+ "2.5.1.82 Hexaprenyl diphosphate synthase (geranylgeranyl-diphosphate specific)",
+ "2.5.1.83 Hexaprenyl-diphosphate synthase ((2E,6E)-farnesyl-diphosphate specific)",
+ "2.5.1.84 All-trans-nonaprenyl-diphosphate synthase (geranyl-diphosphate specific)",
+ "2.5.1.85 All-trans-nonaprenyl-diphosphate synthase (geranylgeranyl-diphosphate specific)",
+ "2.5.1.86 Trans,poly-cis-decaprenyl diphosphate synthase",
+ "2.5.1.87 Di-trans,poly-cis-polyprenyl diphosphate synthase ((2E,6E)-farnesyl diphosphate specific)",
+ "2.5.1.88 Trans,poly-cis-polyprenyl diphosphate synthase ((2Z,6E)-farnesyl diphosphate specific)",
+ "2.5.1.89 Tri-trans,poly-cis-undecaprenyl-diphosphate synthase (geranylgeranyl-diphosphate specific)",
+ "2.5.1.90 All-trans-octaprenyl-diphosphate synthase",
+ "2.5.1.91 All-trans-decaprenyl-diphosphate synthase",
+ "2.5.1.92 (2Z,6Z)-farnesyl diphosphate synthase",
+ "2.5.1.93 4-hydroxybenzoate geranyltransferase",
+ "2.5.1.94 Adenosyl-chloride synthase",
+ "2.6.1.1 Aspartate transaminase",
+ "2.6.1.2 Alanine transaminase",
+ "2.6.1.3 Cysteine transaminase",
+ "2.6.1.4 Glycine transaminase",
+ "2.6.1.5 Tyrosine transaminase",
+ "2.6.1.6 Leucine transaminase",
+ "2.6.1.7 Kynurenine--oxoglutarate transaminase",
+ "2.6.1.8 2,5-diaminovalerate transaminase",
+ "2.6.1.9 Histidinol-phosphate transaminase",
+ "2.6.1.11 Acetylornithine transaminase",
+ "2.6.1.12 Alanine--oxo-acid transaminase",
+ "2.6.1.13 Ornithine aminotransferase",
+ "2.6.1.14 Asparagine--oxo-acid transaminase",
+ "2.6.1.15 Glutamine--pyruvate transaminase",
+ "2.6.1.16 Glutamine--fructose-6-phosphate transaminase (isomerizing)",
+ "2.6.1.17 Succinyldiaminopimelate transaminase",
+ "2.6.1.18 Beta-alanine--pyruvate transaminase",
+ "2.6.1.19 4-aminobutyrate transaminase",
+ "2.6.1.21 D-amino-acid transaminase",
+ "2.6.1.22 (S)-3-amino-2-methylpropionate transaminase",
+ "2.6.1.23 4-hydroxyglutamate transaminase",
+ "2.6.1.24 Diiodotyrosine transaminase",
+ "2.6.1.26 Thyroid-hormone transaminase",
+ "2.6.1.27 Tryptophan transaminase",
+ "2.6.1.28 Tryptophan--phenylpyruvate transaminase",
+ "2.6.1.29 Diamine transaminase",
+ "2.6.1.30 Pyridoxamine--pyruvate transaminase",
+ "2.6.1.31 Pyridoxamine--oxaloacetate transaminase",
+ "2.6.1.32 Valine--3-methyl-2-oxovalerate transaminase",
+ "2.6.1.33 dTDP-4-amino-4,6-dideoxy-D-glucose transaminase",
+ "2.6.1.34 UDP-2-acetamido-4-amino-2,4,6-trideoxyglucose transaminase",
+ "2.6.1.35 Glycine--oxaloacetate transaminase",
+ "2.6.1.36 L-lysine 6-transaminase",
+ "2.6.1.37 2-aminoethylphosphonate--pyruvate transaminase",
+ "2.6.1.38 Histidine transaminase",
+ "2.6.1.39 2-aminoadipate transaminase",
+ "2.6.1.40 (R)-3-amino-2-methylpropionate--pyruvate transaminase",
+ "2.6.1.41 D-methionine--pyruvate transaminase",
+ "2.6.1.42 Branched-chain-amino-acid transaminase",
+ "2.6.1.43 Aminolevulinate transaminase",
+ "2.6.1.44 Alanine--glyoxylate transaminase",
+ "2.6.1.45 Serine--glyoxylate transaminase",
+ "2.6.1.46 Diaminobutyrate--pyruvate transaminase",
+ "2.6.1.47 Alanine--oxomalonate transaminase",
+ "2.6.1.48 5-aminovalerate transaminase",
+ "2.6.1.49 Dihydroxyphenylalanine transaminase",
+ "2.6.1.50 Glutamine--scyllo-inositol transaminase",
+ "2.6.1.51 Serine--pyruvate transaminase",
+ "2.6.1.52 Phosphoserine transaminase",
+ "2.6.1.54 Pyridoxamine-phosphate transaminase",
+ "2.6.1.55 Taurine--2-oxoglutarate transaminase",
+ "2.6.1.56 1D-1-guanidino-3-amino-1,3-dideoxy-scyllo-inositol transaminase",
+ "2.6.1.57 Aromatic-amino-acid transaminase",
+ "2.6.1.58 Phenylalanine(histidine) transaminase",
+ "2.6.1.59 dTDP-4-amino-4,6-dideoxygalactose transaminase",
+ "2.6.1.60 Aromatic-amino-acid--glyoxylate transaminase",
+ "2.6.1.62 Adenosylmethionine--8-amino-7-oxononanoate transaminase",
+ "2.6.1.63 Kynurenine--glyoxylate transaminase",
+ "2.6.1.64 Glutamine--phenylpyruvate transaminase",
+ "2.6.1.65 N(6)-acetyl-beta-lysine transaminase",
+ "2.6.1.66 Valine--pyruvate transaminase",
+ "2.6.1.67 2-aminohexanoate transaminase",
+ "2.6.1.68 Ornithine(lysine) transaminase",
+ "2.6.1.70 Aspartate--phenylpyruvate transaminase",
+ "2.6.1.71 Lysine--pyruvate 6-transaminase",
+ "2.6.1.72 D-4-hydroxyphenylglycine transaminase",
+ "2.6.1.73 Methionine--glyoxylate transaminase",
+ "2.6.1.74 Cephalosporin-C transaminase",
+ "2.6.1.75 Cysteine-conjugate transaminase",
+ "2.6.1.76 Diaminobutyrate--2-oxoglutarate transaminase",
+ "2.6.1.77 Taurine--pyruvate aminotransferase",
+ "2.6.1.78 Aspartate--prephenate aminotransferase",
+ "2.6.1.79 Glutamate--prephenate aminotransferase",
+ "2.6.1.80 Nicotianamine aminotransferase",
+ "2.6.1.81 Succinylornithine transaminase",
+ "2.6.1.82 Putrescine aminotransferase",
+ "2.6.1.83 LL-diaminopimelate aminotransferase",
+ "2.6.1.84 Arginine--pyruvate transaminase",
+ "2.6.1.85 Aminodeoxychorismate synthase",
+ "2.6.1.86 2-amino-4-deoxychorismate synthase",
+ "2.6.1.87 UDP-4-amino-4-deoxy-L-arabinose aminotransferase",
+ "2.6.3.1 Oximinotransferase",
+ "2.6.99.1 dATP(dGTP)--DNA purinetransferase",
+ "2.6.99.2 Pyridoxine 5'-phosphate synthase",
+ "2.7.1.1 Hexokinase",
+ "2.7.1.2 Glucokinase",
+ "2.7.1.3 Ketohexokinase",
+ "2.7.1.4 Fructokinase",
+ "2.7.1.5 Rhamnulokinase",
+ "2.7.1.6 Galactokinase",
+ "2.7.1.7 Mannokinase",
+ "2.7.1.8 Glucosamine kinase",
+ "2.7.1.10 Phosphoglucokinase",
+ "2.7.1.11 6-phosphofructokinase",
+ "2.7.1.12 Gluconokinase",
+ "2.7.1.13 Dehydrogluconokinase",
+ "2.7.1.14 Sedoheptulokinase",
+ "2.7.1.15 Ribokinase",
+ "2.7.1.16 Ribulokinase",
+ "2.7.1.17 Xylulokinase",
+ "2.7.1.18 Phosphoribokinase",
+ "2.7.1.19 Phosphoribulokinase",
+ "2.7.1.20 Adenosine kinase",
+ "2.7.1.21 Thymidine kinase",
+ "2.7.1.22 Ribosylnicotinamide kinase",
+ "2.7.1.23 NAD(+) kinase",
+ "2.7.1.24 Dephospho-CoA kinase",
+ "2.7.1.25 Adenylyl-sulfate kinase",
+ "2.7.1.26 Riboflavin kinase",
+ "2.7.1.27 Erythritol kinase",
+ "2.7.1.28 Triokinase",
+ "2.7.1.29 Glycerone kinase",
+ "2.7.1.30 Glycerol kinase",
+ "2.7.1.31 Glycerate kinase",
+ "2.7.1.32 Choline kinase",
+ "2.7.1.33 Pantothenate kinase",
+ "2.7.1.34 Pantetheine kinase",
+ "2.7.1.35 Pyridoxal kinase",
+ "2.7.1.36 Mevalonate kinase",
+ "2.7.1.39 Homoserine kinase",
+ "2.7.1.40 Pyruvate kinase",
+ "2.7.1.41 Glucose-1-phosphate phosphodismutase",
+ "2.7.1.42 Riboflavin phosphotransferase",
+ "2.7.1.43 Glucuronokinase",
+ "2.7.1.44 Galacturonokinase",
+ "2.7.1.45 2-dehydro-3-deoxygluconokinase",
+ "2.7.1.46 L-arabinokinase",
+ "2.7.1.47 D-ribulokinase",
+ "2.7.1.48 Uridine kinase",
+ "2.7.1.49 Hydroxymethylpyrimidine kinase",
+ "2.7.1.50 Hydroxyethylthiazole kinase",
+ "2.7.1.51 L-fuculokinase",
+ "2.7.1.52 Fucokinase",
+ "2.7.1.53 L-xylulokinase",
+ "2.7.1.54 D-arabinokinase",
+ "2.7.1.55 Allose kinase",
+ "2.7.1.56 1-phosphofructokinase",
+ "2.7.1.58 2-dehydro-3-deoxygalactonokinase",
+ "2.7.1.59 N-acetylglucosamine kinase",
+ "2.7.1.60 N-acylmannosamine kinase",
+ "2.7.1.61 Acyl-phosphate--hexose phosphotransferase",
+ "2.7.1.62 Phosphoramidate--hexose phosphotransferase",
+ "2.7.1.63 Polyphosphate--glucose phosphotransferase",
+ "2.7.1.64 Inositol 3-kinase",
+ "2.7.1.65 Scyllo-inosamine 4-kinase",
+ "2.7.1.66 Undecaprenol kinase",
+ "2.7.1.67 1-phosphatidylinositol 4-kinase",
+ "2.7.1.68 1-phosphatidylinositol-4-phosphate 5-kinase",
+ "2.7.1.69 Protein-N(pi)-phosphohistidine--sugar phosphotransferase",
+ "2.7.1.71 Shikimate kinase",
+ "2.7.1.72 Streptomycin 6-kinase",
+ "2.7.1.73 Inosine kinase",
+ "2.7.1.74 Deoxycytidine kinase",
+ "2.7.1.76 Deoxyadenosine kinase",
+ "2.7.1.77 Nucleoside phosphotransferase",
+ "2.7.1.78 Polynucleotide 5'-hydroxyl-kinase",
+ "2.7.1.79 Diphosphate--glycerol phosphotransferase",
+ "2.7.1.80 Diphosphate--serine phosphotransferase",
+ "2.7.1.81 Hydroxylysine kinase",
+ "2.7.1.82 Ethanolamine kinase",
+ "2.7.1.83 Pseudouridine kinase",
+ "2.7.1.84 Alkylglycerone kinase",
+ "2.7.1.85 Beta-glucoside kinase",
+ "2.7.1.86 NADH kinase",
+ "2.7.1.87 Streptomycin 3''-kinase",
+ "2.7.1.88 Dihydrostreptomycin-6-phosphate 3'-alpha-kinase",
+ "2.7.1.89 Thiamine kinase",
+ "2.7.1.90 Diphosphate--fructose-6-phosphate 1-phosphotransferase",
+ "2.7.1.91 Sphinganine kinase",
+ "2.7.1.92 5-dehydro-2-deoxygluconokinase",
+ "2.7.1.93 Alkylglycerol kinase",
+ "2.7.1.94 Acylglycerol kinase",
+ "2.7.1.95 Kanamycin kinase",
+ "2.7.1.100 S-methyl-5-thioribose kinase",
+ "2.7.1.101 Tagatose kinase",
+ "2.7.1.102 Hamamelose kinase",
+ "2.7.1.103 Viomycin kinase",
+ "2.7.1.105 6-phosphofructo-2-kinase",
+ "2.7.1.106 Glucose-1,6-bisphosphate synthase",
+ "2.7.1.107 Diacylglycerol kinase",
+ "2.7.1.108 Dolichol kinase",
+ "2.7.1.113 Deoxyguanosine kinase",
+ "2.7.1.114 AMP--thymidine kinase",
+ "2.7.1.118 ADP--thymidine kinase",
+ "2.7.1.119 Hygromycin-B 7''-O-kinase",
+ "2.7.1.121 Phosphoenolpyruvate--glycerone phosphotransferase",
+ "2.7.1.122 Xylitol kinase",
+ "2.7.1.127 Inositol-trisphosphate 3-kinase",
+ "2.7.1.130 Tetraacyldisaccharide 4'-kinase",
+ "2.7.1.134 Inositol-tetrakisphosphate 1-kinase",
+ "2.7.1.136 Macrolide 2'-kinase",
+ "2.7.1.137 Phosphatidylinositol 3-kinase",
+ "2.7.1.138 Ceramide kinase",
+ "2.7.1.140 Inositol-tetrakisphosphate 5-kinase",
+ "2.7.1.142 Glycerol-3-phosphate--glucose phosphotransferase",
+ "2.7.1.143 Diphosphate-purine nucleoside kinase",
+ "2.7.1.144 Tagatose-6-phosphate kinase",
+ "2.7.1.145 Deoxynucleoside kinase",
+ "2.7.1.146 ADP-specific phosphofructokinase",
+ "2.7.1.147 ADP-specific glucokinase",
+ "2.7.1.148 4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase",
+ "2.7.1.149 1-phosphatidylinositol-5-phosphate 4-kinase",
+ "2.7.1.150 1-phosphatidylinositol-3-phosphate 5-kinase",
+ "2.7.1.151 Inositol-polyphosphate multikinase",
+ "2.7.1.153 Phosphatidylinositol-4,5-bisphosphate 3-kinase",
+ "2.7.1.154 Phosphatidylinositol-4-phosphate 3-kinase",
+ "2.7.1.156 Adenosylcobinamide kinase",
+ "2.7.1.157 N-acetylgalactosamine kinase",
+ "2.7.1.158 Inositol-pentakisphosphate 2-kinase",
+ "2.7.1.159 Inositol-1,3,4-trisphosphate 5/6-kinase",
+ "2.7.1.160 2'-phosphotransferase",
+ "2.7.1.161 CTP-dependent riboflavin kinase",
+ "2.7.1.162 N-acetylhexosamine 1-kinase",
+ "2.7.1.163 Hygromycin B 4-O-kinase",
+ "2.7.1.164 O-phosphoseryl-tRNA(Sec) kinase",
+ "2.7.1.165 Glycerate 2-kinase",
+ "2.7.1.166 3-deoxy-D-manno-octulosonic acid kinase",
+ "2.7.1.167 D-glycero-beta-D-manno-heptose-7-phosphate kinase",
+ "2.7.1.168 D-glycero-alpha-D-manno-heptose-7-phosphate kinase",
+ "2.7.1.169 Pantoate kinase",
+ "2.7.1.n1 Anhydro-N-acetylmuramic acid kinase",
+ "2.7.1.n4 Nicotinamide riboside kinase",
+ "2.7.1.n5 Diacylglycerol kinase (CTP dependent)",
+ "2.7.2.1 Acetate kinase",
+ "2.7.2.2 Carbamate kinase",
+ "2.7.2.3 Phosphoglycerate kinase",
+ "2.7.2.4 Aspartate kinase",
+ "2.7.2.6 Formate kinase",
+ "2.7.2.7 Butyrate kinase",
+ "2.7.2.8 Acetylglutamate kinase",
+ "2.7.2.10 Phosphoglycerate kinase (GTP)",
+ "2.7.2.11 Glutamate 5-kinase",
+ "2.7.2.12 Acetate kinase (diphosphate)",
+ "2.7.2.13 Glutamate 1-kinase",
+ "2.7.2.14 Branched-chain-fatty-acid kinase",
+ "2.7.2.15 Propionate kinase",
+ "2.7.3.1 Guanidinoacetate kinase",
+ "2.7.3.2 Creatine kinase",
+ "2.7.3.3 Arginine kinase",
+ "2.7.3.4 Taurocyamine kinase",
+ "2.7.3.5 Lombricine kinase",
+ "2.7.3.6 Hypotaurocyamine kinase",
+ "2.7.3.7 Opheline kinase",
+ "2.7.3.8 Ammonia kinase",
+ "2.7.3.9 Phosphoenolpyruvate--protein phosphotransferase",
+ "2.7.3.10 Agmatine kinase",
+ "2.7.4.1 Polyphosphate kinase",
+ "2.7.4.2 Phosphomevalonate kinase",
+ "2.7.4.3 Adenylate kinase",
+ "2.7.4.4 Nucleoside-phosphate kinase",
+ "2.7.4.6 Nucleoside-diphosphate kinase",
+ "2.7.4.7 Phosphomethylpyrimidine kinase",
+ "2.7.4.8 Guanylate kinase",
+ "2.7.4.9 dTMP kinase",
+ "2.7.4.10 Nucleoside-triphosphate--adenylate kinase",
+ "2.7.4.11 (Deoxy)adenylate kinase",
+ "2.7.4.12 T(2)-induced deoxynucleotide kinase",
+ "2.7.4.13 (Deoxy)nucleoside-phosphate kinase",
+ "2.7.4.14 UMP/CMP kinase",
+ "2.7.4.15 Thiamine-diphosphate kinase",
+ "2.7.4.16 Thiamine-phosphate kinase",
+ "2.7.4.17 3-phosphoglyceroyl-phosphate--polyphosphate phosphotransferase",
+ "2.7.4.18 Farnesyl-diphosphate kinase",
+ "2.7.4.19 5-methyldeoxycytidine-5'-phosphate kinase",
+ "2.7.4.20 Dolichyl-diphosphate--polyphosphate phosphotransferase",
+ "2.7.4.21 Inositol-hexakisphosphate kinase",
+ "2.7.4.22 UMP kinase",
+ "2.7.4.23 Ribose 1,5-bisphosphate phosphokinase",
+ "2.7.4.24 Diphosphoinositol-pentakisphosphate kinase",
+ "2.7.4.25 (d)CMP kinase",
+ "2.7.6.1 Ribose-phosphate diphosphokinase",
+ "2.7.6.2 Thiamine diphosphokinase",
+ "2.7.6.3 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase",
+ "2.7.6.4 Nucleotide diphosphokinase",
+ "2.7.6.5 GTP diphosphokinase",
+ "2.7.7.1 Nicotinamide-nucleotide adenylyltransferase",
+ "2.7.7.2 FAD synthetase",
+ "2.7.7.3 Pantetheine-phosphate adenylyltransferase",
+ "2.7.7.4 Sulfate adenylyltransferase",
+ "2.7.7.5 Sulfate adenylyltransferase (ADP)",
+ "2.7.7.6 DNA-directed RNA polymerase",
+ "2.7.7.7 DNA-directed DNA polymerase",
+ "2.7.7.8 Polyribonucleotide nucleotidyltransferase",
+ "2.7.7.9 UTP--glucose-1-phosphate uridylyltransferase",
+ "2.7.7.10 UTP--hexose-1-phosphate uridylyltransferase",
+ "2.7.7.11 UTP--xylose-1-phosphate uridylyltransferase",
+ "2.7.7.12 UDP-glucose--hexose-1-phosphate uridylyltransferase",
+ "2.7.7.13 Mannose-1-phosphate guanylyltransferase",
+ "2.7.7.14 Ethanolamine-phosphate cytidylyltransferase",
+ "2.7.7.15 Choline-phosphate cytidylyltransferase",
+ "2.7.7.18 Nicotinate-nucleotide adenylyltransferase",
+ "2.7.7.19 Polynucleotide adenylyltransferase",
+ "2.7.7.22 Mannose-1-phosphate guanylyltransferase (GDP)",
+ "2.7.7.23 UDP-N-acetylglucosamine diphosphorylase",
+ "2.7.7.24 Glucose-1-phosphate thymidylyltransferase",
+ "2.7.7.27 Glucose-1-phosphate adenylyltransferase",
+ "2.7.7.28 Nucleoside-triphosphate-aldose-1-phosphate nucleotidyltransferase",
+ "2.7.7.30 Fucose-1-phosphate guanylyltransferase",
+ "2.7.7.31 DNA nucleotidylexotransferase",
+ "2.7.7.32 Galactose-1-phosphate thymidylyltransferase",
+ "2.7.7.33 Glucose-1-phosphate cytidylyltransferase",
+ "2.7.7.34 Glucose-1-phosphate guanylyltransferase",
+ "2.7.7.35 Ribose-5-phosphate adenylyltransferase",
+ "2.7.7.36 Aldose-1-phosphate adenylyltransferase",
+ "2.7.7.37 Aldose-1-phosphate nucleotidyltransferase",
+ "2.7.7.38 3-deoxy-manno-octulosonate cytidylyltransferase",
+ "2.7.7.39 Glycerol-3-phosphate cytidylyltransferase",
+ "2.7.7.40 D-ribitol-5-phosphate cytidylyltransferase",
+ "2.7.7.41 Phosphatidate cytidylyltransferase",
+ "2.7.7.42 [Glutamate--ammonia-ligase] adenylyltransferase",
+ "2.7.7.43 N-acylneuraminate cytidylyltransferase",
+ "2.7.7.44 Glucuronate-1-phosphate uridylyltransferase",
+ "2.7.7.45 Guanosine-triphosphate guanylyltransferase",
+ "2.7.7.46 Gentamicin 2''-nucleotidyltransferase",
+ "2.7.7.47 Streptomycin 3''-adenylyltransferase",
+ "2.7.7.48 RNA-directed RNA polymerase",
+ "2.7.7.49 RNA-directed DNA polymerase",
+ "2.7.7.50 mRNA guanylyltransferase",
+ "2.7.7.51 Adenylylsulfate--ammonia adenylyltransferase",
+ "2.7.7.52 RNA uridylyltransferase",
+ "2.7.7.53 ATP adenylyltransferase",
+ "2.7.7.54 Phenylalanine adenylyltransferase",
+ "2.7.7.55 Anthranilate adenylyltransferase",
+ "2.7.7.56 tRNA nucleotidyltransferase",
+ "2.7.7.57 N-methylphosphoethanolamine cytidylyltransferase",
+ "2.7.7.58 (2,3-dihydroxybenzoyl)adenylate synthase",
+ "2.7.7.59 [Protein-PII] uridylyltransferase",
+ "2.7.7.60 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase",
+ "2.7.7.61 Citrate lyase holo-[acyl-carrier-protein] synthase",
+ "2.7.7.62 Adenosylcobinamide-phosphate guanylyltransferase",
+ "2.7.7.63 Lipoate--protein ligase",
+ "2.7.7.64 UTP-monosaccharide-1-phosphate uridylyltransferase",
+ "2.7.7.65 Diguanylate cyclase",
+ "2.7.7.66 Malonate decarboxylase holo-[acyl-carrier-protein] synthase",
+ "2.7.7.67 CDP-archaeol synthase",
+ "2.7.7.68 2-phospho-L-lactate guanylyltransferase",
+ "2.7.7.69 GDP-L-galactose phosphorylase",
+ "2.7.7.70 D-glycero-beta-D-manno-heptose 1-phosphate adenylyltransferase",
+ "2.7.7.71 D-glycero-alpha-D-manno-heptose 1-phosphate guanylyltransferase",
+ "2.7.7.72 CCA tRNA nucleotidyltransferase",
+ "2.7.7.73 Sulfur carrier protein ThiS adenylyltransferase",
+ "2.7.7.n1 Adenosine monophosphate-protein transferase",
+ "2.7.8.1 Ethanolaminephosphotransferase",
+ "2.7.8.2 Diacylglycerol cholinephosphotransferase",
+ "2.7.8.3 Ceramide cholinephosphotransferase",
+ "2.7.8.4 Serine-phosphoethanolamine synthase",
+ "2.7.8.5 CDP-diacylglycerol--glycerol-3-phosphate 3-phosphatidyltransferase",
+ "2.7.8.6 Undecaprenyl-phosphate galactose phosphotransferase",
+ "2.7.8.7 Holo-[acyl-carrier-protein] synthase",
+ "2.7.8.8 CDP-diacylglycerol--serine O-phosphatidyltransferase",
+ "2.7.8.9 Phosphomannan mannosephosphotransferase",
+ "2.7.8.10 Sphingosine cholinephosphotransferase",
+ "2.7.8.11 CDP-diacylglycerol--inositol 3-phosphatidyltransferase",
+ "2.7.8.12 CDP-glycerol glycerophosphotransferase",
+ "2.7.8.13 Phospho-N-acetylmuramoyl-pentapeptide-transferase",
+ "2.7.8.14 CDP-ribitol ribitolphosphotransferase",
+ "2.7.8.15 UDP-N-acetylglucosamine--dolichyl-phosphate N-acetylglucosaminephosphotransferase",
+ "2.7.8.17 UDP-N-acetylglucosamine--lysosomal-enzyme N-acetylglucosaminephosphotransferase",
+ "2.7.8.18 UDP-galactose--UDP-N-acetylglucosamine galactose phosphotransferase",
+ "2.7.8.19 UDP-glucose--glycoprotein glucose phosphotransferase",
+ "2.7.8.20 Phosphatidylglycerol--membrane-oligosaccharide glycerophosphotransferase",
+ "2.7.8.21 Membrane-oligosaccharide glycerophosphotransferase",
+ "2.7.8.22 1-alkenyl-2-acylglycerol choline phosphotransferase",
+ "2.7.8.23 Carboxyvinyl-carboxyphosphonate phosphorylmutase",
+ "2.7.8.24 Phosphatidylcholine synthase",
+ "2.7.8.25 Triphosphoribosyl-dephospho-CoA synthase",
+ "2.7.8.26 Adenosylcobinamide-GDP ribazoletransferase",
+ "2.7.8.27 Sphingomyelin synthase",
+ "2.7.8.28 2-phospho-L-lactate transferase",
+ "2.7.8.29 L-serine-phosphatidylethanolamine phosphatidyltransferase",
+ "2.7.8.30 Undecaprenyl-phosphate 4-deoxy-4-formamido-L-arabinose transferase",
+ "2.7.8.31 Undecaprenyl-phosphate glucose phosphotransferase",
+ "2.7.8.32 3-O-alpha-D-mannopyranosyl-alpha-D-mannopyranose xylosylphosphotransferase",
+ "2.7.8.n2 UDP-GlcNAc:undecaprenyl-phosphate GlcNAc-1-phosphate transferase",
+ "2.7.9.1 Pyruvate, phosphate dikinase",
+ "2.7.9.2 Pyruvate, water dikinase",
+ "2.7.9.3 Selenide, water dikinase",
+ "2.7.9.4 Alpha-glucan, water dikinase",
+ "2.7.9.5 Phosphoglucan, water dikinase",
+ "2.7.10.1 Receptor protein-tyrosine kinase",
+ "2.7.10.2 Non-specific protein-tyrosine kinase",
+ "2.7.11.1 Non-specific serine/threonine protein kinase",
+ "2.7.11.2 [Pyruvate dehydrogenase (acetyl-transferring)] kinase",
+ "2.7.11.3 Dephospho-[reductase kinase] kinase",
+ "2.7.11.4 [3-methyl-2-oxobutanoate dehydrogenase (acetyl-transferring)] kinase",
+ "2.7.11.5 [Isocitrate dehydrogenase (NADP(+))] kinase",
+ "2.7.11.6 [Tyrosine 3-monooxygenase] kinase",
+ "2.7.11.7 [Myosin heavy-chain] kinase",
+ "2.7.11.8 Fas-activated serine/threonine kinase",
+ "2.7.11.9 [Goodpasture-antigen-binding protein] kinase",
+ "2.7.11.10 I-kappa-B kinase",
+ "2.7.11.11 cAMP-dependent protein kinase",
+ "2.7.11.12 cGMP-dependent protein kinase",
+ "2.7.11.13 Protein kinase C",
+ "2.7.11.14 Rhodopsin kinase",
+ "2.7.11.15 [Beta-adrenergic-receptor] kinase",
+ "2.7.11.16 [G-protein-coupled receptor] kinase",
+ "2.7.11.17 Calcium/calmodulin-dependent protein kinase",
+ "2.7.11.18 [Myosin light-chain] kinase",
+ "2.7.11.19 Phosphorylase kinase",
+ "2.7.11.20 [Elongation factor 2] kinase",
+ "2.7.11.21 Polo kinase",
+ "2.7.11.22 Cyclin-dependent kinase",
+ "2.7.11.23 [RNA-polymerase]-subunit kinase",
+ "2.7.11.24 Mitogen-activated protein kinase",
+ "2.7.11.25 Mitogen-activated protein kinase kinase kinase",
+ "2.7.11.26 [Tau protein] kinase",
+ "2.7.11.27 [Acetyl-CoA carboxylase] kinase",
+ "2.7.11.28 Tropomyosin kinase",
+ "2.7.11.29 [Low-density-lipoprotein receptor] kinase",
+ "2.7.11.30 Receptor protein serine/threonine kinase",
+ "2.7.11.31 [Hydroxymethylglutaryl-CoA reductase (NADPH)] kinase",
+ "2.7.12.1 Dual-specificity kinase",
+ "2.7.12.2 Mitogen-activated protein kinase kinase",
+ "2.7.13.1 Protein-histidine pros-kinase",
+ "2.7.13.2 Protein-histidine tele-kinase",
+ "2.7.13.3 Histidine kinase",
+ "2.7.99.1 Triphosphate--protein phosphotransferase",
+ "2.8.1.1 Thiosulfate sulfurtransferase",
+ "2.8.1.2 3-mercaptopyruvate sulfurtransferase",
+ "2.8.1.3 Thiosulfate--thiol sulfurtransferase",
+ "2.8.1.4 tRNA sulfurtransferase",
+ "2.8.1.5 Thiosulfate--dithiol sulfurtransferase",
+ "2.8.1.6 Biotin synthase",
+ "2.8.1.7 Cysteine desulfurase",
+ "2.8.1.8 Lipoyl synthase",
+ "2.8.2.1 Aryl sulfotransferase",
+ "2.8.2.2 Alcohol sulfotransferase",
+ "2.8.2.3 Amine sulfotransferase",
+ "2.8.2.4 Estrone sulfotransferase",
+ "2.8.2.5 Chondroitin 4-sulfotransferase",
+ "2.8.2.6 Choline sulfotransferase",
+ "2.8.2.7 UDP-N-acetylgalactosamine-4-sulfate sulfotransferase",
+ "2.8.2.8 [Heparan sulfate]-glucosamine N-sulfotransferase",
+ "2.8.2.9 Tyrosine-ester sulfotransferase",
+ "2.8.2.10 Renilla-luciferin sulfotransferase",
+ "2.8.2.11 Galactosylceramide sulfotransferase",
+ "2.8.2.13 Psychosine sulfotransferase",
+ "2.8.2.14 Bile-salt sulfotransferase",
+ "2.8.2.15 Steroid sulfotransferase",
+ "2.8.2.16 Thiol sulfotransferase",
+ "2.8.2.17 Chondroitin 6-sulfotransferase",
+ "2.8.2.18 Cortisol sulfotransferase",
+ "2.8.2.19 Triglucosylalkylacylglycerol sulfotransferase",
+ "2.8.2.20 Protein-tyrosine sulfotransferase",
+ "2.8.2.21 Keratan sulfotransferase",
+ "2.8.2.22 Aryl-sulfate sulfotransferase",
+ "2.8.2.23 [Heparan sulfate]-glucosamine 3-sulfotransferase 1",
+ "2.8.2.24 Desulfoglucosinolate sulfotransferase",
+ "2.8.2.25 Flavonol 3-sulfotransferase",
+ "2.8.2.26 Quercetin-3-sulfate 3'-sulfotransferase",
+ "2.8.2.27 Quercetin-3-sulfate 4'-sulfotransferase",
+ "2.8.2.28 Quercetin-3,3'-bissulfate 7-sulfotransferase",
+ "2.8.2.29 [Heparan sulfate]-glucosamine 3-sulfotransferase 2",
+ "2.8.2.30 [Heparan sulfate]-glucosamine 3-sulfotransferase 3",
+ "2.8.2.31 Petromyzonol sulfotransferase",
+ "2.8.2.32 Scymnol sulfotransferase",
+ "2.8.2.33 N-acetylgalactosamine 4-sulfate 6-O-sulfotransferase",
+ "2.8.2.34 Glycochenodeoxycholate sulfotransferase",
+ "2.8.2.35 Dermatan 4-sulfotransferase",
+ "2.8.3.1 Propionate CoA-transferase",
+ "2.8.3.2 Oxalate CoA-transferase",
+ "2.8.3.3 Malonate CoA-transferase",
+ "2.8.3.5 3-oxoacid CoA-transferase",
+ "2.8.3.6 3-oxoadipate CoA-transferase",
+ "2.8.3.7 Succinate--citramalate CoA-transferase",
+ "2.8.3.8 Acetate CoA-transferase",
+ "2.8.3.9 Butyrate--acetoacetate CoA-transferase",
+ "2.8.3.10 Citrate CoA-transferase",
+ "2.8.3.11 Citramalate CoA-transferase",
+ "2.8.3.12 Glutaconate CoA-transferase",
+ "2.8.3.13 Succinate--hydroxymethylglutarate CoA-transferase",
+ "2.8.3.14 5-hydroxypentanoate CoA-transferase",
+ "2.8.3.15 Succinyl-CoA:(R)-benzylsuccinate CoA-transferase",
+ "2.8.3.16 Formyl-CoA transferase",
+ "2.8.3.17 Cinnamoyl-CoA:phenyllactate CoA-transferase",
+ "2.8.4.1 Coenzyme-B sulfoethylthiotransferase",
+ "2.8.4.2 Arsenate-mycothiol transferase",
+ "2.9.1.1 L-seryl-tRNA(Sec) selenium transferase",
+ "2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase",
+ "3.1.1.1 Carboxylesterase",
+ "3.1.1.2 Arylesterase",
+ "3.1.1.3 Triacylglycerol lipase",
+ "3.1.1.4 Phospholipase A(2)",
+ "3.1.1.5 Lysophospholipase",
+ "3.1.1.6 Acetylesterase",
+ "3.1.1.7 Acetylcholinesterase",
+ "3.1.1.8 Cholinesterase",
+ "3.1.1.10 Tropinesterase",
+ "3.1.1.11 Pectinesterase",
+ "3.1.1.13 Sterol esterase",
+ "3.1.1.14 Chlorophyllase",
+ "3.1.1.15 L-arabinonolactonase",
+ "3.1.1.17 Gluconolactonase",
+ "3.1.1.19 Uronolactonase",
+ "3.1.1.20 Tannase",
+ "3.1.1.21 Retinyl-palmitate esterase",
+ "3.1.1.22 Hydroxybutyrate-dimer hydrolase",
+ "3.1.1.23 Acylglycerol lipase",
+ "3.1.1.24 3-oxoadipate enol-lactonase",
+ "3.1.1.25 1,4-lactonase",
+ "3.1.1.26 Galactolipase",
+ "3.1.1.27 4-pyridoxolactonase",
+ "3.1.1.28 Acylcarnitine hydrolase",
+ "3.1.1.29 Aminoacyl-tRNA hydrolase",
+ "3.1.1.30 D-arabinonolactonase",
+ "3.1.1.31 6-phosphogluconolactonase",
+ "3.1.1.32 Phospholipase A(1)",
+ "3.1.1.33 6-acetylglucose deacetylase",
+ "3.1.1.34 Lipoprotein lipase",
+ "3.1.1.35 Dihydrocoumarin hydrolase",
+ "3.1.1.36 Limonin-D-ring-lactonase",
+ "3.1.1.37 Steroid-lactonase",
+ "3.1.1.38 Triacetate-lactonase",
+ "3.1.1.39 Actinomycin lactonase",
+ "3.1.1.40 Orsellinate-depside hydrolase",
+ "3.1.1.41 Cephalosporin-C deacetylase",
+ "3.1.1.42 Chlorogenate hydrolase",
+ "3.1.1.43 Alpha-amino-acid esterase",
+ "3.1.1.44 4-methyloxaloacetate esterase",
+ "3.1.1.45 Carboxymethylenebutenolidase",
+ "3.1.1.46 Deoxylimonate A-ring-lactonase",
+ "3.1.1.47 1-alkyl-2-acetylglycerophosphocholine esterase",
+ "3.1.1.48 Fusarinine-C ornithinesterase",
+ "3.1.1.49 Sinapine esterase",
+ "3.1.1.50 Wax-ester hydrolase",
+ "3.1.1.51 Phorbol-diester hydrolase",
+ "3.1.1.52 Phosphatidylinositol deacylase",
+ "3.1.1.53 Sialate O-acetylesterase",
+ "3.1.1.54 Acetoxybutynylbithiophene deacetylase",
+ "3.1.1.55 Acetylsalicylate deacetylase",
+ "3.1.1.56 Methylumbelliferyl-acetate deacetylase",
+ "3.1.1.57 2-pyrone-4,6-dicarboxylate lactonase",
+ "3.1.1.58 N-acetylgalactosaminoglycan deacetylase",
+ "3.1.1.59 Juvenile-hormone esterase",
+ "3.1.1.60 Bis(2-ethylhexyl)phthalate esterase",
+ "3.1.1.61 Protein-glutamate methylesterase",
+ "3.1.1.63 11-cis-retinyl-palmitate hydrolase",
+ "3.1.1.64 All-trans-retinyl-palmitate hydrolase",
+ "3.1.1.65 L-rhamnono-1,4-lactonase",
+ "3.1.1.66 5-(3,4-diacetoxybut-1-ynyl)-2,2'-bithiophene deacetylase",
+ "3.1.1.67 Fatty-acyl-ethyl-ester synthase",
+ "3.1.1.68 Xylono-1,4-lactonase",
+ "3.1.1.70 Cetraxate benzylesterase",
+ "3.1.1.71 Acetylalkylglycerol acetylhydrolase",
+ "3.1.1.72 Acetylxylan esterase",
+ "3.1.1.73 Feruloyl esterase",
+ "3.1.1.74 Cutinase",
+ "3.1.1.75 Poly(3-hydroxybutyrate) depolymerase",
+ "3.1.1.76 Poly(3-hydroxyoctanoate) depolymerase",
+ "3.1.1.77 Acyloxyacyl hydrolase",
+ "3.1.1.78 Polyneuridine-aldehyde esterase",
+ "3.1.1.79 Hormone-sensitive lipase",
+ "3.1.1.80 Acetylajmaline esterase",
+ "3.1.1.81 Quorum-quenching N-acyl-homoserine lactonase",
+ "3.1.1.82 Pheophorbidase",
+ "3.1.1.83 Monoterpene epsilon-lactone hydrolase",
+ "3.1.1.84 Cocaine esterase",
+ "3.1.2.1 Acetyl-CoA hydrolase",
+ "3.1.2.2 Palmitoyl-CoA hydrolase",
+ "3.1.2.3 Succinyl-CoA hydrolase",
+ "3.1.2.4 3-hydroxyisobutyryl-CoA hydrolase",
+ "3.1.2.5 Hydroxymethylglutaryl-CoA hydrolase",
+ "3.1.2.6 Hydroxyacylglutathione hydrolase",
+ "3.1.2.7 Glutathione thiolesterase",
+ "3.1.2.10 Formyl-CoA hydrolase",
+ "3.1.2.11 Acetoacetyl-CoA hydrolase",
+ "3.1.2.12 S-formylglutathione hydrolase",
+ "3.1.2.13 S-succinylglutathione hydrolase",
+ "3.1.2.14 Oleoyl-[acyl-carrier-protein] hydrolase",
+ "3.1.2.15 Ubiquitin thiolesterase",
+ "3.1.2.16 Citrate-lyase deacetylase",
+ "3.1.2.17 (S)-methylmalonyl-CoA hydrolase",
+ "3.1.2.18 ADP-dependent short-chain-acyl-CoA hydrolase",
+ "3.1.2.19 ADP-dependent medium-chain-acyl-CoA hydrolase",
+ "3.1.2.20 Acyl-CoA hydrolase",
+ "3.1.2.21 Dodecanoyl-[acyl-carrier-protein] hydrolase",
+ "3.1.2.22 Palmitoyl-protein hydrolase",
+ "3.1.2.23 4-hydroxybenzoyl-CoA thioesterase",
+ "3.1.2.25 Phenylacetyl-CoA hydrolase",
+ "3.1.2.26 Bile-acid-CoA hydrolase",
+ "3.1.2.27 Choloyl-CoA hydrolase",
+ "3.1.2.28 1,4-dihydroxy-2-naphthoyl-CoA hydrolase",
+ "3.1.3.1 Alkaline phosphatase",
+ "3.1.3.2 Acid phosphatase",
+ "3.1.3.3 Phosphoserine phosphatase",
+ "3.1.3.4 Phosphatidate phosphatase",
+ "3.1.3.5 5'-nucleotidase",
+ "3.1.3.6 3'-nucleotidase",
+ "3.1.3.7 3'(2'),5'-bisphosphate nucleotidase",
+ "3.1.3.8 3-phytase",
+ "3.1.3.9 Glucose-6-phosphatase",
+ "3.1.3.10 Glucose-1-phosphatase",
+ "3.1.3.11 Fructose-bisphosphatase",
+ "3.1.3.12 Trehalose-phosphatase",
+ "3.1.3.13 Bisphosphoglycerate phosphatase",
+ "3.1.3.14 Methylphosphothioglycerate phosphatase",
+ "3.1.3.15 Histidinol-phosphatase",
+ "3.1.3.16 Phosphoprotein phosphatase",
+ "3.1.3.17 [Phosphorylase] phosphatase",
+ "3.1.3.18 Phosphoglycolate phosphatase",
+ "3.1.3.19 Glycerol-2-phosphatase",
+ "3.1.3.20 Phosphoglycerate phosphatase",
+ "3.1.3.21 Glycerol-1-phosphatase",
+ "3.1.3.22 Mannitol-1-phosphatase",
+ "3.1.3.23 Sugar-phosphatase",
+ "3.1.3.24 Sucrose-phosphate phosphatase",
+ "3.1.3.25 Inositol-phosphate phosphatase",
+ "3.1.3.26 4-phytase",
+ "3.1.3.27 Phosphatidylglycerophosphatase",
+ "3.1.3.28 ADP-phosphoglycerate phosphatase",
+ "3.1.3.29 N-acylneuraminate-9-phosphatase",
+ "3.1.3.31 Nucleotidase",
+ "3.1.3.32 Polynucleotide 3'-phosphatase",
+ "3.1.3.33 Polynucleotide 5'-phosphatase",
+ "3.1.3.34 Deoxynucleotide 3'-phosphatase",
+ "3.1.3.35 Thymidylate 5'-phosphatase",
+ "3.1.3.36 Phosphoinositide 5-phosphatase",
+ "3.1.3.37 Sedoheptulose-bisphosphatase",
+ "3.1.3.38 3-phosphoglycerate phosphatase",
+ "3.1.3.39 Streptomycin-6-phosphatase",
+ "3.1.3.40 Guanidinodeoxy-scyllo-inositol-4-phosphatase",
+ "3.1.3.41 4-nitrophenylphosphatase",
+ "3.1.3.42 [Glycogen-synthase-D] phosphatase",
+ "3.1.3.43 [Pyruvate dehydrogenase (acetyl-transferring)]-phosphatase",
+ "3.1.3.44 [Acetyl-CoA carboxylase]-phosphatase",
+ "3.1.3.45 3-deoxy-manno-octulosonate-8-phosphatase",
+ "3.1.3.46 Fructose-2,6-bisphosphate 2-phosphatase",
+ "3.1.3.47 [Hydroxymethylglutaryl-CoA reductase (NADPH)]-phosphatase",
+ "3.1.3.48 Protein-tyrosine-phosphatase",
+ "3.1.3.49 [Pyruvate kinase]-phosphatase",
+ "3.1.3.50 Sorbitol-6-phosphatase",
+ "3.1.3.51 Dolichyl-phosphatase",
+ "3.1.3.52 [3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring)]-phosphatase",
+ "3.1.3.53 [Myosin-light-chain] phosphatase",
+ "3.1.3.54 Fructose-2,6-bisphosphate 6-phosphatase",
+ "3.1.3.55 Caldesmon-phosphatase",
+ "3.1.3.56 Inositol-polyphosphate 5-phosphatase",
+ "3.1.3.57 Inositol-1,4-bisphosphate 1-phosphatase",
+ "3.1.3.58 Sugar-terminal-phosphatase",
+ "3.1.3.59 Alkylacetylglycerophosphatase",
+ "3.1.3.60 Phosphoenolpyruvate phosphatase",
+ "3.1.3.62 Multiple inositol-polyphosphate phosphatase",
+ "3.1.3.63 2-carboxy-D-arabinitol-1-phosphatase",
+ "3.1.3.64 Phosphatidylinositol-3-phosphatase",
+ "3.1.3.66 Phosphatidylinositol-3,4-bisphosphate 4-phosphatase",
+ "3.1.3.67 Phosphatidylinositol-3,4,5-trisphosphate 3-phosphatase",
+ "3.1.3.68 2-deoxyglucose-6-phosphatase",
+ "3.1.3.69 Glucosylglycerol 3-phosphatase",
+ "3.1.3.70 Mannosyl-3-phosphoglycerate phosphatase",
+ "3.1.3.71 2-phosphosulfolactate phosphatase",
+ "3.1.3.72 5-phytase",
+ "3.1.3.73 Alpha-ribazole phosphatase",
+ "3.1.3.74 Pyridoxal phosphatase",
+ "3.1.3.75 Phosphoethanolamine/phosphocholine phosphatase",
+ "3.1.3.76 Lipid-phosphate phosphatase",
+ "3.1.3.77 Acireductone synthase",
+ "3.1.3.78 Phosphatidylinositol-4,5-bisphosphate 4-phosphatase",
+ "3.1.3.79 Mannosylfructose-phosphate phosphatase",
+ "3.1.3.80 2,3-bisphosphoglycerate 3-phosphatase",
+ "3.1.3.81 Diacylglycerol diphosphate phosphatase",
+ "3.1.3.82 D-glycero-beta-D-manno-heptose 1,7-bisphosphate 7-phosphatase",
+ "3.1.3.83 D-glycero-alpha-D-manno-heptose-1,7-bisphosphate 7-phosphatase",
+ "3.1.3.n1 Phosphatidylinositol-3,4,5-trisphosphate 5-phosphatase",
+ "3.1.3.n2 ADP-ribose 1''-phosphate phosphatase",
+ "3.1.3.n4 2-hydroxy-3-keto-5-methylthiopentenyl-1-phosphate phosphatase",
+ "3.1.4.1 Phosphodiesterase I",
+ "3.1.4.2 Glycerophosphocholine phosphodiesterase",
+ "3.1.4.3 Phospholipase C",
+ "3.1.4.4 Phospholipase D",
+ "3.1.4.11 Phosphoinositide phospholipase C",
+ "3.1.4.12 Sphingomyelin phosphodiesterase",
+ "3.1.4.13 Serine-ethanolaminephosphate phosphodiesterase",
+ "3.1.4.14 [Acyl-carrier-protein] phosphodiesterase",
+ "3.1.4.15 Adenylyl-[glutamate--ammonia ligase] hydrolase",
+ "3.1.4.16 2',3'-cyclic-nucleotide 2'-phosphodiesterase",
+ "3.1.4.17 3',5'-cyclic-nucleotide phosphodiesterase",
+ "3.1.4.35 3',5'-cyclic-GMP phosphodiesterase",
+ "3.1.4.37 2',3'-cyclic-nucleotide 3'-phosphodiesterase",
+ "3.1.4.38 Glycerophosphocholine cholinephosphodiesterase",
+ "3.1.4.39 Alkylglycerophosphoethanolamine phosphodiesterase",
+ "3.1.4.40 CMP-N-acylneuraminate phosphodiesterase",
+ "3.1.4.41 Sphingomyelin phosphodiesterase D",
+ "3.1.4.42 Glycerol-1,2-cyclic-phosphate 2-phosphodiesterase",
+ "3.1.4.43 Glycerophosphoinositol inositolphosphodiesterase",
+ "3.1.4.44 Glycerophosphoinositol glycerophosphodiesterase",
+ "3.1.4.45 N-acetylglucosamine-1-phosphodiester alpha-N-acetylglucosaminidase",
+ "3.1.4.46 Glycerophosphodiester phosphodiesterase",
+ "3.1.4.48 Dolichylphosphate-glucose phosphodiesterase",
+ "3.1.4.49 Dolichylphosphate-mannose phosphodiesterase",
+ "3.1.4.50 Glycosylphosphatidylinositol phospholipase D",
+ "3.1.4.51 Glucose-1-phospho-D-mannosylglycoprotein phosphodiesterase",
+ "3.1.4.52 Cyclic-guanylate-specific phosphodiesterase",
+ "3.1.4.53 3',5'-cyclic-AMP phosphodiesterase",
+ "3.1.5.1 dGTPase",
+ "3.1.6.1 Arylsulfatase",
+ "3.1.6.2 Steryl-sulfatase",
+ "3.1.6.3 Glycosulfatase",
+ "3.1.6.4 N-acetylgalactosamine-6-sulfatase",
+ "3.1.6.6 Choline-sulfatase",
+ "3.1.6.7 Cellulose-polysulfatase",
+ "3.1.6.8 Cerebroside-sulfatase",
+ "3.1.6.9 Chondro-4-sulfatase",
+ "3.1.6.10 Chondro-6-sulfatase",
+ "3.1.6.11 Disulfoglucosamine-6-sulfatase",
+ "3.1.6.12 N-acetylgalactosamine-4-sulfatase",
+ "3.1.6.13 Iduronate-2-sulfatase",
+ "3.1.6.14 N-acetylglucosamine-6-sulfatase",
+ "3.1.6.15 N-sulfoglucosamine-3-sulfatase",
+ "3.1.6.16 Monomethyl-sulfatase",
+ "3.1.6.17 D-lactate-2-sulfatase",
+ "3.1.6.18 Glucuronate-2-sulfatase",
+ "3.1.7.1 Prenyl-diphosphatase",
+ "3.1.7.2 Guanosine-3',5'-bis(diphosphate) 3'-diphosphatase",
+ "3.1.7.3 Monoterpenyl-diphosphatase",
+ "3.1.7.4 Sclareol cyclase",
+ "3.1.7.5 Geranylgeranyl diphosphate diphosphatase",
+ "3.1.7.6 Farnesyl diphosphatase",
+ "3.1.7.7 Drimenol cyclase",
+ "3.1.8.1 Aryldialkylphosphatase",
+ "3.1.8.2 Diisopropyl-fluorophosphatase",
+ "3.1.11.1 Exodeoxyribonuclease I",
+ "3.1.11.2 Exodeoxyribonuclease III",
+ "3.1.11.3 Exodeoxyribonuclease (lambda-induced)",
+ "3.1.11.4 Exodeoxyribonuclease (phage SP3-induced)",
+ "3.1.11.5 Exodeoxyribonuclease V",
+ "3.1.11.6 Exodeoxyribonuclease VII",
+ "3.1.13.1 Exoribonuclease II",
+ "3.1.13.2 Exoribonuclease H",
+ "3.1.13.3 Oligonucleotidase",
+ "3.1.13.4 Poly(A)-specific ribonuclease",
+ "3.1.13.5 Ribonuclease D",
+ "3.1.14.1 Yeast ribonuclease",
+ "3.1.15.1 Venom exonuclease",
+ "3.1.16.1 Spleen exonuclease",
+ "3.1.21.1 Deoxyribonuclease I",
+ "3.1.21.2 Deoxyribonuclease IV (phage-T(4)-induced)",
+ "3.1.21.3 Type I site-specific deoxyribonuclease",
+ "3.1.21.4 Type II site-specific deoxyribonuclease",
+ "3.1.21.5 Type III site-specific deoxyribonuclease",
+ "3.1.21.6 CC-preferring endodeoxyribonuclease",
+ "3.1.21.7 Deoxyribonuclease V",
+ "3.1.22.1 Deoxyribonuclease II",
+ "3.1.22.2 Aspergillus deoxyribonuclease K(1)",
+ "3.1.22.4 Crossover junction endodeoxyribonuclease",
+ "3.1.22.5 Deoxyribonuclease X",
+ "3.1.25.1 Deoxyribonuclease (pyrimidine dimer)",
+ "3.1.26.1 Physarum polycephalum ribonuclease",
+ "3.1.26.2 Ribonuclease alpha",
+ "3.1.26.3 Ribonuclease III",
+ "3.1.26.4 Ribonuclease H",
+ "3.1.26.5 Ribonuclease P",
+ "3.1.26.6 Ribonuclease IV",
+ "3.1.26.7 Ribonuclease P4",
+ "3.1.26.8 Ribonuclease M5",
+ "3.1.26.9 Ribonuclease (poly-(U)-specific)",
+ "3.1.26.10 Ribonuclease IX",
+ "3.1.26.11 Ribonuclease Z",
+ "3.1.26.12 Ribonuclease E",
+ "3.1.26.13 Retroviral ribonuclease H",
+ "3.1.27.1 Ribonuclease T(2)",
+ "3.1.27.2 Bacillus subtilis ribonuclease",
+ "3.1.27.3 Ribonuclease T(1)",
+ "3.1.27.4 Ribonuclease U(2)",
+ "3.1.27.5 Pancreatic ribonuclease",
+ "3.1.27.6 Enterobacter ribonuclease",
+ "3.1.27.7 Ribonuclease F",
+ "3.1.27.8 Ribonuclease V",
+ "3.1.27.9 tRNA-intron endonuclease",
+ "3.1.27.10 rRNA endonuclease",
+ "3.1.30.1 Aspergillus nuclease S(1)",
+ "3.1.30.2 Serratia marcescens nuclease",
+ "3.1.31.1 Micrococcal nuclease",
+ "3.2.1.1 Alpha-amylase",
+ "3.2.1.2 Beta-amylase",
+ "3.2.1.3 Glucan 1,4-alpha-glucosidase",
+ "3.2.1.4 Cellulase",
+ "3.2.1.6 Endo-1,3(4)-beta-glucanase",
+ "3.2.1.7 Inulinase",
+ "3.2.1.8 Endo-1,4-beta-xylanase",
+ "3.2.1.10 Oligo-1,6-glucosidase",
+ "3.2.1.11 Dextranase",
+ "3.2.1.14 Chitinase",
+ "3.2.1.15 Polygalacturonase",
+ "3.2.1.17 Lysozyme",
+ "3.2.1.18 Exo-alpha-sialidase",
+ "3.2.1.20 Alpha-glucosidase",
+ "3.2.1.21 Beta-glucosidase",
+ "3.2.1.22 Alpha-galactosidase",
+ "3.2.1.23 Beta-galactosidase",
+ "3.2.1.24 Alpha-mannosidase",
+ "3.2.1.25 Beta-mannosidase",
+ "3.2.1.26 Beta-fructofuranosidase",
+ "3.2.1.28 Alpha,alpha-trehalase",
+ "3.2.1.31 Beta-glucuronidase",
+ "3.2.1.32 Xylan endo-1,3-beta-xylosidase",
+ "3.2.1.33 Amylo-alpha-1,6-glucosidase",
+ "3.2.1.35 Hyaluronoglucosaminidase",
+ "3.2.1.36 Hyaluronoglucuronidase",
+ "3.2.1.37 Xylan 1,4-beta-xylosidase",
+ "3.2.1.38 Beta-D-fucosidase",
+ "3.2.1.39 Glucan endo-1,3-beta-D-glucosidase",
+ "3.2.1.40 Alpha-L-rhamnosidase",
+ "3.2.1.41 Pullulanase",
+ "3.2.1.42 GDP-glucosidase",
+ "3.2.1.43 Beta-L-rhamnosidase",
+ "3.2.1.44 Fucoidanase",
+ "3.2.1.45 Glucosylceramidase",
+ "3.2.1.46 Galactosylceramidase",
+ "3.2.1.47 Galactosylgalactosylglucosylceramidase",
+ "3.2.1.48 Sucrose alpha-glucosidase",
+ "3.2.1.49 Alpha-N-acetylgalactosaminidase",
+ "3.2.1.50 Alpha-N-acetylglucosaminidase",
+ "3.2.1.51 Alpha-L-fucosidase",
+ "3.2.1.52 Beta-N-acetylhexosaminidase",
+ "3.2.1.53 Beta-N-acetylgalactosaminidase",
+ "3.2.1.54 Cyclomaltodextrinase",
+ "3.2.1.55 Alpha-N-arabinofuranosidase",
+ "3.2.1.56 Glucuronosyl-disulfoglucosamine glucuronidase",
+ "3.2.1.57 Isopullulanase",
+ "3.2.1.58 Glucan 1,3-beta-glucosidase",
+ "3.2.1.59 Glucan endo-1,3-alpha-glucosidase",
+ "3.2.1.60 Glucan 1,4-alpha-maltotetraohydrolase",
+ "3.2.1.61 Mycodextranase",
+ "3.2.1.62 Glycosylceramidase",
+ "3.2.1.63 1,2-alpha-L-fucosidase",
+ "3.2.1.64 2,6-beta-fructan 6-levanbiohydrolase",
+ "3.2.1.65 Levanase",
+ "3.2.1.66 Quercitrinase",
+ "3.2.1.67 Galacturan 1,4-alpha-galacturonidase",
+ "3.2.1.68 Isoamylase",
+ "3.2.1.70 Glucan 1,6-alpha-glucosidase",
+ "3.2.1.71 Glucan endo-1,2-beta-glucosidase",
+ "3.2.1.72 Xylan 1,3-beta-xylosidase",
+ "3.2.1.73 Licheninase",
+ "3.2.1.74 Glucan 1,4-beta-glucosidase",
+ "3.2.1.75 Glucan endo-1,6-beta-glucosidase",
+ "3.2.1.76 L-iduronidase",
+ "3.2.1.77 Mannan 1,2-(1,3)-alpha-mannosidase",
+ "3.2.1.78 Mannan endo-1,4-beta-mannosidase",
+ "3.2.1.80 Fructan beta-fructosidase",
+ "3.2.1.81 Beta-agarase",
+ "3.2.1.82 Exo-poly-alpha-galacturonosidase",
+ "3.2.1.83 Kappa-carrageenase",
+ "3.2.1.84 Glucan 1,3-alpha-glucosidase",
+ "3.2.1.85 6-phospho-beta-galactosidase",
+ "3.2.1.86 6-phospho-beta-glucosidase",
+ "3.2.1.87 Capsular-polysaccharide endo-1,3-alpha-galactosidase",
+ "3.2.1.88 Beta-L-arabinosidase",
+ "3.2.1.89 Arabinogalactan endo-1,4-beta-galactosidase",
+ "3.2.1.91 Cellulose 1,4-beta-cellobiosidase",
+ "3.2.1.92 Peptidoglycan beta-N-acetylmuramidase",
+ "3.2.1.93 Alpha,alpha-phosphotrehalase",
+ "3.2.1.94 Glucan 1,6-alpha-isomaltosidase",
+ "3.2.1.95 Dextran 1,6-alpha-isomaltotriosidase",
+ "3.2.1.96 Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase",
+ "3.2.1.97 Glycopeptide alpha-N-acetylgalactosaminidase",
+ "3.2.1.98 Glucan 1,4-alpha-maltohexaosidase",
+ "3.2.1.99 Arabinan endo-1,5-alpha-L-arabinosidase",
+ "3.2.1.100 Mannan 1,4-mannobiosidase",
+ "3.2.1.101 Mannan endo-1,6-alpha-mannosidase",
+ "3.2.1.102 Blood-group-substance endo-1,4-beta-galactosidase",
+ "3.2.1.103 Keratan-sulfate endo-1,4-beta-galactosidase",
+ "3.2.1.104 Steryl-beta-glucosidase",
+ "3.2.1.105 3-alpha-(S)-strictosidine beta-glucosidase",
+ "3.2.1.106 Mannosyl-oligosaccharide glucosidase",
+ "3.2.1.107 Protein-glucosylgalactosylhydroxylysine glucosidase",
+ "3.2.1.108 Lactase",
+ "3.2.1.109 Endogalactosaminidase",
+ "3.2.1.111 1,3-alpha-L-fucosidase",
+ "3.2.1.112 2-deoxyglucosidase",
+ "3.2.1.113 Mannosyl-oligosaccharide 1,2-alpha-mannosidase",
+ "3.2.1.114 Mannosyl-oligosaccharide 1,3-1,6-alpha-mannosidase",
+ "3.2.1.115 Branched-dextran exo-1,2-alpha-glucosidase",
+ "3.2.1.116 Glucan 1,4-alpha-maltotriohydrolase",
+ "3.2.1.117 Amygdalin beta-glucosidase",
+ "3.2.1.118 Prunasin beta-glucosidase",
+ "3.2.1.119 Vicianin beta-glucosidase",
+ "3.2.1.120 Oligoxyloglucan beta-glycosidase",
+ "3.2.1.121 Polymannuronate hydrolase",
+ "3.2.1.122 Maltose-6'-phosphate glucosidase",
+ "3.2.1.123 Endoglycosylceramidase",
+ "3.2.1.124 3-deoxy-2-octulosonidase",
+ "3.2.1.125 Raucaffricine beta-glucosidase",
+ "3.2.1.126 Coniferin beta-glucosidase",
+ "3.2.1.127 1,6-alpha-L-fucosidase",
+ "3.2.1.128 Glycyrrhizinate beta-glucuronidase",
+ "3.2.1.129 Endo-alpha-sialidase",
+ "3.2.1.130 Glycoprotein endo-alpha-1,2-mannosidase",
+ "3.2.1.131 Xylan alpha-1,2-glucuronosidase",
+ "3.2.1.132 Chitosanase",
+ "3.2.1.133 Glucan 1,4-alpha-maltohydrolase",
+ "3.2.1.134 Difructose-anhydride synthase",
+ "3.2.1.135 Neopullulanase",
+ "3.2.1.136 Glucuronoarabinoxylan endo-1,4-beta-xylanase",
+ "3.2.1.137 Mannan exo-1,2-1,6-alpha-mannosidase",
+ "3.2.1.139 Alpha-glucuronidase",
+ "3.2.1.140 Lacto-N-biosidase",
+ "3.2.1.141 4-alpha-D-((1->4)-alpha-D-glucano)trehalose trehalohydrolase",
+ "3.2.1.142 Limit dextrinase",
+ "3.2.1.143 Poly(ADP-ribose) glycohydrolase",
+ "3.2.1.144 3-deoxyoctulosonase",
+ "3.2.1.145 Galactan 1,3-beta-galactosidase",
+ "3.2.1.146 Beta-galactofuranosidase",
+ "3.2.1.147 Thioglucosidase",
+ "3.2.1.149 Beta-primeverosidase",
+ "3.2.1.150 Oligoxyloglucan reducing-end-specific cellobiohydrolase",
+ "3.2.1.151 Xyloglucan-specific endo-beta-1,4-glucanase",
+ "3.2.1.152 Mannosylglycoprotein endo-beta-mannosidase",
+ "3.2.1.153 Fructan beta-(2,1)-fructosidase",
+ "3.2.1.154 Fructan beta-(2,6)-fructosidase",
+ "3.2.1.155 Xyloglucan-specific exo-beta-1,4-glucanase",
+ "3.2.1.156 Oligosaccharide reducing-end xylanase",
+ "3.2.1.157 Iota-carrageenase",
+ "3.2.1.158 Alpha-agarase",
+ "3.2.1.159 Alpha-neoagaro-oligosaccharide hydrolase",
+ "3.2.1.161 Beta-apiosyl-beta-glucosidase",
+ "3.2.1.162 Lambda-carrageenase",
+ "3.2.1.163 1,6-alpha-D-mannosidase",
+ "3.2.1.164 Galactan endo-1,6-beta-galactosidase",
+ "3.2.1.165 Exo-1,4-beta-D-glucosaminidase",
+ "3.2.1.166 Heparanase",
+ "3.2.1.167 Baicalin-beta-D-glucuronidase",
+ "3.2.1.168 Hesperidin 6-O-alpha-L-rhamnosyl-beta-D-glucosidase",
+ "3.2.1.169 Protein O-GlcNAcase",
+ "3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase",
+ "3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase",
+ "3.2.1.n3 Dictyostelium lysozyme A",
+ "3.2.2.1 Purine nucleosidase",
+ "3.2.2.2 Inosine nucleosidase",
+ "3.2.2.3 Uridine nucleosidase",
+ "3.2.2.4 AMP nucleosidase",
+ "3.2.2.5 NAD(+) nucleosidase",
+ "3.2.2.6 NAD(P)(+) nucleosidase",
+ "3.2.2.7 Adenosine nucleosidase",
+ "3.2.2.8 Ribosylpyrimidine nucleosidase",
+ "3.2.2.9 Adenosylhomocysteine nucleosidase",
+ "3.2.2.10 Pyrimidine-5'-nucleotide nucleosidase",
+ "3.2.2.11 Beta-aspartyl-N-acetylglucosaminidase",
+ "3.2.2.12 Inosinate nucleosidase",
+ "3.2.2.13 1-methyladenosine nucleosidase",
+ "3.2.2.14 NMN nucleosidase",
+ "3.2.2.15 DNA-deoxyinosine glycosylase",
+ "3.2.2.16 Methylthioadenosine nucleosidase",
+ "3.2.2.17 Deoxyribodipyrimidine endonucleosidase",
+ "3.2.2.19 [Protein ADP-ribosylarginine] hydrolase",
+ "3.2.2.20 DNA-3-methyladenine glycosylase I",
+ "3.2.2.21 DNA-3-methyladenine glycosylase II",
+ "3.2.2.22 rRNA N-glycosylase",
+ "3.2.2.23 DNA-formamidopyrimidine glycosylase",
+ "3.2.2.24 ADP-ribosyl-[dinitrogen reductase] hydrolase",
+ "3.2.2.25 N-methyl nucleosidase",
+ "3.2.2.26 Futalosine hydrolase",
+ "3.2.2.27 Uracil-DNA glycosylase",
+ "3.2.2.28 Double-stranded uracil-DNA glycosylase",
+ "3.2.2.29 Thymine-DNA glycosylase",
+ "3.3.1.1 Adenosylhomocysteinase",
+ "3.3.1.2 Adenosylmethionine hydrolase",
+ "3.3.2.1 Isochorismatase",
+ "3.3.2.2 Alkenylglycerophosphocholine hydrolase",
+ "3.3.2.4 Trans-epoxysuccinate hydrolase",
+ "3.3.2.5 Alkenylglycerophosphoethanolamine hydrolase",
+ "3.3.2.6 Leukotriene-A(4) hydrolase",
+ "3.3.2.7 Hepoxilin-epoxide hydrolase",
+ "3.3.2.8 Limonene-1,2-epoxide hydrolase",
+ "3.3.2.9 Microsomal epoxide hydrolase",
+ "3.3.2.10 Soluble epoxide hydrolase",
+ "3.3.2.11 Cholesterol-5,6-oxide hydrolase",
+ "3.4.11.1 Leucyl aminopeptidase",
+ "3.4.11.2 Membrane alanyl aminopeptidase",
+ "3.4.11.3 Cystinyl aminopeptidase",
+ "3.4.11.4 Tripeptide aminopeptidase",
+ "3.4.11.5 Prolyl aminopeptidase",
+ "3.4.11.6 Aminopeptidase B",
+ "3.4.11.7 Glutamyl aminopeptidase",
+ "3.4.11.9 Xaa-Pro aminopeptidase",
+ "3.4.11.10 Bacterial leucyl aminopeptidase",
+ "3.4.11.13 Clostridial aminopeptidase",
+ "3.4.11.14 Cytosol alanyl aminopeptidase",
+ "3.4.11.15 Aminopeptidase Y",
+ "3.4.11.16 Xaa-Trp aminopeptidase",
+ "3.4.11.17 Tryptophanyl aminopeptidase",
+ "3.4.11.18 Methionyl aminopeptidase",
+ "3.4.11.19 D-stereospecific aminopeptidase",
+ "3.4.11.20 Aminopeptidase Ey",
+ "3.4.11.21 Aspartyl aminopeptidase",
+ "3.4.11.22 Aminopeptidase I",
+ "3.4.11.23 PepB aminopeptidase",
+ "3.4.11.24 Aminopeptidase S",
+ "3.4.13.3 Xaa-His dipeptidase",
+ "3.4.13.4 Xaa-Arg dipeptidase",
+ "3.4.13.5 Xaa-methyl-His dipeptidase",
+ "3.4.13.7 Glu-Glu dipeptidase",
+ "3.4.13.9 Xaa-Pro dipeptidase",
+ "3.4.13.12 Met-Xaa dipeptidase",
+ "3.4.13.17 Non-stereospecific dipeptidase",
+ "3.4.13.18 Cytosol nonspecific dipeptidase",
+ "3.4.13.19 Membrane dipeptidase",
+ "3.4.13.20 Beta-Ala-His dipeptidase",
+ "3.4.13.21 Dipeptidase E",
+ "3.4.13.22 D-Ala-D-Ala dipeptidase",
+ "3.4.14.1 Dipeptidyl-peptidase I",
+ "3.4.14.2 Dipeptidyl-peptidase II",
+ "3.4.14.4 Dipeptidyl-peptidase III",
+ "3.4.14.5 Dipeptidyl-peptidase IV",
+ "3.4.14.6 Dipeptidyl-dipeptidase",
+ "3.4.14.9 Tripeptidyl-peptidase I",
+ "3.4.14.10 Tripeptidyl-peptidase II",
+ "3.4.14.11 Xaa-Pro dipeptidyl-peptidase",
+ "3.4.14.12 Xaa-Xaa-Pro tripeptidyl-peptidase",
+ "3.4.15.1 Peptidyl-dipeptidase A",
+ "3.4.15.4 Peptidyl-dipeptidase B",
+ "3.4.15.5 Peptidyl-dipeptidase Dcp",
+ "3.4.15.6 Cyanophycinase",
+ "3.4.16.2 Lysosomal Pro-Xaa carboxypeptidase",
+ "3.4.16.4 Serine-type D-Ala-D-Ala carboxypeptidase",
+ "3.4.16.5 Carboxypeptidase C",
+ "3.4.16.6 Carboxypeptidase D",
+ "3.4.17.1 Carboxypeptidase A",
+ "3.4.17.2 Carboxypeptidase B",
+ "3.4.17.3 Lysine carboxypeptidase",
+ "3.4.17.4 Gly-Xaa carboxypeptidase",
+ "3.4.17.6 Alanine carboxypeptidase",
+ "3.4.17.8 Muramoylpentapeptide carboxypeptidase",
+ "3.4.17.10 Carboxypeptidase E",
+ "3.4.17.11 Glutamate carboxypeptidase",
+ "3.4.17.12 Carboxypeptidase M",
+ "3.4.17.13 Muramoyltetrapeptide carboxypeptidase",
+ "3.4.17.14 Zinc D-Ala-D-Ala carboxypeptidase",
+ "3.4.17.15 Carboxypeptidase A2",
+ "3.4.17.16 Membrane Pro-Xaa carboxypeptidase",
+ "3.4.17.17 Tubulinyl-Tyr carboxypeptidase",
+ "3.4.17.18 Carboxypeptidase T",
+ "3.4.17.19 Carboxypeptidase Taq",
+ "3.4.17.20 Carboxypeptidase U",
+ "3.4.17.21 Glutamate carboxypeptidase II",
+ "3.4.17.22 Metallocarboxypeptidase D",
+ "3.4.17.23 Angiotensin-converting enzyme 2",
+ "3.4.18.1 Cathepsin X",
+ "3.4.19.1 Acylaminoacyl-peptidase",
+ "3.4.19.2 Peptidyl-glycinamidase",
+ "3.4.19.3 Pyroglutamyl-peptidase I",
+ "3.4.19.5 Beta-aspartyl-peptidase",
+ "3.4.19.6 Pyroglutamyl-peptidase II",
+ "3.4.19.7 N-formylmethionyl-peptidase",
+ "3.4.19.9 Gamma-glutamyl hydrolase",
+ "3.4.19.11 Gamma-D-glutamyl-meso-diaminopimelate peptidase",
+ "3.4.19.12 Ubiquitinyl hydrolase 1",
+ "3.4.21.1 Chymotrypsin",
+ "3.4.21.2 Chymotrypsin C",
+ "3.4.21.3 Metridin",
+ "3.4.21.4 Trypsin",
+ "3.4.21.5 Thrombin",
+ "3.4.21.6 Coagulation factor Xa",
+ "3.4.21.7 Plasmin",
+ "3.4.21.9 Enteropeptidase",
+ "3.4.21.10 Acrosin",
+ "3.4.21.12 Alpha-lytic endopeptidase",
+ "3.4.21.19 Glutamyl endopeptidase",
+ "3.4.21.20 Cathepsin G",
+ "3.4.21.21 Coagulation factor VIIa",
+ "3.4.21.22 Coagulation factor IXa",
+ "3.4.21.25 Cucumisin",
+ "3.4.21.26 Prolyl oligopeptidase",
+ "3.4.21.27 Coagulation factor XIa",
+ "3.4.21.32 Brachyurin",
+ "3.4.21.34 Plasma kallikrein",
+ "3.4.21.35 Tissue kallikrein",
+ "3.4.21.36 Pancreatic elastase",
+ "3.4.21.37 Leukocyte elastase",
+ "3.4.21.38 Coagulation factor XIIa",
+ "3.4.21.39 Chymase",
+ "3.4.21.41 Complement subcomponent C1r",
+ "3.4.21.42 Complement subcomponent C1s",
+ "3.4.21.43 Classical-complement-pathway C3/C5 convertase",
+ "3.4.21.45 Complement factor I",
+ "3.4.21.46 Complement factor D",
+ "3.4.21.47 Alternative-complement-pathway C3/C5 convertase",
+ "3.4.21.48 Cerevisin",
+ "3.4.21.49 Hypodermin C",
+ "3.4.21.50 Lysyl endopeptidase",
+ "3.4.21.53 Endopeptidase La",
+ "3.4.21.54 Gamma-renin",
+ "3.4.21.55 Venombin AB",
+ "3.4.21.57 Leucyl endopeptidase",
+ "3.4.21.59 Tryptase",
+ "3.4.21.60 Scutelarin",
+ "3.4.21.61 Kexin",
+ "3.4.21.62 Subtilisin",
+ "3.4.21.63 Oryzin",
+ "3.4.21.64 Peptidase K",
+ "3.4.21.65 Thermomycolin",
+ "3.4.21.66 Thermitase",
+ "3.4.21.67 Endopeptidase So",
+ "3.4.21.68 T-plasminogen activator",
+ "3.4.21.69 Protein C (activated)",
+ "3.4.21.70 Pancreatic endopeptidase E",
+ "3.4.21.71 Pancreatic elastase II",
+ "3.4.21.72 IgA-specific serine endopeptidase",
+ "3.4.21.73 U-plasminogen activator",
+ "3.4.21.74 Venombin A",
+ "3.4.21.75 Furin",
+ "3.4.21.76 Myeloblastin",
+ "3.4.21.77 Semenogelase",
+ "3.4.21.78 Granzyme A",
+ "3.4.21.79 Granzyme B",
+ "3.4.21.80 Streptogrisin A",
+ "3.4.21.81 Streptogrisin B",
+ "3.4.21.82 Glutamyl endopeptidase II",
+ "3.4.21.83 Oligopeptidase B",
+ "3.4.21.84 Limulus clotting factor C",
+ "3.4.21.85 Limulus clotting factor B",
+ "3.4.21.86 Limulus clotting enzyme",
+ "3.4.21.88 Repressor lexA",
+ "3.4.21.89 Signal peptidase I",
+ "3.4.21.90 Togavirin",
+ "3.4.21.91 Flavivirin",
+ "3.4.21.92 Endopeptidase Clp",
+ "3.4.21.93 Proprotein convertase 1",
+ "3.4.21.94 Proprotein convertase 2",
+ "3.4.21.95 Snake venom factor V activator",
+ "3.4.21.96 Lactocepin",
+ "3.4.21.97 Assemblin",
+ "3.4.21.98 Hepacivirin",
+ "3.4.21.99 Spermosin",
+ "3.4.21.100 Sedolisin",
+ "3.4.21.101 Xanthomonalisin",
+ "3.4.21.102 C-terminal processing peptidase",
+ "3.4.21.103 Physarolisin",
+ "3.4.21.104 Mannan-binding lectin-associated serine protease-2",
+ "3.4.21.105 Rhomboid protease",
+ "3.4.21.106 Hepsin",
+ "3.4.21.107 Peptidase Do",
+ "3.4.21.108 HtrA2 peptidase",
+ "3.4.21.109 Matriptase",
+ "3.4.21.110 C5a peptidase",
+ "3.4.21.111 Aqualysin 1",
+ "3.4.21.112 Site-1 protease",
+ "3.4.21.113 Pestivirus NS3 polyprotein peptidase",
+ "3.4.21.114 Equine arterivirus serine peptidase",
+ "3.4.21.115 Infectious pancreatic necrosis birnavirus Vp4 peptidase",
+ "3.4.21.116 SpoIVB peptidase",
+ "3.4.21.117 Stratum corneum chymotryptic enzyme",
+ "3.4.21.118 Kallikrein 8",
+ "3.4.21.119 Kallikrein 13",
+ "3.4.21.120 Oviductin",
+ "3.4.22.1 Cathepsin B",
+ "3.4.22.2 Papain",
+ "3.4.22.3 Ficain",
+ "3.4.22.6 Chymopapain",
+ "3.4.22.7 Asclepain",
+ "3.4.22.8 Clostripain",
+ "3.4.22.10 Streptopain",
+ "3.4.22.14 Actinidain",
+ "3.4.22.15 Cathepsin L",
+ "3.4.22.16 Cathepsin H",
+ "3.4.22.24 Cathepsin T",
+ "3.4.22.25 Glycyl endopeptidase",
+ "3.4.22.26 Cancer procoagulant",
+ "3.4.22.27 Cathepsin S",
+ "3.4.22.28 Picornain 3C",
+ "3.4.22.29 Picornain 2A",
+ "3.4.22.30 Caricain",
+ "3.4.22.31 Ananain",
+ "3.4.22.32 Stem bromelain",
+ "3.4.22.33 Fruit bromelain",
+ "3.4.22.34 Legumain",
+ "3.4.22.35 Histolysain",
+ "3.4.22.36 Caspase-1",
+ "3.4.22.37 Gingipain R",
+ "3.4.22.38 Cathepsin K",
+ "3.4.22.39 Adenain",
+ "3.4.22.40 Bleomycin hydrolase",
+ "3.4.22.41 Cathepsin F",
+ "3.4.22.42 Cathepsin O",
+ "3.4.22.43 Cathepsin V",
+ "3.4.22.44 Nuclear-inclusion-a endopeptidase",
+ "3.4.22.45 Helper-component proteinase",
+ "3.4.22.46 L-peptidase",
+ "3.4.22.47 Gingipain K",
+ "3.4.22.48 Staphopain",
+ "3.4.22.49 Separase",
+ "3.4.22.50 V-cath endopeptidase",
+ "3.4.22.51 Cruzipain",
+ "3.4.22.52 Calpain-1",
+ "3.4.22.53 Calpain-2",
+ "3.4.22.54 Calpain-3",
+ "3.4.22.55 Caspase-2",
+ "3.4.22.56 Caspase-3",
+ "3.4.22.57 Caspase-4",
+ "3.4.22.58 Caspase-5",
+ "3.4.22.59 Caspase-6",
+ "3.4.22.60 Caspase-7",
+ "3.4.22.61 Caspase-8",
+ "3.4.22.62 Caspase-9",
+ "3.4.22.63 Caspase-10",
+ "3.4.22.64 Caspase-11",
+ "3.4.22.65 Peptidase 1 (mite)",
+ "3.4.22.66 Calicivirin",
+ "3.4.22.67 Zingipain",
+ "3.4.22.68 Ulp1 peptidase",
+ "3.4.22.69 SARS coronavirus main proteinase",
+ "3.4.22.70 Sortase A",
+ "3.4.22.71 Sortase B",
+ "3.4.23.1 Pepsin A",
+ "3.4.23.2 Pepsin B",
+ "3.4.23.3 Gastricsin",
+ "3.4.23.4 Chymosin",
+ "3.4.23.5 Cathepsin D",
+ "3.4.23.12 Nepenthesin",
+ "3.4.23.15 Renin",
+ "3.4.23.16 HIV-1 retropepsin",
+ "3.4.23.17 Pro-opiomelanocortin converting enzyme",
+ "3.4.23.18 Aspergillopepsin I",
+ "3.4.23.19 Aspergillopepsin II",
+ "3.4.23.20 Penicillopepsin",
+ "3.4.23.21 Rhizopuspepsin",
+ "3.4.23.22 Endothiapepsin",
+ "3.4.23.23 Mucorpepsin",
+ "3.4.23.24 Candidapepsin",
+ "3.4.23.25 Saccharopepsin",
+ "3.4.23.26 Rhodotorulapepsin",
+ "3.4.23.28 Acrocylindropepsin",
+ "3.4.23.29 Polyporopepsin",
+ "3.4.23.30 Pycnoporopepsin",
+ "3.4.23.31 Scytalidopepsin A",
+ "3.4.23.32 Scytalidopepsin B",
+ "3.4.23.34 Cathepsin E",
+ "3.4.23.35 Barrierpepsin",
+ "3.4.23.36 Signal peptidase II",
+ "3.4.23.38 Plasmepsin I",
+ "3.4.23.39 Plasmepsin II",
+ "3.4.23.40 Phytepsin",
+ "3.4.23.41 Yapsin 1",
+ "3.4.23.42 Thermopsin",
+ "3.4.23.43 Prepilin peptidase",
+ "3.4.23.44 Nodavirus endopeptidase",
+ "3.4.23.45 Memapsin 1",
+ "3.4.23.46 Memapsin 2",
+ "3.4.23.47 HIV-2 retropepsin",
+ "3.4.23.48 Plasminogen activator Pla",
+ "3.4.23.49 Omptin",
+ "3.4.23.50 Human endogenous retrovirus K endopeptidase",
+ "3.4.23.51 HycI peptidase",
+ "3.4.24.1 Atrolysin A",
+ "3.4.24.3 Microbial collagenase",
+ "3.4.24.6 Leucolysin",
+ "3.4.24.7 Interstitial collagenase",
+ "3.4.24.11 Neprilysin",
+ "3.4.24.12 Envelysin",
+ "3.4.24.13 IgA-specific metalloendopeptidase",
+ "3.4.24.14 Procollagen N-endopeptidase",
+ "3.4.24.15 Thimet oligopeptidase",
+ "3.4.24.16 Neurolysin",
+ "3.4.24.17 Stromelysin 1",
+ "3.4.24.18 Meprin A",
+ "3.4.24.19 Procollagen C-endopeptidase",
+ "3.4.24.20 Peptidyl-Lys metalloendopeptidase",
+ "3.4.24.21 Astacin",
+ "3.4.24.22 Stromelysin 2",
+ "3.4.24.23 Matrilysin",
+ "3.4.24.24 Gelatinase A",
+ "3.4.24.25 Vibriolysin",
+ "3.4.24.26 Pseudolysin",
+ "3.4.24.27 Thermolysin",
+ "3.4.24.28 Bacillolysin",
+ "3.4.24.29 Aureolysin",
+ "3.4.24.30 Coccolysin",
+ "3.4.24.31 Mycolysin",
+ "3.4.24.32 Beta-lytic metalloendopeptidase",
+ "3.4.24.33 Peptidyl-Asp metalloendopeptidase",
+ "3.4.24.34 Neutrophil collagenase",
+ "3.4.24.35 Gelatinase B",
+ "3.4.24.36 Leishmanolysin",
+ "3.4.24.37 Saccharolysin",
+ "3.4.24.38 Gametolysin",
+ "3.4.24.39 Deuterolysin",
+ "3.4.24.40 Serralysin",
+ "3.4.24.41 Atrolysin B",
+ "3.4.24.42 Atrolysin C",
+ "3.4.24.43 Atroxase",
+ "3.4.24.44 Atrolysin E",
+ "3.4.24.45 Atrolysin F",
+ "3.4.24.46 Adamalysin",
+ "3.4.24.47 Horrilysin",
+ "3.4.24.48 Ruberlysin",
+ "3.4.24.49 Bothropasin",
+ "3.4.24.50 Bothrolysin",
+ "3.4.24.51 Ophiolysin",
+ "3.4.24.52 Trimerelysin I",
+ "3.4.24.53 Trimerelysin II",
+ "3.4.24.54 Mucrolysin",
+ "3.4.24.55 Pitrilysin",
+ "3.4.24.56 Insulysin",
+ "3.4.24.57 O-sialoglycoprotein endopeptidase",
+ "3.4.24.58 Russellysin",
+ "3.4.24.59 Mitochondrial intermediate peptidase",
+ "3.4.24.60 Dactylysin",
+ "3.4.24.61 Nardilysin",
+ "3.4.24.62 Magnolysin",
+ "3.4.24.63 Meprin B",
+ "3.4.24.64 Mitochondrial processing peptidase",
+ "3.4.24.65 Macrophage elastase",
+ "3.4.24.66 Choriolysin L",
+ "3.4.24.67 Choriolysin H",
+ "3.4.24.68 Tentoxilysin",
+ "3.4.24.69 Bontoxilysin",
+ "3.4.24.70 Oligopeptidase A",
+ "3.4.24.71 Endothelin-converting enzyme 1",
+ "3.4.24.72 Fibrolase",
+ "3.4.24.73 Jararhagin",
+ "3.4.24.74 Fragilysin",
+ "3.4.24.75 Lysostaphin",
+ "3.4.24.76 Flavastacin",
+ "3.4.24.77 Snapalysin",
+ "3.4.24.78 GPR endopeptidase",
+ "3.4.24.79 Pappalysin-1",
+ "3.4.24.80 Membrane-type matrix metalloproteinase-1",
+ "3.4.24.81 ADAM10 endopeptidase",
+ "3.4.24.82 ADAMTS-4 endopeptidase",
+ "3.4.24.83 Anthrax lethal factor endopeptidase",
+ "3.4.24.84 Ste24 endopeptidase",
+ "3.4.24.85 S2P endopeptidase",
+ "3.4.24.86 ADAM 17 endopeptidase",
+ "3.4.24.87 ADAMTS13 endopeptidase",
+ "3.4.25.1 Proteasome endopeptidase complex",
+ "3.4.25.2 HslU--HslV peptidase",
+ "3.5.1.1 Asparaginase",
+ "3.5.1.2 Glutaminase",
+ "3.5.1.3 Omega-amidase",
+ "3.5.1.4 Amidase",
+ "3.5.1.5 Urease",
+ "3.5.1.6 Beta-ureidopropionase",
+ "3.5.1.7 Ureidosuccinase",
+ "3.5.1.8 Formylaspartate deformylase",
+ "3.5.1.9 Arylformamidase",
+ "3.5.1.10 Formyltetrahydrofolate deformylase",
+ "3.5.1.11 Penicillin amidase",
+ "3.5.1.12 Biotinidase",
+ "3.5.1.13 Aryl-acylamidase",
+ "3.5.1.14 Aminoacylase",
+ "3.5.1.15 Aspartoacylase",
+ "3.5.1.16 Acetylornithine deacetylase",
+ "3.5.1.17 Acyl-lysine deacylase",
+ "3.5.1.18 Succinyl-diaminopimelate desuccinylase",
+ "3.5.1.19 Nicotinamidase",
+ "3.5.1.20 Citrullinase",
+ "3.5.1.21 N-acetyl-beta-alanine deacetylase",
+ "3.5.1.22 Pantothenase",
+ "3.5.1.23 Ceramidase",
+ "3.5.1.24 Choloylglycine hydrolase",
+ "3.5.1.25 N-acetylglucosamine-6-phosphate deacetylase",
+ "3.5.1.26 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase",
+ "3.5.1.27 N-formylmethionylaminoacyl-tRNA deformylase",
+ "3.5.1.28 N-acetylmuramoyl-L-alanine amidase",
+ "3.5.1.29 2-(acetamidomethylene)succinate hydrolase",
+ "3.5.1.30 5-aminopentanamidase",
+ "3.5.1.31 Formylmethionine deformylase",
+ "3.5.1.32 Hippurate hydrolase",
+ "3.5.1.33 N-acetylglucosamine deacetylase",
+ "3.5.1.35 D-glutaminase",
+ "3.5.1.36 N-methyl-2-oxoglutaramate hydrolase",
+ "3.5.1.38 Glutamin-(asparagin-)ase",
+ "3.5.1.39 Alkylamidase",
+ "3.5.1.40 Acylagmatine amidase",
+ "3.5.1.41 Chitin deacetylase",
+ "3.5.1.42 Nicotinamide-nucleotide amidase",
+ "3.5.1.43 Peptidyl-glutaminase",
+ "3.5.1.44 Protein-glutamine glutaminase",
+ "3.5.1.46 6-aminohexanoate-dimer hydrolase",
+ "3.5.1.47 N-acetyldiaminopimelate deacetylase",
+ "3.5.1.48 Acetylspermidine deacetylase",
+ "3.5.1.49 Formamidase",
+ "3.5.1.50 Pentanamidase",
+ "3.5.1.51 4-acetamidobutyryl-CoA deacetylase",
+ "3.5.1.52 Peptide-N(4)-(N-acetyl-beta-glucosaminyl)asparagine amidase",
+ "3.5.1.53 N-carbamoylputrescine amidase",
+ "3.5.1.54 Allophanate hydrolase",
+ "3.5.1.55 Long-chain-fatty-acyl-glutamate deacylase",
+ "3.5.1.56 N,N-dimethylformamidase",
+ "3.5.1.57 Tryptophanamidase",
+ "3.5.1.58 N-benzyloxycarbonylglycine hydrolase",
+ "3.5.1.59 N-carbamoylsarcosine amidase",
+ "3.5.1.60 N-(long-chain-acyl)ethanolamine deacylase",
+ "3.5.1.61 Mimosinase",
+ "3.5.1.62 Acetylputrescine deacetylase",
+ "3.5.1.63 4-acetamidobutyrate deacetylase",
+ "3.5.1.64 N(alpha)-benzyloxycarbonylleucine hydrolase",
+ "3.5.1.65 Theanine hydrolase",
+ "3.5.1.66 2-(hydroxymethyl)-3-(acetamidomethylene)succinate hydrolase",
+ "3.5.1.67 4-methyleneglutaminase",
+ "3.5.1.68 N-formylglutamate deformylase",
+ "3.5.1.69 Glycosphingolipid deacylase",
+ "3.5.1.70 Aculeacin-A deacylase",
+ "3.5.1.71 N-feruloylglycine deacylase",
+ "3.5.1.72 D-benzoylarginine-4-nitroanilide amidase",
+ "3.5.1.73 Carnitinamidase",
+ "3.5.1.74 Chenodeoxycholoyltaurine hydrolase",
+ "3.5.1.75 Urethanase",
+ "3.5.1.76 Arylalkyl acylamidase",
+ "3.5.1.77 N-carbamoyl-D-amino-acid hydrolase",
+ "3.5.1.78 Glutathionylspermidine amidase",
+ "3.5.1.79 Phthalyl amidase",
+ "3.5.1.81 N-acyl-D-amino-acid deacylase",
+ "3.5.1.82 N-acyl-D-glutamate deacylase",
+ "3.5.1.83 N-acyl-D-aspartate deacylase",
+ "3.5.1.84 Biuret amidohydrolase",
+ "3.5.1.85 (S)-N-acetyl-1-phenylethylamine hydrolase",
+ "3.5.1.86 Mandelamide amidase",
+ "3.5.1.87 N-carbamoyl-L-amino-acid hydrolase",
+ "3.5.1.88 Peptide deformylase",
+ "3.5.1.89 N-acetylglucosaminylphosphatidylinositol deacetylase",
+ "3.5.1.90 Adenosylcobinamide hydrolase",
+ "3.5.1.91 N-substituted formamide deformylase",
+ "3.5.1.92 Pantetheine hydrolase",
+ "3.5.1.93 Glutaryl-7-aminocephalosporanic-acid acylase",
+ "3.5.1.94 Gamma-glutamyl-gamma-aminobutyrate hydrolase",
+ "3.5.1.95 N-malonylurea hydrolase",
+ "3.5.1.96 Succinylglutamate desuccinylase",
+ "3.5.1.97 Acyl-homoserine-lactone acylase",
+ "3.5.1.98 Histone deacetylase",
+ "3.5.1.99 Fatty acid amide hydrolase",
+ "3.5.1.100 (R)-amidase",
+ "3.5.1.101 L-proline amide hydrolase",
+ "3.5.1.102 2-amino-5-formylamino-6-ribosylaminopyrimidin-4(3H)-one 5'-monophosphate deformylase",
+ "3.5.1.103 N-acetyl-1-D-myo-inositol-2-amino-2-deoxy-alpha-D-glucopyranoside deacetylase",
+ "3.5.1.104 Peptidoglycan-N-acetylglucosamine deacetylase",
+ "3.5.1.105 Chitin disaccharide deacetylase",
+ "3.5.1.106 N-formylmaleamate deformylase",
+ "3.5.1.107 Maleamate amidohydrolase",
+ "3.5.1.108 UDP-3-O-acyl-N-acetylglucosamine deacetylase",
+ "3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase",
+ "3.5.2.1 Barbiturase",
+ "3.5.2.2 Dihydropyrimidinase",
+ "3.5.2.3 Dihydroorotase",
+ "3.5.2.4 Carboxymethylhydantoinase",
+ "3.5.2.5 Allantoinase",
+ "3.5.2.6 Beta-lactamase",
+ "3.5.2.7 Imidazolonepropionase",
+ "3.5.2.9 5-oxoprolinase (ATP-hydrolyzing)",
+ "3.5.2.10 Creatininase",
+ "3.5.2.11 L-lysine-lactamase",
+ "3.5.2.12 6-aminohexanoate-cyclic-dimer hydrolase",
+ "3.5.2.13 2,5-dioxopiperazine hydrolase",
+ "3.5.2.14 N-methylhydantoinase (ATP-hydrolyzing)",
+ "3.5.2.15 Cyanuric acid amidohydrolase",
+ "3.5.2.16 Maleimide hydrolase",
+ "3.5.2.17 Hydroxyisourate hydrolase",
+ "3.5.2.18 Enamidase",
+ "3.5.2.19 Streptothricin hydrolase",
+ "3.5.3.1 Arginase",
+ "3.5.3.2 Guanidinoacetase",
+ "3.5.3.3 Creatinase",
+ "3.5.3.4 Allantoicase",
+ "3.5.3.5 Formimidoylaspartate deiminase",
+ "3.5.3.6 Arginine deiminase",
+ "3.5.3.7 Guanidinobutyrase",
+ "3.5.3.8 Formimidoylglutamase",
+ "3.5.3.9 Allantoate deiminase",
+ "3.5.3.10 D-arginase",
+ "3.5.3.11 Agmatinase",
+ "3.5.3.12 Agmatine deiminase",
+ "3.5.3.13 Formimidoylglutamate deiminase",
+ "3.5.3.14 Amidinoaspartase",
+ "3.5.3.15 Protein-arginine deiminase",
+ "3.5.3.16 Methylguanidinase",
+ "3.5.3.17 Guanidinopropionase",
+ "3.5.3.18 Dimethylargininase",
+ "3.5.3.19 Ureidoglycolate hydrolase",
+ "3.5.3.20 Diguanidinobutanase",
+ "3.5.3.21 Methylenediurea deaminase",
+ "3.5.3.22 Proclavaminate amidinohydrolase",
+ "3.5.3.23 N-succinylarginine dihydrolase",
+ "3.5.4.1 Cytosine deaminase",
+ "3.5.4.2 Adenine deaminase",
+ "3.5.4.3 Guanine deaminase",
+ "3.5.4.4 Adenosine deaminase",
+ "3.5.4.5 Cytidine deaminase",
+ "3.5.4.6 AMP deaminase",
+ "3.5.4.7 ADP deaminase",
+ "3.5.4.8 Aminoimidazolase",
+ "3.5.4.9 Methenyltetrahydrofolate cyclohydrolase",
+ "3.5.4.10 IMP cyclohydrolase",
+ "3.5.4.11 Pterin deaminase",
+ "3.5.4.12 dCMP deaminase",
+ "3.5.4.13 dCTP deaminase",
+ "3.5.4.14 Deoxycytidine deaminase",
+ "3.5.4.15 Guanosine deaminase",
+ "3.5.4.16 GTP cyclohydrolase I",
+ "3.5.4.17 Adenosine-phosphate deaminase",
+ "3.5.4.18 ATP deaminase",
+ "3.5.4.19 Phosphoribosyl-AMP cyclohydrolase",
+ "3.5.4.20 Pyrithiamine deaminase",
+ "3.5.4.21 Creatinine deaminase",
+ "3.5.4.22 1-pyrroline-4-hydroxy-2-carboxylate deaminase",
+ "3.5.4.23 Blasticidin-S deaminase",
+ "3.5.4.24 Sepiapterin deaminase",
+ "3.5.4.25 GTP cyclohydrolase II",
+ "3.5.4.26 Diaminohydroxyphosphoribosylaminopyrimidine deaminase",
+ "3.5.4.27 Methenyltetrahydromethanopterin cyclohydrolase",
+ "3.5.4.28 S-adenosylhomocysteine deaminase",
+ "3.5.4.29 GTP cyclohydrolase IIa",
+ "3.5.4.30 dCTP deaminase (dUMP-forming)",
+ "3.5.4.n1 5-methylthioadenosine deaminase",
+ "3.5.4.n2 GTP cyclohydrolase (cyclic phosphate forming)",
+ "3.5.5.1 Nitrilase",
+ "3.5.5.2 Ricinine nitrilase",
+ "3.5.5.4 Cyanoalanine nitrilase",
+ "3.5.5.5 Arylacetonitrilase",
+ "3.5.5.6 Bromoxynil nitrilase",
+ "3.5.5.7 Aliphatic nitrilase",
+ "3.5.5.8 Thiocyanate hydrolase",
+ "3.5.99.1 Riboflavinase",
+ "3.5.99.2 Thiaminase",
+ "3.5.99.3 Hydroxydechloroatrazine ethylaminohydrolase",
+ "3.5.99.4 N-isopropylammelide isopropylaminohydrolase",
+ "3.5.99.5 2-aminomuconate deaminase",
+ "3.5.99.6 Glucosamine-6-phosphate deaminase",
+ "3.5.99.7 1-aminocyclopropane-1-carboxylate deaminase",
+ "3.5.99.8 5-nitroanthranilic acid aminohydrolase",
+ "3.6.1.1 Inorganic diphosphatase",
+ "3.6.1.2 Trimetaphosphatase",
+ "3.6.1.3 Adenosinetriphosphatase",
+ "3.6.1.5 Apyrase",
+ "3.6.1.6 Nucleoside-diphosphatase",
+ "3.6.1.7 Acylphosphatase",
+ "3.6.1.8 ATP diphosphatase",
+ "3.6.1.9 Nucleotide diphosphatase",
+ "3.6.1.10 Endopolyphosphatase",
+ "3.6.1.11 Exopolyphosphatase",
+ "3.6.1.12 dCTP diphosphatase",
+ "3.6.1.13 ADP-ribose diphosphatase",
+ "3.6.1.14 Adenosine-tetraphosphatase",
+ "3.6.1.15 Nucleoside-triphosphatase",
+ "3.6.1.16 CDP-glycerol diphosphatase",
+ "3.6.1.17 Bis(5'-nucleosyl)-tetraphosphatase (asymmetrical)",
+ "3.6.1.18 FAD diphosphatase",
+ "3.6.1.19 Nucleoside-triphosphate diphosphatase",
+ "3.6.1.20 5'-acylphosphoadenosine hydrolase",
+ "3.6.1.21 ADP-sugar diphosphatase",
+ "3.6.1.22 NAD(+) diphosphatase",
+ "3.6.1.23 dUTP diphosphatase",
+ "3.6.1.24 Nucleoside phosphoacylhydrolase",
+ "3.6.1.25 Triphosphatase",
+ "3.6.1.26 CDP-diacylglycerol diphosphatase",
+ "3.6.1.27 Undecaprenyl-diphosphatase",
+ "3.6.1.28 Thiamine-triphosphatase",
+ "3.6.1.29 Bis(5'-adenosyl)-triphosphatase",
+ "3.6.1.30 M(7)G(5')pppN diphosphatase",
+ "3.6.1.31 Phosphoribosyl-ATP diphosphatase",
+ "3.6.1.39 Thymidine-triphosphatase",
+ "3.6.1.40 Guanosine-5'-triphosphate,3'-diphosphate diphosphatase",
+ "3.6.1.41 Bis(5'-nucleosyl)-tetraphosphatase (symmetrical)",
+ "3.6.1.42 Guanosine-diphosphatase",
+ "3.6.1.43 Dolichyldiphosphatase",
+ "3.6.1.44 Oligosaccharide-diphosphodolichol diphosphatase",
+ "3.6.1.45 UDP-sugar diphosphatase",
+ "3.6.1.52 Diphosphoinositol-polyphosphate diphosphatase",
+ "3.6.1.53 Mn(2+)-dependent ADP-ribose/CDP-alcohol diphosphatase",
+ "3.6.1.54 UDP-2,3-diacylglucosamine diphosphatase",
+ "3.6.1.n1 D-tyrosyl-tRNA(Tyr) hydrolase",
+ "3.6.1.n2 L-cysteinyl-tRNA(Pro)",
+ "3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase",
+ "3.6.1.n4 Dihydroneopterin triphosphate diphosphatase",
+ "3.6.2.1 Adenylylsulfatase",
+ "3.6.2.2 Phosphoadenylylsulfatase",
+ "3.6.3.1 Phospholipid-translocating ATPase",
+ "3.6.3.2 Magnesium-importing ATPase",
+ "3.6.3.3 Cadmium-exporting ATPase",
+ "3.6.3.4 Copper-exporting ATPase",
+ "3.6.3.5 Zinc-exporting ATPase",
+ "3.6.3.6 Proton-exporting ATPase",
+ "3.6.3.7 Sodium-exporting ATPase",
+ "3.6.3.8 Calcium-transporting ATPase",
+ "3.6.3.9 Sodium/potassium-exchanging ATPase",
+ "3.6.3.10 Hydrogen/potassium-exchanging ATPase",
+ "3.6.3.11 Chloride-transporting ATPase",
+ "3.6.3.12 Potassium-transporting ATPase",
+ "3.6.3.14 H(+)-transporting two-sector ATPase",
+ "3.6.3.15 Sodium-transporting two-sector ATPase",
+ "3.6.3.16 Arsenite-transporting ATPase",
+ "3.6.3.17 Monosaccharide-transporting ATPase",
+ "3.6.3.18 Oligosaccharide-transporting ATPase",
+ "3.6.3.19 Maltose-transporting ATPase",
+ "3.6.3.20 Glycerol-3-phosphate-transporting ATPase",
+ "3.6.3.21 Polar-amino-acid-transporting ATPase",
+ "3.6.3.22 Nonpolar-amino-acid-transporting ATPase",
+ "3.6.3.23 Oligopeptide-transporting ATPase",
+ "3.6.3.24 Nickel-transporting ATPase",
+ "3.6.3.25 Sulfate-transporting ATPase",
+ "3.6.3.26 Nitrate-transporting ATPase",
+ "3.6.3.27 Phosphate-transporting ATPase",
+ "3.6.3.28 Phosphonate-transporting ATPase",
+ "3.6.3.29 Molybdate-transporting ATPase",
+ "3.6.3.30 Fe(3+)-transporting ATPase",
+ "3.6.3.31 Polyamine-transporting ATPase",
+ "3.6.3.32 Quaternary-amine-transporting ATPase",
+ "3.6.3.33 Vitamin B12-transporting ATPase",
+ "3.6.3.34 Iron-chelate-transporting ATPase",
+ "3.6.3.35 Manganese-transporting ATPase",
+ "3.6.3.36 Taurine-transporting ATPase",
+ "3.6.3.37 Guanine-transporting ATPase",
+ "3.6.3.38 Capsular-polysaccharide-transporting ATPase",
+ "3.6.3.39 Lipopolysaccharide-transporting ATPase",
+ "3.6.3.40 Teichoic-acid-transporting ATPase",
+ "3.6.3.41 Heme-transporting ATPase",
+ "3.6.3.42 Beta-glucan-transporting ATPase",
+ "3.6.3.43 Peptide-transporting ATPase",
+ "3.6.3.44 Xenobiotic-transporting ATPase",
+ "3.6.3.46 Cadmium-transporting ATPase",
+ "3.6.3.47 Fatty-acyl-CoA-transporting ATPase",
+ "3.6.3.48 Alpha-factor-transporting ATPase",
+ "3.6.3.49 Channel-conductance-controlling ATPase",
+ "3.6.3.50 Protein-secreting ATPase",
+ "3.6.3.51 Mitochondrial protein-transporting ATPase",
+ "3.6.3.52 Chloroplast protein-transporting ATPase",
+ "3.6.3.53 Ag(+)-exporting ATPase",
+ "3.6.3.n1 Cu(+) exporting ATPase",
+ "3.6.4.1 Myosin ATPase",
+ "3.6.4.2 Dynein ATPase",
+ "3.6.4.3 Microtubule-severing ATPase",
+ "3.6.4.4 Plus-end-directed kinesin ATPase",
+ "3.6.4.5 Minus-end-directed kinesin ATPase",
+ "3.6.4.6 Vesicle-fusing ATPase",
+ "3.6.4.7 Peroxisome-assembly ATPase",
+ "3.6.4.8 Proteasome ATPase",
+ "3.6.4.9 Chaperonin ATPase",
+ "3.6.4.10 Non-chaperonin molecular chaperone ATPase",
+ "3.6.4.11 Nucleoplasmin ATPase",
+ "3.6.4.12 DNA helicase",
+ "3.6.4.13 RNA helicase",
+ "3.6.5.1 Heterotrimeric G-protein GTPase",
+ "3.6.5.2 Small monomeric GTPase",
+ "3.6.5.3 Protein-synthesizing GTPase",
+ "3.6.5.4 Signal-recognition-particle GTPase",
+ "3.6.5.5 Dynamin GTPase",
+ "3.6.5.6 Tubulin GTPase",
+ "3.6.5.n1 Elongation factor 4",
+ "3.7.1.1 Oxaloacetase",
+ "3.7.1.2 Fumarylacetoacetase",
+ "3.7.1.3 Kynureninase",
+ "3.7.1.4 Phloretin hydrolase",
+ "3.7.1.5 Acylpyruvate hydrolase",
+ "3.7.1.6 Acetylpyruvate hydrolase",
+ "3.7.1.7 Beta-diketone hydrolase",
+ "3.7.1.8 2,6-dioxo-6-phenylhexa-3-enoate hydrolase",
+ "3.7.1.9 2-hydroxymuconate-semialdehyde hydrolase",
+ "3.7.1.10 Cyclohexane-1,3-dione hydrolase",
+ "3.7.1.11 Cyclohexane-1,2-dione hydrolase",
+ "3.7.1.12 Cobalt-precorrin 5A hydrolase",
+ "3.7.1.13 2-hydroxy-6-oxo-6-(2-aminophenyl)hexa-2,4-dienoate hydrolase",
+ "3.7.1.n1 2-hydroxy-6-oxonona-2,4-dienedioate hydrolase",
+ "3.7.1.n2 3,5/4-trihydroxycyclohexa-1,2-dione hydrolase",
+ "3.8.1.1 Alkylhalidase",
+ "3.8.1.2 (S)-2-haloacid dehalogenase",
+ "3.8.1.3 Haloacetate dehalogenase",
+ "3.8.1.5 Haloalkane dehalogenase",
+ "3.8.1.6 4-chlorobenzoate dehalogenase",
+ "3.8.1.7 4-chlorobenzoyl-CoA dehalogenase",
+ "3.8.1.8 Atrazine chlorohydrolase",
+ "3.8.1.9 (R)-2-haloacid dehalogenase",
+ "3.8.1.10 2-haloacid dehalogenase (configuration-inverting)",
+ "3.8.1.11 2-haloacid dehalogenase (configuration-retaining)",
+ "3.9.1.1 Phosphoamidase",
+ "3.10.1.1 N-sulfoglucosamine sulfohydrolase",
+ "3.10.1.2 Cyclamate sulfohydrolase",
+ "3.11.1.1 Phosphonoacetaldehyde hydrolase",
+ "3.11.1.2 Phosphonoacetate hydrolase",
+ "3.11.1.3 Phosphonopyruvate hydrolase",
+ "3.12.1.1 Trithionate hydrolase",
+ "3.13.1.1 UDP-sulfoquinovose synthase",
+ "3.13.1.3 2'-hydroxybiphenyl-2-sulfinate desulfinase",
+ "4.1.1.1 Pyruvate decarboxylase",
+ "4.1.1.2 Oxalate decarboxylase",
+ "4.1.1.3 Oxaloacetate decarboxylase",
+ "4.1.1.4 Acetoacetate decarboxylase",
+ "4.1.1.5 Acetolactate decarboxylase",
+ "4.1.1.6 Aconitate decarboxylase",
+ "4.1.1.7 Benzoylformate decarboxylase",
+ "4.1.1.8 Oxalyl-CoA decarboxylase",
+ "4.1.1.9 Malonyl-CoA decarboxylase",
+ "4.1.1.11 Aspartate 1-decarboxylase",
+ "4.1.1.12 Aspartate 4-decarboxylase",
+ "4.1.1.14 Valine decarboxylase",
+ "4.1.1.15 Glutamate decarboxylase",
+ "4.1.1.16 Hydroxyglutamate decarboxylase",
+ "4.1.1.17 Ornithine decarboxylase",
+ "4.1.1.18 Lysine decarboxylase",
+ "4.1.1.19 Arginine decarboxylase",
+ "4.1.1.20 Diaminopimelate decarboxylase",
+ "4.1.1.21 Phosphoribosylaminoimidazole carboxylase",
+ "4.1.1.22 Histidine decarboxylase",
+ "4.1.1.23 Orotidine-5'-phosphate decarboxylase",
+ "4.1.1.24 Aminobenzoate decarboxylase",
+ "4.1.1.25 Tyrosine decarboxylase",
+ "4.1.1.28 Aromatic-L-amino-acid decarboxylase",
+ "4.1.1.29 Sulfinoalanine decarboxylase",
+ "4.1.1.30 Pantothenoylcysteine decarboxylase",
+ "4.1.1.31 Phosphoenolpyruvate carboxylase",
+ "4.1.1.32 Phosphoenolpyruvate carboxykinase (GTP)",
+ "4.1.1.33 Diphosphomevalonate decarboxylase",
+ "4.1.1.34 Dehydro-L-gulonate decarboxylase",
+ "4.1.1.35 UDP-glucuronate decarboxylase",
+ "4.1.1.36 Phosphopantothenoylcysteine decarboxylase",
+ "4.1.1.37 Uroporphyrinogen decarboxylase",
+ "4.1.1.38 Phosphoenolpyruvate carboxykinase (diphosphate)",
+ "4.1.1.39 Ribulose-bisphosphate carboxylase",
+ "4.1.1.40 Hydroxypyruvate decarboxylase",
+ "4.1.1.41 Methylmalonyl-CoA decarboxylase",
+ "4.1.1.42 Carnitine decarboxylase",
+ "4.1.1.43 Phenylpyruvate decarboxylase",
+ "4.1.1.44 4-carboxymuconolactone decarboxylase",
+ "4.1.1.45 Aminocarboxymuconate-semialdehyde decarboxylase",
+ "4.1.1.46 o-pyrocatechuate decarboxylase",
+ "4.1.1.47 Tartronate-semialdehyde synthase",
+ "4.1.1.48 Indole-3-glycerol-phosphate synthase",
+ "4.1.1.49 Phosphoenolpyruvate carboxykinase (ATP)",
+ "4.1.1.50 Adenosylmethionine decarboxylase",
+ "4.1.1.51 3-hydroxy-2-methylpyridine-4,5-dicarboxylate 4-decarboxylase",
+ "4.1.1.52 6-methylsalicylate decarboxylase",
+ "4.1.1.53 Phenylalanine decarboxylase",
+ "4.1.1.54 Dihydroxyfumarate decarboxylase",
+ "4.1.1.55 4,5-dihydroxyphthalate decarboxylase",
+ "4.1.1.56 3-oxolaurate decarboxylase",
+ "4.1.1.57 Methionine decarboxylase",
+ "4.1.1.58 Orsellinate decarboxylase",
+ "4.1.1.59 Gallate decarboxylase",
+ "4.1.1.60 Stipitatonate decarboxylase",
+ "4.1.1.61 4-hydroxybenzoate decarboxylase",
+ "4.1.1.62 Gentisate decarboxylase",
+ "4.1.1.63 Protocatechuate decarboxylase",
+ "4.1.1.64 2,2-dialkylglycine decarboxylase (pyruvate)",
+ "4.1.1.65 Phosphatidylserine decarboxylase",
+ "4.1.1.66 Uracil-5-carboxylate decarboxylase",
+ "4.1.1.67 UDP-galacturonate decarboxylase",
+ "4.1.1.68 5-oxopent-3-ene-1,2,5-tricarboxylate decarboxylase",
+ "4.1.1.69 3,4-dihydroxyphthalate decarboxylase",
+ "4.1.1.70 Glutaconyl-CoA decarboxylase",
+ "4.1.1.71 2-oxoglutarate decarboxylase",
+ "4.1.1.72 Branched-chain-2-oxoacid decarboxylase",
+ "4.1.1.73 Tartrate decarboxylase",
+ "4.1.1.74 Indolepyruvate decarboxylase",
+ "4.1.1.75 5-guanidino-2-oxopentanoate decarboxylase",
+ "4.1.1.76 Arylmalonate decarboxylase",
+ "4.1.1.77 4-oxalocrotonate decarboxylase",
+ "4.1.1.78 Acetylenedicarboxylate decarboxylase",
+ "4.1.1.79 Sulfopyruvate decarboxylase",
+ "4.1.1.80 4-hydroxyphenylpyruvate decarboxylase",
+ "4.1.1.81 Threonine-phosphate decarboxylase",
+ "4.1.1.82 Phosphonopyruvate decarboxylase",
+ "4.1.1.83 4-hydroxyphenylacetate decarboxylase",
+ "4.1.1.84 D-dopachrome decarboxylase",
+ "4.1.1.85 3-dehydro-L-gulonate-6-phosphate decarboxylase",
+ "4.1.1.86 Diaminobutyrate decarboxylase",
+ "4.1.1.87 Malonyl-S-ACP decarboxylase",
+ "4.1.1.88 Biotin-independent malonate decarboxylase",
+ "4.1.1.89 Biotin-dependent malonate decarboxylase",
+ "4.1.1.90 Peptidyl-glutamate 4-carboxylase",
+ "4.1.2.2 Ketotetrose-phosphate aldolase",
+ "4.1.2.4 Deoxyribose-phosphate aldolase",
+ "4.1.2.5 Threonine aldolase",
+ "4.1.2.8 Indole-3-glycerol-phosphate lyase",
+ "4.1.2.9 Phosphoketolase",
+ "4.1.2.10 (R)-mandelonitrile lyase",
+ "4.1.2.11 Hydroxymandelonitrile lyase",
+ "4.1.2.12 2-dehydropantoate aldolase",
+ "4.1.2.13 Fructose-bisphosphate aldolase",
+ "4.1.2.14 2-dehydro-3-deoxy-phosphogluconate aldolase",
+ "4.1.2.17 L-fuculose-phosphate aldolase",
+ "4.1.2.18 2-dehydro-3-deoxy-L-pentonate aldolase",
+ "4.1.2.19 Rhamnulose-1-phosphate aldolase",
+ "4.1.2.20 2-dehydro-3-deoxyglucarate aldolase",
+ "4.1.2.21 2-dehydro-3-deoxy-6-phosphogalactonate aldolase",
+ "4.1.2.22 Fructose-6-phosphate phosphoketolase",
+ "4.1.2.23 3-deoxy-D-manno-octulosonate aldolase",
+ "4.1.2.24 Dimethylaniline-N-oxide aldolase",
+ "4.1.2.25 Dihydroneopterin aldolase",
+ "4.1.2.26 Phenylserine aldolase",
+ "4.1.2.27 Sphinganine-1-phosphate aldolase",
+ "4.1.2.28 2-dehydro-3-deoxy-D-pentonate aldolase",
+ "4.1.2.29 5-dehydro-2-deoxyphosphogluconate aldolase",
+ "4.1.2.30 17-alpha-hydroxyprogesterone aldolase",
+ "4.1.2.32 Trimethylamine-oxide aldolase",
+ "4.1.2.33 Fucosterol-epoxide lyase",
+ "4.1.2.34 4-(2-carboxyphenyl)-2-oxobut-3-enoate aldolase",
+ "4.1.2.35 Propioin synthase",
+ "4.1.2.36 Lactate aldolase",
+ "4.1.2.38 Benzoin aldolase",
+ "4.1.2.40 Tagatose-bisphosphate aldolase",
+ "4.1.2.41 Vanillin synthase",
+ "4.1.2.42 D-threonine aldolase",
+ "4.1.2.43 3-hexulose-6-phosphate synthase",
+ "4.1.2.44 Benzoyl-CoA-dihydrodiol lyase",
+ "4.1.2.45 Trans-o-hydroxybenzylidenepyruvate hydratase-aldolase",
+ "4.1.2.46 Aliphatic (R)-hydroxynitrile lyase",
+ "4.1.2.47 (S)-hydroxynitrile lyase",
+ "4.1.2.n2 2-hydroxyphytanoyl-CoA lyase",
+ "4.1.2.n3 2-keto-3-deoxy-L-rhamnonate aldolase",
+ "4.1.2.n4 4-hydroxy-2-oxo-heptane-1,7-dioate aldolase",
+ "4.1.2.n5 2-amino-3,7-dideoxy-D-threo-hept-6-ulosonate synthase",
+ "4.1.3.1 Isocitrate lyase",
+ "4.1.3.3 N-acetylneuraminate lyase",
+ "4.1.3.4 Hydroxymethylglutaryl-CoA lyase",
+ "4.1.3.6 Citrate (pro-3S)-lyase",
+ "4.1.3.13 Oxalomalate lyase",
+ "4.1.3.14 L-erythro-3-hydroxyaspartate aldolase",
+ "4.1.3.16 4-hydroxy-2-oxoglutarate aldolase",
+ "4.1.3.17 4-hydroxy-4-methyl-2-oxoglutarate aldolase",
+ "4.1.3.22 Citramalate lyase",
+ "4.1.3.24 Malyl-CoA lyase",
+ "4.1.3.25 Citramalyl-CoA lyase",
+ "4.1.3.26 3-hydroxy-3-isohexenylglutaryl-CoA lyase",
+ "4.1.3.27 Anthranilate synthase",
+ "4.1.3.30 Methylisocitrate lyase",
+ "4.1.3.32 2,3-dimethylmalate lyase",
+ "4.1.3.34 Citryl-CoA lyase",
+ "4.1.3.35 (1-hydroxycyclohexan-1-yl)acetyl-CoA lyase",
+ "4.1.3.36 1,4-dihydroxy-2-naphthoyl-CoA synthase",
+ "4.1.3.38 Aminodeoxychorismate lyase",
+ "4.1.3.39 4-hydroxy-2-oxovalerate aldolase",
+ "4.1.3.40 Chorismate lyase",
+ "4.1.3.41 3-hydroxy-D-aspartate aldolase",
+ "4.1.99.1 Tryptophanase",
+ "4.1.99.2 Tyrosine phenol-lyase",
+ "4.1.99.3 Deoxyribodipyrimidine photo-lyase",
+ "4.1.99.5 Octadecanal decarbonylase",
+ "4.1.99.11 Benzylsuccinate synthase",
+ "4.1.99.12 3,4-dihydroxy-2-butanone-4-phosphate synthase",
+ "4.1.99.13 (6-4)DNA photolyase",
+ "4.1.99.14 Spore photoproduct lyase",
+ "4.2.1.1 Carbonate dehydratase",
+ "4.2.1.2 Fumarate hydratase",
+ "4.2.1.3 Aconitate hydratase",
+ "4.2.1.4 Citrate dehydratase",
+ "4.2.1.5 Arabinonate dehydratase",
+ "4.2.1.6 Galactonate dehydratase",
+ "4.2.1.7 Altronate dehydratase",
+ "4.2.1.8 Mannonate dehydratase",
+ "4.2.1.9 Dihydroxy-acid dehydratase",
+ "4.2.1.10 3-dehydroquinate dehydratase",
+ "4.2.1.11 Phosphopyruvate hydratase",
+ "4.2.1.12 Phosphogluconate dehydratase",
+ "4.2.1.17 Enoyl-CoA hydratase",
+ "4.2.1.18 Methylglutaconyl-CoA hydratase",
+ "4.2.1.19 Imidazoleglycerol-phosphate dehydratase",
+ "4.2.1.20 Tryptophan synthase",
+ "4.2.1.22 Cystathionine beta-synthase",
+ "4.2.1.24 Porphobilinogen synthase",
+ "4.2.1.25 L-arabinonate dehydratase",
+ "4.2.1.27 Acetylenecarboxylate hydratase",
+ "4.2.1.28 Propanediol dehydratase",
+ "4.2.1.30 Glycerol dehydratase",
+ "4.2.1.31 Maleate hydratase",
+ "4.2.1.32 L(+)-tartrate dehydratase",
+ "4.2.1.33 3-isopropylmalate dehydratase",
+ "4.2.1.34 (S)-2-methylmalate dehydratase",
+ "4.2.1.35 (R)-2-methylmalate dehydratase",
+ "4.2.1.36 Homoaconitate hydratase",
+ "4.2.1.39 Gluconate dehydratase",
+ "4.2.1.40 Glucarate dehydratase",
+ "4.2.1.41 5-dehydro-4-deoxyglucarate dehydratase",
+ "4.2.1.42 Galactarate dehydratase",
+ "4.2.1.43 2-dehydro-3-deoxy-L-arabinonate dehydratase",
+ "4.2.1.44 Myo-inosose-2 dehydratase",
+ "4.2.1.45 CDP-glucose 4,6-dehydratase",
+ "4.2.1.46 dTDP-glucose 4,6-dehydratase",
+ "4.2.1.47 GDP-mannose 4,6-dehydratase",
+ "4.2.1.48 D-glutamate cyclase",
+ "4.2.1.49 Urocanate hydratase",
+ "4.2.1.50 Pyrazolylalanine synthase",
+ "4.2.1.51 Prephenate dehydratase",
+ "4.2.1.52 Dihydrodipicolinate synthase",
+ "4.2.1.53 Oleate hydratase",
+ "4.2.1.54 Lactoyl-CoA dehydratase",
+ "4.2.1.55 3-hydroxybutyryl-CoA dehydratase",
+ "4.2.1.56 Itaconyl-CoA hydratase",
+ "4.2.1.57 Isohexenylglutaconyl-CoA hydratase",
+ "4.2.1.58 Crotonoyl-[acyl-carrier-protein] hydratase",
+ "4.2.1.59 3-hydroxyoctanoyl-[acyl-carrier-protein] dehydratase",
+ "4.2.1.60 3-hydroxydecanoyl-[acyl-carrier-protein] dehydratase",
+ "4.2.1.61 3-hydroxypalmitoyl-[acyl-carrier-protein] dehydratase",
+ "4.2.1.62 5-alpha-hydroxysteroid dehydratase",
+ "4.2.1.65 3-cyanoalanine hydratase",
+ "4.2.1.66 Cyanide hydratase",
+ "4.2.1.67 D-fuconate dehydratase",
+ "4.2.1.68 L-fuconate dehydratase",
+ "4.2.1.69 Cyanamide hydratase",
+ "4.2.1.70 Pseudouridylate synthase",
+ "4.2.1.73 Protoaphin-aglucone dehydratase (cyclizing)",
+ "4.2.1.74 Long-chain-enoyl-CoA hydratase",
+ "4.2.1.75 Uroporphyrinogen-III synthase",
+ "4.2.1.76 UDP-glucose 4,6-dehydratase",
+ "4.2.1.77 Trans-L-3-hydroxyproline dehydratase",
+ "4.2.1.78 (S)-norcoclaurine synthase",
+ "4.2.1.79 2-methylcitrate dehydratase",
+ "4.2.1.80 2-oxopent-4-enoate hydratase",
+ "4.2.1.81 D(-)-tartrate dehydratase",
+ "4.2.1.82 Xylonate dehydratase",
+ "4.2.1.83 4-oxalmesaconate hydratase",
+ "4.2.1.84 Nitrile hydratase",
+ "4.2.1.85 Dimethylmaleate hydratase",
+ "4.2.1.87 Octopamine dehydratase",
+ "4.2.1.88 Synephrine dehydratase",
+ "4.2.1.89 Carnitine dehydratase",
+ "4.2.1.90 L-rhamnonate dehydratase",
+ "4.2.1.91 Arogenate dehydratase",
+ "4.2.1.92 Hydroperoxide dehydratase",
+ "4.2.1.93 ATP-dependent NAD(P)H-hydrate dehydratase",
+ "4.2.1.94 Scytalone dehydratase",
+ "4.2.1.95 Kievitone hydratase",
+ "4.2.1.96 4a-hydroxytetrahydrobiopterin dehydratase",
+ "4.2.1.97 Phaseollidin hydratase",
+ "4.2.1.98 16-alpha-hydroxyprogesterone dehydratase",
+ "4.2.1.99 2-methylisocitrate dehydratase",
+ "4.2.1.100 Cyclohexa-1,5-dienecarbonyl-CoA hydratase",
+ "4.2.1.101 Trans-feruloyl-CoA hydratase",
+ "4.2.1.103 Cyclohexyl-isocyanide hydratase",
+ "4.2.1.104 Cyanase",
+ "4.2.1.105 2-hydroxyisoflavanone dehydratase",
+ "4.2.1.106 Bile-acid 7-alpha-dehydratase",
+ "4.2.1.107 3-alpha,7-alpha,12-alpha-trihydroxy-5-beta-cholest-24-enoyl-CoA hydratase",
+ "4.2.1.108 Ectoine synthase",
+ "4.2.1.109 Methylthioribulose 1-phosphate dehydratase",
+ "4.2.1.110 Aldos-2-ulose dehydratase",
+ "4.2.1.111 1,5-anhydro-D-fructose dehydratase",
+ "4.2.1.112 Acetylene hydratase",
+ "4.2.1.113 o-succinylbenzoate synthase",
+ "4.2.1.114 Methanogen homoaconitase",
+ "4.2.1.115 UDP-N-acetylglucosamine 4,6-dehydratase (inverting)",
+ "4.2.1.116 3-hydroxypropionyl-CoA dehydratase",
+ "4.2.1.117 2-methylcitrate dehydratase (2-methyl-trans-aconitate forming)",
+ "4.2.1.118 3-dehydroshikimate dehydratase",
+ "4.2.1.119 Enoyl-CoA hydratase 2",
+ "4.2.1.120 4-hydroxybutanoyl-CoA dehydratase",
+ "4.2.1.121 Colneleate synthase",
+ "4.2.1.n1 N-acetylmuramic acid 6-phosphate etherase",
+ "4.2.2.1 Hyaluronate lyase",
+ "4.2.2.2 Pectate lyase",
+ "4.2.2.3 Poly(beta-D-mannuronate) lyase",
+ "4.2.2.5 Chondroitin AC lyase",
+ "4.2.2.6 Oligogalacturonide lyase",
+ "4.2.2.7 Heparin lyase",
+ "4.2.2.8 Heparin-sulfate lyase",
+ "4.2.2.9 Pectate disaccharide-lyase",
+ "4.2.2.10 Pectin lyase",
+ "4.2.2.11 Poly(alpha-L-guluronate) lyase",
+ "4.2.2.12 Xanthan lyase",
+ "4.2.2.13 Exo-(1->4)-alpha-D-glucan lyase",
+ "4.2.2.14 Glucuronan lyase",
+ "4.2.2.15 Anhydrosialidase",
+ "4.2.2.16 Levan fructotransferase (DFA-IV-forming)",
+ "4.2.2.17 Inulin fructotransferase (DFA-I-forming)",
+ "4.2.2.18 Inulin fructotransferase (DFA-III-forming)",
+ "4.2.2.19 Chondroitin B lyase",
+ "4.2.2.20 Chondroitin-sulfate-ABC endolyase",
+ "4.2.2.21 Chondroitin-sulfate-ABC exolyase",
+ "4.2.2.22 Pectate trisaccharide-lyase",
+ "4.2.2.n1 Peptidoglycan lytic exotransglycosylase",
+ "4.2.2.n2 Peptidoglycan lytic endotransglycosylase",
+ "4.2.3.1 Threonine synthase",
+ "4.2.3.2 Ethanolamine-phosphate phospho-lyase",
+ "4.2.3.3 Methylglyoxal synthase",
+ "4.2.3.4 3-dehydroquinate synthase",
+ "4.2.3.5 Chorismate synthase",
+ "4.2.3.6 Trichodiene synthase",
+ "4.2.3.7 Pentalenene synthase",
+ "4.2.3.8 Casbene synthase",
+ "4.2.3.9 Aristolochene synthase",
+ "4.2.3.10 (-)-endo-fenchol synthase",
+ "4.2.3.11 Sabinene-hydrate synthase",
+ "4.2.3.12 6-pyruvoyltetrahydropterin synthase",
+ "4.2.3.13 (+)-delta-cadinene synthase",
+ "4.2.3.14 Pinene synthase",
+ "4.2.3.15 Myrcene synthase",
+ "4.2.3.16 (4S)-limonene synthase",
+ "4.2.3.17 Taxadiene synthase",
+ "4.2.3.18 Abietadiene synthase",
+ "4.2.3.19 Ent-kaurene synthase",
+ "4.2.3.20 (R)-limonene synthase",
+ "4.2.3.21 Vetispiradiene synthase",
+ "4.2.3.22 Germacradienol synthase",
+ "4.2.3.23 Germacrene-A synthase",
+ "4.2.3.24 Amorpha-4,11-diene synthase",
+ "4.2.3.25 S-linalool synthase",
+ "4.2.3.26 R-linalool synthase",
+ "4.2.3.27 Isoprene synthase",
+ "4.2.3.28 Ent-cassa-12,15-diene synthase",
+ "4.2.3.29 Ent-sandaracopimaradiene synthase",
+ "4.2.3.30 Ent-pimara-8(14),15-diene synthase",
+ "4.2.3.31 Ent-pimara-9(11),15-diene synthase",
+ "4.2.3.32 Levopimaradiene synthase",
+ "4.2.3.33 Stemar-13-ene synthase",
+ "4.2.3.34 Stemod-13(17)-ene synthase",
+ "4.2.3.35 Syn-pimara-7,15-diene synthase",
+ "4.2.3.36 Terpentetriene synthase",
+ "4.2.3.37 Epi-isozizaene synthase",
+ "4.2.3.38 Alpha-bisabolene synthase",
+ "4.2.3.39 Epi-cedrol synthase",
+ "4.2.3.40 (Z)-gamma-bisabolene synthase",
+ "4.2.3.41 Elisabethatriene synthase",
+ "4.2.3.42 Aphidicolan-16-beta-ol synthase",
+ "4.2.3.43 Fusicocca-2,10(14)-diene synthase",
+ "4.2.3.44 Isopimara-7,15-diene synthase",
+ "4.2.3.45 Phyllocladan-16-alpha-ol synthase",
+ "4.2.3.46 Alpha-farnesene synthase",
+ "4.2.3.47 Beta-farnesene synthase",
+ "4.2.3.48 (3S,6E)-nerolidol synthase",
+ "4.2.3.49 (3R,6E)-nerolidol synthase",
+ "4.2.3.50 (+)-alpha-santalene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)",
+ "4.2.3.51 Beta-phellandrene synthase (neryl-diphosphate-cyclizing)",
+ "4.2.3.52 (4S)-beta-phellandrene synthase (geranyl-diphosphate-cyclizing)",
+ "4.2.3.53 (+)-endo-beta-bergamotene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)",
+ "4.2.3.54 (-)-endo-alpha-bergamotene synthase ((2Z,6Z)-farnesyl diphosphate cyclizing)",
+ "4.2.3.55 (S)-beta-bisabolene synthase",
+ "4.2.3.56 Gamma-humulene synthase",
+ "4.2.3.57 Beta-caryophyllene synthase",
+ "4.2.3.58 Longifolene synthase",
+ "4.2.3.59 (E)-gamma-bisabolene synthase",
+ "4.2.3.60 Germacrene C synthase",
+ "4.2.3.n2 Delta-selinene synthase",
+ "4.2.3.n4 (-)-camphene synthase",
+ "4.2.3.n6 Terpinolene synthase",
+ "4.2.3.n7 (-)-(S)-limonene/(-)-alpha-pinene synthase",
+ "4.2.3.n8 Ent-isokaurene synthase",
+ "4.2.3.n12 Zingiberene synthase",
+ "4.2.3.n14 2-methylisoborneol synthase",
+ "4.2.99.12 Carboxymethyloxysuccinate lyase",
+ "4.2.99.18 DNA-(apurinic or apyrimidinic site) lyase",
+ "4.2.99.20 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase",
+ "4.2.99.21 Isochorismate lyase",
+ "4.3.1.1 Aspartate ammonia-lyase",
+ "4.3.1.2 Methylaspartate ammonia-lyase",
+ "4.3.1.3 Histidine ammonia-lyase",
+ "4.3.1.4 Formimidoyltetrahydrofolate cyclodeaminase",
+ "4.3.1.6 Beta-alanyl-CoA ammonia-lyase",
+ "4.3.1.7 Ethanolamine ammonia-lyase",
+ "4.3.1.9 Glucosaminate ammonia-lyase",
+ "4.3.1.10 Serine-sulfate ammonia-lyase",
+ "4.3.1.12 Ornithine cyclodeaminase",
+ "4.3.1.13 Carbamoyl-serine ammonia-lyase",
+ "4.3.1.14 3-aminobutyryl-CoA ammonia-lyase",
+ "4.3.1.15 Diaminopropionate ammonia-lyase",
+ "4.3.1.16 Threo-3-hydroxy-L-aspartate ammonia-lyase",
+ "4.3.1.17 L-serine ammonia-lyase",
+ "4.3.1.18 D-serine ammonia-lyase",
+ "4.3.1.19 Threonine ammonia-lyase",
+ "4.3.1.20 Erythro-3-hydroxy-L-aspartate ammonia-lyase",
+ "4.3.1.22 3,4-dihydroxyphenylalanine reductive deaminase",
+ "4.3.1.23 Tyrosine ammonia-lyase",
+ "4.3.1.24 Phenylalanine ammonia-lyase",
+ "4.3.1.25 Phenylalanine/tyrosine ammonia-lyase",
+ "4.3.1.26 Chromopyrrolate synthase",
+ "4.3.1.27 Threo-3-hydroxy-D-aspartate ammonia-lyase",
+ "4.3.2.1 Argininosuccinate lyase",
+ "4.3.2.2 Adenylosuccinate lyase",
+ "4.3.2.3 Ureidoglycolate lyase",
+ "4.3.2.4 Purine imidazole-ring cyclase",
+ "4.3.2.5 Peptidylamidoglycolate lyase",
+ "4.3.3.1 3-ketovalidoxylamine C-N-lyase",
+ "4.3.3.2 Strictosidine synthase",
+ "4.3.3.3 Deacetylisoipecoside synthase",
+ "4.3.3.4 Deacetylipecoside synthase",
+ "4.3.3.5 4'-demethylrebeccamycin synthase",
+ "4.3.99.2 Carboxybiotin decarboxylase",
+ "4.4.1.1 Cystathionine gamma-lyase",
+ "4.4.1.2 Homocysteine desulfhydrase",
+ "4.4.1.3 Dimethylpropiothetin dethiomethylase",
+ "4.4.1.4 Alliin lyase",
+ "4.4.1.5 Lactoylglutathione lyase",
+ "4.4.1.6 S-alkylcysteine lyase",
+ "4.4.1.8 Cystathionine beta-lyase",
+ "4.4.1.9 L-3-cyanoalanine synthase",
+ "4.4.1.10 Cysteine lyase",
+ "4.4.1.11 Methionine gamma-lyase",
+ "4.4.1.13 Cysteine-S-conjugate beta-lyase",
+ "4.4.1.14 1-aminocyclopropane-1-carboxylate synthase",
+ "4.4.1.15 D-cysteine desulfhydrase",
+ "4.4.1.16 Selenocysteine lyase",
+ "4.4.1.17 Holocytochrome-c synthase",
+ "4.4.1.19 Phosphosulfolactate synthase",
+ "4.4.1.20 Leukotriene-C(4) synthase",
+ "4.4.1.21 S-ribosylhomocysteine lyase",
+ "4.4.1.22 S-(hydroxymethyl)glutathione synthase",
+ "4.4.1.23 2-hydroxypropyl-CoM lyase",
+ "4.4.1.24 Sulfolactate sulfo-lyase",
+ "4.4.1.25 L-cysteate sulfo-lyase",
+ "4.5.1.1 DDT-dehydrochlorinase",
+ "4.5.1.2 3-chloro-D-alanine dehydrochlorinase",
+ "4.5.1.3 Dichloromethane dehalogenase",
+ "4.5.1.4 L-2-amino-4-chloropent-4-enoate dehydrochlorinase",
+ "4.5.1.5 S-carboxymethylcysteine synthase",
+ "4.6.1.1 Adenylate cyclase",
+ "4.6.1.2 Guanylate cyclase",
+ "4.6.1.6 Cytidylate cyclase",
+ "4.6.1.12 2-C-methyl-D-erythritol 2,4-cyclodiphosphate synthase",
+ "4.6.1.13 Phosphatidylinositol diacylglycerol-lyase",
+ "4.6.1.14 Glycosylphosphatidylinositol diacylglycerol-lyase",
+ "4.6.1.15 FAD-AMP lyase (cyclizing)",
+ "4.99.1.1 Ferrochelatase",
+ "4.99.1.2 Alkylmercury lyase",
+ "4.99.1.3 Sirohydrochlorin cobaltochelatase",
+ "4.99.1.4 Sirohydrochlorin ferrochelatase",
+ "4.99.1.5 Aliphatic aldoxime dehydratase",
+ "4.99.1.6 Indoleacetaldoxime dehydratase",
+ "4.99.1.7 Phenylacetaldoxime dehydratase",
+ "4.99.1.8 Heme ligase",
+ "5.1.1.1 Alanine racemase",
+ "5.1.1.2 Methionine racemase",
+ "5.1.1.3 Glutamate racemase",
+ "5.1.1.4 Proline racemase",
+ "5.1.1.5 Lysine racemase",
+ "5.1.1.6 Threonine racemase",
+ "5.1.1.7 Diaminopimelate epimerase",
+ "5.1.1.8 4-hydroxyproline epimerase",
+ "5.1.1.9 Arginine racemase",
+ "5.1.1.10 Amino-acid racemase",
+ "5.1.1.11 Phenylalanine racemase (ATP-hydrolyzing)",
+ "5.1.1.12 Ornithine racemase",
+ "5.1.1.13 Aspartate racemase",
+ "5.1.1.14 Nocardicin-A epimerase",
+ "5.1.1.15 2-aminohexano-6-lactam racemase",
+ "5.1.1.16 Protein-serine epimerase",
+ "5.1.1.17 Isopenicillin-N epimerase",
+ "5.1.1.18 Serine racemase",
+ "5.1.2.1 Lactate racemase",
+ "5.1.2.2 Mandelate racemase",
+ "5.1.2.3 3-hydroxybutyryl-CoA epimerase",
+ "5.1.2.4 Acetoin racemase",
+ "5.1.2.5 Tartrate epimerase",
+ "5.1.2.6 Isocitrate epimerase",
+ "5.1.3.1 Ribulose-phosphate 3-epimerase",
+ "5.1.3.2 UDP-glucose 4-epimerase",
+ "5.1.3.3 Aldose 1-epimerase",
+ "5.1.3.4 L-ribulose-5-phosphate 4-epimerase",
+ "5.1.3.5 UDP-arabinose 4-epimerase",
+ "5.1.3.6 UDP-glucuronate 4-epimerase",
+ "5.1.3.7 UDP-N-acetylglucosamine 4-epimerase",
+ "5.1.3.8 N-acylglucosamine 2-epimerase",
+ "5.1.3.9 N-acylglucosamine-6-phosphate 2-epimerase",
+ "5.1.3.10 CDP-paratose 2-epimerase",
+ "5.1.3.11 Cellobiose epimerase",
+ "5.1.3.12 UDP-glucuronate 5'-epimerase",
+ "5.1.3.13 dTDP-4-dehydrorhamnose 3,5-epimerase",
+ "5.1.3.14 UDP-N-acetylglucosamine 2-epimerase",
+ "5.1.3.15 Glucose-6-phosphate 1-epimerase",
+ "5.1.3.16 UDP-glucosamine 4-epimerase",
+ "5.1.3.17 Heparosan-N-sulfate-glucuronate 5-epimerase",
+ "5.1.3.18 GDP-mannose 3,5-epimerase",
+ "5.1.3.19 Chondroitin-glucuronate 5-epimerase",
+ "5.1.3.20 ADP-glyceromanno-heptose 6-epimerase",
+ "5.1.3.21 Maltose epimerase",
+ "5.1.3.22 L-ribulose-5-phosphate 3-epimerase",
+ "5.1.3.23 UDP-2,3-diacetamido-2,3-dideoxyglucuronic acid 2-epimerase",
+ "5.1.3.n1 Sialic acid epimerase",
+ "5.1.3.n2 L-fucose mutarotase",
+ "5.1.3.n3 L-rhamnose mutarotase",
+ "5.1.99.1 Methylmalonyl-CoA epimerase",
+ "5.1.99.2 16-hydroxysteroid epimerase",
+ "5.1.99.3 Allantoin racemase",
+ "5.1.99.4 Alpha-methylacyl-CoA racemase",
+ "5.1.99.5 Hydantoin racemase",
+ "5.2.1.1 Maleate isomerase",
+ "5.2.1.2 Maleylacetoacetate isomerase",
+ "5.2.1.3 Retinal isomerase",
+ "5.2.1.4 Maleylpyruvate isomerase",
+ "5.2.1.5 Linoleate isomerase",
+ "5.2.1.6 Furylfuramide isomerase",
+ "5.2.1.7 Retinol isomerase",
+ "5.2.1.8 Peptidylprolyl isomerase",
+ "5.2.1.9 Farnesol 2-isomerase",
+ "5.2.1.10 2-chloro-4-carboxymethylenebut-2-en-1,4-olide isomerase",
+ "5.3.1.1 Triose-phosphate isomerase",
+ "5.3.1.3 Arabinose isomerase",
+ "5.3.1.4 L-arabinose isomerase",
+ "5.3.1.5 Xylose isomerase",
+ "5.3.1.6 Ribose-5-phosphate isomerase",
+ "5.3.1.7 Mannose isomerase",
+ "5.3.1.8 Mannose-6-phosphate isomerase",
+ "5.3.1.9 Glucose-6-phosphate isomerase",
+ "5.3.1.12 Glucuronate isomerase",
+ "5.3.1.13 Arabinose-5-phosphate isomerase",
+ "5.3.1.14 L-rhamnose isomerase",
+ "5.3.1.15 D-lyxose ketol-isomerase",
+ "5.3.1.16 1-(5-phosphoribosyl)-5-((5-phosphoribosylamino)methylideneamino)imidazole-4-carboxamide isomerase",
+ "5.3.1.17 4-deoxy-L-threo-5-hexosulose-uronate ketol-isomerase",
+ "5.3.1.20 Ribose isomerase",
+ "5.3.1.21 Corticosteroid side-chain-isomerase",
+ "5.3.1.22 Hydroxypyruvate isomerase",
+ "5.3.1.23 S-methyl-5-thioribose-1-phosphate isomerase",
+ "5.3.1.24 Phosphoribosylanthranilate isomerase",
+ "5.3.1.25 L-fucose isomerase",
+ "5.3.1.26 Galactose-6-phosphate isomerase",
+ "5.3.1.27 6-phospho-3-hexuloisomerase",
+ "5.3.1.28 D-sedoheptulose 7-phosphate isomerase",
+ "5.3.1.n1 5-deoxy-glucuronate isomerase",
+ "5.3.2.1 Phenylpyruvate tautomerase",
+ "5.3.2.2 Oxaloacetate tautomerase",
+ "5.3.2.n1 2,3-diketo-5-methylthiopentyl-1-phosphate enolase",
+ "5.3.3.1 Steroid Delta-isomerase",
+ "5.3.3.2 Isopentenyl-diphosphate Delta-isomerase",
+ "5.3.3.3 Vinylacetyl-CoA Delta-isomerase",
+ "5.3.3.4 Muconolactone Delta-isomerase",
+ "5.3.3.5 Cholestenol Delta-isomerase",
+ "5.3.3.6 Methylitaconate Delta-isomerase",
+ "5.3.3.7 Aconitate Delta-isomerase",
+ "5.3.3.8 Dodecenoyl-CoA isomerase",
+ "5.3.3.9 Prostaglandin-A(1) Delta-isomerase",
+ "5.3.3.10 5-carboxymethyl-2-hydroxymuconate Delta-isomerase",
+ "5.3.3.11 Isopiperitenone Delta-isomerase",
+ "5.3.3.12 L-dopachrome isomerase",
+ "5.3.3.13 Polyenoic fatty acid isomerase",
+ "5.3.3.14 Trans-2-decenoyl-[acyl-carrier-protein] isomerase",
+ "5.3.3.15 Ascopyrone tautomerase",
+ "5.3.4.1 Protein disulfide-isomerase",
+ "5.3.99.2 Prostaglandin-D synthase",
+ "5.3.99.3 Prostaglandin-E synthase",
+ "5.3.99.4 Prostaglandin-I synthase",
+ "5.3.99.5 Thromboxane-A synthase",
+ "5.3.99.6 Allene-oxide cyclase",
+ "5.3.99.7 Styrene-oxide isomerase",
+ "5.3.99.8 Capsanthin/capsorubin synthase",
+ "5.3.99.9 Neoxanthin synthase",
+ "5.3.99.n1 2-keto-myo-inositol isomerase",
+ "5.4.1.1 Lysolecithin acylmutase",
+ "5.4.1.2 Precorrin-8X methylmutase",
+ "5.4.2.1 Phosphoglycerate mutase",
+ "5.4.2.2 Phosphoglucomutase",
+ "5.4.2.3 Phosphoacetylglucosamine mutase",
+ "5.4.2.4 Bisphosphoglycerate mutase",
+ "5.4.2.5 Phosphoglucomutase (glucose-cofactor)",
+ "5.4.2.6 Beta-phosphoglucomutase",
+ "5.4.2.7 Phosphopentomutase",
+ "5.4.2.8 Phosphomannomutase",
+ "5.4.2.9 Phosphoenolpyruvate mutase",
+ "5.4.2.10 Phosphoglucosamine mutase",
+ "5.4.3.2 Lysine 2,3-aminomutase",
+ "5.4.3.3 Beta-lysine 5,6-aminomutase",
+ "5.4.3.4 D-lysine 5,6-aminomutase",
+ "5.4.3.5 D-ornithine 4,5-aminomutase",
+ "5.4.3.6 Tyrosine 2,3-aminomutase",
+ "5.4.3.7 Leucine 2,3-aminomutase",
+ "5.4.3.8 Glutamate-1-semialdehyde 2,1-aminomutase",
+ "5.4.4.1 (Hydroxyamino)benzene mutase",
+ "5.4.4.2 Isochorismate synthase",
+ "5.4.4.3 3-(hydroxyamino)phenol mutase",
+ "5.4.99.1 Methylaspartate mutase",
+ "5.4.99.2 Methylmalonyl-CoA mutase",
+ "5.4.99.3 2-acetolactate mutase",
+ "5.4.99.4 2-methyleneglutarate mutase",
+ "5.4.99.5 Chorismate mutase",
+ "5.4.99.7 Lanosterol synthase",
+ "5.4.99.8 Cycloartenol synthase",
+ "5.4.99.9 UDP-galactopyranose mutase",
+ "5.4.99.11 Isomaltulose synthase",
+ "5.4.99.12 tRNA pseudouridine(38-40) synthase",
+ "5.4.99.13 Isobutyryl-CoA mutase",
+ "5.4.99.14 4-carboxymethyl-4-methylbutenolide mutase",
+ "5.4.99.15 (1->4)-alpha-D-glucan 1-alpha-D-glucosylmutase",
+ "5.4.99.16 Maltose alpha-D-glucosyltransferase",
+ "5.4.99.17 Squalene--hopene cyclase",
+ "5.4.99.18 5-(carboxyamino)imidazole ribonucleotide mutase",
+ "5.4.99.19 16S rRNA pseudouridine(516) synthase",
+ "5.4.99.20 23S rRNA pseudouridine(2457) synthase",
+ "5.4.99.21 23S rRNA pseudouridine(2604) synthase",
+ "5.4.99.22 23S rRNA pseudouridine(2605) synthase",
+ "5.4.99.23 23S rRNA pseudouridine(1911/1915/1917) synthase",
+ "5.4.99.24 23S rRNA pseudouridine(955/2504/2580) synthase",
+ "5.4.99.25 tRNA pseudouridine(55) synthase",
+ "5.4.99.26 tRNA pseudouridine(65) synthase",
+ "5.4.99.27 tRNA pseudouridine(13) synthase",
+ "5.4.99.28 tRNA pseudouridine(32) synthase",
+ "5.4.99.29 23S rRNA pseudouridine(746) synthase",
+ "5.4.99.30 UDP-arabinopyranose mutase",
+ "5.5.1.1 Muconate cycloisomerase",
+ "5.5.1.2 3-carboxy-cis,cis-muconate cycloisomerase",
+ "5.5.1.3 Tetrahydroxypteridine cycloisomerase",
+ "5.5.1.4 Inositol-3-phosphate synthase",
+ "5.5.1.5 Carboxy-cis,cis-muconate cyclase",
+ "5.5.1.6 Chalcone isomerase",
+ "5.5.1.7 Chloromuconate cycloisomerase",
+ "5.5.1.8 Bornyl diphosphate synthase",
+ "5.5.1.9 Cycloeucalenol cycloisomerase",
+ "5.5.1.10 Alpha-pinene-oxide decyclase",
+ "5.5.1.11 Dichloromuconate cycloisomerase",
+ "5.5.1.12 Copalyl diphosphate synthase",
+ "5.5.1.13 Ent-copalyl diphosphate synthase",
+ "5.5.1.14 Syn-copalyl-diphosphate synthase",
+ "5.5.1.15 Terpentedienyl-diphosphate synthase",
+ "5.5.1.16 Halimadienyl-diphosphate synthase",
+ "5.5.1.17 (S)-beta-macrocarpene synthase",
+ "5.5.1.n1 D-ribose pyranase",
+ "5.99.1.1 Thiocyanate isomerase",
+ "5.99.1.2 DNA topoisomerase",
+ "5.99.1.3 DNA topoisomerase (ATP-hydrolyzing)",
+ "5.99.1.4 2-hydroxychromene-2-carboxylate isomerase",
+ "6.1.1.1 Tyrosine--tRNA ligase",
+ "6.1.1.2 Tryptophan--tRNA ligase",
+ "6.1.1.3 Threonine--tRNA ligase",
+ "6.1.1.4 Leucine--tRNA ligase",
+ "6.1.1.5 Isoleucine--tRNA ligase",
+ "6.1.1.6 Lysine--tRNA ligase",
+ "6.1.1.7 Alanine--tRNA ligase",
+ "6.1.1.9 Valine--tRNA ligase",
+ "6.1.1.10 Methionine--tRNA ligase",
+ "6.1.1.11 Serine--tRNA ligase",
+ "6.1.1.12 Aspartate--tRNA ligase",
+ "6.1.1.13 D-alanine--poly(phosphoribitol) ligase",
+ "6.1.1.14 Glycine--tRNA ligase",
+ "6.1.1.15 Proline--tRNA ligase",
+ "6.1.1.16 Cysteine--tRNA ligase",
+ "6.1.1.17 Glutamate--tRNA ligase",
+ "6.1.1.18 Glutamine--tRNA ligase",
+ "6.1.1.19 Arginine--tRNA ligase",
+ "6.1.1.20 Phenylalanine--tRNA ligase",
+ "6.1.1.21 Histidine--tRNA ligase",
+ "6.1.1.22 Asparagine--tRNA ligase",
+ "6.1.1.23 Aspartate--tRNA(Asn) ligase",
+ "6.1.1.24 Glutamate--tRNA(Gln) ligase",
+ "6.1.1.25 Lysine--tRNA(Pyl) ligase",
+ "6.1.1.26 Pyrrolysine--tRNA(Pyl) ligase",
+ "6.1.1.27 O-phosphoserine--tRNA ligase",
+ "6.1.2.1 D-alanine--(R)-lactate ligase",
+ "6.2.1.1 Acetate--CoA ligase",
+ "6.2.1.2 Butyrate--CoA ligase",
+ "6.2.1.3 Long-chain-fatty-acid--CoA ligase",
+ "6.2.1.4 Succinate--CoA ligase (GDP-forming)",
+ "6.2.1.5 Succinate--CoA ligase (ADP-forming)",
+ "6.2.1.6 Glutarate--CoA ligase",
+ "6.2.1.7 Cholate--CoA ligase",
+ "6.2.1.8 Oxalate--CoA ligase",
+ "6.2.1.9 Malate--CoA ligase",
+ "6.2.1.10 Acid--CoA ligase (GDP-forming)",
+ "6.2.1.11 Biotin--CoA ligase",
+ "6.2.1.12 4-coumarate--CoA ligase",
+ "6.2.1.13 Acetate--CoA ligase (ADP-forming)",
+ "6.2.1.14 6-carboxyhexanoate--CoA ligase",
+ "6.2.1.15 Arachidonate--CoA ligase",
+ "6.2.1.16 Acetoacetate--CoA ligase",
+ "6.2.1.17 Propionate--CoA ligase",
+ "6.2.1.18 Citrate--CoA ligase",
+ "6.2.1.19 Long-chain-fatty-acid--luciferin-component ligase",
+ "6.2.1.20 Long-chain-fatty-acid--[acyl-carrier-protein] ligase",
+ "6.2.1.22 [Citrate (pro-3S)-lyase] ligase",
+ "6.2.1.23 Dicarboxylate--CoA ligase",
+ "6.2.1.24 Phytanate--CoA ligase",
+ "6.2.1.25 Benzoate--CoA ligase",
+ "6.2.1.26 o-succinylbenzoate--CoA ligase",
+ "6.2.1.27 4-hydroxybenzoate--CoA ligase",
+ "6.2.1.28 3-alpha,7-alpha-dihydroxy-5-beta-cholestanate--CoA ligase",
+ "6.2.1.30 Phenylacetate--CoA ligase",
+ "6.2.1.31 2-furoate--CoA ligase",
+ "6.2.1.32 Anthranilate--CoA ligase",
+ "6.2.1.33 4-chlorobenzoate--CoA ligase",
+ "6.2.1.34 Trans-feruloyl-CoA synthase",
+ "6.2.1.35 ACP-SH:acetate ligase",
+ "6.2.1.36 3-hydroxypropionyl-CoA synthase",
+ "6.2.1.n1 3-hydroxybenzoate--CoA ligase",
+ "6.3.1.1 Aspartate--ammonia ligase",
+ "6.3.1.2 Glutamate--ammonia ligase",
+ "6.3.1.4 Aspartate--ammonia ligase (ADP-forming)",
+ "6.3.1.5 NAD(+) synthase",
+ "6.3.1.6 Glutamate--ethylamine ligase",
+ "6.3.1.7 4-methyleneglutamate--ammonia ligase",
+ "6.3.1.8 Glutathionylspermidine synthase",
+ "6.3.1.9 Trypanothione synthase",
+ "6.3.1.10 Adenosylcobinamide-phosphate synthase",
+ "6.3.1.11 Glutamate--putrescine ligase",
+ "6.3.1.12 D-aspartate ligase",
+ "6.3.1.13 L-cysteine:1D-myo-inositol 2-amino-2-deoxy-alpha-D-glucopyranoside ligase",
+ "6.3.1.14 Diphthine--ammonia ligase",
+ "6.3.2.1 Pantoate--beta-alanine ligase",
+ "6.3.2.2 Glutamate--cysteine ligase",
+ "6.3.2.3 Glutathione synthase",
+ "6.3.2.4 D-alanine--D-alanine ligase",
+ "6.3.2.5 Phosphopantothenate--cysteine ligase",
+ "6.3.2.6 Phosphoribosylaminoimidazolesuccinocarboxamide synthase",
+ "6.3.2.7 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--L-lysine ligase",
+ "6.3.2.8 UDP-N-acetylmuramate--L-alanine ligase",
+ "6.3.2.9 UDP-N-acetylmuramoyl-L-alanine--D-glutamate ligase",
+ "6.3.2.10 UDP-N-acetylmuramoyl-tripeptide--D-alanyl-D-alanine ligase",
+ "6.3.2.11 Carnosine synthase",
+ "6.3.2.12 Dihydrofolate synthase",
+ "6.3.2.13 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--2,6-diaminopimelate ligase",
+ "6.3.2.14 2,3-dihydroxybenzoate--serine ligase",
+ "6.3.2.16 D-alanine--alanyl-poly(glycerolphosphate) ligase",
+ "6.3.2.17 Tetrahydrofolate synthase",
+ "6.3.2.18 Gamma-glutamylhistamine synthase",
+ "6.3.2.19 Ubiquitin--protein ligase",
+ "6.3.2.20 Indoleacetate--lysine synthetase",
+ "6.3.2.21 Ubiquitin--calmodulin ligase",
+ "6.3.2.23 Homoglutathione synthase",
+ "6.3.2.24 Tyrosine--arginine ligase",
+ "6.3.2.25 Tubulin--tyrosine ligase",
+ "6.3.2.26 N-(5-amino-5-carboxypentanoyl)-L-cysteinyl-D-valine synthase",
+ "6.3.2.27 Aerobactin synthase",
+ "6.3.2.28 L-amino-acid alpha-ligase",
+ "6.3.2.29 Cyanophycin synthase (L-aspartate-adding)",
+ "6.3.2.30 Cyanophycin synthase (L-arginine-adding)",
+ "6.3.2.31 Coenzyme F420-0:L-glutamate ligase",
+ "6.3.2.32 Coenzyme gamma-F420-2:alpha-L-glutamate ligase",
+ "6.3.2.33 Tetrahydrosarcinapterin synthase",
+ "6.3.2.34 Coenzyme F420-1:gamma-L-glutamate ligase",
+ "6.3.2.35 D-alanine--D-serine ligase",
+ "6.3.2.36 4-phosphopantoate--beta-alanine ligase",
+ "6.3.2.n1 UDP-N-acetylmuramoyl-L-alanyl-D-glutamate--D-lysine ligase",
+ "6.3.2.n2 Pup--protein ligase",
+ "6.3.2.n3 ISG15--protein ligase",
+ "6.3.2.n4 Alpha-aminoadipate--LysW ligase",
+ "6.3.2.n5 Pantoate--beta-alanine ligase (ADP-forming)",
+ "6.3.3.1 Phosphoribosylformylglycinamidine cyclo-ligase",
+ "6.3.3.2 5-formyltetrahydrofolate cyclo-ligase",
+ "6.3.3.3 Dethiobiotin synthase",
+ "6.3.3.4 (Carboxyethyl)arginine beta-lactam-synthase",
+ "6.3.4.1 GMP synthase",
+ "6.3.4.2 CTP synthase",
+ "6.3.4.3 Formate--tetrahydrofolate ligase",
+ "6.3.4.4 Adenylosuccinate synthase",
+ "6.3.4.5 Argininosuccinate synthase",
+ "6.3.4.6 Urea carboxylase",
+ "6.3.4.7 Ribose-5-phosphate--ammonia ligase",
+ "6.3.4.8 Imidazoleacetate--phosphoribosyldiphosphate ligase",
+ "6.3.4.9 Biotin--[methylmalonyl-CoA-carboxytransferase] ligase",
+ "6.3.4.10 Biotin--[propionyl-CoA-carboxylase (ATP-hydrolyzing)] ligase",
+ "6.3.4.11 Biotin--[methylcrotonoyl-CoA-carboxylase] ligase",
+ "6.3.4.12 Glutamate--methylamine ligase",
+ "6.3.4.13 Phosphoribosylamine--glycine ligase",
+ "6.3.4.14 Biotin carboxylase",
+ "6.3.4.15 Biotin--[acetyl-CoA-carboxylase] ligase",
+ "6.3.4.16 Carbamoyl-phosphate synthase (ammonia)",
+ "6.3.4.17 Formate--dihydrofolate ligase",
+ "6.3.4.18 5-(carboxyamino)imidazole ribonucleotide synthase",
+ "6.3.5.1 NAD(+) synthase (glutamine-hydrolyzing)",
+ "6.3.5.2 GMP synthase (glutamine-hydrolyzing)",
+ "6.3.5.3 Phosphoribosylformylglycinamidine synthase",
+ "6.3.5.4 Asparagine synthase (glutamine-hydrolyzing)",
+ "6.3.5.5 Carbamoyl-phosphate synthase (glutamine-hydrolyzing)",
+ "6.3.5.6 Asparaginyl-tRNA synthase (glutamine-hydrolyzing)",
+ "6.3.5.7 Glutaminyl-tRNA synthase (glutamine-hydrolyzing)",
+ "6.3.5.9 Hydrogenobyrinic acid a,c-diamide synthase (glutamine-hydrolyzing)",
+ "6.3.5.10 Adenosylcobyric acid synthase (glutamine-hydrolyzing)",
+ "6.3.5.11 Cobyrinate a,c-diamide synthase (glutamine-hydrolyzing)",
+ "6.4.1.1 Pyruvate carboxylase",
+ "6.4.1.2 Acetyl-CoA carboxylase",
+ "6.4.1.3 Propionyl-CoA carboxylase",
+ "6.4.1.4 Methylcrotonoyl-CoA carboxylase",
+ "6.4.1.5 Geranoyl-CoA carboxylase",
+ "6.4.1.6 Acetone carboxylase",
+ "6.4.1.7 2-oxoglutarate carboxylase",
+ "6.5.1.1 DNA ligase (ATP)",
+ "6.5.1.2 DNA ligase (NAD(+))",
+ "6.5.1.3 RNA ligase (ATP)",
+ "6.5.1.4 RNA-3'-phosphate cyclase",
+ "6.6.1.1 Magnesium chelatase",
+ "6.6.1.2 Cobaltochelatase"
};
diff --git a/api/explore.h b/api/explore.h
index b7f2ea72..ee000c11 100644
--- a/api/explore.h
+++ b/api/explore.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 6/30/98
*
-* $Revision: 6.55 $
+* $Revision: 6.57 $
*
* File Description: Reengineered and optimized exploration functions
* to be used for future code
@@ -212,6 +212,11 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (
SeqFeatPtr sfp
);
+NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (
+ SeqFeatPtr sfp,
+ ObjectIdPtr PNTR oipP
+);
+
NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (
GeneRefPtr grp
);
@@ -571,6 +576,17 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (
SeqMgrFeatContext PNTR context
);
+NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx (
+ SeqLocPtr slp,
+ Uint2 subtype,
+ VoidPtr featarray,
+ Int4 numfeats,
+ Int4Ptr position,
+ Int2 overlapType,
+ SeqMgrFeatContext PNTR context,
+ Boolean special
+);
+
NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (
BioseqPtr bsp,
VoidPtr featarray,
diff --git a/api/gather.c b/api/gather.c
index 1100f519..baef0f11 100644
--- a/api/gather.c
+++ b/api/gather.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 10/7/94
*
-* $Revision: 6.56 $
+* $Revision: 6.57 $
*
* File Description:
*
@@ -181,6 +181,11 @@ NLM_EXTERN Boolean SeqLocOffset (SeqLocPtr seq_loc, SeqLocPtr sfp_loc, GatherRan
return FALSE;
}
+ if( sfp_loc->choice == SEQLOC_NULL ||
+ sfp_loc->choice == SEQLOC_EMPTY )
+ {
+ return FALSE;
+ }
if(ck_extreme(sfp_loc, &across_zero))
{
diff --git a/api/gbftdef.h b/api/gbftdef.h
index c91d0d3e..8c20fe02 100644
--- a/api/gbftdef.h
+++ b/api/gbftdef.h
@@ -121,13 +121,14 @@
#define GBQUAL_artificial_location 111
#define GBQUAL_non_functional 112
#define GBQUAL_pseudogene 113
+#define GBQUAL_mobile_element_type 114
-#define ParFlat_TOTAL_GBQUAL 114
+#define ParFlat_TOTAL_GBQUAL 115
#define ParFlat_TOTAL_IntOr 3
#define ParFlat_TOTAL_LRB 3
#define ParFlat_TOTAL_Exp 2
#define ParFlat_TOTAL_Rpt 7
-#define ParFlat_TOTAL_GBFEAT 69
+#define ParFlat_TOTAL_GBFEAT 70
#define Class_pos_aa 1
#define Class_text 2
diff --git a/api/gbftglob.c b/api/gbftglob.c
index e7863a5a..80a140c4 100644
--- a/api/gbftglob.c
+++ b/api/gbftglob.c
@@ -65,8 +65,8 @@ static GbFeatName STATIC__ParFlat_GBQual_names[ParFlat_TOTAL_GBQUAL] = {
{"tag_peptide", Class_text}, { "mating_type", Class_text},
{"satellite", Class_text}, { "gene_synonym", Class_text},
{"UniProtKB_evidence", Class_text}, {"haplogroup", Class_text},
- {"artificial_location", Class_none}, {"non_functional", Class_none},
- {"pseudogene", Class_none}
+ {"artificial_location", Class_text}, {"non_functional", Class_none},
+ {"pseudogene", Class_none}, {"mobile_element_type", Class_text}
};
NLM_EXTERN GbFeatNamePtr x_ParFlat_GBQual_names(void) {
@@ -95,7 +95,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -121,7 +120,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -145,7 +143,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -172,7 +169,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -201,7 +197,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -224,7 +219,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"conflict", 1, {
+ {"conflict", 1, {
GBQUAL_citation, -1, -1, -1, -1}, 14,
{
GBQUAL_allele,
@@ -247,7 +242,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1}},
- {"D-loop", 0, {-1, -1, -1, -1, -1}, 15,
+ {"D-loop", 0, {-1, -1, -1, -1, -1}, 15,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -257,7 +252,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -279,7 +273,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -296,7 +289,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"enhancer", 0, {-1, -1, -1, -1, -1}, 17,
+ {"enhancer", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_bound_moiety,
@@ -307,7 +300,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -332,7 +324,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -350,7 +341,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1}},
- {"gap", 1, {
+ {"gap", 1, {
GBQUAL_estimated_length, -1, -1, -1, -1}, 5,
{
GBQUAL_evidence,
@@ -374,7 +365,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -397,7 +387,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -427,7 +416,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -453,7 +441,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -480,7 +467,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -508,7 +494,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_note,
GBQUAL_old_locus_tag,
@@ -520,7 +505,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 25,
+ {"mat_peptide", 0, {-1, -1, -1, -1, -1}, 25,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -535,7 +520,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene_synonym,
GBQUAL_inference,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -551,7 +535,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_binding", 1, {
+ {"misc_binding", 1, {
GBQUAL_bound_moiety, -1, -1, -1, -1}, 16,
{
GBQUAL_allele,
@@ -563,7 +547,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -587,7 +570,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -602,7 +584,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"misc_feature", 0, {-1, -1, -1, -1, -1}, 23,
+ {"misc_feature", 0, {-1, -1, -1, -1, -1}, 23,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -613,7 +595,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -642,7 +623,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -655,7 +635,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 23,
+ {"misc_RNA", 0, {-1, -1, -1, -1, -1}, 23,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -666,7 +646,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -685,7 +664,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1}},
- {"misc_signal", 0, {-1, -1, -1, -1, -1}, 19,
+ {"misc_signal", 0, {-1, -1, -1, -1, -1}, 19,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -696,7 +675,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -711,7 +689,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1}},
- {"misc_structure", 0, {-1, -1, -1, -1, -1}, 17,
+ {"misc_structure", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -722,7 +700,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -735,7 +712,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1}},
- {"modified_base", 1, {
+ {"modified_base", 1, {
GBQUAL_mod_base, -1, -1, -1, -1}, 15,
{
GBQUAL_allele,
@@ -747,7 +724,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -758,7 +734,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"mRNA", 0, {-1, -1, -1, -1, -1}, 25,
+ {"mRNA", 0, {-1, -1, -1, -1, -1}, 25,
{
GBQUAL_allele,
GBQUAL_artificial_location,
@@ -770,7 +746,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -789,7 +764,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"mutation", 0, {-1, -1, -1, -1, -1}, 18,
+ {"mutation", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_citation,
GBQUAL_db_xref,
@@ -799,7 +774,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -814,7 +788,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"N_region", 0, {-1, -1, -1, -1, -1}, 18,
+ {"N_region", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_citation,
GBQUAL_db_xref,
@@ -823,7 +797,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -839,7 +812,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"ncRNA", 1, {
+ {"ncRNA", 1, {
GBQUAL_ncRNA_class, -1, -1, -1, -1}, 23,
{
GBQUAL_allele,
@@ -851,7 +824,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -903,7 +875,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_experiment,
GBQUAL_function,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_map,
GBQUAL_non_functional,
GBQUAL_note,
@@ -929,7 +900,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -957,7 +927,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -979,7 +948,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -991,7 +959,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1}},
- {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 20,
+ {"precursor_RNA", 0, {-1, -1, -1, -1, -1}, 20,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1002,7 +970,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1018,7 +985,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 18,
+ {"prim_transcript", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1029,7 +996,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1043,7 +1009,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"primer_bind", 0, {-1, -1, -1, -1, -1}, 17,
+ {"primer_bind", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1053,7 +1019,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1067,7 +1032,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1}},
- {"promoter", 0, {-1, -1, -1, -1, -1}, 23,
+ {"promoter", 0, {-1, -1, -1, -1, -1}, 23,
{
GBQUAL_allele,
GBQUAL_bound_moiety,
@@ -1079,7 +1044,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1097,7 +1061,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1}},
- {"protein_bind", 1, {
+ {"protein_bind", 1, {
GBQUAL_bound_moiety, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
@@ -1109,7 +1073,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1123,7 +1086,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"RBS", 0, {-1, -1, -1, -1, -1}, 16,
+ {"RBS", 0, {-1, -1, -1, -1, -1}, 16,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1133,7 +1096,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1146,7 +1108,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"repeat_region", 0, {-1, -1, -1, -1, -1}, 26,
+ {"repeat_region", 0, {-1, -1, -1, -1, -1}, 26,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1158,7 +1120,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene_synonym,
GBQUAL_inference,
GBQUAL_insertion_seq,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_mobile_element,
@@ -1178,7 +1139,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 21,
+ {"repeat_unit", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1189,7 +1150,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1206,7 +1166,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"rep_origin", 0, {-1, -1, -1, -1, -1}, 17,
+ {"rep_origin", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1217,7 +1177,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1230,7 +1189,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1}},
- {"rRNA", 0, {-1, -1, -1, -1, -1}, 22,
+ {"rRNA", 0, {-1, -1, -1, -1, -1}, 22,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1241,7 +1200,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1259,7 +1217,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1}},
- {"S_region", 0, {-1, -1, -1, -1, -1}, 20,
+ {"S_region", 0, {-1, -1, -1, -1, -1}, 20,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1269,7 +1227,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1286,7 +1243,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"satellite", 0, {-1, -1, -1, -1, -1}, 21,
+ {"satellite", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1296,7 +1253,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1314,7 +1270,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"scRNA", 0, {-1, -1, -1, -1, -1}, 21,
+ {"scRNA", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1325,7 +1281,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1353,7 +1308,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1370,7 +1324,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"snoRNA", 0, {-1, -1, -1, -1, -1}, 21,
+ {"snoRNA", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1381,7 +1335,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1398,7 +1351,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"snRNA", 0, {-1, -1, -1, -1, -1}, 21,
+ {"snRNA", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1409,7 +1362,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1458,7 +1410,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_isolation_source,
GBQUAL_kinetoplast,
GBQUAL_lab_host,
- GBQUAL_label,
GBQUAL_lat_lon,
GBQUAL_macronuclear,
GBQUAL_map,
@@ -1493,7 +1444,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_virion,
GBQUAL_haplogroup,
-1, -1}},
- {"stem_loop", 0, {-1, -1, -1, -1, -1}, 18,
+ {"stem_loop", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1504,7 +1455,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1518,7 +1468,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"STS", 0, {-1, -1, -1, -1, -1}, 16,
+ {"STS", 0, {-1, -1, -1, -1, -1}, 16,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1528,7 +1478,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1541,7 +1490,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 15,
+ {"TATA_signal", 0, {-1, -1, -1, -1, -1}, 15,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1551,7 +1500,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1563,7 +1511,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"terminator", 0, {-1, -1, -1, -1, -1}, 17,
+ {"terminator", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1573,7 +1521,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1587,7 +1534,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1}},
- {"tmRNA", 0, {-1, -1, -1, -1, -1}, 23,
+ {"tmRNA", 0, {-1, -1, -1, -1, -1}, 23,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1598,7 +1545,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1617,7 +1563,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1}},
- {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 21,
+ {"transit_peptide", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1628,7 +1574,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1645,7 +1590,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"tRNA", 0, {-1, -1, -1, -1, -1}, 24,
+ {"tRNA", 0, {-1, -1, -1, -1, -1}, 24,
{
GBQUAL_allele,
GBQUAL_anticodon,
@@ -1657,7 +1602,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1676,7 +1620,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1}},
- {"unsure", 0, {-1, -1, -1, -1, -1}, 15,
+ {"unsure", 0, {-1, -1, -1, -1, -1}, 15,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1686,7 +1630,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1698,7 +1641,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}},
- {"V_region", 0, {-1, -1, -1, -1, -1}, 20,
+ {"V_region", 0, {-1, -1, -1, -1, -1}, 20,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1708,7 +1651,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1725,7 +1667,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"V_segment", 0, {-1, -1, -1, -1, -1}, 20,
+ {"V_segment", 0, {-1, -1, -1, -1, -1}, 20,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1735,7 +1677,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_non_functional,
@@ -1752,7 +1693,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1}},
- {"variation", 0, {-1, -1, -1, -1, -1}, 21,
+ {"variation", 0, {-1, -1, -1, -1, -1}, 21,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1764,7 +1705,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1780,7 +1720,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1}},
- {"3'clip", 0, {-1, -1, -1, -1, -1}, 18,
+ {"3'clip", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1791,7 +1731,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1805,7 +1744,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"3'UTR", 0, {-1, -1, -1, -1, -1}, 18,
+ {"3'UTR", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1816,7 +1755,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1830,7 +1768,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"5'clip", 0, {-1, -1, -1, -1, -1}, 18,
+ {"5'clip", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1841,7 +1779,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1855,7 +1792,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"5'UTR", 0, {-1, -1, -1, -1, -1}, 18,
+ {"5'UTR", 0, {-1, -1, -1, -1, -1}, 18,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1866,7 +1803,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1880,7 +1816,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1}},
- {"-10_signal", 0, {-1, -1, -1, -1, -1}, 17,
+ {"-10_signal", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1890,7 +1826,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1904,7 +1839,7 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1}},
- {"-35_signal", 0, {-1, -1, -1, -1, -1}, 17,
+ {"-35_signal", 0, {-1, -1, -1, -1, -1}, 17,
{
GBQUAL_allele,
GBQUAL_citation,
@@ -1914,7 +1849,6 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
GBQUAL_gene,
GBQUAL_gene_synonym,
GBQUAL_inference,
- GBQUAL_label,
GBQUAL_locus_tag,
GBQUAL_map,
GBQUAL_note,
@@ -1927,7 +1861,35 @@ static SematicFeat STATIC__ParFlat_GBFeat[ParFlat_TOTAL_GBFEAT] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1}}
+ -1, -1, -1, -1, -1, -1, -1, -1}},
+ {"mobile_element", 1, {
+ GBQUAL_mobile_element_type, -1, -1, -1, -1}, 21,
+ {
+ GBQUAL_allele,
+ GBQUAL_citation,
+ GBQUAL_db_xref,
+ GBQUAL_evidence,
+ GBQUAL_experiment,
+ GBQUAL_function,
+ GBQUAL_gene,
+ GBQUAL_gene_synonym,
+ GBQUAL_inference,
+ GBQUAL_insertion_seq,
+ GBQUAL_locus_tag,
+ GBQUAL_map,
+ GBQUAL_note,
+ GBQUAL_old_locus_tag,
+ GBQUAL_partial,
+ GBQUAL_rpt_family,
+ GBQUAL_rpt_type,
+ GBQUAL_standard_name,
+ GBQUAL_transposon,
+ GBQUAL_usedin,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1}}
};
NLM_EXTERN SematicFeatPtr x_ParFlat_GBFeat(void) {
diff --git a/api/macroapi.c b/api/macroapi.c
index ecae0231..9b6e3dc5 100755
--- a/api/macroapi.c
+++ b/api/macroapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/8/2007
*
-* $Revision: 1.262 $
+* $Revision: 1.405 $
*
* File Description:
*
@@ -61,6 +61,40 @@
#include <seqport.h>
#include <parsegb.h>
#include <salutil.h>
+#include <valid.h>
+
+/* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); */
+static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
+static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
+static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
+
+static void GetNucBioseqCallback (BioseqPtr bsp, Pointer userdata)
+
+{
+ ValNodeBlockPtr vbp;
+
+ if (bsp == NULL) return;
+ if (! ISA_na (bsp->mol)) return;
+ vbp = (ValNodeBlockPtr) userdata;
+ if (vbp == NULL) return;
+
+ ValNodeAddPointerEx (&(vbp->head), &(vbp->tail), OBJ_BIOSEQ, bsp);
+}
+
+static ValNodePtr CollectNucBioseqs (SeqEntryPtr sep)
+
+{
+ ValNodeBlock vnb;
+
+ if (sep == NULL) return NULL;
+
+ vnb.head = NULL;
+ vnb.tail = NULL;
+
+ VisitBioseqsInSep (sep, &vnb, GetNucBioseqCallback);
+
+ return vnb.head;
+}
static Boolean IsAllDigits (CharPtr str)
{
@@ -83,7 +117,6 @@ static Boolean IsAllDigits (CharPtr str)
static Boolean IsAllCaps (CharPtr str)
{
CharPtr cp;
- Boolean at_least_one = FALSE;
if (StringHasNoText (str)) return FALSE;
@@ -92,13 +125,47 @@ static Boolean IsAllCaps (CharPtr str)
if (isalpha (*cp)) {
if (islower (*cp)) {
return FALSE;
- } else {
- at_least_one = TRUE;
}
}
cp++;
}
- return at_least_one;
+ return TRUE;
+}
+
+
+static Boolean IsAllLowerCase (CharPtr str)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ cp = str;
+ while (*cp != 0) {
+ if (isalpha (*cp)) {
+ if (isupper (*cp)) {
+ return FALSE;
+ }
+ }
+ cp++;
+ }
+ return TRUE;
+}
+
+
+static Boolean IsAllPunctuation (CharPtr str)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (str)) return FALSE;
+
+ cp = str;
+ while (*cp != 0) {
+ if (!ispunct (*cp)) {
+ return FALSE;
+ }
+ cp++;
+ }
+ return TRUE;
}
@@ -118,6 +185,7 @@ static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2);
* GetFieldValueForObject
* RemoveFieldValueForObject
* SetFieldValueForObject
+ * SortFieldsForObject
* GetObjectListForFieldType
* GetFieldListForFieldType
* IsFieldTypeEmpty
@@ -186,6 +254,7 @@ NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair)
CDSGeneProtFieldPairPtr cp;
MolinfoFieldPairPtr mp;
StructuredCommentFieldPairPtr scfp;
+ DBLinkFieldPairPtr dbfp;
ValNodePtr vnp;
if (fieldpair == NULL) return NULL;
@@ -284,6 +353,14 @@ NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair)
f->data.ptrvalue = AsnIoMemCopy (scfp->from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
}
break;
+ case FieldPairType_dblink:
+ dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue;
+ if (dbfp != NULL) {
+ f = ValNodeNew (NULL);
+ f->choice = FieldType_dblink;
+ f->data.intvalue = dbfp->from;
+ }
+ break;
}
return f;
}
@@ -301,7 +378,7 @@ NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair)
CDSGeneProtFieldPairPtr cp;
MolinfoFieldPairPtr mp;
StructuredCommentFieldPairPtr scfp;
-
+ DBLinkFieldPairPtr dbfp;
ValNodePtr vnp;
if (fieldpair == NULL) return NULL;
@@ -400,6 +477,14 @@ NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair)
f->data.ptrvalue = AsnIoMemCopy (scfp->to, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
}
break;
+ case FieldPairType_dblink:
+ dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue;
+ if (dbfp != NULL) {
+ f = ValNodeNew (NULL);
+ f->choice = FieldType_dblink;
+ f->data.intvalue = dbfp->to;
+ }
+ break;
}
return f;
}
@@ -415,6 +500,7 @@ NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from
RnaQualPtr rq;
CDSGeneProtFieldPairPtr cp;
StructuredCommentFieldPairPtr scfp;
+ DBLinkFieldPairPtr dbfp;
ValNodePtr mp;
MolinfoMoleculePairPtr mol_p;
MolinfoTechniquePairPtr tech_p;
@@ -532,6 +618,13 @@ NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from
scfp->from = AsnIoMemCopy (field_from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
pair->data.ptrvalue = scfp;
break;
+ case FieldType_dblink:
+ pair = ValNodeNew (NULL);
+ pair->choice = FieldPairType_dblink;
+ dbfp = DBLinkFieldPairNew ();
+ dbfp->from = field_from->data.intvalue;
+ pair->data.ptrvalue = dbfp;
+ break;
}
return pair;
}
@@ -560,6 +653,9 @@ NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice
case FieldPairType_struc_comment_field:
field_type_choice = FieldType_struc_comment_field;
break;
+ case FieldPairType_dblink:
+ field_type_choice = FieldType_dblink;
+ break;
}
return field_type_choice;
@@ -567,12 +663,62 @@ NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice
/* functions for handling single fields */
-NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2)
+
+static int CompareSourceQuals (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ CharPtr tmp1, tmp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL && vnp2 == NULL) {
+ rval = 0;
+ } else if (vnp1 == NULL) {
+ rval = -1;
+ } else if (vnp2 == NULL) {
+ rval = 1;
+ } else if (vnp1->choice > vnp2->choice) {
+ rval = 1;
+ } else if (vnp1->choice < vnp2->choice) {
+ rval = -1;
+ } else if (vnp1->choice == SourceQualChoice_textqual) {
+ if (vnp1->data.intvalue == vnp2->data.intvalue) {
+ return 0;
+ } else if (vnp1->data.intvalue == Source_qual_taxname) {
+ return -1;
+ } else if (vnp2->data.intvalue == Source_qual_taxname) {
+ return 1;
+ } else if (vnp1->data.intvalue == Source_qual_taxid) {
+ return -1;
+ } else if (vnp2->data.intvalue == Source_qual_taxid) {
+ return 1;
+ } else {
+ tmp1 = GetSourceQualName(vnp1->data.intvalue);
+ tmp2 = GetSourceQualName (vnp2->data.intvalue);
+ rval = StringCmp (tmp1, tmp2);
+ }
+ } else if (vnp1->data.intvalue > vnp2->data.intvalue) {
+ rval = 1;
+ } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
+ rval = -1;
+ } else {
+ rval = 0;
+ }
+ }
+ return rval;
+}
+
+
+static int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort)
{
int rval = 0;
FeatureFieldPtr field1, field2;
RnaQualPtr rq1, rq2;
StructuredCommentFieldPtr scf1, scf2;
+ Int4 v1, v2;
if (vnp1 == NULL && vnp2 == NULL) {
rval = 0;
@@ -587,6 +733,14 @@ NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2)
} else {
switch (vnp1->choice) {
case FieldType_source_qual:
+ vnp1 = vnp1->data.ptrvalue;
+ vnp2 = vnp2->data.ptrvalue;
+ if (use_source_qual_sort) {
+ rval = CompareSourceQuals(&vnp1, &vnp2);
+ } else {
+ rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2);
+ }
+ break;
case FieldType_molinfo_field:
vnp1 = vnp1->data.ptrvalue;
vnp2 = vnp2->data.ptrvalue;
@@ -675,11 +829,29 @@ NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2)
rval = StringCmp (scf1->data.ptrvalue, scf2->data.ptrvalue);
}
break;
+ case FieldType_dblink:
+ v1 = vnp1->data.intvalue;
+ v2 = vnp2->data.intvalue;
+ if (v1 == v2) {
+ rval = 0;
+ } else if (v1 < v2) {
+ rval = -1;
+ } else {
+ rval = 1;
+ }
+ break;
}
}
return rval;
}
+
+NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2)
+{
+ return CompareFieldTypesEx (vnp1, vnp2, FALSE);
+}
+
+
static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2)
{
if (CompareFieldTypes (field1, field2) == 0) {
@@ -694,16 +866,16 @@ static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field);
NLM_EXTERN Int2 FeatureTypeFromFieldType (FieldTypePtr field)
{
- Int2 feat_type = Feature_type_any;
+ Int2 feat_type = Macro_feature_type_any;
FeatureFieldPtr ffp;
RnaQualPtr rq;
if (field == NULL) {
- feat_type = Feature_type_any;
+ feat_type = Macro_feature_type_any;
} else {
switch (field->choice) {
case FieldType_source_qual:
- feat_type = Feature_type_biosrc;
+ feat_type = Macro_feature_type_biosrc;
break;
case FieldType_feature_field:
ffp = (FeatureFieldPtr) field->data.ptrvalue;
@@ -734,6 +906,24 @@ NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field)
}
+NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual)
+{
+ FeatureFieldPtr ff;
+ ValNodePtr field;
+
+ ff = FeatureFieldNew();
+ ff->type = ftype;
+ ff->field = ValNodeNew (NULL);
+ ff->field->choice = FeatQualChoice_legal_qual;
+ ff->field->data.intvalue = legalqual;
+
+ field = ValNodeNew (NULL);
+ field->choice = FieldType_feature_field;
+ field->data.ptrvalue = ff;
+ return field;
+}
+
+
NLM_EXTERN Boolean IsRnaQualEmpty (RnaQualPtr rq)
{
if (rq == NULL) return TRUE;
@@ -777,6 +967,12 @@ NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field)
rval = FALSE;
}
break;
+ case FieldType_dblink:
+ if (field->data.intvalue < 1) {
+ rval = TRUE;
+ } else {
+ rval = FALSE;
+ }
case FieldType_misc:
rval = FALSE;
break;
@@ -801,18 +997,30 @@ NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field)
feature_field = (FeatureFieldPtr) field->data.ptrvalue;
if (feature_field != NULL && feature_field->field != NULL
&& feature_field->field->choice == FeatQualChoice_legal_qual
- && feature_field->field->data.intvalue == Feat_qual_legal_db_xref) {
+ && (feature_field->field->data.intvalue == Feat_qual_legal_db_xref
+ || feature_field->field->data.intvalue == Feat_qual_legal_ec_number)) {
rval = TRUE;
}
break;
case FieldType_cds_gene_prot:
+ if (field->data.intvalue == CDSGeneProt_field_prot_ec_number
+ || field->data.intvalue == CDSGeneProt_field_mat_peptide_ec_number
+ || field->data.intvalue == CDSGeneProt_field_gene_synonym) {
+ rval = TRUE;
+ }
break;
case FieldType_pub:
break;
case FieldType_rna_field:
+ if (field->data.intvalue == Rna_field_gene_synonym) {
+ rval = TRUE;
+ }
break;
case FieldType_struc_comment_field:
break;
+ case FieldType_dblink:
+ rval = TRUE;
+ break;
case FieldType_misc:
if (field->data.intvalue == Misc_field_keyword) {
rval = TRUE;
@@ -823,7 +1031,7 @@ NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field)
}
-static Boolean IsUserObjectStructuredComment (UserObjectPtr uop)
+NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop)
{
if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "StructuredComment") == 0) {
return TRUE;
@@ -833,6 +1041,29 @@ static Boolean IsUserObjectStructuredComment (UserObjectPtr uop)
}
+static Boolean IsEmptyStructuredComment (UserObjectPtr uop)
+{
+ if (!IsUserObjectStructuredComment(uop)) {
+ return FALSE;
+ }
+ if (uop->data == NULL) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean IsUserObjectDBLink (UserObjectPtr uop)
+{
+ if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field)
{
SeqFeatPtr sfp;
@@ -861,7 +1092,7 @@ static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, Fie
if (choice == OBJ_SEQFEAT) {
sfp = (SeqFeatPtr) data;
fp = (FeatureFieldPtr) field->data.ptrvalue;
- if (fp != NULL && (fp->type == Feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) {
+ if (fp != NULL && (fp->type == Macro_feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) {
rval = TRUE;
}
}
@@ -906,6 +1137,14 @@ static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, Fie
}
}
break;
+ case FieldType_dblink:
+ if (choice == OBJ_SEQDESC) {
+ sdp = (SeqDescrPtr) data;
+ if (sdp->choice == Seq_descr_user && IsUserObjectDBLink (sdp->data.ptrvalue)) {
+ rval = TRUE;
+ }
+ }
+ break;
case FieldType_misc:
if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) {
rval = TRUE;
@@ -1008,100 +1247,101 @@ typedef struct feattypefeatdef {
} FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr;
static FeatTypeFeatDefData feattype_featdef[] = {
- { Feature_type_any , FEATDEF_ANY , "any" } ,
- { Feature_type_gene , FEATDEF_GENE , "gene" } ,
- { Feature_type_org , FEATDEF_ORG , "org" } ,
- { Feature_type_cds , FEATDEF_CDS , "CDS" } ,
- { Feature_type_prot , FEATDEF_PROT , "Protein" } ,
- { Feature_type_preRNA , FEATDEF_preRNA , "preRNA" } ,
- { Feature_type_mRNA , FEATDEF_mRNA , "mRNA" } ,
- { Feature_type_tRNA , FEATDEF_tRNA , "tRNA" } ,
- { Feature_type_rRNA , FEATDEF_rRNA , "rRNA" } ,
- { Feature_type_snRNA , FEATDEF_snRNA , "snRNA" } ,
- { Feature_type_scRNA , FEATDEF_scRNA , "scRNA" } ,
- { Feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } ,
- { Feature_type_pub , FEATDEF_PUB , "pub" } ,
- { Feature_type_seq , FEATDEF_SEQ , "seq" } ,
- { Feature_type_imp , FEATDEF_IMP , "imp" } ,
- { Feature_type_allele , FEATDEF_allele , "allele" } ,
- { Feature_type_attenuator , FEATDEF_attenuator , "attenuator" } ,
- { Feature_type_c_region , FEATDEF_C_region , "c_region" } ,
- { Feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } ,
- { Feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } ,
- { Feature_type_conflict , FEATDEF_conflict , "conflict" } ,
- { Feature_type_d_loop , FEATDEF_D_loop , "d_loop" } ,
- { Feature_type_d_segment , FEATDEF_D_segment , "d_segment" } ,
- { Feature_type_enhancer , FEATDEF_enhancer , "enhancer" } ,
- { Feature_type_exon , FEATDEF_exon , "exon" } ,
- { Feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } ,
- { Feature_type_iDNA , FEATDEF_iDNA , "iDNA" } ,
- { Feature_type_intron , FEATDEF_intron , "intron" } ,
- { Feature_type_j_segment , FEATDEF_J_segment , "j_segment" } ,
- { Feature_type_ltr , FEATDEF_LTR , "ltr" } ,
- { Feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } ,
- { Feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } ,
- { Feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } ,
- { Feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } ,
- { Feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } ,
- { Feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } ,
- { Feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } ,
- { Feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } ,
- { Feature_type_modified_base , FEATDEF_modified_base , "modified_base" } ,
- { Feature_type_mutation , FEATDEF_mutation , "mutation" } ,
- { Feature_type_n_region , FEATDEF_N_region , "n_region" } ,
- { Feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } ,
- { Feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } ,
- { Feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } ,
- { Feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } ,
- { Feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } ,
- { Feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } ,
- { Feature_type_promoter , FEATDEF_promoter , "promoter" } ,
- { Feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } ,
- { Feature_type_rbs , FEATDEF_RBS , "rbs" } ,
- { Feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } ,
- { Feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } ,
- { Feature_type_s_region , FEATDEF_S_region , "s_region" } ,
- { Feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } ,
- { Feature_type_source , FEATDEF_source , "source" } ,
- { Feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } ,
- { Feature_type_sts , FEATDEF_STS , "sts" } ,
- { Feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } ,
- { Feature_type_terminator , FEATDEF_terminator , "terminator" } ,
- { Feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } ,
- { Feature_type_unsure , FEATDEF_unsure , "unsure" } ,
- { Feature_type_v_region , FEATDEF_V_region , "v_region" } ,
- { Feature_type_v_segment , FEATDEF_V_segment , "v_segment" } ,
- { Feature_type_variation , FEATDEF_variation , "variation" } ,
- { Feature_type_virion , FEATDEF_virion , "virion" } ,
- { Feature_type_n3clip , FEATDEF_3clip , "3'clip" } ,
- { Feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } ,
- { Feature_type_n5clip , FEATDEF_5clip , "5'clip" } ,
- { Feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } ,
- { Feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } ,
- { Feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } ,
- { Feature_type_site_ref , FEATDEF_site_ref , "site_ref" } ,
- { Feature_type_region , FEATDEF_REGION , "region" } ,
- { Feature_type_comment , FEATDEF_COMMENT , "comment" } ,
- { Feature_type_bond , FEATDEF_BOND , "bond" } ,
- { Feature_type_site , FEATDEF_SITE , "site" } ,
- { Feature_type_rsite , FEATDEF_RSITE , "rsite" } ,
- { Feature_type_user , FEATDEF_USER , "user" } ,
- { Feature_type_txinit , FEATDEF_TXINIT , "txinit" } ,
- { Feature_type_num , FEATDEF_NUM , "num" } ,
- { Feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } ,
- { Feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } ,
- { Feature_type_het , FEATDEF_HET , "het" } ,
- { Feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } ,
- { Feature_type_preprotein , FEATDEF_preprotein , "preprotein" } ,
- { Feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } ,
- { Feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } ,
- { Feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } ,
- { Feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } ,
- { Feature_type_gap , FEATDEF_gap , "gap" } ,
- { Feature_type_operon , FEATDEF_operon , "operon" } ,
- { Feature_type_oriT , FEATDEF_oriT , "oriT" } ,
- { Feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } ,
- { Feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" }};
+ { Macro_feature_type_any , FEATDEF_ANY , "any" } ,
+ { Macro_feature_type_gene , FEATDEF_GENE , "gene" } ,
+ { Macro_feature_type_org , FEATDEF_ORG , "org" } ,
+ { Macro_feature_type_cds , FEATDEF_CDS , "CDS" } ,
+ { Macro_feature_type_prot , FEATDEF_PROT , "Protein" } ,
+ { Macro_feature_type_preRNA , FEATDEF_preRNA , "preRNA" } ,
+ { Macro_feature_type_mRNA , FEATDEF_mRNA , "mRNA" } ,
+ { Macro_feature_type_tRNA , FEATDEF_tRNA , "tRNA" } ,
+ { Macro_feature_type_rRNA , FEATDEF_rRNA , "rRNA" } ,
+ { Macro_feature_type_snRNA , FEATDEF_snRNA , "snRNA" } ,
+ { Macro_feature_type_scRNA , FEATDEF_scRNA , "scRNA" } ,
+ { Macro_feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } ,
+ { Macro_feature_type_pub , FEATDEF_PUB , "pub" } ,
+ { Macro_feature_type_seq , FEATDEF_SEQ , "seq" } ,
+ { Macro_feature_type_imp , FEATDEF_IMP , "imp" } ,
+ { Macro_feature_type_allele , FEATDEF_allele , "allele" } ,
+ { Macro_feature_type_attenuator , FEATDEF_attenuator , "attenuator" } ,
+ { Macro_feature_type_c_region , FEATDEF_C_region , "c_region" } ,
+ { Macro_feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } ,
+ { Macro_feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } ,
+ { Macro_feature_type_d_loop , FEATDEF_D_loop , "d_loop" } ,
+ { Macro_feature_type_d_segment , FEATDEF_D_segment , "d_segment" } ,
+ { Macro_feature_type_enhancer , FEATDEF_enhancer , "enhancer" } ,
+ { Macro_feature_type_exon , FEATDEF_exon , "exon" } ,
+ { Macro_feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } ,
+ { Macro_feature_type_iDNA , FEATDEF_iDNA , "iDNA" } ,
+ { Macro_feature_type_intron , FEATDEF_intron , "intron" } ,
+ { Macro_feature_type_j_segment , FEATDEF_J_segment , "j_segment" } ,
+ { Macro_feature_type_ltr , FEATDEF_LTR , "LTR" } ,
+ { Macro_feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } ,
+ { Macro_feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } ,
+ { Macro_feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } ,
+ { Macro_feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } ,
+ { Macro_feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } ,
+ { Macro_feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } ,
+ { Macro_feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } ,
+ { Macro_feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } ,
+ { Macro_feature_type_modified_base , FEATDEF_modified_base , "modified_base" } ,
+ { Macro_feature_type_mutation , FEATDEF_mutation , "mutation" } ,
+ { Macro_feature_type_n_region , FEATDEF_N_region , "n_region" } ,
+ { Macro_feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } ,
+ { Macro_feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } ,
+ { Macro_feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } ,
+ { Macro_feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } ,
+ { Macro_feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } ,
+ { Macro_feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } ,
+ { Macro_feature_type_promoter , FEATDEF_promoter , "promoter" } ,
+ { Macro_feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } ,
+ { Macro_feature_type_rbs , FEATDEF_RBS , "rbs" } ,
+ { Macro_feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } ,
+ { Macro_feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } ,
+ { Macro_feature_type_s_region , FEATDEF_S_region , "s_region" } ,
+ { Macro_feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } ,
+ { Macro_feature_type_source , FEATDEF_source , "source" } ,
+ { Macro_feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } ,
+ { Macro_feature_type_sts , FEATDEF_STS , "sts" } ,
+ { Macro_feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } ,
+ { Macro_feature_type_terminator , FEATDEF_terminator , "terminator" } ,
+ { Macro_feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } ,
+ { Macro_feature_type_unsure , FEATDEF_unsure , "unsure" } ,
+ { Macro_feature_type_v_region , FEATDEF_V_region , "v_region" } ,
+ { Macro_feature_type_v_segment , FEATDEF_V_segment , "v_segment" } ,
+ { Macro_feature_type_variation , FEATDEF_variation , "variation" } ,
+ { Macro_feature_type_virion , FEATDEF_virion , "virion" } ,
+ { Macro_feature_type_n3clip , FEATDEF_3clip , "3'clip" } ,
+ { Macro_feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } ,
+ { Macro_feature_type_n5clip , FEATDEF_5clip , "5'clip" } ,
+ { Macro_feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } ,
+ { Macro_feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } ,
+ { Macro_feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } ,
+ { Macro_feature_type_site_ref , FEATDEF_site_ref , "site_ref" } ,
+ { Macro_feature_type_region , FEATDEF_REGION , "region" } ,
+ { Macro_feature_type_comment , FEATDEF_COMMENT , "comment" } ,
+ { Macro_feature_type_bond , FEATDEF_BOND , "bond" } ,
+ { Macro_feature_type_site , FEATDEF_SITE , "site" } ,
+ { Macro_feature_type_rsite , FEATDEF_RSITE , "rsite" } ,
+ { Macro_feature_type_user , FEATDEF_USER , "user" } ,
+ { Macro_feature_type_txinit , FEATDEF_TXINIT , "txinit" } ,
+ { Macro_feature_type_num , FEATDEF_NUM , "num" } ,
+ { Macro_feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } ,
+ { Macro_feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } ,
+ { Macro_feature_type_het , FEATDEF_HET , "het" } ,
+ { Macro_feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } ,
+ { Macro_feature_type_preprotein , FEATDEF_preprotein , "preprotein" } ,
+ { Macro_feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } ,
+ { Macro_feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } ,
+ { Macro_feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } ,
+ { Macro_feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } ,
+ { Macro_feature_type_gap , FEATDEF_gap , "gap" } ,
+ { Macro_feature_type_operon , FEATDEF_operon , "operon" } ,
+ { Macro_feature_type_oriT , FEATDEF_oriT , "oriT" } ,
+ { Macro_feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } ,
+ { Macro_feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" } ,
+ { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" }
+};
#define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData)
@@ -1138,7 +1378,7 @@ NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type)
for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) {
if (feature_type == feattype_featdef[i].feattype) {
- str = feattype_featdef[feature_type].featname;
+ str = feattype_featdef[i].featname;
}
}
if (str == NULL) {
@@ -1219,7 +1459,8 @@ NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list
ValNodePtr tmp_list = NULL;
for (i = 1; i < NUM_feattype_featdef; i++) {
- if (feattype_featdef[i].feattype == Feature_type_gap) continue;
+ if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
+ if (feattype_featdef[i].feattype == Macro_feature_type_conflict) continue;
seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef);
if (seqfeattype == SEQFEAT_IMP) {
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
@@ -1236,14 +1477,15 @@ NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list
static Boolean IsMostUsedFeature (Uint1 val)
{
- if (val == Feature_type_gene
- || val == Feature_type_cds
- || val == Feature_type_prot
- || val == Feature_type_exon
- || val == Feature_type_intron
- || val == Feature_type_mRNA
- || val == Feature_type_rRNA
- || val == Feature_type_otherRNA) {
+ if (val == Macro_feature_type_gene
+ || val == Macro_feature_type_cds
+ || val == Macro_feature_type_prot
+ || val == Macro_feature_type_exon
+ || val == Macro_feature_type_intron
+ || val == Macro_feature_type_mRNA
+ || val == Macro_feature_type_rRNA
+ || val == Macro_feature_type_otherRNA
+ || val == Macro_feature_type_misc_feature) {
return TRUE;
} else {
return FALSE;
@@ -1290,7 +1532,7 @@ NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
ValNodePtr tmp_list = NULL;
for (i = 1; i < NUM_feattype_featdef; i++) {
- if (feattype_featdef[i].feattype == Feature_type_gap) continue;
+ if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
if (featname != NULL) {
ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
@@ -1330,15 +1572,15 @@ static FeatQualGBQualData featqual_gbqual[] = {
{ Feat_qual_legal_function , GBQUAL_function , 0, "function" } ,
{ Feat_qual_legal_gene , GBQUAL_gene , 0, "locus" } ,
{ Feat_qual_legal_inference , GBQUAL_inference , 0, "inference" } ,
- { Feat_qual_legal_label , GBQUAL_label , 0, "label" } ,
- { Feat_qual_legal_location , 255 , 0, "location" } ,
+ { Feat_qual_legal_location , -1 , 0, "location" } ,
{ Feat_qual_legal_locus_tag , GBQUAL_locus_tag , 0, "locus-tag" } ,
{ Feat_qual_legal_map , GBQUAL_map , 0, "map" } ,
- { Feat_qual_legal_mobile_element , GBQUAL_mobile_element , 0, "mobile-element" } ,
- { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element , 1, "mobile-element-type"} ,
- { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element , 2, "mobile-element-name"} ,
+ { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element_type , 0, "mobile-element-type" } ,
+ { Feat_qual_legal_mobile_element_type_type , GBQUAL_mobile_element_type , 1, "mobile-element-type-type"} ,
+ { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element_type , 2, "mobile-element-name"} ,
{ Feat_qual_legal_mod_base , GBQUAL_mod_base , 0, "mod-base" } ,
{ Feat_qual_legal_mol_type , GBQUAL_mol_type , 0, "mol-type" } ,
+ { Feat_qual_legal_name, -1 , 0 , "name" } ,
{ Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , 0, "ncRNA-class" } ,
{ Feat_qual_legal_note , GBQUAL_note , 0, "note" } ,
{ Feat_qual_legal_number , GBQUAL_number , 0, "number" } ,
@@ -1371,7 +1613,8 @@ static FeatQualGBQualData featqual_gbqual[] = {
{ Feat_qual_legal_translation , GBQUAL_translation , 0, "translation" } ,
{ Feat_qual_legal_transl_except , GBQUAL_transl_except , 0, "transl-except" } ,
{ Feat_qual_legal_transl_table , GBQUAL_transl_table , 0, "transl-table" } ,
- { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" } };
+ { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" }
+};
#define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData)
@@ -1486,13 +1729,13 @@ typedef struct rnatypemap {
} RnaTypeMapData, PNTR RnaTypeMapPtr;
static RnaTypeMapData rnatypemap[] = {
- { RnaFeatType_preRNA , RNA_TYPE_premsg, Feature_type_preRNA, "preRNA" } ,
- { RnaFeatType_mRNA , RNA_TYPE_mRNA, Feature_type_mRNA, "mRNA" } ,
- { RnaFeatType_tRNA , RNA_TYPE_tRNA, Feature_type_tRNA, "tRNA" } ,
- { RnaFeatType_rRNA , RNA_TYPE_rRNA, Feature_type_rRNA, "rRNA" } ,
- { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Feature_type_ncRNA, "ncRNA" } ,
- { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Feature_type_tmRNA, "tmRNA" } ,
- { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Feature_type_misc_RNA, "misc_RNA" }
+ { RnaFeatType_preRNA , RNA_TYPE_premsg, Macro_feature_type_preRNA, "preRNA" } ,
+ { RnaFeatType_mRNA , RNA_TYPE_mRNA, Macro_feature_type_mRNA, "mRNA" } ,
+ { RnaFeatType_tRNA , RNA_TYPE_tRNA, Macro_feature_type_tRNA, "tRNA" } ,
+ { RnaFeatType_rRNA , RNA_TYPE_rRNA, Macro_feature_type_rRNA, "rRNA" } ,
+ { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Macro_feature_type_ncRNA, "ncRNA" } ,
+ { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Macro_feature_type_tmRNA, "tmRNA" } ,
+ { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Macro_feature_type_misc_RNA, "misc_RNA" }
};
#define NUM_rnatypemap sizeof (rnatypemap) / sizeof (RnaTypeMapData)
@@ -1572,7 +1815,7 @@ static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt)
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) {
return FALSE;
}
- if (rt == NULL) return TRUE;
+ if (rt == NULL || rt->choice == RnaFeatType_any) return TRUE;
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp == NULL) return FALSE;
@@ -1582,7 +1825,7 @@ static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt)
case RnaFeatType_ncRNA:
if (rt->data.ptrvalue == NULL) {
rval = TRUE;
- } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue)) {
+ } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue) == 0) {
rval = TRUE;
}
break;
@@ -1788,39 +2031,39 @@ NLM_EXTERN RnaQualPtr RnaQualFromFeatureField (FeatureFieldPtr ffp)
rq->field = rnafieldnames[i].featqual;
rq->type = ValNodeNew (NULL);
switch (ffp->type) {
- case Feature_type_preRNA:
- case Feature_type_precursor_RNA:
+ case Macro_feature_type_preRNA:
+ case Macro_feature_type_precursor_RNA:
rq->type->choice = RnaFeatType_preRNA;
break;
- case Feature_type_mRNA:
+ case Macro_feature_type_mRNA:
rq->type->choice = RnaFeatType_mRNA;
break;
- case Feature_type_tRNA:
+ case Macro_feature_type_tRNA:
rq->type->choice = RnaFeatType_tRNA;
break;
- case Feature_type_rRNA:
+ case Macro_feature_type_rRNA:
rq->type->choice = RnaFeatType_rRNA;
break;
- case Feature_type_snRNA:
+ case Macro_feature_type_snRNA:
rq->type->choice = RnaFeatType_ncRNA;
rq->type->data.ptrvalue = StringSave ("snRNA");
break;
- case Feature_type_scRNA:
+ case Macro_feature_type_scRNA:
rq->type->choice = RnaFeatType_ncRNA;
rq->type->data.ptrvalue = StringSave ("scRNA");
break;
- case Feature_type_snoRNA:
+ case Macro_feature_type_snoRNA:
rq->type->choice = RnaFeatType_ncRNA;
rq->type->data.ptrvalue = StringSave ("snoRNA");
break;
- case Feature_type_otherRNA:
- case Feature_type_misc_RNA:
+ case Macro_feature_type_otherRNA:
+ case Macro_feature_type_misc_RNA:
rq->type->choice = RnaFeatType_miscRNA;
break;
- case Feature_type_ncRNA:
+ case Macro_feature_type_ncRNA:
rq->type->choice = RnaFeatType_ncRNA;
break;
- case Feature_type_tmRNA:
+ case Macro_feature_type_tmRNA:
rq->type->choice = RnaFeatType_tmRNA;
break;
default:
@@ -1838,7 +2081,7 @@ NLM_EXTERN CharPtr SummarizeRnaType (RnaFeatTypePtr rt)
CharPtr rnatypename = NULL;
CharPtr fmt = "%s ncRNA";
- if (rt == NULL) {
+ if (rt == NULL || rt->choice == RnaFeatType_any) {
rnatypename = StringSave ("Any RNA");
} else if (rt->choice == RnaFeatType_ncRNA) {
if (StringHasNoText (rt->data.ptrvalue)) {
@@ -1885,13 +2128,15 @@ static CharPtr SummarizeRnaQual (RnaQualPtr rq)
static CharPtr SummarizeStructuredCommentField (StructuredCommentFieldPtr field)
{
CharPtr summ = NULL;
+ CharPtr fmt = "structured comment field %s";
if (field == NULL) return NULL;
if (field->choice == StructuredCommentField_database) {
- summ = StringSave ("database");
+ summ = StringSave ("structured comment database");
} else if (field->choice == StructuredCommentField_named) {
- summ = StringSave (field->data.ptrvalue);
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field->data.ptrvalue)));
+ sprintf (summ, fmt, field->data.ptrvalue == NULL ? "" : field->data.ptrvalue);
}
return summ;
}
@@ -2008,7 +2253,8 @@ static SrcQualSCQualData srcqual_scqual[] = {
{ Source_qual_all_quals , 0 , IS_OTHER , 0, kAllQualsStr } ,
{ Source_qual_mating_type , SUBSRC_mating_type , IS_SUBSRC , 0 , "mating-type" } ,
{ Source_qual_linkage_group , SUBSRC_linkage_group , IS_SUBSRC , 0 , "linkage-group" } ,
- { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"}
+ { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} ,
+ { Source_qual_taxid , 0 , IS_OTHER , 0 , "taxid" } ,
};
#define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData)
@@ -2101,6 +2347,8 @@ NLM_EXTERN Boolean IsNonTextFieldType (FieldTypePtr field)
if (field == NULL) {
return FALSE;
+ } else if (field->choice == FieldType_molinfo_field) {
+ return TRUE;
} else if (field->choice != FieldType_source_qual) {
return FALSE;
} else if ((vnp = field->data.ptrvalue) == NULL) {
@@ -2182,6 +2430,8 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop)
OrgModPtr mod;
ValNodePtr list = NULL, vnp;
Int4 i;
+ PCRReactionSetPtr ps;
+ PCRPrimerPtr pp;
if (biop == NULL) {
return NULL;
@@ -2214,6 +2464,14 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop)
}
}
+ /* add taxid */
+ if (HasTaxonomyID(biop)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualChoice_textqual;
+ vnp->data.intvalue = Source_qual_taxid;
+ ValNodeAddPointer (&list, FieldType_source_qual, vnp);
+ }
+
/* add subtypes */
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
for (i = 0;
@@ -2240,6 +2498,39 @@ NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop)
}
}
}
+
+ /* add PCR primers */
+ for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
+ for (pp = ps->forward; pp != NULL; pp = pp->next) {
+ if (!StringHasNoText (pp->name)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualChoice_textqual;
+ vnp->data.intvalue = Source_qual_fwd_primer_name;
+ ValNodeAddPointer (&list, FieldType_source_qual, vnp);
+ }
+ if (!StringHasNoText (pp->seq)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualChoice_textqual;
+ vnp->data.intvalue = Source_qual_fwd_primer_seq;
+ ValNodeAddPointer (&list, FieldType_source_qual, vnp);
+ }
+ }
+ for (pp = ps->reverse; pp != NULL; pp = pp->next) {
+ if (!StringHasNoText (pp->name)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualChoice_textqual;
+ vnp->data.intvalue = Source_qual_rev_primer_name;
+ ValNodeAddPointer (&list, FieldType_source_qual, vnp);
+ }
+ if (!StringHasNoText (pp->seq)) {
+ vnp = ValNodeNew (NULL);
+ vnp->choice = SourceQualChoice_textqual;
+ vnp->data.intvalue = Source_qual_rev_primer_seq;
+ ValNodeAddPointer (&list, FieldType_source_qual, vnp);
+ }
+ }
+ }
+
return list;
}
@@ -2253,65 +2544,30 @@ NLM_EXTERN Boolean AllowSourceQualMulti (SourceQualChoicePtr s)
} else if (s->data.intvalue == Source_qual_culture_collection
|| s->data.intvalue == Source_qual_bio_material
|| s->data.intvalue == Source_qual_specimen_voucher
- || s->data.intvalue == Source_qual_dbxref) {
+ || s->data.intvalue == Source_qual_dbxref
+ || s->data.intvalue == Source_qual_fwd_primer_name
+ || s->data.intvalue == Source_qual_fwd_primer_seq
+ || s->data.intvalue == Source_qual_rev_primer_name
+ || s->data.intvalue == Source_qual_rev_primer_seq) {
rval = TRUE;
}
return rval;
}
-static Boolean IsNotForParsing (Int4 srcqual)
-{
- if (srcqual == Source_qual_all_notes
- || srcqual == Source_qual_all_quals
- || srcqual == Source_qual_common
- || srcqual == Source_qual_acronym
- || srcqual == Source_qual_dosage
- || srcqual == Source_qual_nat_host
- || srcqual == Source_qual_specimen_voucher
- || srcqual == Source_qual_authority
- || srcqual == Source_qual_synonym
- || srcqual == Source_qual_anamorph
- || srcqual == Source_qual_teleomorph
- || srcqual == Source_qual_gb_acronym
- || srcqual == Source_qual_gb_anamorph
- || srcqual == Source_qual_gb_synonym
- || srcqual == Source_qual_culture_collection
- || srcqual == Source_qual_bio_material
- || srcqual == Source_qual_metagenome_source
- || srcqual == Source_qual_old_lineage
- || srcqual == Source_qual_old_name) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
NLM_EXTERN TextFsaPtr GetOrgModSearch (void)
{
-#if 0
- Int4 i;
-#endif
TextFsaPtr tags;
tags = TextFsaNew();
-#if 0
- for (i = 0; i < NUM_srcqual_scqual; i++) {
- if (!IsNotForParsing(srcqual_scqual[i].srcqual)
- && (srcqual_scqual[i].typeflag & IS_ORGMOD)) {
- TextFsaAdd (tags, srcqual_scqual[i].qualname);
- }
- }
-#else
TextFsaAdd (tags, "pathovar");
TextFsaAdd (tags, "serovar");
TextFsaAdd (tags, "strain");
TextFsaAdd (tags, "sub-species");
TextFsaAdd (tags, "variety");
-#endif
+ /* abbreviations */
TextFsaAdd (tags, "subsp.");
TextFsaAdd (tags, "var.");
TextFsaAdd (tags, "str.");
@@ -2408,7 +2664,7 @@ NLM_EXTERN Int4 GenomeFromLocName (CharPtr loc_name)
NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove)
{
- ValNodePtr list = NULL;
+ ValNodePtr list = NULL, start = NULL;
Int4 i;
for (i = 0; i < NUM_srcloc_genome; i++) {
@@ -2419,6 +2675,11 @@ NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove)
}
}
list = ValNodeSort (list, SortVnpByString);
+ /* put mitochondrion and chloroplast at top of list */
+ ValNodeAddPointer (&start, Source_location_mitochondrion, StringSave ("mitochondrion"));
+ ValNodeAddPointer (&start, Source_location_chloroplast, StringSave ("chloroplast"));
+ ValNodeLink (&start, list);
+ list = start;
return list;
}
@@ -2779,6 +3040,9 @@ NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list)
{
Int4 i;
+ ValNodeAddPointer (field_list, CDSGeneProt_field_prot_name, StringSave ("protein name"));
+ ValNodeAddPointer (field_list, CDSGeneProt_field_prot_description, StringSave ("protein description"));
+
for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name));
}
@@ -2857,7 +3121,7 @@ static Boolean IsFieldTypeMatPeptideRelated (FieldTypePtr field)
rval = FALSE;
} else if ((field->choice == FieldType_feature_field
&& (ff = field->data.ptrvalue) != NULL
- && ff->type == Feature_type_mat_peptide_aa)
+ && ff->type == Macro_feature_type_mat_peptide_aa)
|| (field->choice == FieldType_cds_gene_prot
&& IsCDSGeneProtFieldMatPeptideRelated(field->data.intvalue))) {
rval = TRUE;
@@ -2900,13 +3164,13 @@ static Boolean IsConstraintChoiceMatPeptideRelated (ConstraintChoicePtr constrai
static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field)
{
- Int2 feat_type = Feature_type_any;
+ Int2 feat_type = Macro_feature_type_any;
switch (cds_gene_prot_field) {
case CDSGeneProt_field_cds_comment:
case CDSGeneProt_field_cds_inference:
case CDSGeneProt_field_codon_start:
- feat_type = Feature_type_cds;
+ feat_type = Macro_feature_type_cds;
break;
case CDSGeneProt_field_gene_locus:
case CDSGeneProt_field_gene_description:
@@ -2917,25 +3181,25 @@ static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field)
case CDSGeneProt_field_gene_synonym:
case CDSGeneProt_field_gene_old_locus_tag:
case CDSGeneProt_field_gene_inference:
- feat_type = Feature_type_gene;
+ feat_type = Macro_feature_type_gene;
break;
case CDSGeneProt_field_mrna_product:
case CDSGeneProt_field_mrna_comment:
- feat_type = Feature_type_mRNA;
+ feat_type = Macro_feature_type_mRNA;
break;
case CDSGeneProt_field_prot_name:
case CDSGeneProt_field_prot_description:
case CDSGeneProt_field_prot_ec_number:
case CDSGeneProt_field_prot_activity:
case CDSGeneProt_field_prot_comment:
- feat_type = Feature_type_prot;
+ feat_type = Macro_feature_type_prot;
break;
case CDSGeneProt_field_mat_peptide_name:
case CDSGeneProt_field_mat_peptide_description:
case CDSGeneProt_field_mat_peptide_ec_number:
case CDSGeneProt_field_mat_peptide_activity:
case CDSGeneProt_field_mat_peptide_comment:
- feat_type = Feature_type_mat_peptide_aa;
+ feat_type = Macro_feature_type_mat_peptide_aa;
break;
}
return feat_type;
@@ -2949,168 +3213,168 @@ NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot
switch (cds_gene_prot_field) {
case CDSGeneProt_field_cds_comment:
f = FeatureFieldNew ();
- f->type = Feature_type_cds;
+ f->type = Macro_feature_type_cds;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_note;
break;
case CDSGeneProt_field_cds_inference:
f = FeatureFieldNew ();
- f->type = Feature_type_cds;
+ f->type = Macro_feature_type_cds;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_inference;
break;
case CDSGeneProt_field_codon_start:
f = FeatureFieldNew ();
- f->type = Feature_type_cds;
+ f->type = Macro_feature_type_cds;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_codon_start;
break;
case CDSGeneProt_field_gene_locus:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_gene;
break;
case CDSGeneProt_field_gene_description:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_gene_description;
break;
case CDSGeneProt_field_gene_comment:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_note;
break;
case CDSGeneProt_field_gene_allele:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_allele;
break;
case CDSGeneProt_field_gene_maploc:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_map;
break;
case CDSGeneProt_field_gene_locus_tag:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_locus_tag;
break;
case CDSGeneProt_field_gene_synonym:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_synonym;
break;
case CDSGeneProt_field_gene_old_locus_tag:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_old_locus_tag;
break;
case CDSGeneProt_field_gene_inference:
f = FeatureFieldNew ();
- f->type = Feature_type_gene;
+ f->type = Macro_feature_type_gene;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_inference;
break;
case CDSGeneProt_field_mrna_product:
f = FeatureFieldNew ();
- f->type = Feature_type_mRNA;
+ f->type = Macro_feature_type_mRNA;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_product;
break;
case CDSGeneProt_field_mrna_comment:
f = FeatureFieldNew ();
- f->type = Feature_type_mRNA;
+ f->type = Macro_feature_type_mRNA;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_note;
break;
case CDSGeneProt_field_prot_name:
f = FeatureFieldNew ();
- f->type = Feature_type_prot;
+ f->type = Macro_feature_type_prot;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_product;
break;
case CDSGeneProt_field_prot_description:
f = FeatureFieldNew ();
- f->type = Feature_type_prot;
+ f->type = Macro_feature_type_prot;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_description;
break;
case CDSGeneProt_field_prot_ec_number:
f = FeatureFieldNew ();
- f->type = Feature_type_prot;
+ f->type = Macro_feature_type_prot;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_ec_number;
break;
case CDSGeneProt_field_prot_activity:
f = FeatureFieldNew ();
- f->type = Feature_type_prot;
+ f->type = Macro_feature_type_prot;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_activity;
break;
case CDSGeneProt_field_prot_comment:
f = FeatureFieldNew ();
- f->type = Feature_type_prot;
+ f->type = Macro_feature_type_prot;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_note;
break;
case CDSGeneProt_field_mat_peptide_name:
f = FeatureFieldNew ();
- f->type = Feature_type_mat_peptide_aa;
+ f->type = Macro_feature_type_mat_peptide_aa;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_product;
break;
case CDSGeneProt_field_mat_peptide_description:
f = FeatureFieldNew ();
- f->type = Feature_type_mat_peptide_aa;
+ f->type = Macro_feature_type_mat_peptide_aa;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_description;
break;
case CDSGeneProt_field_mat_peptide_ec_number:
f = FeatureFieldNew ();
- f->type = Feature_type_mat_peptide_aa;
+ f->type = Macro_feature_type_mat_peptide_aa;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_ec_number;
break;
case CDSGeneProt_field_mat_peptide_activity:
f = FeatureFieldNew ();
- f->type = Feature_type_mat_peptide_aa;
+ f->type = Macro_feature_type_mat_peptide_aa;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_activity;
break;
case CDSGeneProt_field_mat_peptide_comment:
f = FeatureFieldNew ();
- f->type = Feature_type_mat_peptide_aa;
+ f->type = Macro_feature_type_mat_peptide_aa;
f->field = ValNodeNew (NULL);
f->field->choice = FeatQualChoice_legal_qual;
f->field->data.intvalue = Feat_qual_legal_note;
@@ -3128,29 +3392,29 @@ static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp)
switch (ffp->field->data.intvalue) {
case Feat_qual_legal_note:
switch (ffp->type) {
- case Feature_type_cds:
+ case Macro_feature_type_cds:
cds_gene_prot_field = CDSGeneProt_field_cds_comment;
break;
- case Feature_type_gene:
+ case Macro_feature_type_gene:
cds_gene_prot_field = CDSGeneProt_field_gene_comment;
break;
- case Feature_type_mRNA:
+ case Macro_feature_type_mRNA:
cds_gene_prot_field = CDSGeneProt_field_mrna_comment;
break;
- case Feature_type_prot:
+ case Macro_feature_type_prot:
cds_gene_prot_field = CDSGeneProt_field_prot_comment;
break;
- case Feature_type_mat_peptide_aa:
+ case Macro_feature_type_mat_peptide_aa:
cds_gene_prot_field = CDSGeneProt_field_mat_peptide_comment;
break;
}
break;
case Feat_qual_legal_inference:
switch (ffp->type) {
- case Feature_type_cds:
+ case Macro_feature_type_cds:
cds_gene_prot_field = CDSGeneProt_field_cds_inference;
break;
- case Feature_type_gene:
+ case Macro_feature_type_gene:
cds_gene_prot_field = CDSGeneProt_field_gene_inference;
break;
}
@@ -3181,46 +3445,46 @@ static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp)
break;
case Feat_qual_legal_product:
switch (ffp->type) {
- case Feature_type_mRNA:
+ case Macro_feature_type_mRNA:
cds_gene_prot_field = CDSGeneProt_field_mrna_product;
break;
- case Feature_type_prot:
+ case Macro_feature_type_prot:
cds_gene_prot_field = CDSGeneProt_field_prot_name;
break;
- case Feature_type_mat_peptide_aa:
+ case Macro_feature_type_mat_peptide_aa:
cds_gene_prot_field = CDSGeneProt_field_mat_peptide_name;
break;
}
break;
case Feat_qual_legal_description:
switch (ffp->type) {
- case Feature_type_gene:
+ case Macro_feature_type_gene:
cds_gene_prot_field = CDSGeneProt_field_gene_description;
break;
- case Feature_type_prot:
+ case Macro_feature_type_prot:
cds_gene_prot_field = CDSGeneProt_field_prot_description;
break;
- case Feature_type_mat_peptide_aa:
+ case Macro_feature_type_mat_peptide_aa:
cds_gene_prot_field = CDSGeneProt_field_mat_peptide_description;
break;
}
break;
case Feat_qual_legal_ec_number:
switch (ffp->type) {
- case Feature_type_prot:
+ case Macro_feature_type_prot:
cds_gene_prot_field = CDSGeneProt_field_prot_ec_number;
break;
- case Feature_type_mat_peptide_aa:
+ case Macro_feature_type_mat_peptide_aa:
cds_gene_prot_field = CDSGeneProt_field_mat_peptide_ec_number;
break;
}
break;
case Feat_qual_legal_activity:
switch (ffp->type) {
- case Feature_type_prot:
+ case Macro_feature_type_prot:
cds_gene_prot_field = CDSGeneProt_field_prot_activity;
break;
- case Feature_type_mat_peptide_aa:
+ case Macro_feature_type_mat_peptide_aa:
cds_gene_prot_field = CDSGeneProt_field_mat_peptide_activity;
break;
}
@@ -3250,7 +3514,7 @@ static MoleculeTypeBiomolData moleculetype_biomol[] = {
{ Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } ,
{ Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } ,
{ Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } ,
- { Molecule_type_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" }
+ { Molecule_type_macro_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" }
};
@@ -4021,12 +4285,12 @@ static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len)
if (found != start)
{
char_before = *(found - 1);
- if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before))
+ if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before) || char_before == '_')
{
rval = FALSE;
}
}
- if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after)))
+ if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after) || char_after == '_'))
{
rval = FALSE;
}
@@ -4037,8 +4301,283 @@ static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len)
NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp)
{
- if (scp == NULL || StringHasNoText (scp->match_text)) return TRUE;
- else return FALSE;
+ if (scp == NULL) {
+ return TRUE;
+ }
+ if (scp->is_all_caps || scp->is_all_lower || scp->is_all_punct) {
+ return FALSE;
+ } else if (scp->match_text == NULL || scp->match_text[0] == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static void StripUnimportantCharacters (CharPtr str, Boolean strip_space, Boolean strip_punct)
+{
+ CharPtr src, dst;
+
+ if (str == NULL) {
+ return;
+ }
+
+ src = str;
+ dst = str;
+ while (*src != 0) {
+ if ((strip_space && isspace (*src)) || (strip_punct && ispunct (*src))) {
+ /* don't copy this character */
+ } else {
+ if (src > dst) {
+ *dst = *src;
+ }
+ dst++;
+ }
+ src++;
+ }
+ *dst = 0;
+}
+
+
+static Boolean IsWholeWordAtStart (CharPtr str, CharPtr cp, Boolean is_start)
+{
+ if (cp == str) {
+ return is_start;
+ } else {
+ return !isalpha (*(cp - 1));
+ }
+}
+
+
+static int CaseNCompare (CharPtr str1, CharPtr str2, Int4 n, Boolean case_sensitive)
+{
+ if (n == 0) {
+ return 0;
+ } else if (case_sensitive) {
+ return StringNCmp (str1, str2, n);
+ } else {
+ return StringNICmp (str1, str2, n);
+ }
+}
+
+
+static Boolean
+AdvancedStringCompare
+(CharPtr str,
+ CharPtr str_match,
+ StringConstraintPtr scp,
+ Boolean is_start,
+ Int4Ptr p_target_match_len)
+{
+ CharPtr cp_s, cp_m;
+ Boolean match = TRUE, recursive_match = FALSE;
+ Boolean word_start_s, word_start_m;
+ WordSubstitutionPtr word;
+ Int4 len1, len2, init_target_match_len = 0, target_match_len = 0;
+ ValNodePtr syn;
+
+ if (str == NULL) {
+ return FALSE;
+ } else if (scp == NULL || str_match == NULL) {
+ return TRUE;
+ }
+
+ cp_s = str;
+ cp_m = str_match;
+ if (p_target_match_len != NULL) {
+ init_target_match_len = *p_target_match_len;
+ }
+
+ while (match && *cp_m != 0 && !recursive_match) {
+ /* first, check to see if we're skipping synonyms */
+ for (word = scp->ignore_words; word != NULL && !recursive_match; word = word->next) {
+ len1 = StringLen (word->word);
+ if (CaseNCompare(word->word, cp_m, len1, word->case_sensitive) == 0) { /* text match */
+ word_start_m = IsWholeWordAtStart (str_match, cp_m, is_start);
+ if (!word->whole_word || (!isalpha (*(cp_m + len1)) && word_start_m)) { /* whole word match */
+ if (word->synonyms == NULL) {
+ if (AdvancedStringCompare (cp_s, cp_m + len1, scp, word_start_m, &target_match_len)) {
+ recursive_match = TRUE;
+ }
+ } else {
+ for (syn = word->synonyms; syn != NULL && !recursive_match; syn = syn->next) {
+ len2 = StringLen (syn->data.ptrvalue);
+ if (CaseNCompare(syn->data.ptrvalue, cp_s, len2, word->case_sensitive) == 0) { /* text match */
+ word_start_s = IsWholeWordAtStart (str, cp_s, is_start);
+ if (!word->whole_word || (!isalpha (*(cp_s + len2)) && word_start_s)) { /* whole word match */
+ if (AdvancedStringCompare (cp_s + len2, cp_m + len1, scp, word_start_m && word_start_s, &target_match_len)) {
+ recursive_match = TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ if (!recursive_match) {
+ if (CaseNCompare(cp_m, cp_s, 1, scp->case_sensitive) == 0) {
+ cp_m++;
+ cp_s++;
+ target_match_len++;
+ } else if (scp->ignore_space && (isspace (*cp_m) || isspace (*cp_s))) {
+ if (isspace (*cp_m)) {
+ cp_m++;
+ }
+ if (isspace (*cp_s)) {
+ cp_s++;
+ target_match_len++;
+ }
+ } else if (scp->ignore_punct && (ispunct (*cp_m) || ispunct (*cp_s))) {
+ if (ispunct (*cp_m)) {
+ cp_m++;
+ }
+ if (ispunct (*cp_s)) {
+ cp_s++;
+ target_match_len++;
+ }
+ } else {
+ match = FALSE;
+ }
+ }
+ }
+
+ if (match && !recursive_match) {
+ while ((scp->ignore_space && isspace (*cp_s)) || (scp->ignore_punct && ispunct (*cp_s))) {
+ cp_s++;
+ target_match_len++;
+ }
+ while ((scp->ignore_space && isspace (*cp_m)) || (scp->ignore_punct && ispunct (*cp_m))) {
+ cp_m++;
+ }
+
+ if (*cp_m != 0) {
+ match = FALSE;
+ } else if ((scp->match_location == String_location_ends || scp->match_location == String_location_equals) && *cp_s != 0) {
+ match = FALSE;
+ } else if (scp->whole_word && (!is_start || isalpha (*cp_s))) {
+ match = FALSE;
+ }
+ }
+ if (match && p_target_match_len != NULL) {
+ (*p_target_match_len) += target_match_len;
+ }
+
+ return match;
+}
+
+
+static Boolean AdvancedStringMatch (CharPtr str, StringConstraintPtr scp)
+{
+ CharPtr cp;
+ Boolean rval = FALSE;
+
+ if (str == NULL) {
+ rval = FALSE;
+ } else if (scp == NULL) {
+ rval = TRUE;
+ } else if (AdvancedStringCompare (str, scp->match_text, scp, TRUE, NULL)) {
+ rval = TRUE;
+ } else if (scp->match_location == String_location_starts || scp->match_location == String_location_equals) {
+ rval = FALSE;
+ } else {
+ cp = str + 1;
+ while (!rval && *cp != 0) {
+ if (scp->whole_word) {
+ while (*cp != 0 && isalpha (*(cp-1))) {
+ cp++;
+ }
+ }
+ if (*cp != 0) {
+ if (AdvancedStringCompare (cp, scp->match_text, scp, TRUE, NULL)) {
+ rval = TRUE;
+ } else {
+ cp++;
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+static void TestAdvancedStringMatch (void)
+{
+ StringConstraintPtr scp;
+ CharPtr text = "The quick brown fox jumped over the lazy dog.";
+ CharPtr summ;
+
+ scp = StringConstraintNew ();
+ scp->match_location = String_location_contains;
+ scp->match_text = StringSave ("dog leaped");
+ scp->ignore_words = WordSubstitutionNew();
+ scp->ignore_words->word = StringSave ("leap");
+ ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("jump"));
+ scp->ignore_words->next = WordSubstitutionNew();
+ scp->ignore_words->next->word = StringSave ("dog");
+ ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("fox"));
+
+
+ AdvancedStringMatch(text, scp);
+ summ = SummarizeStringConstraint (scp);
+ summ = MemFree (summ);
+ scp = StringConstraintFree (scp);
+
+ scp = StringConstraintNew ();
+ scp->match_location = String_location_equals;
+ scp->match_text = StringSave ("A fast beige wolf leaped across a sleepy beagle.");
+ scp->ignore_words = WordSubstitutionNew();
+ scp->ignore_words->word = StringSave ("a");
+ scp->ignore_words->whole_word = TRUE;
+ ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("the"));
+ scp->ignore_words->next = WordSubstitutionNew();
+ scp->ignore_words->next->word = StringSave ("fast");
+ ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("quick"));
+ scp->ignore_words->next->next = WordSubstitutionNew();
+ scp->ignore_words->next->next->word = StringSave ("beige");
+ ValNodeAddPointer (&scp->ignore_words->next->next->synonyms, 0, StringSave ("brown"));
+ scp->ignore_words->next->next->next = WordSubstitutionNew();
+ scp->ignore_words->next->next->next->word = StringSave ("wolf");
+ ValNodeAddPointer (&scp->ignore_words->next->next->next->synonyms, 0, StringSave ("fox"));
+ scp->ignore_words->next->next->next->next = WordSubstitutionNew();
+ scp->ignore_words->next->next->next->next->word = StringSave ("across");
+ ValNodeAddPointer (&scp->ignore_words->next->next->next->next->synonyms, 0, StringSave ("over"));
+ scp->ignore_words->next->next->next->next->next = WordSubstitutionNew();
+ scp->ignore_words->next->next->next->next->next->word = StringSave ("beagle");
+ ValNodeAddPointer (&scp->ignore_words->next->next->next->next->next->synonyms, 0, StringSave ("dog"));
+
+ AdvancedStringMatch(text, scp);
+ summ = SummarizeStringConstraint (scp);
+ summ = MemFree (summ);
+ scp = StringConstraintFree (scp);
+}
+
+
+static const CharPtr kPutative = "putative";
+
+static CharPtr s_weasels[] = {
+ "hypothetical",
+ "probable",
+ "putative",
+ NULL
+};
+
+static CharPtr SkipWeasel (CharPtr str)
+{
+ Int4 i, len;
+ CharPtr cp = str;
+
+ for (i = 0; s_weasels[i] != NULL; i++) {
+ len = StringLen (s_weasels[i]);
+ if (StringNICmp (str, s_weasels[i], len) == 0
+ && isspace (*(str + len))) {
+ cp = str + len + 1;
+ while (isspace (*cp)) {
+ cp++;
+ }
+ return cp;
+ }
+ }
+ return cp;
}
@@ -4047,20 +4586,52 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
CharPtr pFound;
Boolean rval = FALSE;
Char char_after = 0;
+ CharPtr search, pattern;
if (IsStringConstraintEmpty (scp)) return TRUE;
if (StringHasNoText (str)) return FALSE;
+ if (scp->ignore_weasel) {
+ str = SkipWeasel(str);
+ }
+
+ if (scp->is_all_caps && !IsAllCaps(str)) {
+ return FALSE;
+ }
+ if (scp->is_all_lower && !IsAllLowerCase(str)) {
+ return FALSE;
+ }
+ if (scp->is_all_punct && !IsAllPunctuation(str)) {
+ return FALSE;
+ }
+ if (scp->match_text == NULL) {
+ return TRUE;
+ }
+
+ if (scp->match_location != String_location_inlist && scp->ignore_words != NULL) {
+ return AdvancedStringMatch(str, scp);
+ }
+
+ if (scp->match_location != String_location_inlist && (scp->ignore_space || scp->ignore_punct)) {
+ search = StringSave (str);
+ StripUnimportantCharacters (search, scp->ignore_space, scp->ignore_punct);
+ pattern = StringSave (scp->match_text);
+ StripUnimportantCharacters (pattern, scp->ignore_space, scp->ignore_punct);
+ } else {
+ search = str;
+ pattern = scp->match_text;
+ }
+
switch (scp->match_location)
{
case String_location_contains:
if (scp->case_sensitive)
{
- pFound = StringSearch (str, scp->match_text);
+ pFound = StringSearch (search, pattern);
}
else
{
- pFound = StringISearch (str, scp->match_text);
+ pFound = StringISearch (search, pattern);
}
if (pFound == NULL)
{
@@ -4068,20 +4639,20 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
}
else if (scp->whole_word)
{
- rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
+ rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
while (!rval && pFound != NULL)
{
if (scp->case_sensitive)
{
- pFound = StringSearch (pFound + 1, scp->match_text);
+ pFound = StringSearch (pFound + 1, pattern);
}
else
{
- pFound = StringISearch (pFound + 1, scp->match_text);
+ pFound = StringISearch (pFound + 1, pattern);
}
if (pFound != NULL)
{
- rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
+ rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
}
}
}
@@ -4093,17 +4664,17 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
case String_location_starts:
if (scp->case_sensitive)
{
- pFound = StringSearch (str, scp->match_text);
+ pFound = StringSearch (search, pattern);
}
else
{
- pFound = StringISearch (str, scp->match_text);
+ pFound = StringISearch (search, pattern);
}
- if (pFound == str)
+ if (pFound == search)
{
if (scp->whole_word)
{
- rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
+ rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
}
else
{
@@ -4114,19 +4685,19 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
case String_location_ends:
if (scp->case_sensitive)
{
- pFound = StringSearch (str, scp->match_text);
+ pFound = StringSearch (search, pattern);
}
else
{
- pFound = StringISearch (str, scp->match_text);
+ pFound = StringISearch (search, pattern);
}
while (pFound != NULL && !rval) {
- char_after = *(pFound + StringLen (scp->match_text));
+ char_after = *(pFound + StringLen (pattern));
if (char_after == 0)
{
if (scp->whole_word)
{
- rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
+ rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
}
else
{
@@ -4139,11 +4710,11 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
{
if (scp->case_sensitive)
{
- pFound = StringSearch (pFound + 1, scp->match_text);
+ pFound = StringSearch (pFound + 1, pattern);
}
else
{
- pFound = StringISearch (pFound + 1, scp->match_text);
+ pFound = StringISearch (pFound + 1, pattern);
}
}
}
@@ -4151,14 +4722,14 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
case String_location_equals:
if (scp->case_sensitive)
{
- if (StringCmp (str, scp->match_text) == 0)
+ if (StringCmp (search, pattern) == 0)
{
rval = TRUE;
}
}
else
{
- if (StringICmp (str, scp->match_text) == 0)
+ if (StringICmp (search, pattern) == 0)
{
rval = TRUE;
}
@@ -4167,11 +4738,11 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
case String_location_inlist:
if (scp->case_sensitive)
{
- pFound = StringSearch (scp->match_text, str);
+ pFound = StringSearch (pattern, search);
}
else
{
- pFound = StringISearch (scp->match_text, str);
+ pFound = StringISearch (pattern, search);
}
if (pFound == NULL)
{
@@ -4179,29 +4750,36 @@ NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstrain
}
else
{
- rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str));
+ rval = IsWholeWordMatch (pattern, pFound, StringLen (search));
while (!rval && pFound != NULL)
{
if (scp->case_sensitive)
{
- pFound = StringSearch (pFound + 1, str);
+ pFound = StringSearch (pFound + 1, search);
}
else
{
- pFound = StringISearch (pFound + 1, str);
+ pFound = StringISearch (pFound + 1, search);
}
if (pFound != NULL)
{
- rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str));
+ rval = IsWholeWordMatch (pattern, pFound, StringLen (str));
}
}
}
if (!rval) {
/* look for spans */
- rval = IsStringInSpanInList (str, scp->match_text);
+ rval = IsStringInSpanInList (search, pattern);
}
break;
}
+
+ if (search != str) {
+ search = MemFree (search);
+ }
+ if (pattern != scp->match_text) {
+ pattern = MemFree (pattern);
+ }
return rval;
}
@@ -4248,6 +4826,86 @@ static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintP
}
+NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp)
+{
+ Boolean rval = FALSE;
+ CharPtr match_start, new_str;
+ Int4 match_len, front_len;
+
+ if (str == NULL) {
+ return FALSE;
+ } else if (*str == NULL) {
+ if (IsStringConstraintEmpty (scp) || scp->not_present) {
+ *str = StringSave (replace);
+ rval = TRUE;
+ }
+ } else if (IsStringConstraintEmpty (scp)) {
+ *str = MemFree (*str);
+ *str = StringSave (replace);
+ rval = TRUE;
+ } else {
+ switch (scp->match_location)
+ {
+ case String_location_equals:
+ case String_location_inlist:
+ if (DoesStringMatchConstraint (*str, scp)) {
+ *str = MemFree (*str);
+ *str = StringSave (replace);
+ rval = TRUE;
+ }
+ break;
+ case String_location_starts:
+ match_len = 0;
+ if (AdvancedStringCompare (*str, scp->match_text, scp, TRUE, &match_len)) {
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1));
+ StringCpy (new_str, replace);
+ StringCat (new_str, (*str) + match_len);
+ *str = MemFree (*str);
+ *str = new_str;
+ rval = TRUE;
+ }
+ break;
+ case String_location_contains:
+ match_start = *str;
+ while (*match_start != 0) {
+ match_len = 0;
+ if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str || !isalpha (*(match_start - 1))), &match_len)) {
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1));
+ front_len = match_start - *str;
+ StringNCpy (new_str, *str, front_len);
+ StringCat (new_str, replace);
+ StringCat (new_str, match_start + match_len);
+ *str = MemFree (*str);
+ *str = new_str;
+ match_start = (*str) + front_len + StringLen (replace);
+ rval = TRUE;
+ } else {
+ match_start++;
+ }
+ }
+ break;
+ case String_location_ends:
+ match_start = *str;
+ while (!rval && *match_start != 0) {
+ match_len = 0;
+ if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str), &match_len)
+ && *(match_start + match_len) == 0) {
+ new_str = (CharPtr) MemNew (sizeof (Char) * ((match_start - *str) + StringLen (replace) + 1));
+ StringNCpy (new_str, *str, match_start - *str);
+ StringCat (new_str, replace);
+ *str = MemFree (*str);
+ *str = new_str;
+ rval = TRUE;
+ } else {
+ match_start++;
+ }
+ }
+ break;
+ }
+ }
+ return rval;
+}
+
NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp)
{
@@ -4781,6 +5439,66 @@ static Boolean DoesFeatureMatchLocationConstraint (SeqFeatPtr sfp, LocationConst
}
+
+static Boolean DoesSeqFeatMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint)
+{
+ if (sfp == NULL) {
+ return FALSE;
+ } else if (IsLocationConstraintEmpty(constraint)) {
+ return TRUE;
+ } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) {
+ return FALSE;
+ } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) {
+ return FALSE;
+ } else if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) {
+ return FALSE;
+ } else if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static Boolean DoesBioseqMatchLocationConstraint (BioseqPtr bsp, LocationConstraintPtr constraint)
+{
+ Boolean at_least_one = FALSE;
+ Boolean rval = TRUE;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+
+ if (bsp == NULL) return FALSE;
+
+ if (IsLocationConstraintEmpty(constraint)) {
+ return TRUE;
+ }
+
+ if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) {
+ return FALSE;
+ }
+ if (constraint->strand != Strand_constraint_any
+ || constraint->partial5 != Partial_constraint_either
+ || constraint->partial3 != Partial_constraint_either) {
+ if (ISA_aa (bsp->mol)) {
+ sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
+ return DoesSeqFeatMatchLocationConstraint(sfp, constraint);
+ } else {
+ at_least_one = FALSE;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ sfp != NULL && rval;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
+ rval = DoesSeqFeatMatchLocationConstraint (sfp, constraint);
+ at_least_one = TRUE;
+ }
+ return rval && at_least_one;
+ }
+ } else {
+ return TRUE;
+ }
+
+}
+
+
static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint)
{
SeqFeatPtr sfp;
@@ -4790,7 +5508,6 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo
BioseqSetPtr bssp;
ValNodePtr vnp;
ObjValNodePtr ovp;
- SeqMgrFeatContext context;
if (data == NULL) return FALSE;
@@ -4814,31 +5531,7 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo
bsp = (BioseqPtr) ovp->idx.parentptr;
}
}
- if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) {
- return FALSE;
- }
- if (constraint->strand != Strand_constraint_any
- || constraint->partial5 != Partial_constraint_either
- || constraint->partial3 != Partial_constraint_either) {
- if (ISA_aa (bsp->mol)) {
- sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
- if (sfp == NULL) {
- return FALSE;
- } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) {
- return FALSE;
- } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) {
- return FALSE;
- } else if (DoesLocationMatchTypeConstraint (sfp->location, constraint)) {
- return FALSE;
- } else {
- return TRUE;
- }
- } else {
- return FALSE;
- }
- } else {
- return TRUE;
- }
+ return DoesBioseqMatchLocationConstraint(bsp, constraint);
} else if (choice == 0) {
if (constraint->seq_type != Seqtype_constraint_any) {
return FALSE;
@@ -4865,6 +5558,8 @@ static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, Lo
}
}
return FALSE;
+ } else if (choice == OBJ_BIOSEQ) {
+ return DoesBioseqMatchLocationConstraint((BioseqPtr)data, constraint);
} else {
return FALSE;
}
@@ -4984,7 +5679,78 @@ static CharPtr FindTextMarker(CharPtr str, Int4Ptr len, TextMarkerPtr marker, Bo
}
+static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit);
+
/* for parsing and editing */
+NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list)
+{
+ CharPtr tmp;
+
+ if (str == NULL || *str == NULL) {
+ return;
+ }
+
+ while (transform_list != NULL) {
+ switch (transform_list->choice) {
+ case TextTransform_edit:
+ tmp = ApplyEditToString (*str, transform_list->data.ptrvalue);
+ *str = MemFree (*str);
+ *str = tmp;
+ break;
+ case TextTransform_caps:
+ FixCapitalizationInString (str, transform_list->data.intvalue, NULL);
+ break;
+ case TextTransform_remove:
+ RemoveTextPortionFromString (*str, (TextPortionPtr)transform_list->data.ptrvalue);
+ break;
+ }
+ transform_list = transform_list->next;
+ }
+}
+
+
+static Boolean IsTextPortionEmpty (TextPortionPtr text_portion)
+{
+ if (text_portion == NULL
+ || (IsTextMarkerEmpty (text_portion->left_marker)
+ && IsTextMarkerEmpty (text_portion->right_marker))) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp)
+{
+ Boolean rval = TRUE;
+ FieldEditPtr edit;
+
+ if (vnp == NULL) {
+ return TRUE;
+ }
+ switch (vnp->choice) {
+ case TextTransform_edit:
+ if ((edit = (FieldEditPtr) vnp->data.ptrvalue) != NULL
+ && edit->find_txt != NULL) {
+ rval = FALSE;
+ }
+ break;
+ case TextTransform_caps:
+ if (vnp->data.intvalue > Cap_change_none) {
+ rval = FALSE;
+ }
+ break;
+ case TextTransform_remove:
+ if (!IsTextPortionEmpty (vnp->data.ptrvalue)) {
+ rval = FALSE;
+ }
+ break;
+ }
+ return rval;
+}
+
+
NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion)
{
CharPtr portion = NULL;
@@ -5445,7 +6211,7 @@ static CharPtr MakeValFromThreeFields (CharPtr PNTR fields)
val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2));
sprintf (val, ":%s:", fields[1]);
} else if (empty[1] && empty[2]) {
- val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2));
+ val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + 2));
sprintf (val, "%s:", fields[0]);
} else if (empty[0]) {
val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + StringLen (fields[2]) + 3));
@@ -5585,7 +6351,7 @@ SetStringsInValNodeStringList
static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
{
- Boolean rval = FALSE, does_match;
+ Boolean rval = FALSE, does_match, any_found = FALSE;
Int4 gbqual, subfield;
CharPtr qual_name = NULL, tmp;
GBQualPtr gbq, last_gbq = NULL;
@@ -5623,10 +6389,11 @@ static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, Str
} else if (DoesStringMatchConstraint (gbq->val, scp)) {
rval |= SetStringValue (&(gbq->val), new_val, existing_text);
}
+ any_found = TRUE;
}
last_gbq = gbq;
}
- if (!rval && (scp == NULL || scp->match_text == NULL)) {
+ if (!rval && (scp == NULL || scp->match_text == NULL || (any_found == FALSE && scp->not_present))) {
gbq = GBQualNew ();
gbq->qual = StringSave (qual_name);
gbq->val = StringSave (new_val);
@@ -6218,6 +6985,7 @@ static Boolean RemoveDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp
} else {
vnp_prev = vnp;
}
+ vnp = vnp_next;
}
return rval;
}
@@ -6706,7 +7474,7 @@ static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Pt
ch = str [k];
while (ch != '\0' && q < 3 && rval) {
ch = TO_UPPER (ch);
- if (StringChr ("ACGTU", ch) != NULL) {
+ if (StringChr ("ACGTUYNKMRYSWBVHD", ch) != NULL) {
if (ch == 'U') {
ch = 'T';
}
@@ -7289,7 +8057,7 @@ static CharPtr GetAnticodonLocString (SeqFeatPtr sfp)
-static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
+NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
{
BioseqPtr protbsp;
SeqFeatPtr protsfp;
@@ -7320,7 +8088,7 @@ static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
}
-static void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene)
+NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene)
{
GeneRefPtr grp = NULL;
SeqFeatPtr gene = NULL;
@@ -7442,7 +8210,7 @@ static CharPtr GetCodeBreakString (SeqFeatPtr sfp)
}
-NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
+static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
{
CharPtr str = NULL;
GeneRefPtr grp = NULL;
@@ -7452,12 +8220,9 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
CdRegionPtr crp;
ValNodePtr vnp;
Char buf[20];
+ BioseqPtr protbsp;
- if (sfp == NULL || field == NULL || field->field == NULL)
- {
- return NULL;
- }
- if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ if (sfp == NULL || field == NULL)
{
return NULL;
}
@@ -7470,8 +8235,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* fields common to all features */
/* note, also known as comment */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
{
if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
{
@@ -7480,15 +8245,15 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* db-xref */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))))
{
str = GetDbxrefString (sfp->dbxref, scp);
}
/* exception */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))))
{
if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp))
{
@@ -7497,8 +8262,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* evidence */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))))
{
if (sfp->exp_ev == 1)
{
@@ -7515,16 +8280,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* citation */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))))
{
str = GetCitationTextFromFeature (sfp, scp, batch_extra == NULL ? NULL : batch_extra->cit_list);
}
/* location */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))))
{
str = SeqLocPrintUseBestID (sfp->location);
}
@@ -7532,8 +8297,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* fields common to some features */
/* product */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))))
{
if (prp != NULL) {
str = GetFirstValNodeStringMatch (prp->name, scp);
@@ -7545,8 +8310,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* Gene fields */
/* locus */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp))
@@ -7556,8 +8321,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* description */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
@@ -7567,8 +8332,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* maploc */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
@@ -7578,8 +8343,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* allele */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
&& grp != NULL
&& sfp->idx.subtype != FEATDEF_variation)
{
@@ -7590,8 +8355,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* locus_tag */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
@@ -7601,16 +8366,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* synonym */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
&& grp != NULL)
{
str = GetFirstValNodeStringMatch (grp->syn, scp);
}
/* gene comment */
if (str == NULL
- && field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_gene_comment
+ && field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_gene_comment
&& gene != NULL
&& !StringHasNoText (gene->comment)
&& DoesStringMatchConstraint (gene->comment, scp)) {
@@ -7622,8 +8387,8 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* note - product handled above */
/* description */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& prp != NULL)
{
if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
@@ -7632,16 +8397,16 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* ec_number */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
&& prp != NULL)
{
str = GetFirstValNodeStringMatch (prp->ec, scp);
}
/* activity */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)))
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue)))
&& prp != NULL)
{
str = GetFirstValNodeStringMatch (prp->activity, scp);
@@ -7649,13 +8414,13 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
/* coding region fields */
/* transl_except */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
&& sfp->data.choice == SEQFEAT_CDREGION)
{
str = GetCodeBreakString (sfp);
}
/* transl_table */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
&& sfp->data.choice == SEQFEAT_CDREGION
&& (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL)
{
@@ -7665,36 +8430,46 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
str = StringSave (buf);
}
}
+ /* translation */
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation
+ && sfp->data.choice == SEQFEAT_CDREGION)
+ {
+ if (sfp->product != NULL)
+ {
+ protbsp = BioseqFindFromSeqLoc (sfp->product);
+ str = GetSequenceByBsp (protbsp);
+ }
+ }
/* special RNA qualifiers */
/* tRNA qualifiers */
/* codon-recognized */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codons_recognized)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue)))) {
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) {
str = GettRNACodonsRecognized (sfp, scp);
}
/* anticodon */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))) {
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) {
str = GetAnticodonLocString (sfp);
}
/* tag-peptide */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_tag_peptide)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue)))) {
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) {
str = GettmRNATagPeptide (sfp->data.value.ptrvalue, scp);
}
/* ncRNA_class */
if (str == NULL
- && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ncRNA_class)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue)))) {
+ && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) {
str = GetncRNAClass (sfp->data.value.ptrvalue, scp);
}
/* codon-start */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start
&& sfp->data.choice == SEQFEAT_CDREGION)
{
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
@@ -7709,25 +8484,48 @@ NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
+ /* special region qualifiers */
+ if (sfp->idx.subtype == FEATDEF_REGION
+ && field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_name
+ && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) {
+ str = StringSave (sfp->data.value.ptrvalue);
+ }
+
/* actual GenBank qualifiers */
if (str == NULL)
{
- if (field->field->choice == FeatQualChoice_legal_qual)
+ if (field->choice == FeatQualChoice_legal_qual)
{
- gbqual = GetGBQualFromFeatQual (field->field->data.intvalue, &subfield);
+ gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield);
if (gbqual > -1) {
str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, subfield, scp);
} else {
/* need to do something with non-qualifier qualifiers */
}
} else {
- str = GetFirstGBQualMatchConstraintName (sfp->qual, field->field->data.ptrvalue, scp);
+ str = GetFirstGBQualMatchConstraintName (sfp->qual, field->data.ptrvalue, scp);
}
}
return str;
}
+NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
+{
+ if (sfp == NULL || field == NULL || field->field == NULL)
+ {
+ return NULL;
+ }
+ if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ {
+ return NULL;
+ }
+ return GetQualFromFeatureAnyType (sfp, field->field, scp, batch_extra);
+
+}
+
+
NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
{
return GetQualFromFeatureEx (sfp, field, scp, NULL);
@@ -7755,7 +8553,7 @@ static Boolean RemoveCodeBreak (CdRegionPtr crp)
}
-NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
+static Boolean RemoveQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp)
{
Boolean rval = FALSE;
GeneRefPtr grp = NULL;
@@ -7767,11 +8565,7 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
SeqFeatPtr gene = NULL;
SeqMgrFeatContext fcontext;
- if (sfp == NULL || field == NULL || field->field == NULL)
- {
- return FALSE;
- }
- if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ if (sfp == NULL || field == NULL)
{
return FALSE;
}
@@ -7802,8 +8596,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* fields common to all features */
/* note, also known as comment */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
{
if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp))
{
@@ -7812,24 +8606,25 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* db-xref */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))
{
rval = RemoveDbxrefString (&(sfp->dbxref), scp);
}
/* exception */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))
{
if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp))
{
sfp->except_text = MemFree (sfp->except_text);
+ sfp->excpt = FALSE;
rval = TRUE;
}
}
/* evidence */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))
{
if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp))
|| (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) {
@@ -7839,8 +8634,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* citation */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))
{
if (sfp->cit != NULL) {
sfp->cit = PubSetFree (sfp->cit);
@@ -7849,8 +8644,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* location */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))
{
if (sfp->location != NULL) {
sfp->location = SeqLocFree (sfp->location);
@@ -7859,8 +8654,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
/* pseudo */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_pseudo)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->data.ptrvalue)))
{
if (gene != NULL) {
if (gene->pseudo) {
@@ -7877,8 +8672,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* fields common to some features */
/* product */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))
{
if (prp != NULL) {
rval = RemoveValNodeStringMatch (&(prp->name), scp);
@@ -7889,8 +8684,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* Gene fields */
/* locus */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) {
@@ -7899,8 +8694,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* description */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
@@ -7910,8 +8705,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* maploc */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
@@ -7921,8 +8716,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* allele */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
&& grp != NULL
&& sfp->idx.subtype != FEATDEF_variation)
{
@@ -7933,8 +8728,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* locus_tag */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
&& grp != NULL)
{
if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
@@ -7944,15 +8739,15 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* synonym */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
&& grp != NULL)
{
rval = RemoveValNodeStringMatch (&(grp->syn), scp);
}
/* gene comment */
- if (field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_gene_comment
+ if (field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_gene_comment
&& gene != NULL
&& !StringHasNoText (gene->comment)
&& DoesStringMatchConstraint (gene->comment, scp)) {
@@ -7963,8 +8758,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* protein fields */
/* note - product handled above */
/* description */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& prp != NULL)
{
if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
@@ -7973,19 +8768,19 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* ec_number */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
&& prp != NULL)
{
rval = RemoveValNodeStringMatch (&(prp->ec), scp);
}
/* activity */
- if (((field->field->choice == FeatQualChoice_legal_qual
- && (field->field->data.intvalue == Feat_qual_legal_activity
- || field->field->data.intvalue == Feat_qual_legal_function))
- || (field->field->choice == FeatQualChoice_illegal_qual
- && (DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)
- || DoesStringMatchConstraint ("function", field->field->data.ptrvalue))))
+ if (((field->choice == FeatQualChoice_legal_qual
+ && (field->data.intvalue == Feat_qual_legal_activity
+ || field->data.intvalue == Feat_qual_legal_function))
+ || (field->choice == FeatQualChoice_illegal_qual
+ && (DoesStringMatchConstraint ("activity", field->data.ptrvalue)
+ || DoesStringMatchConstraint ("function", field->data.ptrvalue))))
&& prp != NULL)
{
rval = RemoveValNodeStringMatch (&(prp->activity), scp);
@@ -7993,14 +8788,14 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* special coding region fields */
/* transl_except */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
&& sfp->data.choice == SEQFEAT_CDREGION)
{
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
rval = RemoveCodeBreak (crp);
}
/* transl_table */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
&& sfp->data.choice == SEQFEAT_CDREGION
&& (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL)
{
@@ -8013,8 +8808,8 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
/* special RNA fields */
/* anticodon */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))
&& rrp != NULL && rrp->ext.choice == 2)
{
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
@@ -8024,38 +8819,50 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
}
/* codons recognized */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codons_recognized)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))
&& rrp != NULL && rrp->ext.choice == 2)
{
rval = RemovetRNACodons_Recognized (sfp);
}
/* tag_peptide */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_tag_peptide)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))
&& rrp != NULL && rrp->ext.choice == 3)
{
rval = RemovetmRNATagPeptide (rrp, scp);
}
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ncRNA_class)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))
&& rrp != NULL && rrp->ext.choice == 3)
{
rval = RemovencRNAClass (rrp, scp);
}
+ /* special region qualifiers */
+ if (sfp->idx.subtype == FEATDEF_REGION
+ && field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_name
+ && !StringHasNoText (sfp->data.value.ptrvalue)
+ && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) {
+ sfp->data.value.ptrvalue = MemFree (sfp->data.value.ptrvalue);
+ rval = TRUE;
+ }
+
+
+
if (!rval) {
/* actual GenBank qualifiers */
- if (field->field->choice == FeatQualChoice_legal_qual)
+ if (field->choice == FeatQualChoice_legal_qual)
{
- gbqual = GetGBQualFromFeatQual (field->field->data.intvalue, &subfield);
+ gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield);
if (gbqual > -1) {
rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, subfield, scp);
} else {
/* need to do something with non-qualifier qualifiers */
}
} else {
- rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->field->data.ptrvalue, scp);
+ rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->data.ptrvalue, scp);
}
}
@@ -8063,6 +8870,21 @@ NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field,
}
+NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
+{
+ if (sfp == NULL || field == NULL || field->field == NULL)
+ {
+ return FALSE;
+ }
+ if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ {
+ return FALSE;
+ }
+
+ return RemoveQualFromFeatureAnyType (sfp, field->field, scp);
+}
+
+
static Boolean ChooseBestFrame (SeqFeatPtr sfp)
{
CdRegionPtr crp;
@@ -8103,6 +8925,62 @@ static Boolean ChooseBestFrame (SeqFeatPtr sfp)
}
+static Boolean ChooseMatchingFrame (SeqFeatPtr sfp)
+{
+ CdRegionPtr crp;
+ BioseqPtr protbsp;
+ CharPtr expected_translation, frame_translation;
+ Uint1 new_frame = 0, i, orig_frame;
+ ByteStorePtr bs;
+ Boolean retval = FALSE;
+
+ if (sfp == NULL
+ || sfp->data.choice != SEQFEAT_CDREGION
+ || sfp->product == NULL
+ || (protbsp = BioseqFindFromSeqLoc (sfp->product)) == NULL
+ || (crp = sfp->data.value.ptrvalue) == NULL) {
+ return FALSE;
+ }
+
+ expected_translation = GetSequenceByBsp (protbsp);
+ if (StringHasNoText (expected_translation)) {
+ expected_translation = MemFree (expected_translation);
+ return FALSE;
+ }
+
+ orig_frame = crp->frame;
+
+ for (i = 1; i <= 3 && !retval; i++) {
+ crp->frame = i;
+ bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
+ frame_translation = BSMerge (bs, NULL);
+ if (StringCmp (frame_translation, expected_translation) == 0) {
+ new_frame = i;
+ retval = TRUE;
+ }
+ BSFree (bs);
+ frame_translation = MemFree (frame_translation);
+ }
+ expected_translation = MemFree (expected_translation);
+
+ if (new_frame == 1 && orig_frame == 0) {
+ new_frame = 0;
+ }
+
+ if (retval) {
+ crp->frame = new_frame;
+ if (new_frame == orig_frame) {
+ /* didn't actually change the frame */
+ retval = FALSE;
+ }
+ } else {
+ crp->frame = orig_frame;
+ }
+
+ return retval;
+}
+
+
static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp)
{
BioseqPtr bsp;
@@ -8266,7 +9144,7 @@ static Boolean SetGeneticCode (CdRegionPtr crp, Int4 value)
}
-static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
+static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
{
Boolean rval = FALSE;
Boolean matched_term = FALSE;
@@ -8277,11 +9155,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
SeqFeatPtr gene = NULL;
SeqMgrFeatContext fcontext;
- if (sfp == NULL || field == NULL || field->field == NULL)
- {
- return FALSE;
- }
- if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ if (sfp == NULL || field == NULL)
{
return FALSE;
}
@@ -8305,8 +9179,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
/* fields common to all features */
/* note, also known as comment */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
{
if (DoesStringMatchConstraint(sfp->comment, scp))
{
@@ -8315,24 +9189,29 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
matched_term = TRUE;
}
/* db-xref */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))
{
rval = SetDbxrefString (&(sfp->dbxref), scp, value, existing_text);
}
/* exception */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))
{
if (DoesStringMatchConstraint(sfp->except_text, scp))
{
rval = SetStringValue ( &(sfp->except_text), value, existing_text);
+ if (StringHasNoText(sfp->except_text)) {
+ sfp->excpt = FALSE;
+ } else {
+ sfp->excpt = TRUE;
+ }
}
matched_term = TRUE;
}
/* evidence */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))
{
tmp = NULL;
if (sfp->exp_ev == 1)
@@ -8363,23 +9242,23 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
}
/* citation */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_citation)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))
{
rval = SetCitationTextOnFeature (sfp, scp, value, existing_text, batch_extra == NULL ? NULL : batch_extra->cit_list);
}
/* location */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_location)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))
{
rval = SetFeatureLocation (sfp, value, existing_text);
return rval;
}
/* pseudo */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_pseudo)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudo", field->data.ptrvalue)))
{
if (gene != NULL) {
if (!gene->pseudo) {
@@ -8398,8 +9277,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
/* fields common to some features */
/* product */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))
{
if (prp != NULL) {
rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
@@ -8411,8 +9290,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
/* Gene fields */
/* locus */
- if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
+ if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
{
if (grp == NULL && IsStringConstraintEmpty (scp))
{
@@ -8431,8 +9310,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
}
/* description */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& grp != NULL)
{
if (DoesStringMatchConstraint(grp->desc, scp))
@@ -8442,8 +9321,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
matched_term = TRUE;
}
/* maploc */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
&& grp != NULL)
{
if (DoesStringMatchConstraint(grp->maploc, scp))
@@ -8453,8 +9332,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
matched_term = TRUE;
}
/* allele */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
&& grp != NULL
&& sfp->idx.subtype != FEATDEF_variation)
{
@@ -8465,8 +9344,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
matched_term = TRUE;
}
/* locus_tag */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
&& grp != NULL)
{
if (DoesStringMatchConstraint(grp->locus_tag, scp))
@@ -8476,16 +9355,16 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
matched_term = TRUE;
}
/* synonym */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
&& grp != NULL)
{
rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text);
matched_term = TRUE;
}
/* gene comment */
- if (field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_gene_comment
+ if (field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_gene_comment
&& gene != NULL) {
rval = SetStringValue (&(gene->comment), value, existing_text);
matched_term = TRUE;
@@ -8494,8 +9373,8 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
/* protein fields */
/* note - product handled above */
/* description */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
&& prp != NULL)
{
if (DoesStringMatchConstraint(prp->desc, scp)) {
@@ -8503,15 +9382,15 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
}
}
/* ec_number */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
&& prp != NULL)
{
rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
}
/* activity */
- if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity)
- || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)))
+ if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity)
+ || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue)))
&& prp != NULL)
{
rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
@@ -8520,7 +9399,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
/* special coding region fields */
/* codon start */
/* note - if product existed before, it will be retranslated */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start
&& sfp->data.choice == SEQFEAT_CDREGION)
{
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
@@ -8528,6 +9407,10 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
{
rval = ChooseBestFrame (sfp);
}
+ else if (StringICmp (value, "match") == 0)
+ {
+ rval = ChooseMatchingFrame (sfp);
+ }
else if (StringCmp (value, "1") == 0)
{
crp->frame = 1;
@@ -8546,9 +9429,10 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
if (rval && sfp->product != NULL) {
AdjustProteinSequenceForReadingFrame (sfp);
}
+ matched_term = TRUE;
}
/* transl_except */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_except
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
&& sfp->data.choice == SEQFEAT_CDREGION)
{
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
@@ -8562,7 +9446,7 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
}
}
/* transl_table */
- if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_transl_table
+ if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
&& sfp->data.choice == SEQFEAT_CDREGION
&& (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL
&& IsAllDigits (value))
@@ -8575,59 +9459,249 @@ static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, String
}
- /* special RNA fields
+ /* special RNA fields */
/* tRNA fields */
if (sfp->idx.subtype == FEATDEF_tRNA
- && ((field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_codons_recognized)
- || (field->field->choice == FeatQualChoice_illegal_qual
- && DoesStringMatchConstraint ("codon-recognized", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_codons_recognized)
+ || (field->choice == FeatQualChoice_illegal_qual
+ && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue))))
{
rval = SettRNACodons_Recognized (sfp, scp, value, existing_text);
}
if (sfp->idx.subtype == FEATDEF_tRNA
- && ((field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_anticodon)
- || (field->field->choice == FeatQualChoice_illegal_qual
- && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_anticodon)
+ || (field->choice == FeatQualChoice_illegal_qual
+ && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue))))
{
rval = SetAnticodon (sfp, scp, value, existing_text);
}
if (sfp->idx.subtype == FEATDEF_tmRNA
- && ((field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_tag_peptide)
- || (field->field->choice == FeatQualChoice_illegal_qual
- && DoesStringMatchConstraint ("tag-peptide", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_tag_peptide)
+ || (field->choice == FeatQualChoice_illegal_qual
+ && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue))))
{
rval = SettmRNATagPeptide (sfp->data.value.ptrvalue, scp, value, existing_text);
}
if (sfp->idx.subtype == FEATDEF_ncRNA
- && ((field->field->choice == FeatQualChoice_legal_qual
- && field->field->data.intvalue == Feat_qual_legal_ncRNA_class)
- || (field->field->choice == FeatQualChoice_illegal_qual
- && DoesStringMatchConstraint ("ncRNA_class", field->field->data.ptrvalue))))
+ && ((field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_ncRNA_class)
+ || (field->choice == FeatQualChoice_illegal_qual
+ && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue))))
{
rval = SetncRNAClass (sfp->data.value.ptrvalue, scp, value, existing_text);
matched_term = TRUE;
}
+ /* special region qualifiers */
+ if (sfp->idx.subtype == FEATDEF_REGION
+ && field->choice == FeatQualChoice_legal_qual
+ && field->data.intvalue == Feat_qual_legal_name
+ && DoesStringMatchConstraint(sfp->data.value.ptrvalue, scp))
+ {
+ rval = SetStringValue ((CharPtr PNTR)(&(sfp->data.value.ptrvalue)), value, existing_text);
+ matched_term = TRUE;
+ }
+
/* actual GenBank qualifiers */
if (!rval && !matched_term)
{
- rval = SetStringInGBQualList (&(sfp->qual), field->field, scp, value, existing_text);
+ rval = SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text);
}
return rval;
}
+static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
+{
+ if (sfp == NULL || field == NULL || field->field == NULL)
+ {
+ return FALSE;
+ }
+ if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
+ {
+ return FALSE;
+ }
+
+ return SetQualOnFeatureAnyType (sfp, field->field, scp, value, existing_text, batch_extra);
+}
+
+
NLM_EXTERN Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
{
return SetQualOnFeatureEx (sfp, field, scp, value, existing_text, NULL);
}
+
+NLM_EXTERN CharPtr GetRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, BatchExtraPtr batch_extra)
+{
+ ValNode vn;
+
+ if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
+ {
+ return NULL;
+ }
+
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = FeatQualChoice_legal_qual;
+ vn.data.intvalue = GetFeatQualForRnaField (rq->field);
+
+ return GetQualFromFeatureAnyType (sfp, &vn, scp, batch_extra);
+}
+
+
+NLM_EXTERN Boolean RemoveRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp)
+{
+ ValNode vn;
+
+ if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
+ {
+ return FALSE;
+ }
+
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = FeatQualChoice_legal_qual;
+ vn.data.intvalue = GetFeatQualForRnaField (rq->field);
+
+ return RemoveQualFromFeatureAnyType (sfp, &vn, scp);
+}
+
+
+NLM_EXTERN Boolean SetRNAQualOnFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
+{
+ ValNode vn;
+
+ if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
+ {
+ return FALSE;
+ }
+
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = FeatQualChoice_legal_qual;
+ vn.data.intvalue = GetFeatQualForRnaField (rq->field);
+
+ return SetQualOnFeatureAnyType (sfp, &vn, scp, value, existing_text, NULL);
+}
+
+
+static int LIBCALLBACK SortVnpByStringLenShortToLong (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ Int4 len1, len2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ len1 = StringLen (vnp1->data.ptrvalue);
+ len2 = StringLen (vnp2->data.ptrvalue);
+ if (len1 < len2) {
+ return -1;
+ } else if (len1 > len2) {
+ return 1;
+ } else {
+ return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ }
+ return 0;
+}
+
+
+static int LIBCALLBACK SortVnpByStringLenLongToShort (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ Int4 len1, len2;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 != NULL && vnp2 != NULL) {
+ len1 = StringLen (vnp1->data.ptrvalue);
+ len2 = StringLen (vnp2->data.ptrvalue);
+ if (len1 < len2) {
+ return 1;
+ } else if (len1 > len2) {
+ return -1;
+ } else {
+ return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ }
+ return 0;
+}
+
+
+static Boolean SortProtNames (SeqFeatPtr sfp, Uint2 order)
+{
+ ProtRefPtr prp;
+ Boolean rval = FALSE;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT
+ || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL
+ || prp->name == NULL
+ || prp->name->next == NULL) {
+ return FALSE;
+ }
+ switch (order) {
+ case Sort_order_short_to_long:
+ if (!ValNodeIsSorted(prp->name, SortVnpByStringLenShortToLong)) {
+ prp->name = ValNodeSort (prp->name, SortVnpByStringLenShortToLong);
+ rval = TRUE;
+ }
+ break;
+ case Sort_order_long_to_short:
+ if (!ValNodeIsSorted(prp->name, SortVnpByStringLenLongToShort)) {
+ prp->name = ValNodeSort (prp->name, SortVnpByStringLenLongToShort);
+ rval = TRUE;
+ }
+ break;
+ case Sort_order_alphabetical:
+ if (!ValNodeIsSorted(prp->name, SortVnpByStringCS)) {
+ prp->name = ValNodeSort (prp->name, SortVnpByStringCS);
+ rval = TRUE;
+ }
+ break;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean SortQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, Uint2 order)
+{
+ SeqFeatPtr prot = NULL;
+ BioseqPtr protbsp;
+ SeqMgrFeatContext context;
+ Boolean rval = FALSE;
+
+ if (sfp == NULL || field == NULL) {
+ return FALSE;
+ }
+
+ if (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) {
+ if (field->field->choice == FeatQualChoice_legal_qual
+ && field->field->data.intvalue == Feat_qual_legal_product) {
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ protbsp = BioseqFindFromSeqLoc (sfp->product);
+ prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context);
+ } else if (sfp->idx.subtype == FEATDEF_PROT) {
+ prot = sfp;
+ }
+ rval = SortProtNames (prot, order);
+ }
+ }
+
+ return rval;
+}
+
+
static void AddLegalFeatureField (ValNodePtr PNTR list, Uint2 featdef, Uint2 qual)
{
FeatureFieldPtr ffield;
@@ -8749,6 +9823,11 @@ static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp)
AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_codon_start);
}
+ /* regions */
+ if (sfp->idx.subtype == FEATDEF_REGION) {
+ AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_name);
+ }
+
/* actual GenBank qualifiers */
for (qual = sfp->qual; qual != NULL; qual = qual->next)
{
@@ -8761,6 +9840,847 @@ static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp)
}
+/* Functions for handling new PCR primer sets:
+ * GetPrimerValueFromBioSource
+ * GetMultiplePrimerValuesFromBioSource
+ * RemovePrimerValueFromBioSource
+ * SetPrimerValueInBioSource
+*/
+
+static CharPtr GetPrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
+{
+ PCRReactionSetPtr ps;
+ PCRPrimerPtr pp;
+ CharPtr str = NULL;
+
+ if (biop == NULL) {
+ return NULL;
+ }
+
+ ps = biop->pcr_primers;
+ while (ps != NULL && str == NULL) {
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ pp = ps->forward;
+ while (pp != NULL && str == NULL) {
+ if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
+ str = StringSave (pp->name);
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_fwd_primer_seq:
+ pp = ps->forward;
+ while (pp != NULL && str == NULL) {
+ if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
+ str = StringSave (pp->seq);
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_rev_primer_name:
+ pp = ps->reverse;
+ while (pp != NULL && str == NULL) {
+ if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
+ str = StringSave (pp->name);
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_rev_primer_seq:
+ pp = ps->reverse;
+ while (pp != NULL && str == NULL) {
+ if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
+ str = StringSave (pp->seq);
+ }
+ pp = pp->next;
+ }
+ break;
+ }
+ ps = ps->next;
+ }
+ return str;
+}
+
+
+static ValNodePtr GetMultiplePrimerValuesFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
+{
+ PCRReactionSetPtr ps;
+ PCRPrimerPtr pp;
+ ValNodePtr list = NULL;
+
+ if (biop == NULL) {
+ return NULL;
+ }
+
+ ps = biop->pcr_primers;
+ while (ps != NULL) {
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ pp = ps->forward;
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
+ ValNodeAddPointer (&list, 0, StringSave (pp->name));
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_fwd_primer_seq:
+ pp = ps->forward;
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
+ ValNodeAddPointer (&list, 0, StringSave (pp->seq));
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_rev_primer_name:
+ pp = ps->reverse;
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
+ ValNodeAddPointer (&list, 0, StringSave (pp->name));
+ }
+ pp = pp->next;
+ }
+ break;
+ case Source_qual_rev_primer_seq:
+ pp = ps->reverse;
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
+ ValNodeAddPointer (&list, 0, StringSave (pp->seq));
+ }
+ pp = pp->next;
+ }
+ break;
+ }
+ ps = ps->next;
+ }
+ return list;
+}
+
+
+static Boolean PCRPrimerIsEmpty (PCRPrimerPtr primer)
+{
+ if (primer == NULL) {
+ return TRUE;
+ } else if (StringHasNoText (primer->name) && StringHasNoText (primer->seq)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean PCRPrimerListIsEmpty (PCRPrimerPtr primer)
+{
+ Boolean rval = TRUE;
+
+ while (primer != NULL && rval) {
+ rval = PCRPrimerIsEmpty(primer);
+ primer = primer->next;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean PCRReactionIsEmpty (PCRReactionPtr pr)
+{
+ if (pr == NULL) {
+ return TRUE;
+ } else if (PCRPrimerListIsEmpty(pr->forward) && PCRPrimerListIsEmpty(pr->reverse)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean RemoveNameFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint)
+{
+ PCRPrimerPtr pp, prev_pp = NULL, next_pp;
+ Boolean rval = FALSE;
+
+ if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) {
+ return FALSE;
+ }
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
+ pp->name = MemFree (pp->name);
+ rval = TRUE;
+ }
+ next_pp = pp->next;
+ if (PCRPrimerIsEmpty(pp)) {
+ pp->next = NULL;
+ pp = PCRPrimerFree (pp);
+ if (prev_pp == NULL) {
+ *pp_list = next_pp;
+ } else {
+ prev_pp->next = next_pp;
+ }
+ } else {
+ prev_pp = pp;
+ }
+ pp = next_pp;
+ }
+ return rval;
+}
+
+
+static Boolean RemoveSeqFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint)
+{
+ PCRPrimerPtr pp, prev_pp = NULL, next_pp;
+ Boolean rval = FALSE;
+
+ if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) {
+ return FALSE;
+ }
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
+ pp->seq = MemFree (pp->seq);
+ rval = TRUE;
+ }
+ next_pp = pp->next;
+ if (PCRPrimerIsEmpty(pp)) {
+ pp->next = NULL;
+ pp = PCRPrimerFree (pp);
+ if (prev_pp == NULL) {
+ *pp_list = next_pp;
+ } else {
+ prev_pp->next = next_pp;
+ }
+ } else {
+ prev_pp = pp;
+ }
+ pp = next_pp;
+ }
+ return rval;
+}
+
+
+static Boolean RemovePrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
+{
+ PCRReactionSetPtr ps, prev_ps = NULL, next_ps;
+ Boolean rval = FALSE;
+
+ if (biop == NULL) {
+ return FALSE;
+ }
+
+ ps = biop->pcr_primers;
+ while (ps != NULL) {
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ rval |= RemoveNameFromPrimerList (&(ps->forward), constraint);
+ break;
+ case Source_qual_fwd_primer_seq:
+ rval |= RemoveSeqFromPrimerList (&(ps->forward), constraint);
+ break;
+ case Source_qual_rev_primer_name:
+ rval |= RemoveNameFromPrimerList (&(ps->reverse), constraint);
+ break;
+ case Source_qual_rev_primer_seq:
+ rval |= RemoveSeqFromPrimerList (&(ps->reverse), constraint);
+ break;
+ }
+ next_ps = ps->next;
+ if (PCRReactionIsEmpty(ps)) {
+ ps->next = NULL;
+ ps = PCRReactionFree (ps);
+ if (prev_ps == NULL) {
+ biop->pcr_primers = next_ps;
+ } else {
+ prev_ps->next = next_ps;
+ }
+ } else {
+ prev_ps = ps;
+ }
+ ps = next_ps;
+ }
+ return rval;
+}
+
+
+static Boolean IsCompoundPrimerValue (CharPtr value)
+{
+ Int4 len;
+
+ if (StringHasNoText (value)) {
+ return FALSE;
+ } else if (StringChr (value, ':') != NULL
+ || StringChr (value, ',') != NULL) {
+ return TRUE;
+ }
+ len = StringLen (value);
+ if (*value == '(' && value[len - 1] == ')') {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean HasMultiplePrimerSets (CharPtr value)
+{
+ if (StringChr (value, ',')) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static ValNodePtr GetPrimerSetComponents (CharPtr value)
+{
+ CharPtr cp, last_cp, tmp, src, dst;
+ ValNodePtr list = NULL;
+
+ last_cp = value;
+ for (cp = StringChr (value, ','); cp != NULL; cp = StringChr (last_cp, ',')) {
+ tmp = (CharPtr) MemNew (sizeof (Char) * (cp - last_cp + 1));
+ src = last_cp;
+ dst = tmp;
+ while (src < cp) {
+ if (*src != '(' && *src != ')') {
+ *dst = *src;
+ dst++;
+ }
+ src++;
+ }
+ *dst = 0;
+ ValNodeAddPointer (&list, 0, tmp);
+ last_cp = cp + 1;
+ }
+ if (*last_cp != 0) {
+ tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (last_cp) + 1));
+ src = last_cp;
+ dst = tmp;
+ while (*src != 0) {
+ if (*src != '(' && *src != ')') {
+ *dst = *src;
+ dst++;
+ }
+ src++;
+ }
+ *dst = 0;
+ ValNodeAddPointer (&list, 0, tmp);
+ }
+ return list;
+}
+
+
+static ValNodePtr GetPrimerElements (CharPtr value)
+{
+ CharPtr cp, last_cp, tmp;
+ ValNodePtr list = NULL;
+ Int4 len;
+
+ last_cp = value;
+ for (cp = StringChr (value, ':'); cp != NULL; cp = StringChr (last_cp, ':')) {
+ len = cp - last_cp + 1;
+ tmp = (CharPtr) MemNew (sizeof (Char) * len);
+ StringNCpy (tmp, last_cp, len - 1);
+ tmp[len - 1] = 0;
+ ValNodeAddPointer (&list, 0, tmp);
+ last_cp = cp + 1;
+ }
+ if (*last_cp != 0) {
+ ValNodeAddPointer (&list, 0, StringSave (last_cp));
+ }
+ return list;
+}
+
+
+static Boolean OverwriteNameStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list)
+{
+ ValNodePtr elements, vnp;
+ PCRPrimerPtr pp, prev_pp = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ elements = GetPrimerElements (value);
+ for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (pp == NULL) {
+ pp = PCRPrimerNew ();
+ if (prev_pp == NULL) {
+ *p_list = pp;
+ } else {
+ prev_pp->next = pp;
+ }
+ any_change = TRUE;
+ }
+ if (StringCmp (pp->name, vnp->data.ptrvalue) != 0) {
+ pp->name = MemFree (pp->name);
+ pp->name = vnp->data.ptrvalue;
+ vnp->data.ptrvalue = NULL;
+ any_change = TRUE;
+ }
+ prev_pp = pp;
+ pp = pp->next;
+ }
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->name)) {
+ any_change = TRUE;
+ }
+ pp->name = MemFree (pp->name);
+ pp = pp->next;
+ }
+ elements = ValNodeFreeData (elements);
+ return any_change;
+}
+
+
+static Boolean OverwriteSeqStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list)
+{
+ ValNodePtr elements, vnp;
+ PCRPrimerPtr pp, prev_pp = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ elements = GetPrimerElements (value);
+ for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (pp == NULL) {
+ pp = PCRPrimerNew ();
+ if (prev_pp == NULL) {
+ *p_list = pp;
+ } else {
+ prev_pp->next = pp;
+ }
+ any_change = TRUE;
+ }
+ if (StringCmp (pp->seq, vnp->data.ptrvalue) != 0) {
+ pp->seq = MemFree (pp->seq);
+ pp->seq = vnp->data.ptrvalue;
+ vnp->data.ptrvalue = NULL;
+ any_change = TRUE;
+ }
+ prev_pp = pp;
+ pp = pp->next;
+ }
+ while (pp != NULL) {
+ if (!StringHasNoText (pp->seq)) {
+ any_change = TRUE;
+ }
+ pp->seq = MemFree (pp->seq);
+ pp = pp->next;
+ }
+ elements = ValNodeFreeData (elements);
+ return any_change;
+}
+
+
+static Boolean OverwriteFwdNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
+{
+ ValNodePtr sets, vnp;
+ PCRReactionPtr ps, prev_ps = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ sets = GetPrimerSetComponents (value);
+ for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (ps == NULL) {
+ ps = PCRReactionNew ();
+ if (prev_ps == NULL) {
+ *p_list = ps;
+ } else {
+ prev_ps->next = ps;
+ }
+ any_change = TRUE;
+ }
+ any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward));
+ prev_ps = ps;
+ ps = ps->next;
+ }
+ while (ps != NULL) {
+ any_change |= RemoveNameFromPrimerList (&(ps->forward), NULL);
+ ps = ps->next;
+ }
+ sets = ValNodeFreeData (sets);
+ return any_change;
+}
+
+
+static Boolean OverwriteRevNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
+{
+ ValNodePtr sets, vnp;
+ PCRReactionPtr ps, prev_ps = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ sets = GetPrimerSetComponents (value);
+ for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (ps == NULL) {
+ ps = PCRReactionNew ();
+ if (prev_ps == NULL) {
+ *p_list = ps;
+ } else {
+ prev_ps->next = ps;
+ }
+ any_change = TRUE;
+ }
+ any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse));
+ prev_ps = ps;
+ ps = ps->next;
+ }
+ while (ps != NULL) {
+ any_change |= RemoveNameFromPrimerList (&(ps->reverse), NULL);
+ ps = ps->next;
+ }
+ sets = ValNodeFreeData (sets);
+ return any_change;
+}
+
+
+static Boolean OverwriteFwdSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
+{
+ ValNodePtr sets, vnp;
+ PCRReactionPtr ps, prev_ps = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ sets = GetPrimerSetComponents (value);
+ for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (ps == NULL) {
+ ps = PCRReactionNew ();
+ if (prev_ps == NULL) {
+ *p_list = ps;
+ } else {
+ prev_ps->next = ps;
+ }
+ any_change = TRUE;
+ }
+ any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward));
+ prev_ps = ps;
+ ps = ps->next;
+ }
+ while (ps != NULL) {
+ any_change |= RemoveSeqFromPrimerList (&(ps->forward), NULL);
+ ps = ps->next;
+ }
+ sets = ValNodeFreeData (sets);
+ return any_change;
+}
+
+
+static Boolean OverwriteRevSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
+{
+ ValNodePtr sets, vnp;
+ PCRReactionPtr ps, prev_ps = NULL;
+ Boolean any_change = FALSE;
+
+ if (p_list == NULL) {
+ return FALSE;
+ }
+
+ sets = GetPrimerSetComponents (value);
+ for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
+ if (ps == NULL) {
+ ps = PCRReactionNew ();
+ if (prev_ps == NULL) {
+ *p_list = ps;
+ } else {
+ prev_ps->next = ps;
+ }
+ any_change = TRUE;
+ }
+ any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse));
+ prev_ps = ps;
+ ps = ps->next;
+ }
+ while (ps != NULL) {
+ any_change |= RemoveSeqFromPrimerList (&(ps->reverse), NULL);
+ ps = ps->next;
+ }
+ sets = ValNodeFreeData (sets);
+ return any_change;
+}
+
+
+static Boolean SetNameInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
+{
+ PCRPrimerPtr pp, prev_pp = NULL;
+ Boolean rval = FALSE;
+
+ if (pp_list == NULL) {
+ return FALSE;
+ }
+ pp = *pp_list;
+
+ while (pp != NULL) {
+ if (DoesStringMatchConstraint (pp->name, constraint)) {
+ rval = SetStringValue (&(pp->name), value, existing_text);
+ }
+ prev_pp = pp;
+ pp = pp->next;
+ }
+ return rval;
+}
+
+
+static Boolean SetSeqInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
+{
+ PCRPrimerPtr pp, prev_pp = NULL;
+ Boolean rval = FALSE;
+
+ if (pp_list == NULL) {
+ return FALSE;
+ }
+ pp = *pp_list;
+
+ while (pp != NULL) {
+ if (DoesStringMatchConstraint (pp->seq, constraint)) {
+ rval = SetStringValue (&(pp->seq), value, existing_text);
+ }
+ prev_pp = pp;
+ pp = pp->next;
+ }
+ return rval;
+}
+
+
+static Boolean SetPrimerValueInBioSource(BioSourcePtr biop, Int4 field, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
+{
+ PCRReactionSetPtr ps, prev_ps = NULL;
+ Boolean rval = FALSE;
+
+ ps = biop->pcr_primers;
+
+ if (IsCompoundPrimerValue(value)) {
+ if (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL) {
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ rval = OverwriteFwdNameStringIntoPCRReactionSet (value, &(biop->pcr_primers));
+ break;
+ case Source_qual_fwd_primer_seq:
+ rval = OverwriteFwdSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers));
+ break;
+ case Source_qual_rev_primer_name:
+ rval = OverwriteRevNameStringIntoPCRReactionSet (value, &(biop->pcr_primers));
+ break;
+ case Source_qual_rev_primer_seq:
+ rval = OverwriteRevSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers));
+ break;
+ }
+ }
+ } else {
+ while (ps != NULL) {
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ rval |= SetNameInPrimerList (&(ps->forward), constraint, value, existing_text);
+ break;
+ case Source_qual_fwd_primer_seq:
+ rval |= SetSeqInPrimerList (&(ps->forward), constraint, value, existing_text);
+ break;
+ case Source_qual_rev_primer_name:
+ rval |= SetNameInPrimerList (&(ps->reverse), constraint, value, existing_text);
+ break;
+ case Source_qual_rev_primer_seq:
+ rval |= SetSeqInPrimerList (&(ps->reverse), constraint, value, existing_text);
+ break;
+ }
+ prev_ps = ps;
+ ps = ps->next;
+ }
+
+ if (IsStringConstraintEmpty (constraint) && !rval && (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL)) {
+ if (prev_ps == NULL) {
+ ps = PCRReactionSetNew ();
+ biop->pcr_primers = ps;
+ } else if ((PCRPrimerListIsEmpty(prev_ps->forward)
+ && (field == Source_qual_fwd_primer_name || field == Source_qual_fwd_primer_seq))
+ || (PCRPrimerListIsEmpty(prev_ps->reverse)
+ && (field == Source_qual_rev_primer_name || field == Source_qual_rev_primer_seq))) {
+ /* add to previous set */
+ ps = prev_ps;
+ } else {
+ /* field is filled on previous, build a new one */
+ ps = PCRReactionSetNew ();
+ prev_ps->next = ps;
+ }
+ switch (field) {
+ case Source_qual_fwd_primer_name:
+ ps->forward = PCRPrimerNew ();
+ ps->forward->name = StringSave (value);
+ rval = TRUE;
+ break;
+ case Source_qual_fwd_primer_seq:
+ ps->forward = PCRPrimerNew ();
+ ps->forward->seq = StringSave (value);
+ rval = TRUE;
+ break;
+ case Source_qual_rev_primer_name:
+ ps->reverse = PCRPrimerNew ();
+ ps->reverse->name = StringSave (value);
+ rval = TRUE;
+ break;
+ case Source_qual_rev_primer_seq:
+ ps->reverse = PCRPrimerNew ();
+ ps->reverse->seq = StringSave (value);
+ rval = TRUE;
+ break;
+ }
+ }
+ }
+ return rval;
+}
+
+
+
+/* functions for source qualifiers */
+
+NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop)
+{
+ ValNodePtr db;
+ DbtagPtr dbt;
+ Boolean rval = FALSE;
+
+ if (biop == NULL || biop->org == NULL) {
+ return FALSE;
+ }
+ for (db = biop->org->db; db != NULL && !rval; db = db->next) {
+ dbt = (DbtagPtr) db->data.ptrvalue;
+ if (dbt != NULL && dbt->db != NULL &&
+ StringICmp (dbt->db, "taxon") == 0) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static CharPtr GetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp)
+{
+ ValNodePtr db;
+ DbtagPtr dbt;
+ CharPtr str = NULL;
+ Char buf[15];
+
+ if (biop == NULL || biop->org == NULL) {
+ return NULL;
+ }
+ for (db = biop->org->db; db != NULL && str == NULL; db = db->next) {
+ dbt = (DbtagPtr) db->data.ptrvalue;
+ if (dbt != NULL && dbt->db != NULL &&
+ StringICmp (dbt->db, "taxon") == 0) {
+ if (dbt->tag->id > 0) {
+ sprintf (buf, "%d", dbt->tag->id);
+ if (DoesStringMatchConstraint (buf, scp)) {
+ str = StringSave (buf);
+ }
+ } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
+ str = StringSave (dbt->tag->str);
+ }
+ }
+ }
+ return str;
+}
+
+
+static ValNodePtr GetMultipleTaxidStrings (ValNodePtr list, StringConstraintPtr scp)
+{
+ ValNodePtr vnp, val_list = NULL;
+ DbtagPtr dbt;
+ CharPtr str = NULL;
+ Char buf[15];
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ dbt = (DbtagPtr) vnp->data.ptrvalue;
+ if (dbt != NULL && StringCmp (dbt->db, "taxon") == 0) {
+ if (dbt->tag->id > 0) {
+ sprintf (buf, "%d", dbt->tag->id);
+ if (DoesStringMatchConstraint (buf, scp)) {
+ str = StringSave (buf);
+ }
+ } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
+ str = StringSave (dbt->tag->str);
+ }
+ if (str != NULL) {
+ ValNodeAddPointer (&val_list, 0, str);
+ }
+ }
+ }
+
+ return val_list;
+}
+
+
+static Boolean SetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
+{
+ CharPtr tmp;
+ CharPtr fmt = "taxon:%s";
+ Boolean rval;
+
+ if (biop == NULL) {
+ return FALSE;
+ }
+ if (biop->org == NULL) {
+ biop->org = OrgRefNew();
+ }
+ tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (value) + StringLen (fmt)));
+ sprintf (tmp, fmt, value == NULL ? "" : value);
+ rval = SetDbxrefString (&(biop->org->db), scp, tmp, existing_text);
+ tmp = MemFree (tmp);
+ return rval;
+}
+
+
+static Boolean RemoveTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp)
+{
+ ValNodePtr db, db_prev = NULL, db_next;
+ DbtagPtr dbt;
+ Boolean rval = FALSE, do_remove;
+ Char buf[15];
+
+ if (biop == NULL || biop->org == NULL) {
+ return FALSE;
+ }
+ for (db = biop->org->db; db != NULL; db = db_next) {
+ db_next = db->next;
+ dbt = (DbtagPtr) db->data.ptrvalue;
+ do_remove = FALSE;
+ if (dbt != NULL && dbt->db != NULL &&
+ StringICmp (dbt->db, "taxon") == 0) {
+ if (dbt->tag->id > 0) {
+ sprintf (buf, "%d", dbt->tag->id);
+ if (DoesStringMatchConstraint (buf, scp)) {
+ do_remove = TRUE;
+ }
+ } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
+ do_remove = TRUE;
+ }
+ }
+ if (do_remove) {
+ if (db_prev == NULL) {
+ biop->org->db = db_next;
+ } else {
+ db_prev->next = db_next;
+ }
+ db->next = NULL;
+ db->data.ptrvalue = DbtagFree (db->data.ptrvalue);
+ db = ValNodeFree (db);
+ rval = TRUE;
+ } else {
+ db_prev = db;
+ }
+ }
+ return rval;
+}
+
+
NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
{
CharPtr str = NULL;
@@ -8800,6 +10720,8 @@ NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoi
if (biop->org != NULL) {
str = GetDbxrefString (biop->org->db, constraint);
}
+ } else if (scp->data.intvalue == Source_qual_taxid) {
+ str = GetTaxonomyId (biop, constraint);
} else if (scp->data.intvalue == Source_qual_all_notes) {
vn.choice = SourceQualChoice_textqual;
vn.data.intvalue = Source_qual_subsource_note;
@@ -8811,6 +10733,12 @@ NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoi
}
} else if (scp->data.intvalue == Source_qual_all_quals) {
/* will not do */
+ } else if (scp->data.intvalue == Source_qual_fwd_primer_name
+ || scp->data.intvalue == Source_qual_fwd_primer_seq
+ || scp->data.intvalue == Source_qual_rev_primer_name
+ || scp->data.intvalue == Source_qual_rev_primer_seq) {
+ /* fetch from new primer object */
+ str = GetPrimerValueFromBioSource (biop, scp->data.intvalue, constraint);
} else {
orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
if (orgmod_subtype == -1) {
@@ -8933,6 +10861,16 @@ NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, So
if (biop->org != NULL) {
ValNodeLink (&val_list, GetMultipleDbxrefStrings (biop->org->db, constraint));
}
+ } else if (scp->data.intvalue == Source_qual_taxid) {
+ if (biop->org != NULL) {
+ ValNodeLink (&val_list, GetMultipleTaxidStrings (biop->org->db, constraint));
+ }
+ } else if (scp->data.intvalue == Source_qual_fwd_primer_name
+ || scp->data.intvalue == Source_qual_fwd_primer_seq
+ || scp->data.intvalue == Source_qual_rev_primer_name
+ || scp->data.intvalue == Source_qual_rev_primer_seq) {
+ /* fetch from new primer object */
+ ValNodeLink (&val_list, GetMultiplePrimerValuesFromBioSource (biop, scp->data.intvalue, constraint));
} else if (scp->data.intvalue == Source_qual_all_notes) {
vn.choice = SourceQualChoice_textqual;
vn.data.intvalue = Source_qual_subsource_note;
@@ -9108,6 +11046,14 @@ NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualC
if (biop->org != NULL) {
rval = RemoveDbxrefString (&(biop->org->db), constraint);
}
+ } else if (scp->data.intvalue == Source_qual_taxid) {
+ rval = RemoveTaxonomyId (biop, constraint);
+ } else if (scp->data.intvalue == Source_qual_fwd_primer_name
+ || scp->data.intvalue == Source_qual_fwd_primer_seq
+ || scp->data.intvalue == Source_qual_rev_primer_name
+ || scp->data.intvalue == Source_qual_rev_primer_seq) {
+ /* remove from new primer object */
+ rval = RemovePrimerValueFromBioSource (biop, scp->data.intvalue, constraint);
} else if (scp->data.intvalue == Source_qual_all_notes) {
vn.choice = SourceQualChoice_textqual;
vn.data.intvalue = Source_qual_subsource_note;
@@ -9305,6 +11251,8 @@ NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoice
biop->org = OrgRefNew ();
}
rval = SetDbxrefString (&(biop->org->db), constraint, value, existing_text);
+ } else if (scp->data.intvalue == Source_qual_taxid) {
+ rval = SetTaxonomyId(biop, constraint, value, existing_text);
} else if (scp->data.intvalue == Source_qual_all_notes) {
vn.choice = SourceQualChoice_textqual;
vn.data.intvalue = Source_qual_subsource_note;
@@ -9312,6 +11260,12 @@ NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoice
rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
vn.data.intvalue = Source_qual_orgmod_note;
rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
+ } else if (scp->data.intvalue == Source_qual_fwd_primer_name
+ || scp->data.intvalue == Source_qual_fwd_primer_seq
+ || scp->data.intvalue == Source_qual_rev_primer_name
+ || scp->data.intvalue == Source_qual_rev_primer_seq) {
+ /* remove from new primer object */
+ rval = SetPrimerValueInBioSource (biop, scp->data.intvalue, constraint, value, existing_text);
} else if (scp->data.intvalue == Source_qual_all_quals) {
/* will not do this */
} else {
@@ -10416,6 +12370,27 @@ static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraint
}
+static Boolean SortFieldInCGPSet (CGPSetPtr c, Uint2 field, Uint2 order)
+{
+ ValNodePtr vnp;
+ SeqFeatPtr sfp;
+ Boolean rval = FALSE;
+
+ if (c == NULL) {
+ return FALSE;
+ }
+ if (field == CDSGeneProt_field_prot_name) {
+ for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT) {
+ rval |= SortProtNames (sfp, order);
+ }
+ }
+ }
+ return rval;
+}
+
+
static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp)
{
MolInfoPtr m = NULL;
@@ -10534,47 +12509,73 @@ static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp)
static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field)
{
- MolInfoPtr m;
+ MolInfoPtr m = NULL;
Boolean rval = FALSE;
+ Int4 new_val;
if (bsp == NULL || field == NULL) return FALSE;
switch (field->choice) {
case MolinfoField_molecule:
- m = GetMolInfoForBioseq (bsp);
if (m == NULL) {
- m = AddMolInfoToBioseq (bsp);
+ m = GetMolInfoForBioseq (bsp);
+ if (m == NULL) {
+ m = AddMolInfoToBioseq (bsp);
+ rval = TRUE;
+ }
+ }
+ new_val = BiomolFromMoleculeType (field->data.intvalue);
+ if (m->biomol != new_val) {
+ m->biomol = new_val;
+ rval = TRUE;
}
- m->biomol = BiomolFromMoleculeType (field->data.intvalue);
- rval = TRUE;
break;
case MolinfoField_technique:
- m = GetMolInfoForBioseq (bsp);
if (m == NULL) {
- m = AddMolInfoToBioseq (bsp);
+ m = GetMolInfoForBioseq (bsp);
+ if (m == NULL) {
+ m = AddMolInfoToBioseq (bsp);
+ }
+ }
+ new_val = TechFromTechniqueType (field->data.intvalue);
+ if (m->tech != new_val) {
+ m->tech = new_val;
+ rval = TRUE;
}
- m->tech = TechFromTechniqueType (field->data.intvalue);
- rval = TRUE;
break;
case MolinfoField_completedness:
- m = GetMolInfoForBioseq (bsp);
if (m == NULL) {
- m = AddMolInfoToBioseq (bsp);
+ m = GetMolInfoForBioseq (bsp);
+ if (m == NULL) {
+ m = AddMolInfoToBioseq (bsp);
+ }
+ }
+ new_val = CompletenessFromCompletednessType (field->data.intvalue);
+ if (m->completeness != new_val) {
+ m->completeness = new_val;
+ rval = TRUE;
}
- m->completeness = CompletenessFromCompletednessType (field->data.intvalue);
- rval = TRUE;
break;
case MolinfoField_mol_class:
- bsp->mol = MolFromMoleculeClassType (field->data.intvalue);
- rval = TRUE;
+ new_val = MolFromMoleculeClassType (field->data.intvalue);
+ if (bsp->mol != new_val) {
+ bsp->mol = new_val;
+ rval = TRUE;
+ }
break;
case MolinfoField_topology:
- bsp->topology = TopologyFromTopologyType (field->data.intvalue);
- rval = TRUE;
+ new_val = TopologyFromTopologyType (field->data.intvalue);
+ if (bsp->topology != new_val) {
+ bsp->topology = new_val;
+ rval = TRUE;
+ }
break;
case MolinfoField_strand:
- bsp->strand = StrandFromStrandType (field->data.intvalue);
- rval = TRUE;
+ new_val = StrandFromStrandType (field->data.intvalue);
+ if (bsp->strand != new_val) {
+ bsp->strand = new_val;
+ rval = TRUE;
+ }
break;
}
return rval;
@@ -10764,11 +12765,9 @@ static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val)
val += 2;
}
rval = StringSave (val);
- if ((stop = s_StringEndsWith (rval, "Data-START##")) != NULL
- || (stop = s_StringEndsWith (rval, "-START##")) != NULL
+ if ((stop = s_StringEndsWith (rval, "-START##")) != NULL
|| (stop = s_StringEndsWith (rval, "-START##")) != NULL
|| (stop = s_StringEndsWith (rval, "START##")) != NULL
- || (stop = s_StringEndsWith (rval, "Data-END##")) != NULL
|| (stop = s_StringEndsWith (rval, "-END##")) != NULL
|| (stop = s_StringEndsWith (rval, "END##")) != NULL) {
*stop = 0;
@@ -10790,7 +12789,7 @@ static Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp)
}
-static CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp)
+NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp)
{
UserFieldPtr curr;
CharPtr rval = NULL;
@@ -10847,7 +12846,7 @@ static Boolean RemoveStructuredCommentFieldFromUserObject (UserObjectPtr uop, Va
if (IsUserFieldStructuredCommentPrefixOrSuffix (curr)
&& curr->choice == 1) {
val = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue);
- if (IsStringConstraintEmpty (scp) || !DoesStringMatchConstraint (val, scp)) {
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp)) {
do_remove = TRUE;
}
val = MemFree (val);
@@ -10916,8 +12915,8 @@ static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, Structu
UserFieldPtr curr, first = NULL, last = NULL, ufp;
Boolean rval = FALSE;
CharPtr oldval, newval, fmt;
- CharPtr prefix_fmt = "##%sData-START##";
- CharPtr suffix_fmt = "##%sData-END##";
+ CharPtr prefix_fmt = "##%s-START##";
+ CharPtr suffix_fmt = "##%s-END##";
if (!IsUserObjectStructuredComment(uop) || field == NULL) {
return FALSE;
@@ -11035,6 +13034,266 @@ static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, Structu
}
+typedef struct dblinkname {
+ Int4 field_type;
+ CharPtr field_name;
+} DBLinkNameData, PNTR DBLinkNamePtr;
+
+static DBLinkNameData dblink_names[] = {
+ { DBLink_field_type_trace_assembly , "Trace Assembly Archive" } ,
+ { DBLink_field_type_bio_sample , "Bio Sample" } ,
+ { DBLink_field_type_probe_db , "ProbeDB" } ,
+ { DBLink_field_type_sequence_read_archve , "Sequence Read Archive" }
+};
+
+#define NUM_dblinkname sizeof (dblink_names) / sizeof (DBLinkNameData)
+
+NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type)
+{
+ CharPtr str = NULL;
+ Int4 i;
+
+ for (i = 0; i < NUM_dblinkname && str == NULL; i++) {
+ if (field_type == dblink_names[i].field_type) {
+ str = dblink_names[i].field_name;
+ }
+ }
+ if (str == NULL) {
+ str = "Unknown field type";
+ }
+ return str;
+}
+
+
+NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name)
+{
+ Int4 rval = -1;
+ Int4 i;
+
+ for (i = 0; i < NUM_dblinkname && rval < 0; i++) {
+ if (StringCmp (field_name, dblink_names[i].field_name) == 0) {
+ rval = dblink_names[i].field_type;
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Int4 GetNumDBLinkFields (void)
+{
+ return NUM_dblinkname;
+}
+
+
+static CharPtr GetDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
+{
+ UserFieldPtr curr;
+ CharPtr rval = NULL;
+ CharPtr field_name;
+ Char buf[15];
+ CharPtr PNTR cpp;
+ Int4Ptr ipp;
+ Int4 i;
+
+ if (!IsUserObjectDBLink(uop) || field < 1) {
+ return NULL;
+ }
+
+ field_name = GetDBLinkNameFromDBLinkFieldType (field);
+ for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) {
+ if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
+ if (curr->choice == 7) {
+ if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num && rval == NULL; i++) {
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
+ rval = StringSave (cpp[i]);
+ }
+ }
+ }
+ } else if (curr->choice == 8) {
+ if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num && rval == NULL; i++) {
+ sprintf (buf, "%d", ipp[i]);
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
+ rval = StringSave (buf);
+ }
+ }
+ }
+ }
+ }
+ }
+ return rval;
+}
+
+
+static Boolean RemoveDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
+{
+ UserFieldPtr curr, prev_type = NULL, next_type;
+ Boolean rval = FALSE;
+ Char buf[15];
+ CharPtr field_name;
+ CharPtr PNTR cpp;
+ Int4Ptr ipp;
+ Int4 i, j;
+
+ if (!IsUserObjectDBLink(uop) || field < 1) {
+ return FALSE;
+ }
+
+ field_name = GetDBLinkNameFromDBLinkFieldType (field);
+ for (curr = uop->data; curr != NULL; curr = next_type) {
+ next_type = curr->next;
+ if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
+ if (curr->choice == 7) {
+ if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
+ cpp[i] = MemFree (cpp[i]);
+ for (j = i + 1; j < curr->num; j++) {
+ cpp[j - 1] = cpp[j];
+ }
+ curr->num--;
+ rval = TRUE;
+ i--;
+ }
+ }
+ }
+ } else if (curr->choice == 8) {
+ if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ sprintf (buf, "%d", ipp[i]);
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
+ for (j = i + 1; j < curr->num; j++) {
+ ipp[j - 1] = ipp[j];
+ }
+ curr->num--;
+ rval = TRUE;
+ i--;
+ }
+ }
+ }
+ }
+ }
+ if (curr->num == 0) {
+ if (prev_type == NULL) {
+ uop->data = next_type;
+ } else {
+ prev_type->next = next_type;
+ }
+ curr->next = NULL;
+ curr = UserFieldFree (curr);
+ } else {
+ prev_type = curr;
+ }
+ }
+
+ return rval;
+}
+
+
+static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
+{
+ UserFieldPtr curr, last = NULL;
+ Boolean rval = FALSE;
+ CharPtr newval;
+ CharPtr field_name;
+ CharPtr PNTR cpp;
+ CharPtr PNTR new_cpp;
+ Int4Ptr ipp, new_ipp;
+ Int4 i;
+ Char buf[15];
+
+ if (!IsUserObjectDBLink(uop) || field < 1) {
+ return FALSE;
+ }
+
+ field_name = GetDBLinkNameFromDBLinkFieldType (field);
+
+ for (curr = uop->data; curr != NULL; curr = curr->next) {
+ if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
+ if (curr->choice == 7) {
+ if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
+ newval = cpp[i];
+ SetStringValue (&newval, value, existing_text);
+ cpp[i] = newval;
+ rval = TRUE;
+ }
+ }
+ }
+ if (!rval && IsStringConstraintEmpty (scp)) {
+ new_cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (curr->num + 1));
+ for (i = 0; i < curr->num; i++) {
+ new_cpp[i] = cpp[i];
+ cpp[i] = NULL;
+ }
+ new_cpp[i] = StringSave (value);
+ cpp = MemFree (cpp);
+ curr->data.ptrvalue = new_cpp;
+ curr->num++;
+ rval = TRUE;
+ }
+ } else if (curr->choice == 8 && IsAllDigits (value)) {
+ if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
+ for (i = 0; i < curr->num; i++) {
+ sprintf (buf, "%d", ipp[i]);
+ if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
+ newval = StringSave (buf);
+ SetStringValue (&newval, value, existing_text);
+ if (IsAllDigits (newval)) {
+ ipp[i] = atoi (newval);
+ rval = TRUE;
+ }
+ newval = MemFree (newval);
+ }
+ }
+ }
+ if (!rval && IsStringConstraintEmpty (scp)) {
+ new_ipp = (Int4Ptr) MemNew (sizeof (Int4) * (curr->num + 1));
+ for (i = 0; i < curr->num; i++) {
+ new_ipp[i] = ipp[i];
+ }
+ new_ipp[i] = atoi (value);
+ ipp = MemFree (ipp);
+ curr->data.ptrvalue = new_ipp;
+ curr->num++;
+ rval = TRUE;
+ }
+ }
+ }
+ last = curr;
+ }
+ if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || IsAllDigits (value))) {
+ curr = UserFieldNew ();
+ curr->label = ObjectIdNew ();
+ curr->label->str = StringSave (field_name);
+
+ if (field == DBLink_field_type_trace_assembly) {
+ curr->choice = 8;
+ curr->num = 1;
+ ipp = (Int4Ptr) MemNew (sizeof (Int4) * curr->num);
+ ipp[0] = atoi (value);
+ curr->data.ptrvalue = ipp;
+ } else {
+ curr->choice = 7;
+ curr->num = 1;
+ cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * curr->num);
+ cpp[0] = StringSave (value);
+ curr->data.ptrvalue = cpp;
+ }
+ if (last == NULL) {
+ uop->data = curr;
+ } else {
+ last->next = curr;
+ }
+ rval = TRUE;
+ }
+ return rval;
+}
+
+
+
@@ -13247,6 +15506,7 @@ NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
FeatureFieldPtr feature_field;
SeqDescrPtr sdp;
GBBlockPtr gb;
+ SeqMgrDescContext context;
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return NULL;
@@ -13278,9 +15538,7 @@ NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
break;
case FieldType_rna_field :
if (choice == OBJ_SEQFEAT) {
- feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue);
- str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
- feature_field = FeatureFieldFree (feature_field);
+ str = GetRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, NULL);
}
break;
case FieldType_struc_comment_field:
@@ -13291,9 +15549,45 @@ NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
}
}
break;
+ case FieldType_dblink:
+ if (choice == OBJ_SEQDESC && data != NULL) {
+ sdp = (SeqDescrPtr) data;
+ if (sdp != NULL && sdp->choice == Seq_descr_user) {
+ str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
+ }
+ }
+ break;
case FieldType_misc:
- if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) {
- str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp);
+ if (choice == OBJ_BIOSEQ) {
+ if (field->data.intvalue == Misc_field_genome_project_id) {
+ str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp);
+ } else if (field->data.intvalue == Misc_field_comment_descriptor) {
+ str = NULL;
+ for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_comment, &context);
+ sdp != NULL && str == NULL;
+ sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_comment, &context)) {
+ if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) {
+ str = StringSave (sdp->data.ptrvalue);
+ }
+ }
+ } else if (field->data.intvalue == Misc_field_defline) {
+ str = NULL;
+ for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_title, &context);
+ sdp != NULL && str == NULL;
+ sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_title, &context)) {
+ if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) {
+ str = StringSave (sdp->data.ptrvalue);
+ }
+ }
+ } else if (field->data.intvalue == Misc_field_keyword) {
+ str = NULL;
+ for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_genbank, &context);
+ sdp != NULL && str == NULL;
+ sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_genbank, &context)) {
+ gb = (GBBlockPtr) sdp->data.ptrvalue;
+ str = GetFirstValNodeStringMatch (gb->keywords, scp);
+ }
+ }
} else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) {
sdp = (SeqDescrPtr) data;
if (sdp != NULL && sdp->choice == Seq_descr_comment && !StringHasNoText (sdp->data.ptrvalue)) {
@@ -13397,9 +15691,7 @@ static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypeP
break;
case FieldType_rna_field :
if (choice == OBJ_SEQFEAT) {
- feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue);
- rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
- feature_field = FeatureFieldFree (feature_field);
+ rval = RemoveRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp);
}
break;
case FieldType_struc_comment_field:
@@ -13407,6 +15699,18 @@ static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypeP
sdp = (SeqDescrPtr) data;
if (sdp != NULL && sdp->choice == Seq_descr_user) {
rval = RemoveStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp);
+ if (rval && IsEmptyStructuredComment (sdp->data.ptrvalue)) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+ }
+ }
+ break;
+ case FieldType_dblink:
+ if (choice == OBJ_SEQDESC && data != NULL) {
+ sdp = (SeqDescrPtr) data;
+ if (sdp != NULL && sdp->choice == Seq_descr_user) {
+ rval = RemoveDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
}
}
break;
@@ -13484,16 +15788,27 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
break;
case FieldType_rna_field :
if (choice == OBJ_SEQFEAT) {
- feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue);
- rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra);
- feature_field = FeatureFieldFree (feature_field);
+ rval = SetRNAQualOnFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, value, existing_text);
}
break;
case FieldType_struc_comment_field:
if (choice == OBJ_SEQDESC && data != NULL) {
sdp = (SeqDescrPtr) data;
if (sdp != NULL && sdp->choice == Seq_descr_user) {
+ was_empty = IsEmptyStructuredComment (sdp->data.ptrvalue);
rval = SetStructuredCommentFieldOnUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp, value, existing_text);
+ if (was_empty && !IsEmptyStructuredComment (sdp->data.ptrvalue)) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = FALSE;
+ }
+ }
+ }
+ break;
+ case FieldType_dblink:
+ if (choice == OBJ_SEQDESC && data != NULL) {
+ sdp = (SeqDescrPtr) data;
+ if (sdp != NULL && sdp->choice == Seq_descr_user) {
+ rval = SetDBLinkFieldOnUserObject (sdp->data.ptrvalue, field->data.intvalue, scp, value, existing_text);
}
}
break;
@@ -13539,6 +15854,47 @@ NLM_EXTERN Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldType
}
+NLM_EXTERN Boolean SortFieldsForObject (Uint1 choice, Pointer data, FieldTypePtr field, Uint2 order)
+{
+ Boolean rval = FALSE;
+ FeatureFieldPtr feature_field;
+
+ if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
+
+ switch (field->choice) {
+ case FieldType_source_qual :
+ break;
+ case FieldType_feature_field :
+ if (choice == OBJ_SEQFEAT) {
+ rval = SortQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, order);
+ }
+ break;
+ case FieldType_cds_gene_prot:
+ if (choice == 0) {
+ rval = SortFieldInCGPSet ((CGPSetPtr) data, field->data.intvalue, order);
+ } else if (choice == OBJ_SEQFEAT) {
+ feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
+ rval = SortQualOnFeature ((SeqFeatPtr) data, feature_field, order);
+ feature_field = FeatureFieldFree (feature_field);
+ }
+ break;
+ case FieldType_molinfo_field:
+ break;
+ case FieldType_pub :
+ break;
+ case FieldType_rna_field :
+ break;
+ case FieldType_struc_comment_field:
+ break;
+ case FieldType_dblink:
+ break;
+ case FieldType_misc:
+ break;
+ }
+ return rval;
+}
+
+
NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action)
{
ValNodePtr field_list = NULL;
@@ -13974,18 +16330,30 @@ NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstr
}
} else {
if (scp->field1 != NULL && scp->field2 == NULL) {
- str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
- if (str1 == NULL) {
- if (scp->constraint->not_present) {
- str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
- if (str1 == NULL) {
- rval = TRUE;
+ if (AllowSourceQualMulti(scp->field1) && scp->constraint->not_present) {
+ scp->constraint->not_present = FALSE;
+ str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
+ scp->constraint->not_present = TRUE;
+ if (str1 != NULL) {
+ rval = FALSE;
+ } else {
+ rval = TRUE;
+ }
+ str1 = MemFree (str1);
+ } else {
+ str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
+ if (str1 == NULL) {
+ if (scp->constraint->not_present) {
+ str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
+ if (str1 == NULL) {
+ rval = TRUE;
+ }
}
+ } else if (!StringHasNoText (str1)) {
+ rval = TRUE;
}
- } else if (!StringHasNoText (str1)) {
- rval = TRUE;
+ str1 = MemFree (str1);
}
- str1 = MemFree (str1);
} else if (scp->field2 != NULL && scp->field1 == NULL) {
str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint);
if (str2 == NULL) {
@@ -14121,13 +16489,13 @@ static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtP
} else if (sfp->data.choice == SEQFEAT_PROT) {
cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext);
if (cds != NULL) {
- mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext);
+ mrna = GetmRNAforCDS (cds);
if (mrna != NULL && mrna->pseudo) {
any_pseudo = TRUE;
}
}
} else {
- mrna = SeqMgrGetOverlappingmRNA (sfp->location, &fcontext);
+ mrna = GetmRNAforCDS (sfp);
if (mrna != NULL && mrna->pseudo) {
any_pseudo = TRUE;
}
@@ -14159,7 +16527,7 @@ static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtP
if (sfp->pseudo) {
any_pseudo = TRUE;
}
- } else if (sfp->data.choice = SEQFEAT_PROT) {
+ } else if (sfp->data.choice == SEQFEAT_PROT) {
prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->location), NULL, 0, FEATDEF_PROT, &fcontext);
if (prot != NULL && prot->pseudo) {
any_pseudo = TRUE;
@@ -14532,6 +16900,22 @@ static Boolean DoesSeqDescMatchCGPQualConstraint (SeqDescrPtr sdp, CDSGeneProtQu
}
+static void UnmarkFeatureList (ValNodePtr list)
+{
+ SeqFeatPtr sfp;
+
+ while (list != NULL)
+ {
+ sfp = list->data.ptrvalue;
+ if (sfp != NULL)
+ {
+ sfp->idx.deleteme = FALSE;
+ }
+ list = list->next;
+ }
+}
+
+
static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQualConstraintPtr constraint)
{
CGPSetPtr c = NULL;
@@ -14558,6 +16942,9 @@ static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQua
} else if (sfp->data.choice == SEQFEAT_RNA) {
c = BuildCGPSetFrommRNA (sfp);
}
+ UnmarkFeatureList (c->cds_list);
+ UnmarkFeatureList (c->mrna_list);
+ UnmarkFeatureList (c->gene_list);
rval = DoesCGPSetMatchQualConstraint (c, constraint);
if (rval && sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
@@ -14600,9 +16987,11 @@ NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint)
{
if (constraint == NULL) return TRUE;
if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE;
- if (constraint->feature != Feature_type_any) return FALSE;
+ if (constraint->feature != Macro_feature_type_any) return FALSE;
if (!IsStringConstraintEmpty (constraint->id)) return FALSE;
if (constraint->num_features != NULL) return FALSE;
+ if (constraint->length != NULL) return FALSE;
+ if (constraint->strandedness != Feature_strandedness_constraint_any) return FALSE;
return TRUE;
}
@@ -14650,6 +17039,8 @@ NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstr
CharPtr cp, cp_dst;
SeqIdPtr tmp;
Boolean match, changed;
+ DbtagPtr dbtag;
+ CharPtr tmp_id;
if (sip == NULL)
{
@@ -14713,6 +17104,23 @@ NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstr
{
match = TRUE;
}
+
+ if (!match && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) {
+ dbtag = (DbtagPtr) sip->data.ptrvalue;
+ if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) {
+ if (DoesSingleStringMatchConstraint (dbtag->tag->str, string_constraint)) {
+ match = TRUE;
+ } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
+ tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1));
+ StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str);
+ tmp_id[cp - dbtag->tag->str] = 0;
+ if (DoesSingleStringMatchConstraint (tmp_id, string_constraint)) {
+ match = TRUE;
+ }
+ tmp_id = MemFree (tmp_id);
+ }
+ }
+ }
}
id = MemFree (id);
sip->next = tmp;
@@ -14800,7 +17208,99 @@ NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list)
}
-static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint)
+static Boolean DoesValueMatchQuantityConstraint (Int4 val, ValNodePtr quantity)
+{
+ Boolean rval = TRUE;
+
+ if (quantity == NULL) {
+ rval = TRUE;
+ } else if (quantity->choice == QuantityConstraint_equals
+ && val != quantity->data.intvalue) {
+ return FALSE;
+ } else if (quantity->choice == QuantityConstraint_greater_than
+ && val <= quantity->data.intvalue) {
+ return FALSE;
+ } else if (quantity->choice == QuantityConstraint_less_than
+ && val >= quantity->data.intvalue) {
+ return FALSE;
+ }
+ return rval;
+}
+
+
+static Boolean DoesSequenceMatchStrandednessConstraint (BioseqPtr bsp, Uint2 strandedness)
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+ Int4 num_minus = 0;
+ Int4 num_plus = 0;
+ Boolean rval = FALSE;
+
+ if (bsp == NULL) {
+ return FALSE;
+ } else if (strandedness == Feature_strandedness_constraint_any) {
+ return TRUE;
+ }
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ while (sfp != NULL) {
+ if (context.strand == Seq_strand_minus) {
+ num_minus++;
+ if (strandedness == Feature_strandedness_constraint_plus_only
+ || strandedness == Feature_strandedness_constraint_no_minus) {
+ return FALSE;
+ } else if (strandedness == Feature_strandedness_constraint_at_least_one_minus) {
+ return TRUE;
+ }
+ } else {
+ num_plus++;
+ if (strandedness == Feature_strandedness_constraint_minus_only
+ || strandedness == Feature_strandedness_constraint_no_plus) {
+ return FALSE;
+ } else if (strandedness == Feature_strandedness_constraint_at_least_one_plus) {
+ return TRUE;
+ }
+ }
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
+ }
+
+ switch (strandedness) {
+ case Feature_strandedness_constraint_minus_only:
+ if (num_minus > 0 && num_plus == 0) {
+ rval = TRUE;
+ }
+ break;
+ case Feature_strandedness_constraint_plus_only:
+ if (num_plus > 0 && num_minus == 0) {
+ rval = TRUE;
+ }
+ break;
+ case Feature_strandedness_constraint_at_least_one_minus:
+ if (num_minus > 0) {
+ rval = TRUE;
+ }
+ break;
+ case Feature_strandedness_constraint_at_least_one_plus:
+ if (num_plus > 0) {
+ rval = TRUE;
+ }
+ break;
+ case Feature_strandedness_constraint_no_minus:
+ if (num_minus == 0) {
+ rval = TRUE;
+ }
+ break;
+ case Feature_strandedness_constraint_no_plus:
+ if (num_plus == 0) {
+ rval = TRUE;
+ }
+ break;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint)
{
SeqFeatPtr sfp;
SeqMgrFeatContext fcontext;
@@ -14847,7 +17347,7 @@ static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConst
}
}
- if (constraint->feature != Feature_type_any) {
+ if (constraint->feature != Macro_feature_type_any) {
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, GetFeatdefFromFeatureType (constraint->feature), &fcontext);
if (sfp == NULL) {
return FALSE;
@@ -14877,18 +17377,19 @@ static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConst
return FALSE;
}
}
- if (constraint->num_features->choice == QuantityConstraint_equals
- && num_features != constraint->num_features->data.intvalue) {
- return FALSE;
- } else if (constraint->num_features->choice == QuantityConstraint_greater_than
- && num_features <= constraint->num_features->data.intvalue) {
- return FALSE;
- } else if (constraint->num_features->choice == QuantityConstraint_less_than
- && num_features >= constraint->num_features->data.intvalue) {
+ if (!DoesValueMatchQuantityConstraint(num_features, constraint->num_features)) {
return FALSE;
}
}
+ if (!DoesValueMatchQuantityConstraint(bsp->length, constraint->length)) {
+ return FALSE;
+ }
+
+ if (!DoesSequenceMatchStrandednessConstraint(bsp, constraint->strandedness)) {
+ return FALSE;
+ }
+
return TRUE;
}
@@ -15198,7 +17699,7 @@ static Boolean DoesPubFieldMatch (PubdescPtr pdp, PubFieldConstraintPtr field)
static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstraintPtr field)
{
- Boolean rval = FALSE, match_all = TRUE;
+ Boolean rval = FALSE;
PubPtr pub;
CharPtr tmp;
@@ -15228,12 +17729,32 @@ static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstrai
rval = TRUE;
for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
tmp = GetPubFieldFromPub (pub, field->field, NULL);
- if (!IsAllCaps (tmp)) {
+ if (tmp != NULL && !IsAllCaps (tmp)) {
+ /* at least one is not all caps */
+ rval = FALSE;
+ }
+ tmp = MemFree (tmp);
+ }
+ } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_lower) {
+ rval = TRUE;
+ for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
+ tmp = GetPubFieldFromPub (pub, field->field, NULL);
+ if (tmp != NULL && !IsAllLowerCase (tmp)) {
/* at least one is not all caps */
rval = FALSE;
}
tmp = MemFree (tmp);
}
+ } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_punct) {
+ rval = TRUE;
+ for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
+ tmp = GetPubFieldFromPub (pub, field->field, NULL);
+ if (tmp != NULL && !IsAllPunctuation (tmp)) {
+ /* at least one is not all punctuation */
+ rval = FALSE;
+ }
+ tmp = MemFree (tmp);
+ }
}
return rval;
@@ -15257,7 +17778,8 @@ static Boolean DoesPubMatchPublicationConstraint (PubdescPtr pdp, PublicationCon
}
}
if (type_ok) {
- rval = DoesPubFieldMatch (pdp, constraint->field) && DoesPubFieldSpecialMatch (pdp, constraint->special_field);
+ rval = (constraint->field == NULL || DoesPubFieldMatch (pdp, constraint->field))
+ && (constraint->special_field == NULL || DoesPubFieldSpecialMatch (pdp, constraint->special_field));
}
return rval;
}
@@ -15331,13 +17853,20 @@ static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data
switch (choice) {
case OBJ_SEQFEAT:
+ not_present = string_constraint->not_present;
+ string_constraint->not_present = FALSE;
str = GetQualFromFeature ((SeqFeatPtr) data, ffp, string_constraint);
if (str != NULL) {
rval = TRUE;
str = MemFree (str);
}
+ if (not_present) {
+ rval = !rval;
+ string_constraint->not_present = TRUE;
+ }
break;
case OBJ_SEQDESC:
+ case OBJ_BIOSEQ:
bsp = GetSequenceForObject (choice, data);
if (bsp != NULL) {
subtype = GetFeatdefFromFeatureType (ffp->type);
@@ -15380,6 +17909,72 @@ static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data
}
+static Boolean DoesObjectMatchRnaQualConstraint (Uint1 choice, Pointer data, RnaQualPtr rq, StringConstraintPtr string_constraint)
+{
+ Boolean rval = FALSE;
+ CharPtr str;
+ BioseqPtr bsp;
+ Int4 subtype;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ Boolean not_present;
+ Uint1 feat_choice = 0;
+
+ if (data == NULL) {
+ return FALSE;
+ }
+ if (IsStringConstraintEmpty (string_constraint)) {
+ return TRUE;
+ }
+
+ switch (choice) {
+ case OBJ_SEQFEAT:
+ not_present = string_constraint->not_present;
+ string_constraint->not_present = FALSE;
+ str = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, string_constraint, NULL);
+ if (str != NULL) {
+ rval = TRUE;
+ str = MemFree (str);
+ }
+ if (not_present) {
+ rval = !rval;
+ string_constraint->not_present = TRUE;
+ }
+ break;
+ case OBJ_SEQDESC:
+ case OBJ_BIOSEQ:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ if (rq->type == NULL || rq->type->choice == RnaFeatType_any) {
+ feat_choice = SEQFEAT_RNA;
+ subtype = 0;
+ } else {
+ feat_choice = 0;
+ subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice));
+ }
+
+ not_present = string_constraint->not_present;
+ string_constraint->not_present = FALSE;
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext);
+ !rval && sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) {
+ str = GetRNAQualFromFeature (sfp, rq, string_constraint, NULL);
+ if (str != NULL) {
+ rval = TRUE;
+ str = MemFree (str);
+ }
+ }
+ if (not_present) {
+ rval = !rval;
+ string_constraint->not_present = TRUE;
+ }
+ }
+ break;
+ }
+ return rval;
+}
+
+
static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, FieldConstraintPtr constraint)
{
Boolean rval = FALSE;
@@ -15409,9 +18004,7 @@ static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, Field
rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint);
break;
case FieldType_rna_field:
- ffp = FeatureFieldFromRnaQual (constraint->field->data.ptrvalue);
- rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint);
- ffp = FeatureFieldFree (ffp);
+ rval = DoesObjectMatchRnaQualConstraint (choice, data, constraint->field->data.ptrvalue, constraint->string_constraint);
break;
case FieldType_cds_gene_prot:
ffp = FeatureFieldFromCDSGeneProtField (constraint->field->data.intvalue);
@@ -15430,15 +18023,393 @@ static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, Field
str = MemFree (str);
}
break;
+ case FieldType_misc:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ str = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, constraint->field, constraint->string_constraint, NULL);
+ if (str != NULL) {
+ rval = TRUE;
+ }
+ str = MemFree (str);
+ }
+ break;
/* TODO LATER */
case FieldType_pub:
+ break;
+ }
+ return rval;
+}
+
+
+static CharPtr GetFeatureFieldFromObject (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr scp)
+{
+ CharPtr rval = NULL;
+ BioseqPtr bsp;
+ CGPSetPtr cgp;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ Int4 subtype;
+ Uint2 cds_gene_prot_field;
+
+ if (ffp == NULL || data == NULL) {
+ return NULL;
+ }
+ switch (choice) {
+ case OBJ_SEQFEAT:
+ rval = GetQualFromFeature ((SeqFeatPtr) data, ffp, scp);
+ break;
+ case OBJ_SEQDESC:
+ case OBJ_BIOSEQ:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ subtype = GetFeatdefFromFeatureType (ffp->type);
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext);
+ rval == NULL && sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) {
+ rval = GetQualFromFeature (sfp, ffp, scp);
+ }
+ }
+ break;
+ case 0:
+ cgp = (CGPSetPtr) data;
+ cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp);
+ if (cds_gene_prot_field > 0) {
+ rval = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, scp);
+ }
+ break;
+ }
+ return rval;
+}
+
+
+static CharPtr GetConstraintFieldFromObject (Uint1 choice, Pointer data, ValNodePtr field, StringConstraintPtr scp)
+{
+ BioSourcePtr biop;
+ BioseqPtr bsp;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+ Int4 subtype;
+ FeatureFieldPtr ffp;
+ RnaQualPtr rq;
+ Uint1 feat_choice = 0;
+ CharPtr rval = NULL;
+
+ if (data == NULL || field == NULL) {
+ return NULL;
+ }
+
+ switch (field->choice) {
+ case FieldType_source_qual:
+ biop = GetBioSourceFromObject (choice, data);
+ if (biop != NULL) {
+ rval = GetSourceQualFromBioSource (biop, field->data.ptrvalue, scp);
+ }
+ break;
+ case FieldType_feature_field:
+ rval = GetFeatureFieldFromObject(choice, data, (FeatureFieldPtr) field->data.ptrvalue, scp);
+ break;
+ case FieldType_rna_field:
+ rq = (RnaQualPtr) field->data.ptrvalue;
+ switch (choice) {
+ case OBJ_SEQFEAT:
+ rval = GetRNAQualFromFeature ((SeqFeatPtr) data, rq, scp, NULL);
+ break;
+ case OBJ_SEQDESC:
+ case OBJ_BIOSEQ:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ if (rq->type == NULL || rq->type->choice == RnaFeatType_any) {
+ feat_choice = SEQFEAT_RNA;
+ subtype = 0;
+ } else {
+ feat_choice = 0;
+ subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice));
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext);
+ rval == NULL && sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) {
+ rval = GetRNAQualFromFeature (sfp, rq, scp, NULL);
+ }
+ }
+ break;
+ }
+ break;
+ case FieldType_cds_gene_prot:
+ ffp = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
+ rval = GetFeatureFieldFromObject (choice, data, ffp, scp);
+ ffp = FeatureFieldFree (ffp);
+ break;
+ case FieldType_molinfo_field:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ rval = GetSequenceQualFromBioseq (bsp, field->data.ptrvalue);
+ if (rval != NULL && scp != NULL && !DoesStringMatchConstraint (rval, scp)) {
+ rval = MemFree (rval);
+ }
+ }
+ break;
case FieldType_misc:
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp != NULL) {
+ rval = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, field, scp, NULL);
+ }
break;
}
+
+ return rval;
+}
+
+
+static Boolean DoesObjectMatchFieldMissingConstraint(Uint1 choice, Pointer data, ValNodePtr field)
+{
+ Boolean rval = FALSE;
+ CharPtr str;
+
+ if (data == NULL || field == NULL) return FALSE;
+
+ str = GetConstraintFieldFromObject(choice, data, field, NULL);
+ if (str == NULL) {
+ rval = TRUE;
+ }
+ str = MemFree (str);
return rval;
}
+NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint)
+{
+ if (constraint == NULL || constraint->field == NULL) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean DoesObjectMatchMolinfoFieldConstraint (Uint1 choice, Pointer data, MolinfoFieldConstraintPtr constraint)
+{
+ BioseqPtr bsp;
+ MolInfoPtr mip;
+ Boolean rval = FALSE;
+
+ bsp = GetSequenceForObject (choice, data);
+ if (bsp == NULL) {
+ rval = FALSE;
+ } else if (IsMolinfoFieldConstraintEmpty(constraint)) {
+ rval = TRUE;
+ } else {
+ mip = GetMolInfoForBioseq (bsp);
+ rval = FALSE;
+ switch (constraint->field->choice) {
+ case MolinfoField_molecule:
+ if (mip == NULL && constraint->field->data.intvalue == 0) {
+ rval = TRUE;
+ } else if (mip != NULL && mip->biomol == BiomolFromMoleculeType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ case MolinfoField_technique:
+ if (mip == NULL && constraint->field->data.intvalue == 0) {
+ rval = TRUE;
+ } else if (mip != NULL && mip->tech == TechFromTechniqueType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ case MolinfoField_completedness:
+ if (mip == NULL && constraint->field->data.intvalue == 0) {
+ rval = TRUE;
+ } else if (mip != NULL && mip->completeness == CompletenessFromCompletednessType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ case MolinfoField_mol_class:
+ if (bsp->mol == MolFromMoleculeClassType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ case MolinfoField_topology:
+ if (bsp->topology == TopologyFromTopologyType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ case MolinfoField_strand:
+ if (bsp->strand == StrandFromStrandType (constraint->field->data.intvalue)) {
+ rval = TRUE;
+ }
+ break;
+ }
+ if (constraint->is_not) {
+ rval = !rval;
+ }
+ }
+
+ return rval;
+}
+
+
+static Boolean DoesCodingRegionMatchTranslationConstraint (SeqFeatPtr sfp, TranslationConstraintPtr constraint)
+{
+ ByteStorePtr trans_prot = NULL;
+ BioseqPtr actual_prot = NULL;
+ CharPtr translation = NULL;
+ Int4 translation_len = 0;
+ CharPtr actual = NULL;
+ Int4 actual_len = 0;
+ CharPtr stop, cp1, cp2;
+ Boolean rval = TRUE, alt_start = FALSE;
+ StringConstraintPtr scp;
+ Int4 pos, comp_len;
+ Int4 num = 0;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
+ return FALSE;
+ } else if (constraint == NULL) {
+ return TRUE;
+ }
+
+ if (constraint->actual_strings != NULL
+ || constraint->num_mismatches != NULL) {
+ actual_prot = BioseqLockById(SeqLocId(sfp->product));
+ if (actual_prot != NULL) {
+ actual = (CharPtr) MemNew (sizeof (Char) * (actual_prot->length + 1));
+ SeqPortStreamInt (actual_prot, 0, actual_prot->length - 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (actual), NULL);
+ actual_len = StringLen (actual);
+ }
+ }
+
+ for (scp = constraint->actual_strings; scp != NULL && rval; scp = scp->next) {
+ rval = DoesStringMatchConstraint (actual, scp);
+ }
+
+ if (rval) {
+ if (constraint->transl_strings != NULL
+ || constraint->internal_stops != Match_type_constraint_dont_care
+ || constraint->num_mismatches != NULL) {
+ trans_prot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start, TRUE); /* include stop codons, do not remove trailing X/B/Z */
+ if (trans_prot != NULL) {
+ translation = BSMerge (trans_prot, NULL);
+ translation_len = StringLen (translation);
+ }
+ BSFree (trans_prot);
+ }
+ for (scp = constraint->transl_strings; scp != NULL && rval; scp = scp->next) {
+ rval = DoesStringMatchConstraint (translation, scp);
+ }
+
+ if (rval && constraint->internal_stops != Match_type_constraint_dont_care) {
+ stop = StringChr (translation, '*');
+ if (stop != NULL && stop != translation + translation_len - 1) {
+ if (constraint->internal_stops == Match_type_constraint_no) {
+ rval = FALSE;
+ }
+ } else {
+ if (constraint->internal_stops == Match_type_constraint_yes) {
+ rval = FALSE;
+ }
+ }
+ }
+ }
+
+ if (rval && constraint->num_mismatches != NULL) {
+ stop = StringRChr (translation, '*');
+ if (stop != NULL && stop == translation + translation_len - 1) {
+ translation_len--;
+ }
+ stop = StringRChr (actual, '*');
+ if (stop != NULL && stop == actual + actual_len - 1) {
+ actual_len--;
+ }
+ if (translation_len > actual_len) {
+ num = translation_len - actual_len;
+ comp_len = actual_len;
+ } else {
+ num = actual_len - translation_len;
+ comp_len = translation_len;
+ }
+
+ cp1 = actual;
+ cp2 = translation;
+ for (pos = 0; pos < comp_len && rval; pos++) {
+ if (*cp1 != *cp2) {
+ num++;
+ if (constraint->num_mismatches->choice == QuantityConstraint_equals
+ && num > constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
+ && num >= constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ }
+ }
+ cp1++;
+ cp2++;
+ }
+ if (rval) {
+ if (constraint->num_mismatches->choice == QuantityConstraint_greater_than
+ && num <= constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ } else if (constraint->num_mismatches->choice == QuantityConstraint_equals
+ && num != constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
+ && num >= constraint->num_mismatches->data.intvalue) {
+ rval = FALSE;
+ }
+ }
+ }
+
+ if (actual_prot != NULL) {
+ BioseqUnlock(actual_prot);
+ }
+ actual = MemFree (actual);
+ translation = MemFree (translation);
+ return rval;
+}
+
+
+static Boolean DoesObjectMatchTranslationConstraint (Uint1 choice, Pointer data, TranslationConstraintPtr constraint)
+{
+ Boolean rval = FALSE;
+ SeqFeatPtr sfp = NULL;
+ BioseqPtr bsp;
+ SeqMgrFeatContext context;
+
+ if (data == NULL) {
+ return FALSE;
+ } else if (constraint == NULL) {
+ return TRUE;
+ }
+
+ switch (choice) {
+ case OBJ_SEQFEAT:
+ /* must be coding region or protein feature */
+ sfp = (SeqFeatPtr) data;
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
+ }
+ rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint);
+ break;
+ case OBJ_BIOSEQ:
+ /* must be protein sequence, or nucleotide bioseq with only one coding region */
+ bsp = data;
+ if (bsp != NULL) {
+ if (ISA_aa (bsp->mol)) {
+ sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
+ } else {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &context);
+ if (SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &context) != NULL) {
+ sfp = NULL;
+ }
+ }
+ rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint);
+ }
+ break;
+ }
+ return rval;
+}
+
+
static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint)
{
Boolean rval = TRUE;
@@ -15485,6 +18456,15 @@ static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, Constraint
case ConstraintChoice_pub:
rval = DoesObjectMatchPublicationConstraint (choice, data, constraint->data.ptrvalue);
break;
+ case ConstraintChoice_molinfo:
+ rval = DoesObjectMatchMolinfoFieldConstraint (choice, data, constraint->data.ptrvalue);
+ break;
+ case ConstraintChoice_field_missing:
+ rval = DoesObjectMatchFieldMissingConstraint (choice, data, constraint->data.ptrvalue);
+ break;
+ case ConstraintChoice_translation:
+ rval = DoesObjectMatchTranslationConstraint (choice, data, constraint->data.ptrvalue);
+ break;
}
return rval;
}
@@ -15604,7 +18584,7 @@ NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit)
break;
}
- scp->case_sensitive = TRUE;
+ scp->case_sensitive = !(edit->case_insensitive);
scp->whole_word = FALSE;
scp->not_present = FALSE;
@@ -15620,19 +18600,26 @@ static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit)
if (edit == NULL) return StringSave (str);
str = StringSave (str);
- cp_found = StringISearch (str, edit->find_txt);
+ if (edit->case_insensitive) {
+ cp_found = StringISearch (str, edit->find_txt);
+ } else {
+ cp_found = StringSearch (str, edit->find_txt);
+ }
found_len = StringLen (edit->find_txt);
replace_len = StringLen (edit->repl_txt);
- if (edit->location == Field_edit_location_beginning
- && cp_found != str) {
- cp_found = NULL;
- }
while (cp_found != NULL)
{
- if (edit->location == Field_edit_location_end
+ if (edit->location == Field_edit_location_beginning
+ && cp_found != str) {
+ cp_found = NULL;
+ } else if (edit->location == Field_edit_location_end
&& cp_found != str + StringLen (str) - found_len) {
- cp_found = StringISearch (cp_found + found_len, edit->find_txt);
+ if (edit->case_insensitive) {
+ cp_found = StringISearch (cp_found + found_len, edit->find_txt);
+ } else {
+ cp_found = StringSearch (cp_found + found_len, edit->find_txt);
+ }
} else {
new_len = StringLen (str) + 1 - found_len + replace_len;
new_str = (CharPtr) MemNew (new_len * sizeof (Char));
@@ -15648,7 +18635,11 @@ static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit)
str = MemFree (str);
str = new_str;
}
- cp_found = StringISearch (cp_found, edit->find_txt);
+ if (edit->case_insensitive) {
+ cp_found = StringISearch (cp_found, edit->find_txt);
+ } else {
+ cp_found = StringSearch (cp_found, edit->find_txt);
+ }
}
}
return str;
@@ -15689,6 +18680,7 @@ static void RemoveFieldNameFromString (CharPtr field_name, CharPtr str)
typedef struct objectcollection {
AECRActionPtr action;
ValNodePtr object_list;
+ ValNodePtr object_tail;
BatchExtraPtr batch_extra;
} ObjectCollectionData, PNTR ObjectCollectionPtr;
@@ -15715,7 +18707,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
if (a != NULL
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field)
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
break;
case ActionChoice_edit :
@@ -15726,7 +18718,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
scp = StringConstraintFromFieldEdit (e->edit);
str = GetFieldValueForObjectEx (objecttype, objectdata, e->field, scp, o->batch_extra);
if (!StringHasNoText (str)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
str = MemFree (str);
}
@@ -15747,7 +18739,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
field_to = FieldTypeFree (field_to);
}
if (!StringHasNoText (str)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
str = MemFree (str);
}
@@ -15761,7 +18753,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
field_from = FieldTypeFree (field_from);
field_to = FieldTypeFree (field_to);
@@ -15774,7 +18766,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
field_from = FieldTypeFree (field_from);
field_to = FieldTypeFree (field_to);
@@ -15784,7 +18776,7 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
if (r != NULL
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field)
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
break;
case ActionChoice_parse :
@@ -15798,8 +18790,9 @@ static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer ob
scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint);
str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra);
portion = GetTextPortionFromString (str, p->portion);
+ ApplyTextTransformsToString (&portion, p->transform);
if (!StringHasNoText (portion)) {
- ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
+ ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
}
portion = MemFree (portion);
str = MemFree (str);
@@ -15845,23 +18838,19 @@ static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data)
}
-static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data);
-static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
-static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
-static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
-
static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint)
{
- ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, vnp;
+ ValNodePtr target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
if (sep == NULL) {
return NULL;
}
- VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
+ bsp_list = CollectNucBioseqs (sep);
for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
- ValNodeAddPointer (&tmp_list, vnp->choice, vnp->data.ptrvalue);
+ ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue);
}
}
bsp_list = ValNodeFree (bsp_list);
@@ -15889,6 +18878,68 @@ static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, V
}
+static void AddStructuredCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list, ValNodePtr PNTR dest_tail)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext context;
+ Boolean found = FALSE;
+ ObjValNodePtr ovp;
+ UserObjectPtr uop;
+
+ if (bsp == NULL || dest_list == NULL || dest_tail == NULL) {
+ return;
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) {
+ ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp);
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ /* if no existing structured comment descriptor, create one, marked for delete.
+ * unmark it for deletion when it gets populated.
+ */
+ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
+ uop = UserObjectNew ();
+ uop->type = ObjectIdNew ();
+ uop->type->str = StringSave ("StructuredComment");
+ sdp->data.ptrvalue = uop;
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static ValNodePtr CollectStructuredCommentsForApply (SeqEntryPtr sep, ValNodePtr constraint)
+{
+ ValNodePtr target_list = NULL, target_tail = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
+
+ if (sep == NULL) {
+ return NULL;
+ }
+
+ /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
+ bsp_list = CollectNucBioseqs (sep);
+ for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
+ if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
+ ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue);
+ }
+ }
+ bsp_list = ValNodeFree (bsp_list);
+
+
+ for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
+ AddStructuredCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list, &target_tail);
+ }
+ tmp_list = ValNodeFree (tmp_list);
+ return target_list;
+}
+
+
NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra)
{
ObjectCollectionData ocd;
@@ -15899,6 +18950,7 @@ NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionP
ocd.action = action;
ocd.object_list = NULL;
+ ocd.object_tail = NULL;
if (batch_extra == NULL) {
ocd.batch_extra = BatchExtraNew ();
InitBatchExtraForAECRAction (ocd.batch_extra, action, sep);
@@ -15914,6 +18966,8 @@ NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionP
&& action->action->choice == ActionChoice_apply
&& (apply = action->action->data.ptrvalue) != NULL) {
ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint);
+ } else if (field_type == FieldType_struc_comment_field) {
+ ocd.object_list = CollectStructuredCommentsForApply (sep, action->constraint);
} else {
VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback);
VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback);
@@ -16005,7 +19059,7 @@ static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_n
gene->idx.deleteme = TRUE;
}
- mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext);
+ mrna = GetmRNAforCDS (cds);
if (mrna != NULL)
{
ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna);
@@ -16087,22 +19141,6 @@ static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene)
}
-static void UnmarkFeatureList (ValNodePtr list)
-{
- SeqFeatPtr sfp;
-
- while (list != NULL)
- {
- sfp = list->data.ptrvalue;
- if (sfp != NULL)
- {
- sfp->idx.deleteme = FALSE;
- }
- list = list->next;
- }
-}
-
-
static void
AdjustCGPObjectListForMatPeptides
(ValNodePtr PNTR cgp_list,
@@ -16210,7 +19248,7 @@ AdjustCGPObjectListForMatPeptides
}
-static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act)
+static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act, BoolPtr created_protein_features)
{
SeqEntryPtr sep;
BuildCGPSetData b;
@@ -16233,6 +19271,10 @@ static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act)
b.cds_list = NULL;
b.gene_list = NULL;
b.mrna_list = NULL;
+
+ if (created_protein_features != NULL) {
+ *created_protein_features = FALSE;
+ }
VisitFeaturesInSep (sep, &b, BuildCGPSetCallback);
@@ -16251,6 +19293,9 @@ static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act)
{
/* indexing because we have created full-length protein features */
SeqMgrIndexFeatures (entityID, NULL);
+ if (created_protein_features != NULL) {
+ *created_protein_features = TRUE;
+ }
}
/* build cdsets for mrna features that don't have coding regions */
@@ -16368,7 +19413,6 @@ static void AlsoChangeMrnaForObject (Uint1 choice, Pointer data)
{
CharPtr str;
SeqFeatPtr sfp, mrna;
- SeqMgrFeatContext context;
FeatureField f;
if (choice == 0) {
@@ -16378,15 +19422,15 @@ static void AlsoChangeMrnaForObject (Uint1 choice, Pointer data)
} else if (choice == OBJ_SEQFEAT) {
sfp = (SeqFeatPtr) data;
if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
- mrna = SeqMgrGetOverlappingmRNA (sfp->location, &context);
+ mrna = GetmRNAforCDS (sfp);
if (mrna != NULL) {
- f.type = Feature_type_cds;
+ f.type = Macro_feature_type_cds;
f.field = ValNodeNew(NULL);
f.field->next = NULL;
f.field->choice = FeatQualChoice_legal_qual;
f.field->data.intvalue = Feat_qual_legal_product;
str = GetQualFromFeature (sfp, &f, NULL);
- f.type = Feature_type_mRNA;
+ f.type = Macro_feature_type_mRNA;
SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old);
str = MemFree (str);
f.field = ValNodeFree (f.field);
@@ -16400,18 +19444,25 @@ NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr o
{
ValNodePtr vnp;
Int4 num_succeed = 0, num_fail = 0;
+ CharPtr old_str, new_str;
if (action == NULL || object_list == NULL) return 0;
for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
+ old_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra);
if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text, batch_extra)) {
- if (also_change_mrna) {
- AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
+ new_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra);
+ if (StringCmp (old_str, new_str) != 0) {
+ if (also_change_mrna) {
+ AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
+ }
+ num_succeed ++;
}
- num_succeed ++;
+ new_str = MemFree (new_str);
} else {
num_fail++;
}
+ old_str = MemFree (old_str);
}
return num_succeed;
@@ -16493,6 +19544,7 @@ NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodeP
} else {
for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
/* there may be multiple qualifiers */
+ MemSet (&remove_constraint, 0, sizeof (StringConstraint));
remove_constraint.case_sensitive = TRUE;
remove_constraint.match_location = String_location_equals;
remove_constraint.not_present = FALSE;
@@ -16636,7 +19688,7 @@ NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr o
NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
{
ValNodePtr vnp;
- CharPtr str1, str2, cp, tmp;
+ CharPtr str1, str2, str3, cp, tmp;
Int4 len, num_succeed = 0, diff, left_len, right_len;
FieldTypePtr field_from, field_to;
@@ -16646,8 +19698,10 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP
for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
- str2 = GetTextPortionFromString (str1, action->portion);
- if (str2 != NULL) {
+ str2 = GetTextPortionFromString (str1, action->portion);
+ str3 = StringSave (str2);
+ ApplyTextTransformsToString (&str3, action->transform);
+ if (str3 != NULL) {
if (action->remove_from_parsed) {
cp = FindTextPortionLocationInString (str1, action->portion);
if (cp != NULL) {
@@ -16680,7 +19734,7 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP
SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old, batch_extra);
}
}
- if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str2, action->existing_text, batch_extra)) {
+ if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str3, action->existing_text, batch_extra)) {
if (also_change_mrna) {
AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
}
@@ -16689,6 +19743,7 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP
}
str1 = MemFree (str1);
str2 = MemFree (str2);
+ str3 = MemFree (str3);
}
field_from = FieldTypeFree (field_from);
field_to = FieldTypeFree (field_to);
@@ -16702,7 +19757,7 @@ NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr
}
-static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep)
+static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolPtr created_protein_features)
{
StringConstraintPtr scp;
ApplyActionPtr a;
@@ -16724,7 +19779,7 @@ static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep)
field_type = FieldTypeFromAECRAction (act);
if (field_type == FieldType_cds_gene_prot) {
entityID = ObjMgrGetEntityIDForChoice(sep);
- object_list = BuildCGPSetList (entityID, act);
+ object_list = BuildCGPSetList (entityID, act, created_protein_features);
} else {
object_list = GetObjectListForAECRActionEx (sep, act, batch_extra);
@@ -17040,6 +20095,23 @@ static int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2)
}
+static int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ rval = CompareFieldTypesEx (vnp1, vnp2, TRUE);
+ }
+
+ return rval;
+}
+
+
static void GetBioSourceFields (BioSourcePtr biop, Pointer userdata)
{
@@ -17062,29 +20134,10 @@ NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list)
NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep)
{
ValNodePtr field_list = NULL;
- ValNodePtr vnp_prev = NULL, vnp, sq;
- Boolean done = FALSE;
VisitBioSourcesInSep (sep, &field_list, GetBioSourceFields);
- field_list = ValNodeSort (field_list, SortVnpByFieldType);
- ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree);
-
- /* rearrange so that taxname is always first */
- for (vnp = field_list; vnp != NULL && !done; vnp = vnp->next) {
- if (vnp->choice == FieldType_source_qual
- && (sq = vnp->data.ptrvalue) != NULL
- && sq->choice == SourceQualChoice_textqual
- && sq->data.intvalue == Source_qual_taxname) {
- if (vnp_prev != NULL) {
- vnp_prev->next = vnp->next;
- vnp->next = field_list;
- field_list = vnp;
- }
- done = TRUE;
- } else {
- vnp_prev = vnp;
- }
- }
+ field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier);
+ ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree);
return field_list;
}
@@ -17093,8 +20146,7 @@ NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep)
NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr list)
{
ValNodePtr field_list = NULL;
- ValNodePtr vnp_prev = NULL, vnp, sq;
- Boolean done = FALSE;
+ ValNodePtr vnp;
if (list == NULL) {
return NULL;
@@ -17103,25 +20155,8 @@ NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr li
for (vnp = list; vnp != NULL; vnp = vnp->next) {
VisitBioSourcesInSep (vnp->data.ptrvalue, &field_list, GetBioSourceFields);
}
- field_list = ValNodeSort (field_list, SortVnpByFieldType);
- ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree);
-
- /* rearrange so that taxname is always first */
- for (vnp = field_list; vnp != NULL && !done; vnp = vnp->next) {
- if (vnp->choice == FieldType_source_qual
- && (sq = vnp->data.ptrvalue) != NULL
- && sq->choice == SourceQualChoice_textqual
- && sq->data.intvalue == Source_qual_taxname) {
- if (vnp_prev != NULL) {
- vnp_prev->next = vnp->next;
- vnp->next = field_list;
- field_list = vnp;
- }
- done = TRUE;
- } else {
- vnp_prev = vnp;
- }
- }
+ field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier);
+ ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree);
return field_list;
}
@@ -17170,7 +20205,8 @@ static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer us
/* add product if appropriate */
if ((type->choice == RnaFeatType_preRNA || type->choice == RnaFeatType_mRNA
- || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA)
+ || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA
+ || type->choice == RnaFeatType_any)
&& rrp->ext.choice == 1
&& !StringHasNoText (rrp->ext.value.ptrvalue)) {
rq = RnaQualNew ();
@@ -17194,7 +20230,7 @@ static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer us
}
/* add tRNA specific if appropriate */
- if (type->choice == RnaFeatType_tRNA) {
+ if (type->choice == RnaFeatType_tRNA || (type->choice == RnaFeatType_any && rrp->type == 2)) {
/* codons recognized */
rq = RnaQualNew ();
rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
@@ -17209,7 +20245,7 @@ static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer us
}
/* add ncRNA class if appropriate and present */
- if (type->choice == RnaFeatType_ncRNA
+ if ((type->choice == RnaFeatType_ncRNA || type->choice == RnaFeatType_any)
&& rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL
&& !StringHasNoText (rgp->_class)) {
rq = RnaQualNew ();
@@ -17386,12 +20422,14 @@ static void CollectBioseqCallback (BioseqPtr bsp, Pointer data)
}
+/*
static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data)
{
if (bsp != NULL && data != NULL && !ISA_aa (bsp->mol)) {
ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
}
}
+*/
static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
@@ -17432,7 +20470,8 @@ static ValNodePtr CollectCommentDescriptors (SeqEntryPtr sep)
return NULL;
}
- VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
+ seq_list = CollectNucBioseqs (sep);
for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
@@ -17454,6 +20493,18 @@ static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data)
}
+static void CollectDBLinksCallback (SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+
+ if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user
+ && (uop = sdp->data.ptrvalue) != NULL
+ && IsUserObjectDBLink (uop)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
{
SeqDescrPtr sdp;
@@ -17492,7 +20543,8 @@ static ValNodePtr CollectDeflineDescriptors (SeqEntryPtr sep)
return NULL;
}
- VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
+ seq_list = CollectNucBioseqs (sep);
for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
@@ -17540,7 +20592,8 @@ static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep)
return NULL;
}
- VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
+ seq_list = CollectNucBioseqs (sep);
for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
@@ -17563,7 +20616,7 @@ NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr s
break;
case FieldType_cds_gene_prot:
entityID = ObjMgrGetEntityIDForChoice(sep);
- object_list = BuildCGPSetList (entityID, NULL);
+ object_list = BuildCGPSetList (entityID, NULL, NULL);
break;
case FieldType_feature_field:
VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback);
@@ -17582,7 +20635,8 @@ NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr s
VisitDescriptorsInSep (sep, &object_list, CollectStructuredCommentsCallback);
break;
case FieldType_misc:
- VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback); */
+ object_list = CollectNucBioseqs (sep);
ValNodeLink (&object_list, CollectCommentDescriptors (sep));
break;
}
@@ -17590,6 +20644,37 @@ NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr s
}
+typedef struct seqcollector {
+ ValNodePtr object_list;
+ ConstraintChoiceSetPtr csp;
+} SeqCollectorData, PNTR SeqCollectorPtr;
+
+
+static void SeqCollectorCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqCollectorPtr s;
+
+ if ((s = (SeqCollectorPtr) data) == NULL) {
+ return;
+ }
+
+ if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, s->csp)) {
+ ValNodeAddPointer (&(s->object_list), OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp)
+{
+ SeqCollectorData s;
+
+ MemSet (&s, 0, sizeof (SeqCollectorData));
+ s.csp = csp;
+ VisitBioseqsInSep (sep, &s, SeqCollectorCallback);
+ return s.object_list;
+}
+
+
NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep)
{
ValNodePtr fields = NULL;
@@ -17678,7 +20763,7 @@ NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep)
field_type = FieldTypeFromAECRAction (act);
if (field_type == FieldType_cds_gene_prot) {
entityID = ObjMgrGetEntityIDForChoice(sep);
- object_list = BuildCGPSetList (entityID, act);
+ object_list = BuildCGPSetList (entityID, act, NULL);
} else {
object_list = GetObjectListForAECRActionEx (sep, act, batch_extra);
}
@@ -17698,6 +20783,7 @@ NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep)
fields = FieldTypeListFree (fields);
batch_extra = BatchExtraFree (batch_extra);
+ DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
FreeObjectList (object_list);
return list;
@@ -17803,15 +20889,86 @@ NLM_EXTERN void GetAECRExistingTextList (Uint1 field_type, SeqEntryPtr sep, FILE
}
+static void InsertBlanksInRow (ValNodePtr row, Int4 insert_pos, Int4Ptr num_field_per_pos, Int4 num_blanks)
+{
+ ValNodePtr vnp, prev, vnp_blank;
+ Int4 pos = 0, skip;
+
+ /* first, skip accession */
+ prev = row;
+ vnp = row->next;
+ while (vnp != NULL && pos <= insert_pos) {
+ for (skip = 0; skip < num_field_per_pos[pos] && vnp != NULL; skip++, vnp = vnp->next) {
+ prev = vnp;
+ }
+ pos++;
+ }
+ for (skip = 0; skip < num_blanks; skip++) {
+ vnp_blank = ValNodeNew (NULL);
+ vnp_blank->next = prev->next;
+ prev->next = vnp_blank;
+ }
+
+}
+
+
+static void AddListToTabTable (ValNodePtr vals, ValNodePtr text_table, ValNodePtr this_row, Int4 pos, Int4Ptr num_field_per_pos)
+{
+ Int4 num_new_fields;
+ ValNodePtr vnp;
+
+ num_new_fields = ValNodeLen (vals);
+ if (num_new_fields > num_field_per_pos[pos]) {
+ /* go back and insert blanks in all the previous rows */
+ for (vnp = text_table; vnp != NULL; vnp = vnp->next) {
+ InsertBlanksInRow (vnp->data.ptrvalue, pos, num_field_per_pos, num_new_fields - num_field_per_pos[pos]);
+ }
+ num_field_per_pos[pos] = num_new_fields;
+ }
+ ValNodeLink (&this_row, vals);
+ while (num_new_fields < num_field_per_pos[pos]) {
+ ValNodeAddPointer (&this_row, 0, NULL);
+ num_new_fields++;
+ }
+}
+
+
+static ValNodePtr StartRowWithSourceFields (CharPtr id, BioseqPtr bsp, ValNodePtr src_field_list, Int4Ptr num_field_per_pos, ValNodePtr text_table)
+{
+ ValNodePtr text_row = NULL;
+ SeqDescPtr sdp;
+ ValNodePtr vals, vnp_f;
+ Int4 pos;
+ SeqMgrDescContext context;
+
+ /* add accession */
+ ValNodeAddPointer (&text_row, 0, StringSave (id));
+
+ /* add source fields */
+ if (src_field_list != NULL) {
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) {
+ vals = GetMultipleFieldValuesForObject (OBJ_SEQDESC, sdp, vnp_f, NULL, NULL);
+ AddListToTabTable (vals, text_table, text_row, pos, num_field_per_pos);
+ }
+ }
+ return text_row;
+}
+
+
NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp)
{
ValNodePtr object_list, vnp_f, vnp_o;
ValNodePtr fields = NULL;
+ ValNodePtr text_table = NULL, text_row;
BioseqPtr bsp;
Char id_buf[255];
CharPtr txt1 = NULL, title;
- SeqDescrPtr sdp, pub_sdp;
- SeqMgrDescContext context, pub_context;
+ SeqDescrPtr pub_sdp;
+ SeqMgrDescContext pub_context;
+ Int4 num_orig_fields;
+ Int4Ptr num_field_per_pos;
+ Int4 pos, i;
if (field_type == 0) {
object_list = GetObjectListForFieldType (FieldType_source_qual, sep);
@@ -17828,16 +20985,61 @@ NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, S
fields = GetFieldListForFieldType (field_type, sep);
/* remove fields for which there is no data */
RemoveFieldsForWhichThereAreNoData (&fields, object_list);
+ }
+ num_orig_fields = ValNodeLen (src_field_list);
+ num_field_per_pos = (Int4Ptr) MemNew (sizeof (Int4) * num_orig_fields);
+ for (pos = 0; pos < num_orig_fields; pos++) {
+ num_field_per_pos[pos] = 1;
+ }
+
+ /* get text table */
+ for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) {
+ bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue);
+ if (bsp != NULL) {
+ /* first column is accession */
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
+ if (field_type == FieldType_pub) {
+ for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context);
+ pub_sdp != NULL;
+ pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) {
+
+ /* Get Publication Title */
+ title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL);
+
+ if (!StringHasNoText (title)) {
+ text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table);
+
+ /* add publication title */
+ ValNodeAddPointer (&text_row, 0, title);
+
+ /* add row to table */
+ ValNodeAddPointer (&text_table, 0, text_row);
+ }
+ title = MemFree (title);
+ }
+ } else {
+ text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table);
+ /* get requested fields */
+ for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
+ txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL);
+ ValNodeAddPointer (&text_row, 0, txt1);
+ }
+ /* add row to table */
+ ValNodeAddPointer (&text_table, 0, text_row);
+ }
+ }
}
/* add header */
/* accession is first column */
fprintf (fp, "Accession");
/* list source fields first */
- for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) {
+ for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) {
txt1 = SummarizeFieldType (vnp_f);
- fprintf (fp, "\t%s", txt1);
+ for (i = 0; i < num_field_per_pos[pos]; i++) {
+ fprintf (fp, "\t%s", txt1);
+ }
txt1 = MemFree (txt1);
}
/* list fields */
@@ -17847,65 +21049,12 @@ NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, S
txt1 = MemFree (txt1);
}
fprintf (fp, "\n");
-
- for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) {
- bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue);
- if (bsp == NULL) {
- id_buf[0] = 0;
- } else {
- SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
- }
-
- if (field_type == FieldType_pub) {
- for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context);
- pub_sdp != NULL;
- pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) {
-
- /* Get Publication Title */
- title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL);
- if (!StringHasNoText (title)) {
- /* print accession */
- fprintf (fp, "%s", id_buf);
- /* print source fields */
- if (src_field_list != NULL) {
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
- for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) {
- txt1 = GetFieldValueForObject (OBJ_SEQDESC, sdp, vnp_f, NULL);
- fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1);
- txt1 = MemFree (txt1);
- }
- }
- /* print publication title */
- fprintf (fp, "\t%s", title == NULL ? "" : title);
- fprintf (fp, "\n");
- }
- title = MemFree (title);
- }
- } else {
- /* print accession */
- fprintf (fp, "%s", id_buf);
- /* print source fields */
- if (src_field_list != NULL) {
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
- for (vnp_f = src_field_list; vnp_f != NULL; vnp_f = vnp_f->next) {
- txt1 = GetFieldValueForObject (OBJ_SEQDESC, sdp, vnp_f, NULL);
- fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1);
- txt1 = MemFree (txt1);
- }
- }
- /* get requested fields */
- for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
- txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL);
- fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1);
- txt1 = MemFree (txt1);
- }
- fprintf (fp, "\n");
- }
- }
+ WriteTabTableToFile (text_table, fp);
+ FreeTabTable(text_table);
fields = FieldTypeListFree (fields);
-
object_list = FreeObjectList (object_list);
+ num_field_per_pos = MemFree (num_field_per_pos);
}
@@ -18112,6 +21261,74 @@ static void GetNcbiFileSourceForBioseq
}
+static void
+GetGeneralIdTextSourcesForBioseq
+(BioseqPtr bsp,
+ Boolean db_only,
+ TextPortionPtr portion,
+ ValNodePtr PNTR source_list)
+{
+ SeqIdPtr sip;
+ ParseSourceInfoPtr psip;
+ DbtagPtr dbtag;
+ CharPtr src_str = NULL, str;
+
+ if (bsp == NULL || source_list == NULL)
+ {
+ return;
+ }
+
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) {
+ if (db_only) {
+ str = GetTextPortionFromString (dbtag->db, portion);
+ } else {
+ src_str = GetDbtagString (dbtag);
+ str = GetTextPortionFromString (src_str, portion);
+ src_str = MemFree (src_str);
+ }
+ if (str != NULL) {
+ psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str);
+ if (psip != NULL) {
+ ValNodeAddPointer (source_list, 0, psip);
+ } else {
+ str = MemFree (str);
+ }
+ }
+ }
+ }
+}
+
+
+static void GetGeneralIDSourcesForBioseq
+(BioseqPtr bsp,
+ ValNodePtr general_id,
+ TextPortionPtr tp,
+ ValNodePtr PNTR source_list)
+{
+ if (general_id == NULL) {
+ return;
+ }
+ switch (general_id->choice) {
+ case ParseSrcGeneralId_whole_text:
+ GetGeneralIdTextSourcesForBioseq (bsp, FALSE, tp, source_list);
+ break;
+ case ParseSrcGeneralId_db:
+ GetGeneralIdTextSourcesForBioseq (bsp, TRUE, tp, source_list);
+ break;
+ case ParseSrcGeneralId_tag:
+ if (StringHasNoText (general_id->data.ptrvalue)) {
+ GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, NULL, source_list);
+ } else {
+ GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, general_id->data.ptrvalue, source_list);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+
static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp)
{
UserObjectPtr uop;
@@ -18523,6 +21740,9 @@ static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata)
case ParseSrc_file_id:
GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list));
break;
+ case ParseSrc_general_id:
+ GetGeneralIDSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list));
+ break;
case ParseSrc_org:
GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list));
break;
@@ -19396,6 +22616,7 @@ static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep)
}
/* fix source text */
FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames);
+ ApplyTextTransformsToString (&(psip->parse_src_txt), action->transform);
/* find destinations */
AddParseDestinations (psip, action->dest);
@@ -19799,9 +23020,9 @@ ApplyOneFeatureToBioseq
&& (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) {
if (gene == NULL) {
gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp);
- CreateDataForFeature (gene, Feature_type_gene);
+ CreateDataForFeature (gene, Macro_feature_type_gene);
}
- f.type = Feature_type_gene;
+ f.type = Macro_feature_type_gene;
SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old);
} else {
f.type = feature_type;
@@ -19818,7 +23039,7 @@ ApplyOneFeatureToBioseq
if (add_mrna) {
slp = SeqLocCopy (slp);
mrna = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, slp);
- CreateDataForFeature (mrna, Feature_type_mRNA);
+ CreateDataForFeature (mrna, Macro_feature_type_mRNA);
for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) {
q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue;
if (q != NULL && q->qual == Feat_qual_legal_product) {
@@ -19826,7 +23047,7 @@ ApplyOneFeatureToBioseq
f.field->next = NULL;
f.field->choice = FeatQualChoice_legal_qual;
f.field->data.intvalue = q->qual;
- f.type = Feature_type_mRNA;
+ f.type = Macro_feature_type_mRNA;
SetQualOnFeature (mrna, &f, NULL, q->val, ExistingTextOption_replace_old);
}
}
@@ -19845,6 +23066,9 @@ static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, Seq
SeqLocPtr slp;
SeqIdPtr sip;
Int4 num_created = 0;
+ Int4 len;
+ CharPtr list_delimiters = " ,\t;";
+ CharPtr cp, tmp;
if (sep == NULL || action == NULL) return 0;
@@ -19852,10 +23076,21 @@ static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, Seq
/* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */
if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) {
for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
- sip = CreateSeqIdFromText (vnp->data.ptrvalue, sep);
- bsp = BioseqFind (sip);
- if (bsp != NULL) {
- AddSequenceOrParts (action, bsp, &bsp_list);
+ cp = (CharPtr) vnp->data.ptrvalue;
+ while (cp != NULL && *cp != 0) {
+ len = StringCSpn (cp, list_delimiters);
+ if (len > 0) {
+ tmp = (CharPtr) MemNew (sizeof (Char) * (len + 1));
+ StringNCpy (tmp, cp, len);
+ tmp[len] = 0;
+ sip = CreateSeqIdFromText (tmp, sep);
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ AddSequenceOrParts (action, bsp, &bsp_list);
+ }
+ cp += len;
+ }
+ cp += StringSpn (cp, list_delimiters);
}
}
} else {
@@ -19890,7 +23125,8 @@ static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer d
if (sfp == NULL || data == NULL) return;
p = (ConvertAndRemoveFeatureCollectionPtr) data;
- if (sfp->idx.subtype == p->featdef && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) {
+ if ((p->featdef == FEATDEF_ANY || sfp->idx.subtype == p->featdef )
+ && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) {
ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp);
}
}
@@ -19955,7 +23191,7 @@ static void ApplyRNADestinationOptions (SeqFeatPtr sfp, Int4 featdef_to, Convert
&& dst_options != NULL
&& dst_options->choice == ConvertFeatureDstOptions_ncrna_class
&& !StringHasNoText (dst_options->data.ptrvalue)) {
- ff.type = Feature_type_ncRNA;
+ ff.type = Macro_feature_type_ncRNA;
ff.field = ValNodeNew (NULL);
ff.field->choice = FeatQualChoice_legal_qual;
ff.field->data.intvalue = Feat_qual_legal_ncRNA_class;
@@ -20115,6 +23351,34 @@ static Boolean ConvertRegionToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatu
}
+static Boolean ConvertncRNAToMiscBinding (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+{
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
+ ImpFeatPtr ifp;
+
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (NULL == rrp)
+ return FALSE;
+
+ if (rrp->ext.choice == 1) {
+ /* move product to note */
+ SetStringValue (&(sfp->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi);
+ } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL
+ && !StringHasNoText (rgp->product)) {
+ SetStringValue (&(sfp->comment), rgp->product, ExistingTextOption_append_semi);
+ }
+ rrp = RnaRefFree (rrp);
+ sfp->data.choice = SEQFEAT_IMP;
+ ifp = ImpFeatNew ();
+ ifp->key = StringSave ("misc_binding");
+ sfp->data.value.ptrvalue = ifp;
+ sfp->idx.subtype = 0;
+
+ return TRUE;
+}
+
+
static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
{
ImpFeatPtr ifp;
@@ -20475,6 +23739,12 @@ static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_t
}
+static Boolean MiscFeatToGeneConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
+{
+ return ConvertMiscFeatToGene (sfp);
+}
+
+
typedef struct convertfeattable {
Uint2 seqfeat_from;
Uint2 featdef_from;
@@ -20524,6 +23794,9 @@ static ConvertFeatTableData conversion_functions[] = {
{ SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_CDREGION, FEATDEF_CDS,
MiscFeatToCodingRegionConvertFunc,
"Use misc_feature comment for coding region product name." },
+ { SEQFEAT_IMP, FEATDEF_misc_feature, SEQFEAT_GENE, FEATDEF_GENE,
+ MiscFeatToGeneConvertFunc,
+ "Creates gene with locus value from misc_feature comment." },
{ SEQFEAT_REGION, FEATDEF_REGION, SEQFEAT_IMP, FEATDEF_ANY,
ConvertRegionToImp,
"Creates a misc_feature with the region name saved as a /note qualifier." },
@@ -20567,6 +23840,9 @@ static ConvertFeatTableData conversion_functions[] = {
{ SEQFEAT_RNA, FEATDEF_ANY, SEQFEAT_RNA, FEATDEF_ANY,
ConvertRNAToRNA,
"Changes type of RNA feature." },
+ { SEQFEAT_RNA, FEATDEF_ncRNA, SEQFEAT_IMP, FEATDEF_misc_binding,
+ ConvertncRNAToMiscBinding,
+ "Changes ncRNA to misc_binding." },
{ SEQFEAT_PROT, FEATDEF_ANY, SEQFEAT_PROT, FEATDEF_ANY,
ConvertProtToProt,
"Changes type of protein feature." },
@@ -20967,7 +24243,9 @@ static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, Seq
if (!partial5) {
SetSeqLocPartial (sfp->location, TRUE, partial3);
if (action->extend && bsp != NULL) {
- ExtendSeqLocToEnd (sfp->location, bsp, TRUE);
+ if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) {
+ ChooseBestFrame (sfp);
+ }
}
rval = TRUE;
}
@@ -21118,6 +24396,75 @@ static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
}
+static Boolean ApplyPartialBothSetActionToSeqFeat (PartialBothSetActionPtr action, SeqFeatPtr sfp)
+{
+ Boolean rval = FALSE;
+ Boolean make_partial = FALSE;
+ Uint1 strand;
+ BioseqPtr bsp;
+ Boolean partial5, partial3;
+
+ if (action == NULL || sfp == NULL) return FALSE;
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ strand = SeqLocStrand (sfp->location);
+
+ switch (action->constraint) {
+ case Partial_both_set_constraint_all:
+ make_partial = TRUE;
+ break;
+ case Partial_both_set_constraint_at_end:
+ make_partial = At5EndOfSequence (sfp->location, bsp) && At3EndOfSequence (sfp->location, bsp);
+ break;
+ }
+
+ if (make_partial) {
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ if (!partial5 || !partial3) {
+ SetSeqLocPartial (sfp->location, TRUE, TRUE);
+ if (action->extend && bsp != NULL) {
+ ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
+ if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) {
+ ChooseBestFrame (sfp);
+ }
+ }
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Boolean ApplyClearBothPartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
+{
+ Boolean rval = FALSE, clear_partial = FALSE;
+ Boolean partial5, partial3;
+ BioseqPtr bsp;
+
+ if (sfp == NULL) return FALSE;
+
+ switch (action) {
+ case Partial_both_clear_constraint_all:
+ clear_partial = TRUE;
+ break;
+ case Partial_both_clear_constraint_not_at_end:
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ clear_partial = !At5EndOfSequence (sfp->location, bsp) && !At3EndOfSequence(sfp->location, bsp);
+ break;
+ case Partial_3_clear_constraint_good_end:
+ clear_partial = !HasGoodStopCodon(sfp);
+ break;
+ }
+ if (clear_partial) {
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ if (partial5 || partial3) {
+ SetSeqLocPartial (sfp->location, FALSE, FALSE);
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp)
{
Boolean hasNulls, rval = FALSE;
@@ -21189,13 +24536,29 @@ static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr
static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp)
{
BioseqPtr bsp;
+ CdRegionPtr crp;
+ Int4 start_diff;
+ Boolean partial5, partial3;
if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
{
return FALSE;
}
- if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE) > 0)
+
+ if ((start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) > 0)
{
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ if (partial5) {
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp != NULL) {
+ if (crp->frame == 0) {
+ crp->frame = 1;
+ }
+ crp->frame = (crp->frame + start_diff - 1) % 3 + 1;
+ }
+ }
+ }
return TRUE;
}
else
@@ -21208,13 +24571,31 @@ static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp)
static Boolean ExtendSeqFeat3 (SeqFeatPtr sfp)
{
BioseqPtr bsp;
+ Uint1 strand;
+ Int4 stop_before, stop_after;
if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
{
return FALSE;
}
+ strand = SeqLocStrand (sfp->location);
+ if (strand == Seq_strand_minus) {
+ stop_before = SeqLocStart (sfp->location);
+ } else {
+ stop_before = SeqLocStop (sfp->location);
+ }
ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
- return TRUE;
+ if (strand == Seq_strand_minus) {
+ stop_after = SeqLocStart (sfp->location);
+ } else {
+ stop_after = SeqLocStop (sfp->location);
+ }
+ if (stop_before == stop_after)
+ {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
}
@@ -21242,6 +24623,12 @@ static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp
case LocationEditType_clear_3_partial:
rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp);
break;
+ case LocationEditType_set_both_partial:
+ rval = ApplyPartialBothSetActionToSeqFeat (action->data.ptrvalue, sfp);
+ break;
+ case LocationEditType_clear_both_partial:
+ rval = ApplyClearBothPartialToSeqFeat (action->data.intvalue, sfp);
+ break;
case LocationEditType_convert:
rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp);
break;
@@ -21264,6 +24651,7 @@ static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionP
Int4 num_affected = 0;
/* variables for logging */
CharPtr old_loc = NULL, new_loc;
+ Boolean retranslated;
if (action == NULL) return 0;
@@ -21279,10 +24667,14 @@ static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionP
old_loc = SeqLocPrintUseBestID (sfp->location);
}
if (ApplyLocationEditTypeToSeqFeat (action->action, sfp)) {
+ retranslated = FALSE;
+ if (sfp->data.choice == SEQFEAT_CDREGION && action->retranslate_cds) {
+ retranslated = RetranslateOneCDS (sfp, sfp->idx.entityID, TRUE, TRUE);
+ }
num_affected++;
if (log_fp != NULL) {
new_loc = SeqLocPrintUseBestID (sfp->location);
- fprintf (log_fp, "Changed location %s to %s\n", old_loc, new_loc);
+ fprintf (log_fp, "Changed location %s to %s%s\n", old_loc, new_loc, retranslated ? " and retranslated protein" : "");
new_loc = MemFree (new_loc);
}
}
@@ -21293,20 +24685,30 @@ static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionP
}
+typedef struct molinfoblocklog {
+ MolinfoBlockPtr mib;
+ FILE *log_fp;
+ Boolean any_change;
+} MolInfoBlockLogData, PNTR MolInfoBlockLogPtr;
+
static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data)
{
+ MolInfoBlockLogPtr ml;
MolinfoBlockPtr mib;
ValNodePtr field;
MolInfoPtr mip;
+ Char id_buf[100];
+ CharPtr field_name;
if (bsp == NULL) {
return;
}
- mib = (MolinfoBlockPtr) data;
- if (mib == NULL) {
+ ml = (MolInfoBlockLogPtr) data;
+ if (ml == NULL || ml->mib == NULL) {
return;
}
+ mib = ml->mib;
if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, mib->constraint)) {
return;
@@ -21351,15 +24753,470 @@ static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data)
for (field = mib->to_list; field != NULL; field = field->next) {
- SetSequenceQualOnBioseq (bsp, field);
+ if (SetSequenceQualOnBioseq (bsp, field)) {
+ if (ml->log_fp != NULL) {
+ SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
+ field_name = GetSequenceQualName (field);
+ fprintf (ml->log_fp, "Changed to %s for %s\n", field_name, id_buf);
+ field_name = MemFree (field_name);
+ }
+ ml->any_change = TRUE;
+ }
}
}
+static Boolean ApplyMolinfoBlockToSeqEntryEx (SeqEntryPtr sep, MolinfoBlockPtr mib, FILE *log_fp)
+{
+ MolInfoBlockLogData md;
+
+ md.any_change = FALSE;
+ md.log_fp = log_fp;
+ md.mib = mib;
+
+ VisitBioseqsInSep (sep, &md, ApplyMolinfoBlockCallback);
+ return md.any_change;
+}
+
+
NLM_EXTERN void ApplyMolinfoBlockToSeqEntry (SeqEntryPtr sep, MolinfoBlockPtr mib)
{
- VisitBioseqsInSep (sep, mib, ApplyMolinfoBlockCallback);
+ ApplyMolinfoBlockToSeqEntryEx (sep, mib, NULL);
+}
+
+static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp);
+
+static Boolean ApplyFixCapsToSeqEntry (SeqEntryPtr sep, FixCapsActionPtr action, FILE *log_fp)
+{
+ Boolean any_change = FALSE;
+
+ if (sep == NULL || action == NULL) {
+ return FALSE;
+ }
+
+ switch (action->choice) {
+ case FixCapsAction_pub:
+ any_change = ApplyFixPubCapsToSeqEntry (action->data.ptrvalue, sep, log_fp);
+ break;
+ case FixCapsAction_src_country:
+ any_change = FixupCountryQualsWithLog (sep, FALSE, log_fp);
+ break;
+ case FixCapsAction_mouse_strain:
+ any_change = FixupMouseStrains (sep, log_fp);
+ break;
+ case FixCapsAction_src_qual:
+ any_change = FixSrcQualCaps (sep, action->data.intvalue, log_fp);
+ break;
+ }
+
+ return any_change;
+}
+
+
+static void FixCollectionDatesCallback (BioSourcePtr biop, Pointer data)
+{
+ LogInfoPtr lip;
+ SubSourcePtr ssp;
+ CharPtr new_date;
+
+ if (biop == NULL) {
+ return;
+ }
+
+ lip = (LogInfoPtr) data;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_collection_date) {
+ new_date = ReformatDateWithMonthNames (ssp->name);
+ if (new_date != NULL && StringCmp (new_date, ssp->name) != 0) {
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed '%s' to '%s'\n", ssp->name, new_date);
+ }
+ lip->data_in_log = TRUE;
+ }
+ ssp->name = MemFree (ssp->name);
+ ssp->name = new_date;
+ new_date = NULL;
+ }
+ new_date = MemFree (new_date);
+ }
+ }
+}
+
+
+NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop)
+{
+ SubSourcePtr ssp, ssp_bad = NULL;
+ Boolean format_ok, lat_in_range, lon_in_range;
+
+ if (biop == NULL)
+ {
+ return NULL;
+ }
+
+ for (ssp = biop->subtype; ssp != NULL && ssp_bad == NULL; ssp = ssp->next)
+ {
+ if (ssp->subtype == SUBSRC_lat_lon)
+ {
+ IsCorrectLatLonFormat (ssp->name, &format_ok, &lat_in_range, &lon_in_range);
+ if (!format_ok || !lat_in_range || !lon_in_range)
+ {
+ ssp_bad = ssp;
+ }
+ }
+ }
+ return ssp_bad;
+}
+
+
+static void FindBadLatLonDesc (SeqDescrPtr sdp, Pointer userdata)
+{
+ if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL)
+ {
+ return;
+ }
+ if (FindBadLatLon (sdp->data.ptrvalue) != NULL)
+ {
+ ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindBadLatLonFeat (SeqFeatPtr sfp, Pointer userdata)
+{
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL)
+ {
+ return;
+ }
+ if (FindBadLatLon (sfp->data.value.ptrvalue) != NULL)
+ {
+ ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
+ }
+}
+
+
+NLM_EXTERN ValNodePtr FindBadLatLonObjects (SeqEntryPtr sep)
+{
+ ValNodePtr list = NULL;
+
+ VisitDescriptorsInSep (sep, &list, FindBadLatLonDesc);
+ VisitFeaturesInSep (sep, &list, FindBadLatLonFeat);
+ return list;
+}
+
+
+static void AddAltitudeToSubSourceNote (BioSourcePtr biop, CharPtr extra_text)
+{
+ SubSourcePtr ssp;
+ CharPtr new_note, new_note_fmt = "%s%saltitude:%s";
+
+ if (biop == NULL || StringHasNoText (extra_text))
+ {
+ return;
+ }
+
+ ssp = biop->subtype;
+ while (ssp != NULL && ssp->subtype != SUBSRC_other)
+ {
+ ssp = ssp->next;
+ }
+ if (ssp == NULL)
+ {
+ ssp = SubSourceNew ();
+ ssp->subtype = SUBSRC_other;
+ ssp->next = biop->subtype;
+ biop->subtype = ssp;
+ }
+ new_note = (CharPtr) MemNew (sizeof (Char) * (StringLen (ssp->name)
+ + StringLen (extra_text)
+ + StringLen (new_note_fmt)));
+ sprintf (new_note, new_note_fmt, ssp->name == NULL ? "" : ssp->name,
+ ssp->name == NULL ? "" : "; ",
+ extra_text);
+ ssp->name = MemFree (ssp->name);
+ ssp->name = new_note;
+}
+
+
+NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list)
+{
+ ValNodePtr vnp;
+ SeqDescrPtr sdp;
+ BioSourcePtr biop;
+ SubSourcePtr bad_ssp;
+ CharPtr fix, extra_text;
+ Boolean any_change = FALSE;
+
+ if (fp == NULL || object_list == NULL) return FALSE;
+
+ for (vnp = object_list; vnp != NULL; vnp = vnp->next)
+ {
+ if (vnp->choice != OBJ_SEQDESC) continue;
+ sdp = vnp->data.ptrvalue;
+ if (sdp != NULL && sdp->choice == Seq_descr_source)
+ {
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ bad_ssp = FindBadLatLon (biop);
+ if (bad_ssp != NULL)
+ {
+ fix = FixLatLonFormat (bad_ssp->name);
+ if (fix != NULL)
+ {
+ extra_text = StringChr (fix, ',');
+ if (extra_text != NULL)
+ {
+ *extra_text = 0;
+ extra_text++;
+ while (isspace (*extra_text))
+ {
+ extra_text++;
+ }
+ }
+ fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix);
+ bad_ssp->name = MemFree (bad_ssp->name);
+ bad_ssp->name = fix;
+ if (extra_text != NULL)
+ {
+ AddAltitudeToSubSourceNote (biop, extra_text);
+ fprintf (fp, "Moved %s to subsource note\n", extra_text);
+ }
+ any_change = TRUE;
+ }
+ else
+ {
+ fprintf (fp, "Unable to correct %s\n", bad_ssp->name);
+ }
+ }
+ }
+ }
+ return any_change;
+}
+
+
+static void ReplaceiInSeq (CharPtr PNTR seq, LogInfoPtr lip)
+{
+ CharPtr cp, new_seq, src, dst;
+ Int4 num_i = 0, num_extra = 0;
+
+ if (seq == NULL) {
+ return;
+ }
+
+ cp = StringISearch (*seq, "i");
+ while (cp != NULL) {
+ if (cp == *seq || *(cp - 1) != '<') {
+ num_extra++;
+ }
+ if (*(cp + 1) != '>') {
+ num_extra++;
+ }
+ num_i++;
+ cp = StringISearch (cp + 1, "i");
+ }
+
+ if (num_extra != 0) {
+ new_seq = (CharPtr) MemNew (sizeof (Char) * (StringLen (*seq) + 1 + num_extra));
+ src = *seq;
+ dst = new_seq;
+ while (*src != 0) {
+ if (*src == 'i' || *src == 'I') {
+ if (src == *seq || *(src - 1) != '<') {
+ *dst = '<';
+ dst++;
+ }
+ *dst = 'i';
+ dst++;
+ if (*(src + 1) != '>') {
+ *dst = '>';
+ dst++;
+ }
+ } else {
+ *dst = *src;
+ dst++;
+ }
+ src++;
+ }
+ *dst = 0;
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed primer sequence from '%s' to '%s'\n", *seq, new_seq);
+ }
+ lip->data_in_log = TRUE;
+ }
+
+ *seq = MemFree (*seq);
+ *seq = new_seq;
+ }
+}
+
+
+NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data)
+{
+ PCRReactionSetPtr ps;
+ PCRPrimerPtr p;
+ LogInfoPtr lip;
+
+ if (biop == NULL) {
+ return;
+ }
+ lip = (LogInfoPtr) data;
+
+ for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
+ for (p = ps->forward; p != NULL; p = p->next) {
+ ReplaceiInSeq (&(p->seq), lip);
+ }
+ for (p = ps->reverse; p != NULL; p = p->next) {
+ ReplaceiInSeq (&(p->seq), lip);
+ }
+ }
+}
+
+
+typedef struct fixproteinnameformat {
+ Boolean any_change;
+ FILE *fp;
+ ValNodePtr orgnames;
+} FixProteinNameFormatData, PNTR FixProteinNameFormatPtr;
+
+
+static void FixProteinNameFormatCallback (SeqFeatPtr sfp, Pointer data)
+{
+ FixProteinNameFormatPtr f;
+ ProtRefPtr prp;
+ ValNodePtr vnp_n, vnp_p;
+ CharPtr cp;
+ Int4 len;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL
+ || (f = (FixProteinNameFormatPtr) data) == NULL) {
+ return;
+ }
+
+ for (vnp_n = f->orgnames; vnp_n != NULL; vnp_n = vnp_n->next) {
+ for (vnp_p = prp->name; vnp_p != NULL; vnp_p = vnp_p->next) {
+ if ((cp = StringISearch (vnp_p->data.ptrvalue, vnp_n->data.ptrvalue)) != NULL) {
+ len = StringLen (vnp_n->data.ptrvalue);
+ if (cp != vnp_p->data.ptrvalue
+ && ((*(cp - 1) == '(' && *(cp + len) == ')') || (*(cp - 1) == '[' && *(cp + len) == ']'))) {
+ cp--;
+ len+= 2;
+ }
+ if (*(cp + len) == 0 && isspace (*(cp - 1))) {
+ *(cp - 1) = 0;
+ f->any_change = TRUE;
+ if (f->fp != NULL) {
+ fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue);
+ }
+ } else {
+ if (isspace (*(cp + len))) {
+ len ++;
+ }
+ StringCpy (cp, cp + len);
+ f->any_change = TRUE;
+ if (f->fp != NULL) {
+ fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue);
+ }
+ }
+ }
+ }
+ }
+}
+
+
+static Boolean ApplyFixFormatToSeqEntry (SeqEntryPtr sep, FixFormatActionPtr action, FILE *log_fp)
+{
+ LogInfoData lid;
+ FixProteinNameFormatData protformat;
+ ValNodePtr list;
+
+
+ if (sep == NULL || action == NULL) {
+ return FALSE;
+ }
+
+ MemSet (&lid, 0, sizeof (LogInfoData));
+ lid.fp = log_fp;
+
+ switch (action->choice) {
+ case FixFormatAction_collection_date:
+ VisitBioSourcesInSep (sep, &lid, FixCollectionDatesCallback);
+ break;
+ case FixFormatAction_lat_lon:
+ list = FindBadLatLonObjects (sep);
+ lid.data_in_log = LatLonAutocorrectList (lid.fp, list);
+ list = FreeObjectList (list);
+ break;
+ case FixFormatAction_primers:
+ VisitBioSourcesInSep (sep, &lid, FixiPCRPrimerSeqsCallback);
+ break;
+ case FixFormatAction_protein_name:
+ MemSet (&protformat, 0, sizeof (FixProteinNameFormatData));
+ protformat.fp = log_fp;
+ VisitBioSourcesInSep (sep, &(protformat.orgnames), GetOrgNamesInRecordCallback);
+ VisitFeaturesInSep (sep, &protformat, FixProteinNameFormatCallback);
+ protformat.orgnames = ValNodeFree (protformat.orgnames);
+ lid.data_in_log = protformat.any_change;
+ break;
+ }
+ return lid.data_in_log;
+}
+
+
+typedef struct replacepair {
+ CharPtr find;
+ CharPtr replace;
+} ReplacePairData, PNTR ReplacePairPtr;
+
+static ReplacePairData macro_spell_fixes[] = {
+ {"univeristy", "University" },
+ {"univerisity", "University" },
+ {"univercity", "University" },
+ {"uiniversity", "University" },
+ {"uinversity", "University" },
+ {"univesity", "University" },
+ {"uviversity", "University" },
+ {"universtiy", "University" },
+ {"protien", "protein" },
+ {"Insitiute", "Institute" },
+ {"Instutite", "Institute" },
+ {"instute", "Institute" },
+ {"institue", "Institute" },
+ {"insitute", "Institute" },
+ {"insititute","Institute" },
+ {NULL, NULL}};
+
+
+static void SetFlagWhenChanged (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Pointer userdata)
+{
+ BoolPtr flag;
+
+ if ((flag = (BoolPtr) userdata) != NULL) {
+ *flag = TRUE;
+ }
+}
+
+
+static Boolean SpellFixSeqEntry (SeqEntryPtr sep, Pointer data, FILE *log_fp)
+{
+ Boolean any_changes = FALSE, this_change;
+ Uint2 entityID;
+ Int4 i;
+
+ entityID = ObjMgrGetEntityIDForChoice (sep);
+ for (i = 0; macro_spell_fixes[i].find != NULL; i++) {
+ this_change = FALSE;
+ FindReplaceInEntity (entityID, macro_spell_fixes[i].find, macro_spell_fixes[i].replace, FALSE, TRUE, TRUE,
+ FALSE, 0, NULL, NULL, NULL, FALSE, SetFlagWhenChanged, &this_change);
+ if (this_change) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Replaced '%s' with '%s'\n", macro_spell_fixes[i].find, macro_spell_fixes[i].replace);
+ }
+ any_changes = TRUE;
+ }
+ }
+ return any_changes;
}
@@ -21458,18 +25315,18 @@ static Boolean DoesDescriptorMatchType (SeqDescrPtr sdp, Int4 descriptortype)
return FALSE;
} else if (descriptortype == Descriptor_type_structured_comment) {
if (sdp->choice == Seq_descr_user
- && (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
- || uop->type == NULL
- || StringCmp (uop->type->str, "StructuredComment") != 0) {
+ && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || uop->type == NULL
+ || StringCmp (uop->type->str, "StructuredComment") != 0)) {
return FALSE;
} else {
return TRUE;
}
} else if (descriptortype == Descriptor_type_genome_project_id) {
if (sdp->choice == Seq_descr_user
- && (uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
- || uop->type == NULL
- || StringCmp (uop->type->str, "GenomeProjectsDB") != 0) {
+ && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
+ || uop->type == NULL
+ || StringCmp (uop->type->str, "GenomeProjectsDB") != 0)) {
return FALSE;
} else {
return TRUE;
@@ -21610,7 +25467,7 @@ NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action)
if (action == NULL) {
return TRUE;
}
- if (action->affiliation || action->authors || action->title) {
+ if (action->affiliation || action->authors || action->title || action->affil_country) {
return FALSE;
} else {
return TRUE;
@@ -21621,10 +25478,26 @@ NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action)
typedef struct fixpubcaps {
FixPubCapsActionPtr action;
ValNodePtr orgnames;
- Int4 num_fields;
+ Int4 num_pub_fields;
+ Int4 num_sub_fields;
ValNodePtr object_list;
} FixPubCapsData, PNTR FixPubCapsPtr;
+
+static Boolean IsPubASub (ValNodePtr pub)
+{
+ if (pub == NULL) {
+ return FALSE;
+ } else if (pub->choice == PUB_Sub) {
+ return TRUE;
+ } else if (pub->choice == PUB_Equiv) {
+ return IsPubASub(pub->data.ptrvalue);
+ } else {
+ return FALSE;
+ }
+}
+
+
static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
{
FixPubCapsPtr f;
@@ -21645,10 +25518,16 @@ static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
orig = GetPubFieldFromPub (pub, Publication_field_title, NULL);
if (orig != NULL) {
tmp = StringSave (orig);
- FixCapitalizationInTitle (&tmp, TRUE, f->orgnames);
+ if (!f->action->punct_only) {
+ FixCapitalizationInTitle (&tmp, TRUE, f->orgnames);
+ }
if (StringCmp (orig, tmp) != 0) {
SetPubFieldOnPub (pub, Publication_field_title, NULL, tmp, ExistingTextOption_replace_old);
- f->num_fields++;
+ if (IsPubASub(pub)) {
+ f->num_sub_fields++;
+ } else {
+ f->num_pub_fields++;
+ }
}
tmp = MemFree (tmp);
orig = MemFree (orig);
@@ -21656,7 +25535,7 @@ static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
}
}
- if (f->action->authors) {
+ if (f->action->authors && !f->action->punct_only) {
alp = GetAuthListPtr (pdp, NULL);
if (alp != NULL) {
for (names = alp->names; names != NULL; names = names->next) {
@@ -21664,7 +25543,11 @@ static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
ap_orig = AsnIoMemCopy (ap, (AsnReadFunc) AuthorAsnRead, (AsnWriteFunc) AuthorAsnWrite);
FixCapitalizationInAuthor (ap);
if (!AsnIoMemComp (ap, ap_orig, (AsnWriteFunc) AuthorAsnWrite)) {
- f->num_fields++;
+ if (IsPubASub(pdp->pub)) {
+ f->num_sub_fields++;
+ } else {
+ f->num_pub_fields++;
+ }
}
ap_orig = AuthorFree (ap_orig);
}
@@ -21677,11 +25560,43 @@ static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
}
if (alp != NULL && alp->affil != NULL) {
affil_orig = AsnIoMemCopy (alp->affil, (AsnReadFunc) AffilAsnRead, (AsnWriteFunc) AffilAsnWrite);
- FixCapsInPubAffil (alp->affil);
- if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) {
- f->num_fields++;
+ FixCapsInPubAffilEx (alp->affil, f->action->punct_only);
+ if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) {
+ if (IsPubASub(pdp->pub)) {
+ f->num_sub_fields++;
+ } else {
+ f->num_pub_fields++;
}
- affil_orig = AffilFree (affil_orig);
+ }
+ affil_orig = AffilFree (affil_orig);
+ }
+ } else if (f->action->affil_country) {
+ if (alp == NULL) {
+ alp = GetAuthListPtr (pdp, NULL);
+ }
+ if (alp != NULL && alp->affil != NULL && !StringHasNoText (alp->affil->country)) {
+ orig = StringSave (alp->affil->country);
+ FixCapitalizationInCountryStringEx (&(alp->affil->country), f->action->punct_only);
+ if (StringCmp (orig, alp->affil->country) != 0) {
+ if (IsPubASub(pdp->pub)) {
+ f->num_sub_fields++;
+ } else {
+ f->num_pub_fields++;
+ }
+ }
+ if (StringCmp (alp->affil->country, "USA") == 0 && !StringHasNoText (alp->affil->sub) && !f->action->punct_only) {
+ orig = StringSave (alp->affil->sub);
+ FixStateAbbreviationsInAffil (alp->affil, NULL);
+ if (StringCmp (orig, alp->affil->sub) != 0) {
+ if (IsPubASub(pdp->pub)) {
+ f->num_sub_fields++;
+ } else {
+ f->num_pub_fields++;
+ }
+ }
+ orig = MemFree (orig);
+ }
+ orig = MemFree (orig);
}
}
}
@@ -21715,16 +25630,17 @@ static void CollectPubObjectsDescCallback (SeqDescPtr sdp, Pointer data)
}
-static Int4 ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep)
+static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp)
{
FixPubCapsData f;
ValNodePtr vnp;
- Int4 num_succeeded = 0;
PubdescPtr pdp;
SeqFeatPtr sfp;
SeqDescPtr sdp;
+ CharPtr summ;
+ Boolean rval = FALSE;
- if (action == NULL || sep == NULL) return 0;
+ if (action == NULL || sep == NULL) return FALSE;
MemSet (&f, 0, sizeof (FixPubCapsData));
f.action = action;
@@ -21735,7 +25651,7 @@ static Int4 ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr s
if (f.object_list == NULL) {
/* nothing to change */
- return 0;
+ return FALSE;
}
if (action->title) {
@@ -21757,23 +25673,819 @@ static Int4 ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr s
f.orgnames = ValNodeFree (f.orgnames);
- return f.num_fields;
+ if (f.num_sub_fields > 0 || f.num_pub_fields > 0) {
+ rval = TRUE;
+ if (log_fp != NULL) {
+ summ = SummarizeFixPubCapsAction (action);
+ if (f.num_sub_fields > 0) {
+ fprintf (log_fp, "Fixed capitalization in %d publication fields in submitter blocks during %s\n", f.num_sub_fields, summ);
+ }
+ if (f.num_pub_fields > 0) {
+ fprintf (log_fp, "Fixed capitalization in %d publication fields in publication blocks during %s\n", f.num_pub_fields, summ);
+ }
+ summ = MemFree (summ);
+ }
+ }
+
+ return rval;
+}
+
+
+NLM_EXTERN Boolean IsFieldSortable (FieldTypePtr field)
+{
+ Boolean rval = FALSE;
+ FeatureFieldPtr ffield;
+
+ if (field == NULL) {
+ return FALSE;
+ }
+ if (field->choice == FieldType_feature_field) {
+ ffield = field->data.ptrvalue;
+ if (ffield != NULL) {
+ if ((ffield->type == Macro_feature_type_cds || ffield->type == Macro_feature_type_prot)
+ && ffield->field->choice == FeatQualChoice_legal_qual
+ && ffield->field->data.intvalue == Feat_qual_legal_product) {
+ rval = TRUE;
+ }
+ }
+ } else if (field->choice == FieldType_cds_gene_prot) {
+ if (field->data.intvalue == CDSGeneProt_field_prot_name) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Int4 SortFieldsInSeqEntry (SortFieldsActionPtr action, SeqEntryPtr sep)
+{
+ ValNodePtr object_list = NULL, vnp;
+ Int4 num = 0;
+
+ if (action == NULL || action->field == NULL || !IsFieldSortable(action->field) || sep == NULL) {
+ return 0;
+ }
+
+ object_list = GetObjectListForFieldType (action->field->choice, sep);
+ for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
+ if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, action->constraint)
+ && IsObjectAppropriateForFieldValue(vnp->choice, vnp->data.ptrvalue, action->field)) {
+ if (SortFieldsForObject (vnp->choice, vnp->data.ptrvalue, action->field, action->order)) {
+ num++;
+ }
+ }
+ }
+
+
+ return num;
+}
+
+
+typedef struct dupfeats {
+ ValNodePtr delete_list;
+ RemoveDuplicateFeatureActionPtr action;
+} DupFeatsData, PNTR DupFeatsPtr;
+
+
+static void FindDuplicateFeatsCallback (BioseqPtr bsp, Pointer data)
+{
+ DupFeatsPtr dfp;
+ SeqFeatPtr sfp1, sfp2;
+ SeqMgrFeatContext fcontext;
+ Uint1 featdef;
+ ValNodePtr vnp_prev = NULL;
+
+ if (bsp == NULL || (dfp = (DupFeatsPtr) data) == NULL) {
+ return;
+ }
+
+ if (dfp->action->type == Macro_feature_type_any) {
+ featdef = 0;
+ } else {
+ featdef = GetFeatdefFromFeatureType (dfp->action->type);
+ }
+ sfp1 = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
+ while (sfp1 != NULL) {
+ sfp2 = SeqMgrGetNextFeature (bsp, sfp1, 0, featdef, &fcontext);
+ if (sfp1 == sfp2) {
+ break;
+ }
+ if (DoFeaturesMatch (sfp1, sfp2, FALSE, dfp->action->case_sensitive, dfp->action->ignore_partials)) {
+ if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp2, dfp->action->rd_constraint)) {
+ vnp_prev = ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp2);
+ } else if ((vnp_prev == NULL || vnp_prev->data.ptrvalue != sfp1)
+ && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp1, dfp->action->rd_constraint)) {
+ ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp1);
+ }
+ }
+ sfp1 = sfp2;
+ }
+
+}
+
+
+NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action)
+{
+ DupFeatsData df;
+
+ MemSet (&df, 0, sizeof (DupFeatsData));
+ df.action = action;
+
+ VisitBioseqsInSep (sep, &df, FindDuplicateFeatsCallback);
+ return df.delete_list;
+}
+
+
+NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins)
+{
+ ValNodePtr vnp;
+ SeqFeatPtr sfp;
+ BioseqPtr protbsp;
+ SeqEntryPtr sep;
+
+ for (vnp = delete_list; vnp != NULL; vnp = vnp->next) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp != NULL) {
+ if (remove_proteins && sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL) {
+ protbsp = BioseqFindFromSeqLoc (sfp->product);
+ if (protbsp != NULL) {
+ protbsp->idx.deleteme = TRUE;
+ }
+ }
+ sfp->idx.deleteme = TRUE;
+ }
+ }
+
+ DeleteMarkedObjects (entityID, 0, NULL);
+ if (remove_proteins) {
+ sep = GetTopSeqEntryForEntityID (entityID);
+ RenormalizeNucProtSets (sep, TRUE);
+ }
+
+}
+
+
+NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp)
+{
+ ValNodePtr delete_list;
+ Int4 num;
+
+ delete_list = GetDuplicateFeaturesForRemoval (sep, action);
+ if (delete_list == NULL) {
+ return FALSE;
+ }
+
+ if (log_fp != NULL) {
+ num = ValNodeLen (delete_list);
+ fprintf (log_fp, "Removed %d duplicate features\n", num);
+ }
+
+ RemoveDuplicateFeaturesInList (delete_list, ObjMgrGetEntityIDForChoice(sep), action->remove_proteins);
+ return TRUE;
+}
+
+
+NLM_EXTERN Boolean DoesTextContainOnlyTheseWords (CharPtr txt, ValNodePtr word_list)
+{
+ CharPtr cp;
+ ValNodePtr vnp;
+ Boolean match;
+ Boolean at_least_one = FALSE;
+ Int4 len;
+
+ if (StringHasNoText(txt)) {
+ return FALSE;
+ }
+
+ cp = txt;
+ while (isspace (*cp) || ispunct(*cp)) {
+ cp++;
+ }
+ match = TRUE;
+ while (*cp != 0 && match) {
+ match = FALSE;
+ for (vnp = word_list; vnp != NULL && !match; vnp = vnp->next) {
+ len = StringLen (vnp->data.ptrvalue);
+ if (StringNICmp (cp, vnp->data.ptrvalue, len) == 0
+ && (*(cp + len) == 0 || isspace(*(cp + len)) || ispunct(*(cp + len)))) {
+ match = TRUE;
+ cp += len;
+ at_least_one = TRUE;
+ }
+ }
+ while (isspace (*cp) || ispunct(*cp)) {
+ cp++;
+ }
+ }
+ return (match && at_least_one);
+}
+
+
+static ValNodePtr WordListFromText (CharPtr txt)
+{
+ ValNodePtr list = NULL;
+ CharPtr start, end, word;
+ Int4 len;
+
+ if (StringHasNoText(txt)) {
+ return NULL;
+ }
+
+ start = txt;
+
+ while (isspace (*start) || ispunct(*start)) {
+ start++;
+ }
+ while (*start != 0) {
+ end = start + 1;
+ len = 1;
+ while (*end != 0 && !isspace (*end) && !ispunct(*end)) {
+ end++;
+ len++;
+ }
+ word = (CharPtr) MemNew (sizeof (Char) * (len + 1));
+ StringNCpy (word, start, len);
+ word[len] = 0;
+ ValNodeAddPointer (&list, 0, word);
+ start = end;
+ while (isspace (*start) || ispunct(*start)) {
+ start++;
+ }
+ }
+
+ return list;
+}
+
+
+static CharPtr s_SpecialLineageWords[] = {
+ "Domain",
+ "Phylum",
+ "Kingdom",
+ "Family",
+ "Class",
+ "Superfamily",
+ "Order",
+ "Genus",
+ "Species",
+ "Organism",
+ "Note",
+ "Taxonomic classification",
+ "Lineage",
+ "Tax class/lineage",
+ NULL
+};
+
+static Boolean RemoveLineageNoteFromBioSource (BioSourcePtr biop, FILE *fp)
+{
+ SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
+ OrgModPtr mod, mod_prev = NULL, mod_next;
+ Boolean any_removed = FALSE;
+ ValNodePtr word_list = NULL;
+ Int4 i;
+
+ if (!HasTaxonomyID (biop) || biop->org == NULL
+ || biop->org->orgname == NULL
+ || StringHasNoText (biop->org->orgname->lineage)) {
+ return FALSE;
+ }
+
+ word_list = WordListFromText(biop->org->orgname->lineage);
+ ValNodeLink (&word_list, WordListFromText(biop->org->taxname));
+ for (i = 0; s_SpecialLineageWords[i] != NULL; i++) {
+ ValNodeAddPointer (&word_list, 0, StringSave (s_SpecialLineageWords[i]));
+ }
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) {
+ ssp_next = ssp->next;
+ if (ssp->subtype == SUBSRC_other && DoesTextContainOnlyTheseWords(ssp->name, word_list)) {
+ if (ssp_prev == NULL) {
+ biop->subtype = ssp_next;
+ } else {
+ ssp_prev->next = ssp_next;
+ }
+ ssp->next = NULL;
+ if (fp != NULL) {
+ fprintf (fp, "Removed note %s where lineage is %s\n", ssp->name, biop->org->orgname->lineage);
+ }
+ ssp = SubSourceFree (ssp);
+ any_removed = TRUE;
+ } else {
+ ssp_prev = ssp;
+ }
+ }
+
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod_next) {
+ mod_next = mod->next;
+ if (mod->subtype == ORGMOD_other && DoesTextContainOnlyTheseWords(mod->subname, word_list)) {
+ if (mod_prev == NULL) {
+ biop->org->orgname->mod = mod_next;
+ } else {
+ mod_prev->next = mod_next;
+ }
+ mod->next = NULL;
+ if (fp != NULL) {
+ fprintf (fp, "Removed note %s where lineage is %s\n", mod->subname, biop->org->orgname->lineage);
+ }
+ mod = OrgModFree (mod);
+ any_removed = TRUE;
+ } else {
+ mod_prev = mod;
+ }
+ }
+ word_list = ValNodeFreeData (word_list);
+ return any_removed;
+}
+
+
+static void RemoveLineageNotesCallback (BioSourcePtr biop, Pointer data)
+{
+ LogInfoPtr lip;
+
+ if (biop == NULL) {
+ return;
+ }
+ lip = (LogInfoPtr) data;
+
+ if (RemoveLineageNoteFromBioSource(biop, lip == NULL ? NULL : lip->fp)) {
+ if (lip) {
+ lip->data_in_log = TRUE;
+ }
+ }
+}
+
+
+static Boolean RemoveLineageNotesInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
+{
+ LogInfoData lid;
+
+ MemSet (&lid, 0, sizeof (LogInfoData));
+ lid.fp = log_fp;
+
+ VisitBioSourcesInSep (sep, &lid, RemoveLineageNotesCallback);
+ return lid.data_in_log;
+}
+
+
+typedef struct logandpointer {
+ LogInfoData lid;
+ Pointer action;
+} LogAndPointerData, PNTR LogAndPointerPtr;
+
+
+static Boolean GeneXrefMatchesSuppression (GeneRefPtr grp, Uint2 suppression)
+{
+ Boolean rval = FALSE;
+
+ if (grp == NULL) {
+ return FALSE;
+ }
+
+ switch (suppression) {
+ case Gene_xref_suppression_type_any:
+ rval = TRUE;
+ break;
+ case Gene_xref_suppression_type_suppressing:
+ if (SeqMgrGeneIsSuppressed(grp)) {
+ rval = TRUE;
+ }
+ break;
+ case Gene_xref_suppression_type_non_suppressing:
+ if (!SeqMgrGeneIsSuppressed(grp)) {
+ rval = TRUE;
+ }
+ break;
+ }
+ return rval;
+}
+
+
+static Boolean GeneXrefMatchesNecessary (SeqFeatPtr sfp, GeneRefPtr grp, Uint2 necessary)
+{
+ Boolean rval = FALSE;
+
+ if (sfp == NULL || grp == NULL) {
+ return FALSE;
+ }
+
+ switch (necessary) {
+ case Gene_xref_necessary_type_any:
+ rval = TRUE;
+ break;
+ case Gene_xref_necessary_type_necessary:
+ if (!SeqMgrGeneIsSuppressed (grp) && !IsGeneXrefRedundant (sfp)) {
+ rval = TRUE;
+ }
+ break;
+ case Gene_xref_necessary_type_unnecessary:
+ if (!SeqMgrGeneIsSuppressed (grp) && IsGeneXrefRedundant (sfp)) {
+ rval = TRUE;
+ }
+ break;
+ }
+ return rval;
}
-NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat, FILE *log_fp)
+static Boolean RemoveXref (SeqFeatPtr sfp, Uint2 choice, Pointer data)
{
- Int4 num_AECR = 0, num_parse = 0, num_feature = 0, num_fields = 0, num;
+ SeqFeatXrefPtr xref, xref_next, xref_prev = NULL;
+ Boolean removed = FALSE;
+
+ if (sfp == NULL) return FALSE;
+ for (xref = sfp->xref; xref != NULL; xref = xref_next) {
+ xref_next = xref->next;
+ if ((xref->data.choice == choice || choice == 0)
+ && (xref->data.value.ptrvalue == data || data == NULL)) {
+ if (xref_prev == NULL) {
+ sfp->xref = xref_next;
+ } else {
+ xref_prev->next = xref_next;
+ }
+ xref->next = NULL;
+ xref = SeqFeatXrefFree (xref);
+ removed = TRUE;
+ } else {
+ xref_prev = xref;
+ }
+ }
+ return removed;
+}
+
+
+static void MacroRemoveXrefsCallback(SeqFeatPtr sfp, Pointer data)
+{
+ LogAndPointerPtr lp;
+ RemoveXrefsActionPtr action;
+ GeneXrefTypePtr gene;
+ GeneRefPtr grp;
+ CharPtr text;
+ ValNode vn;
+
+ if (sfp == NULL || (lp = (LogAndPointerPtr)data) == NULL
+ || (action = (RemoveXrefsActionPtr)lp->action) == NULL
+ || action->xref_type == NULL) {
+ return;
+ }
+ if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) {
+ return;
+ }
+
+ switch (action->xref_type->choice) {
+ case XrefType_gene:
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp != NULL) {
+ gene = (GeneXrefTypePtr) action->xref_type->data.ptrvalue;
+ if (gene != NULL) {
+ if ((gene->feature == Macro_feature_type_any || gene->feature == GetFeatureTypeFromFeatdef(sfp->idx.subtype))
+ && GeneXrefMatchesSuppression(grp, gene->suppression)
+ && GeneXrefMatchesNecessary(sfp, grp, gene->necessary)) {
+ if (RemoveXref(sfp, SEQFEAT_GENE, grp)) {
+ lp->lid.data_in_log = TRUE;
+ if (lp->lid.fp != NULL) {
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = sfp;
+ text = GetDiscrepancyItemText (&vn);
+ fprintf (lp->lid.fp, "Removed Gene xref from %s\n", text);
+ text = MemFree (text);
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+}
+
+
+static Boolean MacroRemoveXrefs (SeqEntryPtr sep, RemoveXrefsActionPtr action, FILE *log_fp)
+{
+ LogAndPointerData ld;
+
+ MemSet (&ld.lid, 0, sizeof (LogAndPointerData));
+ ld.lid.fp = log_fp;
+ ld.action = action;
+
+ VisitFeaturesInSep (sep, &ld, MacroRemoveXrefsCallback);
+ return ld.lid.data_in_log;
+}
+
+static void MacroMakeGeneXrefsCallback(SeqFeatPtr sfp, Pointer data)
+{
+ LogAndPointerPtr lp;
+ MakeGeneXrefActionPtr action;
+ SeqFeatPtr gene;
+ GeneRefPtr grp;
+ CharPtr text;
+ ValNode vn;
+ SeqMgrFeatContext context;
+ SeqFeatXrefPtr xref;
+
+ if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || (lp = (LogAndPointerPtr)data) == NULL
+ || (action = (MakeGeneXrefActionPtr) lp->action) == NULL) {
+ return;
+ }
+
+ if (action->feature != Macro_feature_type_any && action->feature != GetFeatureTypeFromFeatdef(sfp->idx.subtype)) {
+ return;
+ }
+
+ if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) {
+ return;
+ }
+
+ grp = SeqMgrGetGeneXref (sfp);
+
+ if (grp != NULL) {
+ return;
+ }
+
+ gene = SeqMgrGetOverlappingGene (sfp->location, &context);
+ if (gene != NULL && (grp = (GeneRefPtr) gene->data.value.ptrvalue) != NULL) {
+ grp = (GeneRefPtr) AsnIoMemCopy (grp, (AsnReadFunc)GeneRefAsnRead, (AsnWriteFunc)GeneRefAsnWrite);
+ xref = SeqFeatXrefNew ();
+ xref->data.choice = SEQFEAT_GENE;
+ xref->data.value.ptrvalue = grp;
+ xref->next = sfp->xref;
+ sfp->xref = xref;
+ lp->lid.data_in_log = TRUE;
+ if (lp->lid.fp != NULL) {
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = sfp;
+ text = GetDiscrepancyItemText (&vn);
+ fprintf (lp->lid.fp, "Added Gene xref to %s\n", text);
+ text = MemFree (text);
+ }
+ }
+}
+
+
+static Boolean MacroMakeGeneXrefs (SeqEntryPtr sep, MakeGeneXrefActionPtr action, FILE *log_fp)
+{
+ LogAndPointerData ld;
+
+ MemSet (&ld.lid, 0, sizeof (LogAndPointerData));
+ ld.lid.fp = log_fp;
+ ld.action = action;
+
+ VisitFeaturesInSep (sep, &ld, MacroMakeGeneXrefsCallback);
+ return ld.lid.data_in_log;
+}
+
+
+static Boolean MacroMakeBoldXrefs (SeqEntryPtr sep, FILE *log_fp)
+{
+ Int4 num_created = 0;
+
+ VisitBioseqsInSep (sep, &num_created, ApplyBarcodeDbxrefsToBioseq);
+
+ if (num_created > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Created %d BARCODE dbxrefs\n", num_created);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor)
+{
+ NameStdPtr pNameStandard;
+ Boolean rval = FALSE;
+
+ if (pAuthor == NULL)
+ return FALSE;
+ else if(pAuthor->name->choice != 2)
+ return FALSE;
+ pNameStandard = pAuthor->name->data;
+ if (pNameStandard != NULL && pNameStandard->names[5] != NULL)
+ {
+ pNameStandard->names[5][0] = 0;
+ rval = TRUE;
+ }
+ return rval;
+}
+
+NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor)
+{
+ NameStdPtr pNameStandard;
+ CharPtr cp;
+ Boolean rval = FALSE;
+
+ if (pAuthor == NULL)
+ return FALSE;
+ else if(pAuthor->name->choice != 2)
+ return FALSE;
+ pNameStandard = pAuthor->name->data;
+ if (pNameStandard != NULL)
+ {
+ cp = StringChr (pNameStandard->names[4], '.');
+ if (cp == NULL || StringChr (cp + 1, '.') == NULL) {
+ if (StringLen (pNameStandard->names[4]) > 3)
+ {
+ pNameStandard->names[4][3] = 0;
+ pNameStandard->names[4][2] = '.';
+ rval = TRUE;
+ }
+ } else if (StringLen (pNameStandard->names[4]) > 4) {
+ pNameStandard->names[4][4] = 0;
+ pNameStandard->names[4][3] = '.';
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static Boolean MoveAuthorMiddleToFirst (AuthorPtr pAuthor)
+{
+ NameStdPtr pNameStandard;
+ CharPtr cp;
+ Int4 num_letters = 0;
+ Boolean rval = FALSE;
+
+ if (pAuthor == NULL)
+ return FALSE;
+ else if(pAuthor->name->choice != 2)
+ return FALSE;
+ pNameStandard = pAuthor->name->data;
+ if (pNameStandard != NULL)
+ {
+ cp = StringChr (pNameStandard->names[4], '.');
+ if (cp != NULL) {
+ cp++;
+ while (isalpha(*(cp + num_letters))) {
+ num_letters++;
+ }
+ if (num_letters > 1) {
+ SetStringValue (&(pNameStandard->names[1]), cp, ExistingTextOption_append_space);
+ *cp = 0;
+ rval = TRUE;
+ }
+ }
+ }
+ return rval;
+}
+
+
+const CharPtr s_AuthorFixActionNames[] = {
+ "Truncate middle initials",
+ "Strip author suffix",
+ "Move middle name to first name"
+};
+
+
+NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a)
+{
+ CharPtr rval = NULL;
+ CharPtr constraint;
+
+ if (a == NULL) {
+ return StringSave("Unknown action");
+ }
+
+ if (a->fix_type < 1 || a->fix_type > sizeof (s_AuthorFixActionNames) / sizeof (CharPtr)) {
+ return StringSave("Unknown action");
+ }
+
+ constraint = SummarizeConstraintSet (a->constraint);
+ if (constraint == NULL) {
+ rval = StringSave (s_AuthorFixActionNames[a->fix_type - 1]);
+ } else {
+ rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (s_AuthorFixActionNames[a->fix_type - 1]) + StringLen (constraint) + 2));
+ StringCpy (rval, s_AuthorFixActionNames[a->fix_type - 1]);
+ StringCat (rval, " ");
+ StringCat (rval, constraint);
+ constraint = MemFree (constraint);
+ }
+ return rval;
+}
+
+
+typedef struct pubcollect {
+ ValNodePtr list;
+ ValNodePtr constraint;
+} PubCollectData, PNTR PubCollectPtr;
+
+static void GetPubsForAuthorFixDesc (SeqDescPtr sdp, Pointer data)
+{
+ PubCollectPtr p;
+
+ if (sdp == NULL || sdp->choice != Seq_descr_pub || (p = (PubCollectPtr) data) == NULL) {
+ return;
+ }
+
+ if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, p->constraint)) {
+ ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp);
+ }
+}
+
+static void GetPubsForAuthorFixFeat (SeqFeatPtr sfp, Pointer data)
+{
+ PubCollectPtr p;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (p = (PubCollectPtr) data) == NULL) {
+ return;
+ }
+
+ if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint)) {
+ ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp);
+ }
+}
+
+static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr action, FILE *log_fp)
+{
+ PubCollectData p;
+ ValNodePtr vnp, pub;
+ PubdescPtr pdp;
+ SeqFeatPtr sfp;
+ SeqDescPtr sdp;
+ AuthListPtr alp;
+ ValNodePtr names;
+ AuthorPtr ap;
+ Int4 num_changed = 0;
+
+ if (sep == NULL || action == NULL) {
+ return FALSE;
+ }
+
+ MemSet (&p, 0, sizeof (PubCollectData));
+ p.constraint = action->constraint;
+ VisitDescriptorsInSep (sep, &p, GetPubsForAuthorFixDesc);
+ VisitFeaturesInSep (sep, &p, GetPubsForAuthorFixFeat);
+ for (vnp = p.list; vnp != NULL; vnp = vnp->next) {
+ pdp = NULL;
+ if (vnp->choice == OBJ_SEQFEAT) {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
+ pdp = sfp->data.value.ptrvalue;
+ }
+ } else if (vnp->choice == OBJ_SEQDESC) {
+ sdp = (SeqDescPtr) vnp->data.ptrvalue;
+ if (sdp != NULL && sdp->choice == Seq_descr_pub) {
+ pdp = sdp->data.ptrvalue;
+ }
+ }
+ if (pdp != NULL) {
+ for (pub = pdp->pub; pub != NULL; pub = pub->next) {
+ alp = GetAuthorListForPub (pub);
+ if (alp != NULL) {
+ for (names = alp->names; names != NULL; names = names->next) {
+ ap = names->data.ptrvalue;
+ switch (action->fix_type) {
+ case Author_fix_type_truncate_middle_initials:
+ if (TruncateAuthorMiddleInitials(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_strip_suffix:
+ if (StripSuffixFromAuthor(ap)) {
+ num_changed++;
+ }
+ break;
+ case Author_fix_type_move_middle_to_first:
+ if (MoveAuthorMiddleToFirst (ap)) {
+ num_changed++;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ p.list = ValNodeFree (p.list);
+ if (num_changed > 0) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "%s for %d names\n", s_AuthorFixActionNames[action->fix_type - 1], num_changed);
+ }
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp)
+{
+ Int4 num_AECR = 0, num_parse = 0, num;
Uint2 entityID;
Boolean needs_update = FALSE;
CharPtr summ;
Boolean any_change = FALSE;
+ Boolean created_protein_features = FALSE;
+
+ entityID = SeqMgrGetEntityIDForSeqEntry(sep);
while (macro != NULL) {
needs_update = TRUE;
switch (macro->choice) {
case MacroActionChoice_aecr:
- num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep);
+ num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep, &created_protein_features);
num_AECR += num;
if (num > 0) {
if (log_fp != NULL) {
@@ -21783,6 +26495,12 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
}
any_change = TRUE;
}
+ if (created_protein_features) {
+ if (log_fp != NULL) {
+ fprintf (log_fp, "Created protein features\n");
+ }
+ any_change = TRUE;
+ }
break;
case MacroActionChoice_parse:
num = ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep);
@@ -21798,14 +26516,13 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
break;
case MacroActionChoice_add_feature:
num = ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep);
- num_feature += num;
if (num > 0) {
if (log_fp != NULL) {
fprintf (log_fp, "Added %d features\n", num);
}
any_change = TRUE;
}
- SeqMgrIndexFeatures (ObjMgrGetEntityIDForChoice(sep), NULL);
+ SeqMgrIndexFeatures (entityID, NULL);
break;
case MacroActionChoice_remove_feature:
num = ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep);
@@ -21814,23 +26531,25 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
fprintf (log_fp, "Removed %d features\n", num);
}
any_change = TRUE;
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+ needs_update = FALSE;
}
break;
case MacroActionChoice_edit_location:
num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp);
if (num > 0) {
- num_fields += num;
any_change = TRUE;
}
break;
case MacroActionChoice_convert_feature:
- num += ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp);
- num_feature += num;
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
+ num = ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp);
+ if (num > 0) {
+ any_change = TRUE;
+ }
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
needs_update = FALSE;
- any_change = TRUE;
break;
case MacroActionChoice_remove_descriptor:
num = ApplyRemoveDescriptorActionToSeqEntry ((RemoveDescriptorActionPtr) macro->data.ptrvalue, sep);
@@ -21855,7 +26574,7 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
any_change = TRUE;
break;
case MacroActionChoice_removesets:
- if (RemoveDuplicateNestedSetsForEntityID (SeqMgrGetEntityIDForSeqEntry (sep))) {
+ if (RemoveDuplicateNestedSetsForEntityID (entityID)) {
if (log_fp != NULL) {
fprintf (log_fp, "Removed duplicate nested sets\n");
}
@@ -21870,7 +26589,6 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
break;
case MacroActionChoice_trim_stop_from_complete_cds:
if (TrimStopsFromCompleteCodingRegions(sep, log_fp)) {
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
needs_update = FALSE;
@@ -21879,7 +26597,6 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
break;
case MacroActionChoice_synchronize_cds_partials:
if (ResynchCodingRegionPartialsEx(sep, log_fp)) {
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
needs_update = FALSE;
@@ -21888,7 +26605,6 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
break;
case MacroActionChoice_adjust_for_consensus_splice:
if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp)) {
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
needs_update = FALSE;
@@ -21896,49 +26612,76 @@ NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, In
}
break;
case MacroActionChoice_fix_pub_caps:
- num = ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep);
+ any_change |= ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep, log_fp);
+ break;
+ case MacroActionChoice_remove_seg_gaps:
+ num = RemoveSegGapsInSeqEntry (sep);
if (num > 0) {
if (log_fp != NULL) {
- summ = SummarizeFixPubCapsAction (macro->data.ptrvalue);
- fprintf (log_fp, "Fixed capitalization in %d publication fields during %s\n", num, summ);
- summ = MemFree (summ);
+ fprintf (log_fp, "Removed gaps in %d alignments\n", num);
}
any_change = TRUE;
}
- num_fields += num;
break;
- case MacroActionChoice_remove_seg_gaps:
- num = RemoveSegGapsInSeqEntry (sep);
+ case MacroActionChoice_sort_fields:
+ num = SortFieldsInSeqEntry (macro->data.ptrvalue, sep);
if (num > 0) {
if (log_fp != NULL) {
- fprintf (log_fp, "Removed gaps in %d alignments\n", num);
+ summ = SummarizeSortFieldsAction (macro->data.ptrvalue);
+ fprintf (log_fp, "Changed order of fields for %d objects during %s\n", num, summ);
+ summ = MemFree (summ);
}
any_change = TRUE;
}
- num_fields += num;
+ break;
+ case MacroActionChoice_apply_molinfo_block:
+ any_change |= ApplyMolinfoBlockToSeqEntryEx (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_fix_caps:
+ any_change |= ApplyFixCapsToSeqEntry (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_fix_format:
+ any_change |= ApplyFixFormatToSeqEntry (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_fix_spell:
+ any_change |= SpellFixSeqEntry (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_remove_duplicate_features:
+ any_change |= RemoveDuplicateFeaturesInSeqEntry (sep, macro->data.ptrvalue, log_fp);
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+ needs_update = FALSE;
+ break;
+ case MacroActionChoice_remove_lineage_notes:
+ any_change |= RemoveLineageNotesInSeqEntry (sep, log_fp);
+ break;
+ case MacroActionChoice_remove_xrefs:
+ any_change |= MacroRemoveXrefs (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_make_gene_xrefs:
+ any_change |= MacroMakeGeneXrefs (sep, macro->data.ptrvalue, log_fp);
+ break;
+ case MacroActionChoice_make_bold_xrefs:
+ any_change |= MacroMakeBoldXrefs (sep, log_fp);
+ break;
+ case MacroActionChoice_fix_author:
+ any_change |= ApplyAuthorFixToSeqEntry (sep, macro->data.ptrvalue, log_fp);
break;
}
macro = macro->next;
}
if (needs_update) {
- entityID = SeqMgrGetEntityIDForSeqEntry (sep);
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
}
- if (pNumFields != NULL) {
- *pNumFields = num_AECR + num_parse + num_fields;
- }
- if (pNumFeat != NULL) {
- *pNumFeat = num_feature;
- }
return any_change;
}
-NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat)
+NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro)
{
- ApplyMacroToSeqEntryEx (sep, macro, pNumFields, pNumFeat, NULL);
+ ApplyMacroToSeqEntryEx (sep, macro, NULL);
}
@@ -22081,6 +26824,9 @@ NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp)
case FieldType_struc_comment_field:
str = SummarizeStructuredCommentField (vnp->data.ptrvalue);
break;
+ case FieldType_dblink:
+ str = StringSave (GetDBLinkNameFromDBLinkFieldType (vnp->data.intvalue));
+ break;
case FieldType_misc:
if (vnp->data.intvalue == Misc_field_genome_project_id) {
str = StringSave ("Genome Project ID");
@@ -22235,6 +26981,9 @@ NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text)
case ExistingTextOption_append_colon :
str = "append separated by colon";
break;
+ case ExistingTextOption_append_comma:
+ str = "append separated by comma";
+ break;
case ExistingTextOption_append_none :
str = "append (no separator)";
break;
@@ -22247,6 +26996,9 @@ NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text)
case ExistingTextOption_prefix_colon :
str = "prefix separated by colon";
break;
+ case ExistingTextOption_prefix_comma:
+ str = "prefix separated by comma";
+ break;
case ExistingTextOption_prefix_none :
str = "prefix (no separator)";
break;
@@ -22393,6 +27145,37 @@ NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion)
const CharPtr kTaxnameAfterBinomialString = "Taxname after binomial";
+static CharPtr SummarizeParseSrcGeneralId (ValNodePtr vnp)
+{
+ CharPtr summ = NULL;
+ CharPtr fmt = "general ID %s tag";
+
+ if (vnp == NULL) {
+ return StringSave ("invalid id");
+ }
+ switch (vnp->choice) {
+ case ParseSrcGeneralId_whole_text:
+ summ = StringSave ("entire general ID");
+ break;
+ case ParseSrcGeneralId_db:
+ summ = StringSave ("general ID database");
+ break;
+ case ParseSrcGeneralId_tag:
+ if (vnp->data.ptrvalue == NULL || StringHasNoText (vnp->data.ptrvalue)) {
+ summ = StringSave ("general ID tag");
+ } else {
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (vnp->data.ptrvalue)));
+ sprintf (summ, fmt, vnp->data.ptrvalue);
+ }
+ break;
+ default:
+ summ = StringSave ("invalid id");
+ break;
+ }
+ return summ;
+}
+
+
NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src)
{
CharPtr summ = NULL;
@@ -22439,6 +27222,10 @@ NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src)
case ParseSrc_file_id:
summ = "file ID";
break;
+ case ParseSrc_general_id:
+ summ = SummarizeParseSrcGeneralId(src->data.ptrvalue);
+ need_to_save = FALSE;
+ break;
}
}
if (summ == NULL) {
@@ -22677,8 +27464,10 @@ static CharPtr SummarizeApplyAction (ApplyActionPtr a)
static CharPtr SummarizeEditAction (EditActionPtr a)
{
CharPtr str = NULL;
- CharPtr fmt = "Edit %s replace '%s' with '%s'";
+ CharPtr fmt = "Edit %s replace '%s'%s with '%s'";
+ CharPtr case_insensitive = " (case insensitive)";
CharPtr field;
+ Int4 len;
if (a == NULL) {
str = StringSave ("No action");
@@ -22686,8 +27475,15 @@ static CharPtr SummarizeEditAction (EditActionPtr a)
str = StringSave ("Invalid action");
} else {
field = SummarizeFieldType (a->field);
- str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt)));
- sprintf (str, fmt, field, a->edit->find_txt, a->edit->repl_txt == NULL ? "" : a->edit->repl_txt);
+ len = StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt);
+ if (a->edit->case_insensitive) {
+ len += StringLen (case_insensitive);
+ }
+
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (str, fmt, field, a->edit->find_txt,
+ a->edit->case_insensitive ? case_insensitive : "",
+ a->edit->repl_txt == NULL ? "" : a->edit->repl_txt);
field = MemFree (field);
}
return str;
@@ -22756,11 +27552,100 @@ static CharPtr SummarizeSwapAction (SwapActionPtr a)
}
+static CharPtr SummarizeCapChange (Uint1 cap_change)
+{
+ CharPtr rval = NULL;
+
+ switch (cap_change) {
+ case Cap_change_tolower:
+ rval = StringSave ("change capitalization to lower");
+ break;
+ case Cap_change_toupper:
+ rval = StringSave ("change capitalization to upper");
+ break;
+ case Cap_change_firstcap:
+ rval = StringSave ("capitalize first letter, remaining lower case");
+ break;
+ case Cap_change_firstcaprestnochange:
+ rval = StringSave ("capitalize first letter, do not change other characters");
+ break;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform)
+{
+ FieldEditPtr edit;
+ CharPtr replace_fmt = "replace '%s' with '%s'";
+ CharPtr remove_fmt = "remove %s";
+ CharPtr case_insensitive = " (case insensitive)";
+ CharPtr rval = NULL, tmp;
+ Int4 len = 0;
+
+ if (transform == NULL) {
+ return NULL;
+ }
+
+ switch (transform->choice) {
+ case TextTransform_edit:
+ if ((edit = (FieldEditPtr) transform->data.ptrvalue) != NULL) {
+ len = StringLen (replace_fmt) + StringLen (edit->find_txt) + StringLen (edit->repl_txt);
+ if (edit->case_insensitive) {
+ len += StringLen (case_insensitive);
+ }
+ rval = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (rval, replace_fmt, edit->find_txt == NULL ? "" : edit->find_txt, edit->repl_txt == NULL ? "" : edit->repl_txt);
+ if (edit->case_insensitive) {
+ StringCat (rval, case_insensitive);
+ }
+ }
+ break;
+ case TextTransform_caps:
+ rval = SummarizeCapChange(transform->data.intvalue);
+ break;
+ case TextTransform_remove:
+ tmp = SummarizeTextPortion (transform->data.ptrvalue);
+ rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (remove_fmt) + StringLen (tmp)));
+ sprintf (rval, remove_fmt, tmp);
+ tmp = MemFree (tmp);
+ break;
+ }
+ return rval;
+}
+
+
+static CharPtr SummarizeTextTransformList (ValNodePtr text_transform)
+{
+ ValNodePtr str_list = NULL, vnp;
+ Int4 len = 0;
+ CharPtr rval = NULL, tmp;
+
+ for (vnp = text_transform; vnp != NULL; vnp = vnp->next) {
+ tmp = SummarizeTextTransform (vnp);
+ if (tmp != NULL) {
+ ValNodeAddPointer (&str_list, 0, tmp);
+ len += StringLen (tmp) + 3;
+ }
+ }
+
+ rval = (CharPtr) MemNew (sizeof (Char) * len);
+ for (vnp = str_list; vnp != NULL; vnp = vnp->next) {
+ StringCat (rval, vnp->data.ptrvalue);
+ if (vnp->next != NULL) {
+ StringCat (rval, ", ");
+ }
+ }
+ str_list = ValNodeFreeData (str_list);
+ return rval;
+}
+
+
static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a)
{
CharPtr str = NULL;
- CharPtr fmt = "Parse %s from %s (%s)";
- CharPtr fields, existing_text, text_portion;
+ CharPtr fmt = "Parse %s%s%s from %s(%s)";
+ CharPtr fields, existing_text, text_portion, transform;
if (a == NULL) {
str = StringSave ("No action");
@@ -22770,10 +27655,12 @@ static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a)
fields = SummarizeFieldPairType (a->fields, "to");
existing_text = SummarizeExistingText (a->existing_text);
text_portion = SummarizeTextPortion (a->portion);
- str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (fields) + StringLen (existing_text)));
- sprintf (str, fmt, text_portion, fields, existing_text);
+ transform = SummarizeTextTransformList(a->transform);
+ str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (transform) + StringLen (fields) + StringLen (existing_text)));
+ sprintf (str, fmt, text_portion, transform == NULL ? "" : " ", transform == NULL ? "" : transform, fields, existing_text);
fields = MemFree (fields);
text_portion = MemFree (text_portion);
+ transform = MemFree (transform);
}
return str;
}
@@ -22851,9 +27738,9 @@ NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a)
NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p)
{
CharPtr field_from = NULL, field_to = NULL;
- CharPtr existing_text = NULL, text_portion = NULL;
+ CharPtr existing_text = NULL, text_portion = NULL, transform;
CharPtr summ = NULL;
- CharPtr fmt = "Parse %s from %s to %s (%s)";
+ CharPtr fmt = "Parse %s from %s to %s%s%s (%s)";
if (p == NULL) {
summ = StringSave ("No action");
@@ -22862,8 +27749,15 @@ NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p)
field_to = SummarizeParseDst (p->dest);
existing_text = SummarizeExistingText (p->existing_text);
text_portion = SummarizeTextPortion (p->portion);
- summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (field_from) + StringLen (field_to) + StringLen (existing_text)));
- sprintf (summ, fmt, text_portion, field_from, field_to, existing_text);
+ transform = SummarizeTextTransformList(p->transform);
+
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
+ + StringLen (text_portion)
+ + StringLen (field_from)
+ + StringLen (field_to)
+ + StringLen (transform)
+ + StringLen (existing_text)));
+ sprintf (summ, fmt, text_portion, field_from, field_to, transform == NULL ? "" : " ", transform == NULL ? "" : transform, existing_text);
text_portion = MemFree (text_portion);
field_from = MemFree (field_from);
field_to = MemFree (field_to);
@@ -22968,56 +27862,55 @@ NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a)
{
CharPtr constraint = NULL;
Int4 len = 0;
- CharPtr affil = "affiliation";
- CharPtr title = "title";
- CharPtr authors = "authors";
- Boolean first = TRUE;
+ CharPtr descriptions[] = {"affiliation", "title", "authors", "affiliation country"};
+ CharPtr punct_only = " (punctuation only)";
+ Boolean present[4];
+ Int4 i, first = 4, last = 0, num_items = 0;
CharPtr summ = NULL;
if (a == NULL) {
return NULL;
}
- if (a->title) {
- len += 6 + StringLen (title);
- }
- if (a->authors) {
- len += 6 + StringLen (authors);
- }
- if (a->affiliation) {
- len += 6 + StringLen (affil);
+ present[0] = a->affiliation;
+ present[1] = a->title;
+ present[2] = a->authors;
+ present[3] = a->affil_country;
+
+ for (i = 0; i < 4; i++) {
+ if (present[i]) {
+ len += 6 + StringLen (descriptions[i]);
+ if (first == 4) {
+ first = i;
+ }
+ last = i;
+ num_items++;
+ }
}
if (len > 0) {
+ if (a->punct_only) {
+ len += StringLen (punct_only);
+ }
constraint = SummarizeConstraintSet (a->constraint);
len += StringLen (constraint) + 14;
summ = (CharPtr) MemNew (sizeof (Char) * len);
sprintf (summ, "Fix pub ");
- if (a->title) {
- StringCat (summ, title);
- first = FALSE;
- }
- if (a->authors) {
- if (!first) {
- if (a->affiliation) {
- StringCat (summ, ", ");
- } else {
- StringCat (summ, " and ");
+ for (i = 0; i < 4; i++) {
+ if (present[i]) {
+ if (i != first) {
+ if (num_items > 2) {
+ StringCat (summ, ", ");
+ }
+ if (i == last) {
+ StringCat (summ, " and ");
+ }
}
+ StringCat (summ, descriptions[i]);
}
- first = FALSE;
- StringCat (summ, authors);
}
- if (a->affiliation) {
- if (!first) {
- if (a->title && a->authors) {
- StringCat (summ, ", and ");
- } else {
- StringCat (summ, " and ");
- }
- }
- first = FALSE;
- StringCat (summ, affil);
+ if (a->punct_only) {
+ StringCat (summ, punct_only);
}
if (constraint != NULL) {
StringCat (summ, " where ");
@@ -23030,6 +27923,247 @@ NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a)
}
+NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action)
+{
+ CharPtr summ = NULL, tmp;
+ CharPtr fmt = "Fix capitalization in %s source qualifier";
+
+ if (action == NULL) {
+ summ = StringSave ("Invalid action");
+ } else {
+ switch (action->choice) {
+ case FixCapsAction_pub:
+ summ = SummarizeFixPubCapsAction (action->data.ptrvalue);
+ break;
+ case FixCapsAction_src_country:
+ summ = StringSave ("Fix source country qualifier capitalization");
+ break;
+ case FixCapsAction_mouse_strain:
+ summ = StringSave ("Fix capitalization in common Mus musculus strains");
+ break;
+ case FixCapsAction_src_qual:
+ tmp = GetSourceQualName (action->data.intvalue);
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt)));
+ sprintf (summ, fmt, tmp);
+ break;
+ default:
+ summ = StringSave ("Invalid action");
+ break;
+ }
+ }
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action)
+{
+ CharPtr summ = NULL;
+ if (action == NULL) {
+ summ = StringSave ("Invalid action");
+ } else {
+ switch (action->choice) {
+ case FixFormatAction_collection_date:
+ summ = StringSave ("Fix collection-date format");
+ break;
+ case FixFormatAction_lat_lon:
+ summ = StringSave ("Fix lat-lon format");
+ break;
+ case FixFormatAction_primers:
+ summ = StringSave ("Fix i in primer sequence");
+ break;
+ case FixFormatAction_protein_name:
+ summ = StringSave ("Remove organism names from protein names");
+ break;
+ default:
+ summ = StringSave ("Invalid action");
+ break;
+ }
+ }
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action)
+{
+ CharPtr summ = NULL;
+ CharPtr start_fmt = "Remove duplicate%s%s features";
+ CharPtr feat_type;
+ CharPtr case_sensitive = "(case-sensitive)";
+ CharPtr ignore_partials = "(ignore partials)";
+ CharPtr remove_proteins = " and remove protein products";
+
+ Int4 len = 0;
+
+ if (action == NULL) {
+ summ = StringSave ("Invalid action");
+ } else {
+ len = StringLen (start_fmt);
+ if (action->type == Macro_feature_type_any) {
+ feat_type = "";
+ } else {
+ feat_type = GetFeatureNameFromFeatureType (action->type);
+ }
+ len += StringLen (feat_type) + 1;
+ if (action->case_sensitive) {
+ len += StringLen (case_sensitive);
+ }
+ if (action->ignore_partials) {
+ len += StringLen (ignore_partials);
+ }
+ if (action->remove_proteins) {
+ len += StringLen (remove_proteins);
+ }
+
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (summ, start_fmt, action->type == Macro_feature_type_any ? "" : " ", feat_type);
+ if (action->case_sensitive) {
+ StringCat (summ, case_sensitive);
+ }
+ if (action->ignore_partials) {
+ StringCat (summ, ignore_partials);
+ }
+ if (action->remove_proteins) {
+ StringCat (summ, remove_proteins);
+ }
+ }
+ return summ;
+}
+
+
+
+NLM_EXTERN CharPtr GetSortOrderName (Uint2 order)
+{
+ CharPtr rval = NULL;
+
+ switch (order) {
+ case Sort_order_short_to_long:
+ rval = "by length, short to long";
+ break;
+ case Sort_order_long_to_short:
+ rval = "by length, long to short";
+ break;
+ case Sort_order_alphabetical:
+ rval = "alphabetically";
+ break;
+ default:
+ rval = "unknown order";
+ break;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action)
+{
+ CharPtr label, order, constraint, summ;
+ CharPtr fmt = "Sort %s fields %s%s%s";
+
+ label = SummarizeFieldType (action->field);
+ order = GetSortOrderName(action->order);
+ constraint = SummarizeConstraintSet (action->constraint);
+
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen(order) + StringLen (constraint)));
+ sprintf (summ, fmt, label, order, constraint == NULL ? "" : " where ", constraint == NULL ? "" : constraint);
+ label = MemFree (label);
+ constraint = MemFree (constraint);
+
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib)
+{
+ CharPtr field_label, constraint, summ;
+ ValNodePtr field, field_strs = NULL, from_strs = NULL, vnp;
+ Int4 len = 11;
+ Int4 num_from = 0;
+ Int4 num_to = 0;
+
+ if (mib == NULL) {
+ return NULL;
+ }
+
+
+ for (field = mib->to_list; field != NULL; field = field->next) {
+ field_label = GetSequenceQualName (field);
+ ValNodeAddPointer (&field_strs, 0, field_label);
+ len += StringLen (field_label) + 2;
+ num_to++;
+ }
+
+ for (field = mib->from_list; field != NULL; field = field->next) {
+ field_label = GetSequenceQualName (field);
+ ValNodeAddPointer (&from_strs, 0, field_label);
+ len += StringLen (field_label) + 2;
+ num_from++;
+ }
+
+ constraint = SummarizeConstraintSet (mib->constraint);
+ len += StringLen (constraint);
+ if (constraint != NULL || num_from > 0) {
+ len += 12;
+ }
+
+ if (num_to > 1) {
+ len += 5;
+ }
+ if (num_from > 1) {
+ len += 5;
+ }
+
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (summ, "Change to ");
+ for (vnp = field_strs; vnp != NULL; vnp = vnp->next) {
+ StringCat (summ, vnp->data.ptrvalue);
+ if (vnp->next != NULL) {
+ if (num_to > 2) {
+ if (vnp->next->next == NULL) {
+ StringCat (summ, ", and");
+ } else {
+ StringCat (summ, ", ");
+ }
+ } else {
+ StringCat (summ, " and ");
+ }
+ }
+ }
+
+ if (num_from > 0 || constraint != NULL) {
+ StringCat (summ, " where ");
+ }
+
+ for (vnp = from_strs; vnp != NULL; vnp = vnp->next) {
+ StringCat (summ, vnp->data.ptrvalue);
+ if (vnp->next != NULL) {
+ if (num_from > 2) {
+ if (vnp->next->next == NULL && constraint == NULL) {
+ StringCat (summ, ", and");
+ } else {
+ StringCat (summ, ", ");
+ }
+ } else if (constraint == NULL) {
+ StringCat (summ, " and ");
+ } else {
+ StringCat (summ, ", ");
+ }
+ }
+ }
+
+ if (constraint != NULL && num_from > 0) {
+ StringCat (summ, " and ");
+ }
+
+ StringCat (summ, constraint);
+
+ field_strs = ValNodeFreeData (field_strs);
+ from_strs = ValNodeFreeData (from_strs);
+ constraint = MemFree (constraint);
+
+ return summ;
+
+}
+
+
/* summarizing constraints */
static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_present)
{
@@ -23076,42 +28210,168 @@ static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_presen
}
-static CharPtr SummarizeStringConstraint (StringConstraintPtr constraint)
+static const CharPtr kCaseSensitive = "case-sensitive";
+static const CharPtr kWholeWord = "whole word";
+
+NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word)
+{
+ CharPtr fmt = "allow '%s' to be replaced by '%s'";
+ Int4 len = 0;
+ ValNodePtr vnp;
+ CharPtr summ = NULL;
+
+ if (word == NULL && word->synonyms == NULL) {
+ return NULL;
+ }
+
+ len = StringLen (fmt) + StringLen (word->word);
+ for (vnp = word->synonyms; vnp != NULL; vnp = vnp->next) {
+ len += StringLen (vnp->data.ptrvalue) + 4;
+ }
+
+ if (word->case_sensitive) {
+ len += StringLen (kCaseSensitive) + 3;
+ }
+ if (word->whole_word) {
+ len += StringLen (kWholeWord) + 3;
+ }
+
+
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (summ, fmt,
+ word->word == NULL ? "" : word->word,
+ (word->synonyms == NULL || word->synonyms->data.ptrvalue == NULL) ? "" : word->synonyms->data.ptrvalue);
+ if (word->synonyms != NULL) {
+ for (vnp = word->synonyms->next; vnp != NULL; vnp = vnp->next) {
+ if (word->synonyms->next->next != NULL) {
+ StringCat (summ, ",");
+ }
+ StringCat (summ, " ");
+ if (vnp->next == NULL) {
+ StringCat (summ, "and ");
+ }
+ StringCat (summ, "'");
+ if (vnp->data.ptrvalue != NULL) {
+ StringCat (summ, vnp->data.ptrvalue);
+ }
+ StringCat (summ, "'");
+ }
+ }
+ if (word->case_sensitive) {
+ StringCat (summ, ", ");
+ StringCat (summ, kCaseSensitive);
+ }
+ if (word->whole_word) {
+ StringCat (summ, ", ");
+ StringCat (summ, kWholeWord);
+ }
+
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint)
{
CharPtr location_word = NULL;
- CharPtr case_sensitive = "case-sensitive";
- CharPtr whole_word = "whole word";
+ CharPtr ignore_space = "ignore spaces";
+ CharPtr ignore_punct = "ignore punctuation";
+ CharPtr ignore_weasel = "ignore 'putative' synonyms";
CharPtr str = NULL;
Int4 len;
CharPtr fmt = "%s '%s'";
+ Boolean has_extra = FALSE;
+ WordSubstitutionPtr word;
+ ValNodePtr subst_words = NULL, vnp;
+ CharPtr tmp;
- if (constraint == NULL || constraint->match_text == NULL) return NULL;
+ if (IsStringConstraintEmpty (constraint)) return NULL;
- location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present);
- if (location_word == NULL) return NULL;
- len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt);
- if (constraint->case_sensitive) {
- len += StringLen (case_sensitive) + 3;
- }
- if (constraint->whole_word) {
- len += StringLen (whole_word) + 3;
- }
- str = (CharPtr) MemNew (sizeof (Char) * len);
- sprintf (str, fmt, location_word, constraint->match_text);
- if (constraint->case_sensitive || constraint->whole_word) {
- StringCat (str, " (");
- }
- if (constraint->case_sensitive) {
- StringCat (str, case_sensitive);
+ if (constraint->match_text != NULL) {
+ location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present);
+ if (location_word == NULL) return NULL;
+ len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt);
+ if (constraint->case_sensitive) {
+ len += StringLen (kCaseSensitive) + 3;
+ }
+ if (constraint->whole_word) {
+ len += StringLen (kWholeWord) + 3;
+ }
+ if (constraint->ignore_space) {
+ len += StringLen (ignore_space) + 3;
+ }
+ if (constraint->ignore_punct) {
+ len += StringLen (ignore_punct) + 3;
+ }
+ if (constraint->ignore_weasel) {
+ len += StringLen (ignore_weasel) + 3;
+ }
+
+ /* allocate space for substitution phrases */
+ for (word = constraint->ignore_words; word != NULL; word = word->next) {
+ tmp = SummarizeWordSubstitution (word);
+ if (tmp != NULL) {
+ ValNodeAddPointer (&subst_words, 0, tmp);
+ len += StringLen (tmp) + 2;
+ }
+ }
+
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (str, fmt, location_word, constraint->match_text);
+ if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) {
+ StringCat (str, " (");
+ }
+ if (constraint->case_sensitive) {
+ StringCat (str, kCaseSensitive);
+ has_extra = TRUE;
+ }
if (constraint->whole_word) {
+ if (has_extra) {
+ StringCat (str, ", ");
+ }
+ StringCat (str, kWholeWord);
+ has_extra = TRUE;
+ }
+ if (constraint->ignore_space) {
+ if (has_extra) {
+ StringCat (str, ", ");
+ }
+ StringCat (str, ignore_space);
+ has_extra = TRUE;
+ }
+ if (constraint->ignore_punct) {
+ if (has_extra) {
+ StringCat (str, ", ");
+ }
+ StringCat (str, ignore_punct);
+ has_extra = TRUE;
+ }
+ if (constraint->ignore_weasel) {
+ if (has_extra) {
+ StringCat (str, ", ");
+ }
+ StringCat (str, ignore_weasel);
+ has_extra = TRUE;
+ }
+
+ if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) {
+ StringCat (str, ")");
+ }
+
+ for (vnp = subst_words; vnp != NULL; vnp = vnp->next) {
StringCat (str, ", ");
+ StringCat (str, vnp->data.ptrvalue);
}
+
+ subst_words = ValNodeFreeData (subst_words);
}
- if (constraint->whole_word) {
- StringCat (str, whole_word);
+ if (constraint->is_all_caps) {
+ SetStringValue(&str, "all letters are uppercase", ExistingTextOption_append_comma);
}
- if (constraint->case_sensitive || constraint->whole_word) {
- StringCat (str, ")");
+ if (constraint->is_all_lower) {
+ SetStringValue(&str, "all letters are lowercase", ExistingTextOption_append_comma);
+ }
+ if (constraint->is_all_punct) {
+ SetStringValue(&str, "all characters are punctuation", ExistingTextOption_append_comma);
}
return str;
@@ -23450,6 +28710,42 @@ static CharPtr SummarizeFeatureQuantity (ValNodePtr v)
}
+static CharPtr SummarizeSequenceLength (ValNodePtr v)
+{
+ CharPtr fmt = "sequence is %s %d in length";
+ CharPtr summ = NULL;
+
+ if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
+ return NULL;
+ }
+
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15));
+ sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue);
+ return summ;
+}
+
+
+static CharPtr s_SequenceConstraintStrandedness[] = {
+ "Any",
+ "sequence contains only minus strand features",
+ "sequence contains only plus strand features",
+ "sequence contains at least one minus strand feature",
+ "sequence contains at least one plus strand feature",
+ "sequence contains no minus strand features",
+ "sequence contains no plus strand features"
+};
+
+
+NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness)
+{
+ if (strandedness < sizeof (s_SequenceConstraintStrandedness) / sizeof (CharPtr)) {
+ return s_SequenceConstraintStrandedness[strandedness];
+ } else {
+ return NULL;
+ }
+}
+
+
static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
{
CharPtr summ = NULL;
@@ -23459,6 +28755,8 @@ static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
CharPtr feat_after = " is present";
CharPtr id_intro = "sequence ID ";
CharPtr feat_quantity = NULL;
+ CharPtr length_quantity = NULL;
+ CharPtr strandedness = NULL;
if (IsSequenceConstraintEmpty (constraint)) {
summ = StringSave ("Missing sequence constraint");
@@ -23484,7 +28782,7 @@ static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
}
}
- if (constraint->feature != Feature_type_any) {
+ if (constraint->feature != Macro_feature_type_any) {
featpresent = GetFeatureNameFromFeatureType (constraint->feature);
}
@@ -23521,6 +28819,16 @@ static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
len += StringLen (feat_quantity) + 6;
}
+ length_quantity = SummarizeSequenceLength (constraint->length);
+ if (length_quantity != NULL) {
+ len += StringLen (length_quantity) + 6;
+ }
+
+ if (constraint->strandedness > Feature_strandedness_constraint_any) {
+ strandedness = SummarizeFeatureStrandedness(constraint->strandedness);
+ len += StringLen (strandedness) + 6;
+ }
+
if (len == 0) {
summ = StringSave ("missing sequence constraint");
} else {
@@ -23557,15 +28865,32 @@ static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
}
StringCat (summ, feat_quantity);
}
+ if (length_quantity != NULL) {
+ if (StringHasNoText (summ)) {
+ StringCat (summ, "where ");
+ } else {
+ StringCat (summ, " and ");
+ }
+ StringCat (summ, length_quantity);
+ }
+ if (strandedness != NULL) {
+ if (StringHasNoText (summ)) {
+ StringCat (summ, "where ");
+ } else {
+ StringCat (summ, " and ");
+ }
+ StringCat (summ, strandedness);
+ }
}
id = MemFree (id);
feat_quantity = MemFree (feat_quantity);
+ length_quantity = MemFree (length_quantity);
}
return summ;
}
-const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps" };
+const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps", "is all lowercase", "is all punctuation" };
const Int4 k_NumSpecialPubFieldWords = sizeof (s_SpecialPubFieldWords) / sizeof (CharPtr);
static CharPtr SummarizePubFieldSpecialConstraint (PubFieldSpecialConstraintPtr field)
@@ -23689,6 +29014,225 @@ static CharPtr SummarizeFieldConstraint (FieldConstraintPtr constraint)
}
+static CharPtr SummarizeMissingFieldConstraint (FieldTypePtr field)
+{
+ CharPtr rval = NULL;
+ CharPtr label = NULL;
+ CharPtr fmt = "where %s is missing";
+
+ if (field == NULL) return NULL;
+
+ label = SummarizeFieldType (field);
+
+ if (label != NULL) {
+ rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)));
+ sprintf (rval, fmt, label);
+ }
+ label = MemFree (label);
+
+ return rval;
+}
+
+
+static CharPtr SummarizeMolinfoFieldConstraint (MolinfoFieldConstraintPtr constraint)
+{
+ CharPtr label, cp;
+ CharPtr fmt = "where %s is%s %s";
+ CharPtr rval = NULL;
+ Int4 len, offset;
+
+ if (IsMolinfoFieldConstraintEmpty(constraint)) {
+ return NULL;
+ }
+ label = GetSequenceQualName (constraint->field);
+ if (label == NULL) {
+ return NULL;
+ }
+ cp = StringChr (label, ' ');
+ if (cp == NULL) {
+ return NULL;
+ }
+ offset = cp - label;
+ len = StringLen (fmt) + StringLen (label);
+ if (constraint->is_not) {
+ len += 4;
+ }
+ rval = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (rval, "where %s", label);
+ StringCpy (rval + 7 + offset, constraint->is_not ? "is not " : "is ");
+ StringCat (rval, cp + 1);
+
+ return rval;
+}
+
+
+NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint)
+{
+ if (constraint == NULL) {
+ return TRUE;
+ } else if (constraint->num_mismatches != NULL) {
+ return FALSE;
+ } else if (constraint->internal_stops != Match_type_constraint_dont_care) {
+ return FALSE;
+ } else if (!IsStringConstraintEmpty (constraint->actual_strings)) {
+ return FALSE;
+ } else if (!IsStringConstraintEmpty (constraint->transl_strings)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static CharPtr SummarizeTranslationMismatches (ValNodePtr v)
+{
+ CharPtr fmt = "there are %s %d mismatches between the actual and translated protein sequences";
+ CharPtr summ = NULL;
+
+ if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
+ return NULL;
+ }
+
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15));
+ sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue);
+ return summ;
+}
+
+
+static CharPtr SummarizeTranslationConstraint (TranslationConstraintPtr constraint)
+{
+ CharPtr rval = NULL;
+ CharPtr mismatch = NULL;
+ CharPtr tmp;
+ CharPtr where_actual_sequence = "where actual sequence ";
+ CharPtr where_transl_sequence = "where translated sequence ";
+ CharPtr has_internal_stops = "sequence has internal stops";
+ CharPtr no_internal_stops = "sequence has no internal stops";
+ Int4 len = 0;
+ StringConstraintPtr scp;
+ ValNodePtr actual_phrases = NULL, transl_phrases = NULL, vnp;
+ Int4 num_phrases = 0, phrase_num = 1;
+
+ if (IsTranslationConstraintEmpty(constraint)) {
+ return NULL;
+ }
+
+ if (constraint->actual_strings != NULL) {
+ len += StringLen (where_actual_sequence);
+ for (scp = constraint->actual_strings; scp != NULL; scp = scp->next) {
+ tmp = SummarizeStringConstraint (scp);
+ if (tmp != NULL) {
+ len += StringLen (tmp) + 2;
+ ValNodeAddPointer (&actual_phrases, 0, tmp);
+ }
+ }
+ len += 5;
+ num_phrases ++;
+ }
+ if (constraint->transl_strings != NULL) {
+ len += StringLen (where_transl_sequence);
+ for (scp = constraint->transl_strings; scp != NULL; scp = scp->next) {
+ tmp = SummarizeStringConstraint (scp);
+ if (tmp != NULL) {
+ len += StringLen (tmp) + 2;
+ ValNodeAddPointer (&transl_phrases, 0, tmp);
+ }
+ }
+ len += 5;
+ num_phrases ++;
+ }
+
+ if (constraint->num_mismatches != NULL) {
+ mismatch = SummarizeTranslationMismatches(constraint->num_mismatches);
+ len += StringLen (mismatch) + 5;
+ num_phrases ++;
+ }
+
+ if (constraint->internal_stops == Match_type_constraint_yes) {
+ len += StringLen (has_internal_stops) + 5;
+ num_phrases ++;
+ } else if (constraint->internal_stops == Match_type_constraint_no) {
+ len += StringLen (no_internal_stops) + 5;
+ num_phrases ++;
+ }
+
+ rval = (CharPtr) MemNew (sizeof (Char) * len);
+ rval[0] = 0;
+ if (actual_phrases != NULL) {
+ StringCat (rval, where_actual_sequence);
+ for (vnp = actual_phrases; vnp != NULL; vnp = vnp->next) {
+ StringCat (rval, vnp->data.ptrvalue);
+ if (vnp->next != NULL) {
+ StringCat (rval, ", ");
+ }
+ }
+ actual_phrases = ValNodeFreeData (actual_phrases);
+ phrase_num++;
+ }
+
+ if (transl_phrases != NULL) {
+ if (phrase_num > 1) {
+ if (num_phrases > 2) {
+ StringCat (rval, ", ");
+ }
+ if (phrase_num == num_phrases) {
+ StringCat (rval, " and ");
+ }
+ }
+ StringCat (rval, where_transl_sequence);
+ for (vnp = transl_phrases; vnp != NULL; vnp = vnp->next) {
+ StringCat (rval, vnp->data.ptrvalue);
+ if (vnp->next != NULL) {
+ StringCat (rval, ", ");
+ }
+ }
+ transl_phrases = ValNodeFreeData (transl_phrases);
+ phrase_num++;
+ }
+
+ if (mismatch != NULL) {
+ if (phrase_num > 1) {
+ if (num_phrases > 2) {
+ StringCat (rval, ", ");
+ }
+ if (phrase_num == num_phrases) {
+ StringCat (rval, " and ");
+ }
+ }
+ StringCat (rval, mismatch);
+ mismatch = MemFree (mismatch);
+ phrase_num++;
+ }
+
+ if (constraint->internal_stops == Match_type_constraint_yes) {
+ if (phrase_num > 1) {
+ if (num_phrases > 2) {
+ StringCat (rval, ", ");
+ }
+ if (phrase_num == num_phrases) {
+ StringCat (rval, " and ");
+ }
+ }
+ StringCat (rval, has_internal_stops);
+ phrase_num++;
+ } else if (constraint->internal_stops == Match_type_constraint_yes) {
+ len += StringLen (no_internal_stops) + 5;
+ if (phrase_num > 1) {
+ if (num_phrases > 2) {
+ StringCat (rval, ", ");
+ }
+ if (phrase_num == num_phrases) {
+ StringCat (rval, " and ");
+ }
+ }
+ StringCat (rval, no_internal_stops);
+ phrase_num++;
+ }
+
+ return rval;
+}
+
+
NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint)
{
CharPtr phrase = NULL, tmp;
@@ -23725,6 +29269,15 @@ NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint)
case ConstraintChoice_field:
phrase = SummarizeFieldConstraint (constraint->data.ptrvalue);
break;
+ case ConstraintChoice_molinfo:
+ phrase = SummarizeMolinfoFieldConstraint (constraint->data.ptrvalue);
+ break;
+ case ConstraintChoice_field_missing:
+ phrase = SummarizeMissingFieldConstraint (constraint->data.ptrvalue);
+ break;
+ case ConstraintChoice_translation:
+ phrase = SummarizeTranslationConstraint (constraint->data.ptrvalue);
+ break;
}
return phrase;
}
@@ -23867,6 +29420,19 @@ NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig)
}
+NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t)
+{
+ if (t != NULL) {
+ t->match_type = MatchTypeFree (t->match_type);
+ t->field = FieldTypeFree (t->field);
+ t->constraint = ConstraintChoiceSetFree (t->constraint);
+ t->existing_text = ExistingTextOption_replace_old;
+ t->skip_blank = TRUE;
+ t->match_mrna = FALSE;
+ }
+}
+
+
NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns)
{
ValNodePtr vnp_next;
@@ -23961,7 +29527,7 @@ NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, V
} else if (err_list == NULL) {
/* if we've already found errors, don't bother collecting more fields */
field = FeatureFieldNew ();
- field->type = Feature_type_any;
+ field->type = Macro_feature_type_any;
field->field = ValNodeNew (NULL);
field->field->choice = FeatQualChoice_legal_qual;
field->field->data.intvalue = featqual;
@@ -24282,6 +29848,21 @@ static BioseqSearchItemPtr BioseqSearchItemFree (BioseqSearchItemPtr bsi)
}
+static ValNodePtr BioseqSearchItemListFree (ValNodePtr vnp)
+{
+ ValNodePtr vnp_next;
+
+ while (vnp != NULL) {
+ vnp_next = vnp->next;
+ vnp->next = NULL;
+ vnp->data.ptrvalue = BioseqSearchItemFree (vnp->data.ptrvalue);
+ vnp = ValNodeFree (vnp);
+ vnp = vnp_next;
+ }
+ return vnp;
+}
+
+
static int CompareBioseqSearchItem (BioseqSearchItemPtr b1, BioseqSearchItemPtr b2)
{
if (b1 == NULL && b2 == NULL) {
@@ -24323,6 +29904,18 @@ static int LIBCALLBACK SortVnpByBioseqSearchItem (VoidPtr ptr1, VoidPtr ptr2)
}
+NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list)
+{
+ vnbp->head = list;
+ vnbp->tail = list;
+ if (vnbp->tail != NULL) {
+ while (vnbp->tail->next != NULL) {
+ vnbp->tail = vnbp->tail->next;
+ }
+ }
+}
+
+
NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data)
{
ValNodePtr vnp_new;
@@ -24347,13 +29940,38 @@ NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Po
}
-static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
+NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list)
{
- SeqIdPtr sip, sip_next;
- CharPtr id, cp;
+ if (vnbp->head == NULL) {
+ vnbp->head = list;
+ vnbp->tail = list;
+ } else {
+ vnbp->tail->next = list;
+ while (vnbp->tail->next != NULL) {
+ vnbp->tail = vnbp->tail->next;
+ }
+ }
+}
+
+
+static SeqIdPtr FindLocalId (SeqIdPtr list)
+{
+ while (list != NULL && list->choice != SEQID_LOCAL) {
+ list = list->next;
+ }
+ return list;
+}
+
+
+static void BuildIdStringsListForIdList (SeqIdPtr sip_list, BioseqPtr bsp, ValNodeBlockPtr block)
+{
+ SeqIdPtr sip, sip_next, local;
+ CharPtr id, cp, tmp;
DbtagPtr dbtag;
+ ObjectIdPtr oid;
+ Int4 len;
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ for (sip = sip_list; sip != NULL; sip = sip->next) {
sip_next = sip->next;
sip->next = NULL;
id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG);
@@ -24364,28 +29982,36 @@ static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
{
id[StringLen(id) - 1] = 0;
}
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
/* remove leading pipe identifier */
cp = StringChr (id, '|');
if (cp != NULL)
{
cp = cp + 1;
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, cp, FALSE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp, FALSE));
} else {
cp = id;
}
-
- /* try ID without version */
- id = StringSave (cp);
- cp = StringChr (id, '.');
- if (cp != NULL)
- {
- *cp = 0;
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
- } else {
- id = MemFree (id);
+ if (sip->choice == SEQID_GENBANK
+ || sip->choice == SEQID_EMBL
+ || sip->choice == SEQID_DDBJ
+ || sip->choice == SEQID_TPG
+ || sip->choice == SEQID_TPE
+ || sip->choice == SEQID_TPD
+ || sip->choice == SEQID_PIR
+ || sip->choice == SEQID_SWISSPROT) {
+ /* if this is an ID that has a version, try text without version */
+ id = StringSave (cp);
+ cp = StringChr (id, '.');
+ if (cp != NULL && IsAllDigits (cp + 1))
+ {
+ *cp = 0;
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
+ } else {
+ id = MemFree (id);
+ }
}
/* just bankit number */
@@ -24395,18 +30021,37 @@ static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
if (dbtag->tag->id > 0) {
id = (CharPtr) MemNew (sizeof (Char) * 22);
sprintf (id, "BankIt%d", dbtag->tag->id);
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id));
} else {
id = (CharPtr) MemNew (sizeof (Char) * (8 + StringLen (dbtag->tag->str)));
sprintf (id, "BankIt%s", dbtag->tag->str);
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
}
+ /* also look for BankIt id with forward slash instead of _ */
+ if ((cp = StringRChr (id, '_')) != NULL) {
+ len = cp - id;
+ tmp = StringSave (id);
+ tmp[len] = '/';
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE));
+ }
} else if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) {
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
- if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
- ValNodeAddPointerToEnd ((ValNodeBlockPtr)data, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE));
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
+ if ((local = FindLocalId(bsp->id)) != NULL
+ && (oid = (ObjectIdPtr) local->data.ptrvalue) != NULL
+ && oid->str != NULL
+ && (cp = StringSearch (dbtag->tag->str, oid->str)) == dbtag->tag->str + StringLen (dbtag->tag->str) - StringLen (oid->str)) {
+ /* file ID already ends with local ID, don't need to add twice, but do add file name */
+ id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str));
+ StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1);
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
+ } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE));
+ /* also add string for just file name */
+ id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str));
+ StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1);
+ ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
}
}
}
@@ -24415,6 +30060,68 @@ static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
}
+static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
+{
+ if (bsp != NULL) {
+ BuildIdStringsListForIdList (bsp->id, bsp, (ValNodeBlockPtr) data);
+ }
+}
+
+
+static void AddBankItSingletons (ValNodeBlockPtr list)
+{
+ BioseqSearchItemPtr item, item2;
+ ValNodePtr vnp, forw;
+ CharPtr bankit_str = NULL, cp;
+ ValNodePtr other_list = NULL;
+ Int4 len1, len2;
+ Boolean add_truncated;
+
+ for (vnp = list->head; vnp != NULL; vnp = vnp->next) {
+ item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
+ if (item != NULL && StringNICmp (item->str, "BankIt", 6) == 0
+ && item->str[6] != '|'
+ && StringChr (item->str, '_') != NULL) {
+ ValNodeAddPointer (&other_list, 0, item);
+ }
+ }
+ other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem);
+ vnp = other_list;
+ while (vnp != NULL) {
+ item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
+ add_truncated = TRUE;
+ if (vnp->next != NULL) {
+ item2 = vnp->next->data.ptrvalue;
+ cp = StringRChr (item->str, '_');
+ len1 = cp - item->str;
+ cp = StringRChr (item2->str, '_');
+ len2 = cp - item2->str;
+ if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) {
+ add_truncated = FALSE;
+ forw = vnp->next->next;
+ while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL
+ && (cp = StringRChr (item2->str, '_')) != NULL
+ && (len2 = cp - item2->str) == len1
+ && StringNICmp (item->str, item2->str, len1) == 0) {
+ forw = forw->next;
+ }
+ vnp = forw;
+ }
+ }
+ if (add_truncated) {
+ bankit_str = StringSave (item->str);
+ cp = StringRChr (bankit_str, '_');
+ if (cp != NULL) {
+ *cp = 0;
+ }
+ ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str, TRUE));
+ vnp = vnp->next;
+ }
+ }
+ other_list = ValNodeFree (other_list);
+}
+
+
/* first are str, second are int */
typedef struct bioseqsearchindex {
Int4 num_str;
@@ -24450,6 +30157,7 @@ static BioseqSearchIndexPtr BuildIDStringsList (SeqEntryPtr sep)
vnb.tail = NULL;
VisitBioseqsInSep (sep, &vnb, BuildIDStringsListCallback);
+ AddBankItSingletons(&vnb);
list = vnb.head;
list = ValNodeSort (list, SortVnpByBioseqSearchItem);
@@ -24490,9 +30198,9 @@ static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr inde
while (imax >= imin)
{
i = (imax + imin)/2;
- if (index->items[i]->num < match)
+ if (index->items[i]->num > match)
imax = i - 1;
- else if (index->items[i]->num > match)
+ else if (index->items[i]->num < match)
imin = i + 1;
else
{
@@ -24659,7 +30367,7 @@ static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp)
}
else if (featdef == FEATDEF_mRNA)
{
- sfp = SeqMgrGetOverlappingmRNA (cds->location, &fcontext);
+ sfp = GetmRNAforCDS (cds);
}
if (sfp != NULL)
{
@@ -24909,12 +30617,21 @@ static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
{
SeqDescrPtr sdp;
SeqMgrDescContext context;
+ Boolean any = FALSE;
+ SeqEntryPtr sep;
- if (feature_list == NULL) return;
+ if (bsp == NULL || feature_list == NULL) return;
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
sdp != NULL;
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) {
ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
+ any = TRUE;
+ }
+ if (!any && !ISA_aa (bsp->mol)) {
+ sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
+ sdp = CreateNewDescriptor (sep, Seq_descr_source);
+ sdp->data.ptrvalue = BioSourceNew ();
+ ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
}
}
@@ -25135,7 +30852,8 @@ static ValNodePtr GetSequenceListForBioSourceObjects (ValNodePtr item_list)
ovp = (ObjValNodePtr) sdp;
if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr);
- VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
+ seq_list = CollectNucBioseqs (sep);
} else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
bsp = (BioseqPtr) ovp->idx.parentptr;
if (bsp != NULL) {
@@ -25233,6 +30951,30 @@ static ValNodePtr GetStructuredCommentListForRowAndColumn (MatchTypePtr match_ty
}
+static ValNodePtr GetDBLinkListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list)
+{
+ ValNodePtr seq_list, target_list = NULL, vnp;
+ SeqDescrPtr sdp;
+ SeqMgrDescContext context;
+
+ seq_list = GetSequenceListForRowAndColumn (match_type, match_list);
+
+ for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_BIOSEQ) {
+ for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) {
+ if (IsUserObjectDBLink (sdp->data.ptrvalue)) {
+ ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp);
+ }
+ }
+ }
+ }
+ seq_list = ValNodeFree (seq_list);
+ return target_list;
+}
+
+
static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FieldTypePtr field, ValNodePtr constraint)
{
ValNodePtr target_list = NULL, vnp_prev = NULL, vnp, vnp_next, tmp_list;
@@ -25262,6 +31004,9 @@ static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNode
case FieldType_struc_comment_field:
target_list = GetStructuredCommentListForRowAndColumn (match_type, match_list);
break;
+ case FieldType_dblink:
+ target_list = GetDBLinkListForRowAndColumn (match_type, match_list);
+ break;
case FieldType_misc:
if (field->data.intvalue == Misc_field_genome_project_id) {
target_list = GetSequenceListForRowAndColumn (match_type, match_list);
@@ -25324,7 +31069,9 @@ static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, Cha
CharPtr no_src_fmt = "No biosource for %s (column %d, line %d)";
CharPtr no_seq_fmt = "No sequence for %s (column %d, line %d)";
CharPtr no_cmt_fmt = "No structured comment for %s (column %d, line %d)";
+ CharPtr no_dblink_fmt = "No DBLink object for %s (column %d, line %d)";
CharPtr err_msg;
+ RnaQualPtr rq;
if (err_list == NULL || ft == NULL || match_val == NULL) return;
@@ -25362,9 +31109,9 @@ static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, Cha
field = FeatureFieldFree (field);
break;
case FieldType_rna_field:
- field = FeatureFieldFromRnaQual (ft->data.ptrvalue);
- if (field != NULL) {
- feat_name = GetFeatureNameFromFeatureType (field->type);
+ rq = (RnaQualPtr) ft->data.ptrvalue;
+ if (rq != NULL) {
+ feat_name = SummarizeRnaType (rq->type);
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt)
+ StringLen (feat_name)
+ StringLen (match_val)
@@ -25372,13 +31119,17 @@ static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, Cha
sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
ValNodeAddPointerToEnd (err_list, 0, err_msg);
}
- field = FeatureFieldFree (field);
break;
case FieldType_struc_comment_field:
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_cmt_fmt) + StringLen (match_val) + 30));
sprintf (err_msg, no_cmt_fmt, match_val, col_num, line_num);
ValNodeAddPointerToEnd (err_list, 0, err_msg);
break;
+ case FieldType_dblink:
+ err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_dblink_fmt) + StringLen (match_val) + 30));
+ sprintf (err_msg, no_dblink_fmt, match_val, col_num, line_num);
+ ValNodeAddPointerToEnd (err_list, 0, err_msg);
+ break;
case FieldType_misc:
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_seq_fmt)
+ StringLen (match_val)
@@ -25418,7 +31169,6 @@ static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp)
NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp)
{
- SeqMgrFeatContext fcontext;
BioseqPtr pbsp;
if (sfp == NULL) return NULL;
@@ -25428,7 +31178,7 @@ NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp)
sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL);
if (sfp == NULL) return NULL;
}
- return SeqMgrGetOverlappingmRNA (sfp->location, &fcontext);
+ return GetmRNAforCDS (sfp);
}
@@ -25474,7 +31224,7 @@ NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft)
if (ft == NULL) return FALSE;
if (ft->choice == FieldType_feature_field) {
field = (FeatureFieldPtr) ft->data.ptrvalue;
- if (field != NULL && field->type == Feature_type_cds
+ if (field != NULL && field->type == Macro_feature_type_cds
&& field->field != NULL
&& field->field->choice == FeatQualChoice_legal_qual
&& field->field->data.intvalue == Feat_qual_legal_product) {
@@ -25497,7 +31247,7 @@ static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft)
if (ft == NULL) return FALSE;
if (ft->choice == FieldType_feature_field) {
field = (FeatureFieldPtr) ft->data.ptrvalue;
- if (field != NULL && (field->type == Feature_type_cds || field->type == Feature_type_prot)
+ if (field != NULL && (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot)
&& field->field != NULL
&& field->field->choice == FeatQualChoice_legal_qual
&& field->field->data.intvalue == Feat_qual_legal_description) {
@@ -25521,7 +31271,7 @@ static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft)
if (ft == NULL) return FALSE;
if (ft->choice == FieldType_feature_field) {
field = (FeatureFieldPtr) ft->data.ptrvalue;
- if (field != NULL && field->type == Feature_type_gene
+ if (field != NULL && field->type == Macro_feature_type_gene
&& field->field != NULL
&& field->field->choice == FeatQualChoice_legal_qual
&& field->field->data.intvalue == Feat_qual_legal_locus_tag) {
@@ -25617,6 +31367,7 @@ NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, V
CharPtr no_match_fmt = "No match for %s, line %d";
CharPtr no_match_txt_fmt = "No match text for line %d";
CharPtr msg;
+ BioseqSearchIndexPtr index = NULL;
if (sep == NULL || table == NULL || match_type == NULL || col < 0) {
@@ -25625,6 +31376,8 @@ NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, V
entityID = SeqMgrGetEntityIDForSeqEntry (sep);
+ index = BuildIDStringsList(sep);
+
for (vnp_row = table, line = 1; vnp_row != NULL; vnp_row = vnp_row->next, line++) {
vnp = vnp_row->data.ptrvalue;
num = 0;
@@ -25640,7 +31393,7 @@ NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, V
ValNodeAddPointer (p_err_list, 0, msg);
}
} else {
- match_list = FindMatchForRow (match_type, vnp->data.ptrvalue, entityID, sep);
+ match_list = FindMatchForRowEx (match_type, vnp->data.ptrvalue, entityID, sep, index);
target_list = GetSequenceListForRowAndColumn (match_type, match_list);
match_list = ValNodeFree (match_list);
ValNodeAddPointer (&sequence_lists, 0, target_list);
@@ -25651,6 +31404,8 @@ NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, V
}
}
}
+ index = BioseqSearchIndexFree (index);
+
return sequence_lists;
}
@@ -25667,6 +31422,51 @@ NLM_EXTERN ValNodePtr FreeSequenceLists (ValNodePtr lists)
}
+NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep)
+{
+ ValNodePtr response_list = NULL, vnp, single_list, vnp_t;
+ BioseqSearchIndexPtr index = NULL;
+ BioseqPtr bsp;
+ ValNodeBlock thisid_index;
+ BioseqSearchItemPtr si;
+ Char num_buf[15];
+ CharPtr match_str;
+
+ index = BuildIDStringsList(sep);
+
+ for (vnp = query_list; vnp != NULL; vnp = vnp->next) {
+ InitValNodeBlock (&thisid_index, NULL);
+ BuildIdStringsListForIdList (vnp->data.ptrvalue, NULL, &thisid_index);
+
+ bsp = NULL;
+ for (vnp_t = thisid_index.head; vnp_t != NULL && bsp == NULL; vnp_t = vnp_t->next) {
+ si = (BioseqSearchItemPtr) vnp_t->data.ptrvalue;
+ if (si->num > 0) {
+ sprintf (num_buf, "%d", si->num);
+ match_str = num_buf;
+ } else {
+ match_str = si->str;
+ }
+ if (match_location == String_location_equals) {
+ bsp = FindStringInIdListIndex (match_str, index);
+ } else {
+ single_list = FindListInIdListIndex (match_location, match_str, index);
+ if (single_list != NULL && single_list->next == NULL) {
+ bsp = single_list->data.ptrvalue;
+ }
+ single_list = ValNodeFree (single_list);
+ }
+ }
+
+ thisid_index.head = BioseqSearchItemListFree(thisid_index.head);
+ ValNodeAddPointer (&response_list, OBJ_BIOSEQ, bsp);
+ }
+
+ index = BioseqSearchIndexFree (index);
+ return response_list;
+}
+
+
static ValNodePtr ReportTableSummaryLine (Int4 err_lines, Int4 total_lines, CharPtr fmt)
{
CharPtr str;
@@ -25934,9 +31734,15 @@ static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr colum
num = ValNodeLen (tmp_list);
tmp_list = ValNodeFree (tmp_list);
break;
+ case FieldType_dblink:
+ VisitDescriptorsInSep (sep, &tmp_list, CollectDBLinksCallback);
+ num = ValNodeLen (tmp_list);
+ tmp_list = ValNodeFree (tmp_list);
+ break;
case FieldType_misc:
if (t->field->data.intvalue == Misc_field_genome_project_id) {
- VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback);
+ /* VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback); */
+ tmp_list = CollectNucBioseqs (sep);
num = ValNodeLen (tmp_list);
tmp_list = ValNodeFree (tmp_list);
} else if (t->field->data.intvalue == Misc_field_comment_descriptor) {
@@ -26686,5 +32492,2119 @@ NLM_EXTERN ValNodePtr GetBankitCommentsOnSep (SeqEntryPtr sep)
}
+static void SplitPCRPrimersByPositionCallback (BioSourcePtr biop, Pointer data)
+{
+ PCRReactionPtr ps, ps_next, ps_new;
+ PCRPrimerPtr pp_f, pp_r;
+
+ if (biop == NULL || biop->pcr_primers == NULL) {
+ return;
+ }
+
+ for (ps = biop->pcr_primers; ps != NULL; ps = ps_next) {
+ ps_next = ps->next;
+
+ pp_f = ps->forward;
+ pp_r = ps->reverse;
+ while (pp_f != NULL && pp_r != NULL && pp_f->next != NULL && pp_r->next != NULL) {
+ ps_new = PCRReactionNew ();
+ ps_new->forward = pp_f->next;
+ ps_new->reverse = pp_r->next;
+ pp_f->next = NULL;
+ pp_r->next = NULL;
+ ps->next = ps_new;
+ ps_new->next = ps_next;
+ ps = ps_new;
+ pp_f = ps->forward;
+ pp_r = ps->reverse;
+ }
+ }
+}
+
+
+NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep)
+{
+ VisitBioSourcesInSep (sep, NULL, SplitPCRPrimersByPositionCallback);
+}
+
+
+static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data)
+{
+ PCRReactionPtr ps, ps_next;
+ PCRPrimerPtr pp_f_last, pp_r_last;
+
+ if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) {
+ return;
+ }
+
+ pp_f_last = biop->pcr_primers->forward;
+ if (pp_f_last != NULL) {
+ while (pp_f_last->next != NULL) {
+ pp_f_last = pp_f_last->next;
+ }
+ }
+ pp_r_last = biop->pcr_primers->reverse;
+ if (pp_r_last != NULL) {
+ while (pp_r_last->next != NULL) {
+ pp_r_last = pp_r_last->next;
+ }
+ }
+ ps = biop->pcr_primers->next;
+ biop->pcr_primers->next = NULL;
+
+ while (ps != NULL) {
+ ps_next = ps->next;
+ ps->next = NULL;
+ if (ps->forward != NULL) {
+ if (pp_f_last == NULL) {
+ biop->pcr_primers->forward = ps->forward;
+ } else {
+ pp_f_last->next = ps->forward;
+ }
+ while (pp_f_last->next != NULL) {
+ pp_f_last = pp_f_last->next;
+ }
+ ps->forward = NULL;
+ }
+ if (ps->reverse != NULL) {
+ if (pp_r_last == NULL) {
+ biop->pcr_primers->reverse = ps->reverse;
+ } else {
+ pp_r_last->next = ps->reverse;
+ }
+ while (pp_r_last->next != NULL) {
+ pp_r_last = pp_r_last->next;
+ }
+ ps->reverse = NULL;
+ }
+ ps = PCRReactionFree (ps);
+ ps = ps_next;
+ }
+}
+
+
+NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep)
+{
+ VisitBioSourcesInSep (sep, NULL, MergePCRPrimersCallback);
+}
+
+
+static PCRPrimerPtr ExtractPrimersByConstraint (PCRPrimerPtr PNTR pp_list, StringConstraintPtr scp)
+{
+ PCRPrimerPtr new_list = NULL, last_new = NULL, prev = NULL, pp, pp_next;
+
+ if (pp_list == NULL || *pp_list == NULL) {
+ return NULL;
+ }
+
+ pp = *pp_list;
+ while (pp != NULL) {
+ pp_next = pp->next;
+ if (DoesStringMatchConstraint(pp->name, scp)) {
+ if (prev == NULL) {
+ *pp_list = pp->next;
+ } else {
+ prev->next = pp->next;
+ }
+ pp->next = NULL;
+ if (last_new == NULL) {
+ new_list = pp;
+ } else {
+ last_new->next = pp;
+ }
+ last_new = pp;
+ } else {
+ prev = pp;
+ }
+ pp = pp_next;
+ }
+ return new_list;
+}
+
+
+typedef struct stringconstraintpair {
+ StringConstraintPtr scp1;
+ StringConstraintPtr scp2;
+} StringConstraintPairData, PNTR StringConstraintPairPtr;
+
+static void SplitPCRPrimersByConstraintsCallback (BioSourcePtr biop, Pointer data)
+{
+ PCRReactionPtr ps, ps_new, last_ps = NULL;
+ PCRPrimerPtr pp_match, last_fwd = NULL, last_rev = NULL;
+ StringConstraintPairPtr pair;
+
+ if (biop == NULL || biop->pcr_primers == NULL || (pair = (StringConstraintPairPtr) data) == NULL) {
+ return;
+ }
+
+ ps_new = PCRReactionNew ();
+
+ for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
+ /* take forward matches */
+ pp_match = ExtractPrimersByConstraint (&(ps->forward), pair->scp1);
+ if (pp_match != NULL) {
+ if (last_fwd == NULL) {
+ ps_new->forward = pp_match;
+ } else {
+ last_fwd->next = pp_match;
+ }
+ last_fwd = pp_match;
+ while (last_fwd->next != NULL) {
+ last_fwd = last_fwd->next;
+ }
+ }
+ /* take reverse matches */
+ pp_match = ExtractPrimersByConstraint (&(ps->reverse), pair->scp2);
+ if (pp_match != NULL) {
+ if (last_rev == NULL) {
+ ps_new->reverse = pp_match;
+ } else {
+ last_rev->next = pp_match;
+ }
+ last_rev = pp_match;
+ while (last_rev->next != NULL) {
+ last_rev = last_rev->next;
+ }
+ }
+ last_ps = ps;
+ }
+ if (ps_new->forward != NULL || ps_new->reverse != NULL) {
+ last_ps->next = ps_new;
+ } else {
+ ps_new = PCRReactionFree (ps_new);
+ }
+}
+
+
+NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev)
+{
+ StringConstraintPairData pair;
+
+ pair.scp1 = scp_fwd;
+ pair.scp2 = scp_rev;
+
+ VisitBioSourcesInSep (sep, &pair, SplitPCRPrimersByConstraintsCallback);
+}
+
+
+/* product name fixing rules */
+
+NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set)
+{
+ Int4 num = 0;
+ while (set != NULL) {
+ num++;
+ set = set->next;
+ }
+ return num;
+}
+
+
+/* emptiness */
+NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func)
+{
+ Boolean rval = TRUE;
+
+ if (func == NULL) {
+ rval = TRUE;
+ } else {
+ switch (func->choice) {
+ case SearchFunc_string_constraint:
+ rval = IsStringConstraintEmpty (func->data.ptrvalue);
+ break;
+ case SearchFunc_prefix_and_numbers:
+ rval = StringHasNoText (func->data.ptrvalue);
+ break;
+ default:
+ rval = FALSE;
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule)
+{
+ if (rule == NULL) {
+ return TRUE;
+ } else if (IsSearchFuncEmpty(rule->find)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+/* summarization */
+NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func)
+{
+ CharPtr summ = NULL;
+ CharPtr bracket_fmt = "Contains %d or more brackets or parentheses";
+ CharPtr prefix_fmt = "Contains '%s' followed by numbers";
+ CharPtr length_fmt = "Is longer than %d characters";
+ CharPtr term_fmt = "Contains '%s' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'";
+
+ if (func == NULL) {
+ summ = StringSave ("No search function");
+ } else {
+ switch (func->choice) {
+ case SearchFunc_string_constraint:
+ summ = SummarizeStringConstraint (func->data.ptrvalue);
+ break;
+ case SearchFunc_contains_plural:
+ summ = StringSave ("May contain plural");
+ break;
+ case SearchFunc_n_or_more_brackets_or_parentheses:
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (bracket_fmt) + 15));
+ sprintf (summ, bracket_fmt, func->data.intvalue);
+ break;
+ case SearchFunc_three_numbers:
+ summ = StringSave ("Three or more numbers together");
+ break;
+ case SearchFunc_underscore:
+ summ = StringSave ("Contains underscore");
+ break;
+ case SearchFunc_prefix_and_numbers:
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (func->data.ptrvalue)));
+ sprintf (summ, prefix_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue);
+ break;
+ case SearchFunc_all_caps:
+ summ = StringSave ("Is all capital letters");
+ break;
+ case SearchFunc_unbalanced_paren:
+ summ = StringSave ("Contains unbalanced brackets or parentheses");
+ break;
+ case SearchFunc_too_long:
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (length_fmt) + 15));
+ sprintf (summ, length_fmt, func->data.intvalue);
+ break;
+ case SearchFunc_has_term:
+ summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (term_fmt) + StringLen (func->data.ptrvalue)));
+ sprintf (summ, term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue);
+ break;
+ default:
+ summ = StringSave ("Unknown search function");
+ break;
+ }
+ }
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace)
+{
+ CharPtr summ = NULL;
+ SimpleReplacePtr simple;
+ CharPtr replace_fmt = "Replace %swith '%s'";
+ CharPtr whole = "entire name ";
+ CharPtr weasel_to_putative = ", retain and normalize 'putative' synonym";
+ Int4 len;
+
+ if (replace == NULL) {
+ return NULL;
+ }
+ switch (replace->choice) {
+ case ReplaceFunc_simple_replace:
+ simple = (SimpleReplacePtr) replace->data.ptrvalue;
+ len = StringLen (replace_fmt) + StringLen (simple->replace) + 1;
+ if (simple->whole_string) {
+ len += StringLen (whole);
+ }
+ if (simple->weasel_to_putative) {
+ len += StringLen (weasel_to_putative);
+ }
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (summ, replace_fmt,
+ simple->whole_string ? whole : "" ,
+ simple->replace == NULL ? "" : simple->replace);
+ if (simple->weasel_to_putative) {
+ StringCat (summ, weasel_to_putative);
+ }
+ break;
+ case ReplaceFunc_haem_replace:
+ summ = StringSave ("Replace with 'heme' if whole word, 'hem' otherwise");
+ break;
+ default:
+ summ = StringSave ("Unknown replacement function");
+ break;
+ }
+ return summ;
+}
+
+static CharPtr fix_type_names[] = {
+ "None",
+ "Typo",
+ "Quick fix",
+ "Organelles not appropriate in prokaryote",
+ "Suspicous phrase; should this be nonfunctional?",
+ "May contain database identifer more appropriate in note; remove from product name",
+ "Remove organism from product name",
+ "Possible parsing error or incorrect formatting; remove inappropriate symbols",
+ "Implies evolutionary relationship; change to -like protein",
+ "Use xxx protein or xxx-containing protein",
+ "Use hypothetical protein",
+ "Use American spelling",
+ "Use short product name instead of descriptive phrase",
+ "use protein instead of gene as appropriate"
+};
+
+NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type)
+{
+ if (fix_type < sizeof (fix_type_names) / sizeof (CharPtr)) {
+ return fix_type_names[fix_type];
+ } else {
+ return "Unknown fix type";
+ }
+}
+
+
+NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace)
+{
+ CharPtr add_note = ", move original to note";
+ CharPtr func;
+ CharPtr summ = NULL;
+ Int4 len;
+
+ if (replace == NULL) {
+ return NULL;
+ }
+ func = SummarizeReplaceFunc (replace->replace_func);
+ len = StringLen (func) + 1;
+ if (replace->move_to_note) {
+ len += StringLen (add_note);
+ }
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ StringCpy (summ, func);
+ if (replace->move_to_note) {
+ StringCat (summ, add_note);
+ }
+
+ func = MemFree (func);
+ return summ;
+}
+
+
+NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule)
+{
+ CharPtr find = NULL, replace = NULL, fix_type = NULL, feat_constraint = NULL, except = NULL;
+ CharPtr summ = NULL;
+ CharPtr butnot = " but not ";
+ Int4 len;
+
+ if (rule == NULL) {
+ return NULL;
+ }
+
+ find = SummarizeSearchFunc (rule->find);
+ if (!IsSearchFuncEmpty(rule->except)) {
+ except = SummarizeSearchFunc (rule->except);
+ }
+ feat_constraint = SummarizeConstraintSet (rule->feat_constraint);
+ replace = SummarizeReplaceRule (rule->replace);
+ if (rule->rule_type != Fix_type_none) {
+ fix_type = SummarizeFixType (rule->rule_type);
+ }
+
+ len = StringLen (find) + StringLen (except) + StringLen (feat_constraint) + StringLen (replace) + StringLen (fix_type) + 6;
+ if (feat_constraint != NULL) {
+ len += 2;
+ }
+ if (except != NULL) {
+ len += StringLen (butnot);
+ }
+ summ = (CharPtr) MemNew (sizeof (Char) * len);
+ StringCpy (summ, find);
+ if (except != NULL) {
+ StringCat (summ, butnot);
+ StringCat (summ, except);
+ }
+
+ if (feat_constraint != NULL) {
+ StringCat (summ, ", ");
+ StringCat (summ, feat_constraint);
+ }
+
+ if (replace != NULL) {
+ StringCat (summ, ", ");
+ StringCat (summ, replace);
+ }
+
+ if (rule->rule_type != Fix_type_none) {
+ StringCat (summ, " (");
+ StringCat (summ, fix_type);
+ StringCat (summ, ")");
+ }
+
+ find = MemFree (find);
+ except = MemFree (except);
+ feat_constraint = MemFree (feat_constraint);
+ replace = MemFree (replace);
+ return summ;
+}
+
+
+NLM_EXTERN Boolean StringMayContainPlural (CharPtr search)
+{
+ CharPtr cp;
+ Char last_letter, second_to_last_letter, next_letter;
+ Int4 word_len = 0;
+ Boolean may_contain_plural = FALSE;
+ CharPtr word_skip = " ,";
+
+ if (search == NULL) return FALSE;
+ cp = search;
+ while (*cp != 0 && !may_contain_plural) {
+ word_len = StringCSpn (cp, word_skip);
+ last_letter = *(cp + word_len - 1);
+ if (last_letter == 's') {
+ if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) {
+ /* not plural */
+ cp = cp + word_len;
+ cp += StringSpn (cp, word_skip);
+ } else if (word_len > 2
+ && (second_to_last_letter = *(cp + word_len - 2)) != 's'
+ && second_to_last_letter != 'i'
+ && second_to_last_letter != 'u'
+ && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) {
+ may_contain_plural = TRUE;
+ } else {
+ cp = cp + word_len;
+ cp += StringSpn (cp, word_skip);
+ }
+ } else {
+ cp = cp + word_len;
+ cp += StringSpn (cp, word_skip);
+ }
+ }
+ return may_contain_plural;
+}
+
+
+static CharPtr FindFirstOpen (CharPtr cp)
+{
+ CharPtr pa, ba;
+
+ if (cp == NULL) {
+ return NULL;
+ }
+ pa = StringChr (cp, '(');
+ ba = StringChr (cp, '[');
+ if (pa == NULL) {
+ return ba;
+ } else if (ba == NULL || ba > pa) {
+ return pa;
+ } else {
+ return ba;
+ }
+}
+
+
+static Char GetClose (Char ch)
+{
+ if (ch == '(') {
+ return ')';
+ } else if (ch == '[') {
+ return ']';
+ } else if (ch == '{') {
+ return '}';
+ } else {
+ return ch;
+ }
+}
+
+
+static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to)
+{
+ Boolean rval = FALSE;
+ CharPtr ep, ns;
+
+ if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 6;
+ } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 9;
+ } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 8;
+ } else if (StringNCmp (bp, "(I)", 3) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 4;
+ } else if (StringNCmp (bp, "(II)", 4) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 5;
+ } else if (StringNCmp (bp, "(III)", 5) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 6;
+ } else if (StringNCmp (bp, "(NADPH)", 7) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 7;
+ } else if (StringNCmp (bp, "(NAD+)", 6) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 6;
+ } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 12;
+ } else if (StringNCmp (bp, "(NADP+)", 7) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 7;
+ } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 22;
+ } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 22;
+ } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) {
+ rval = TRUE;
+ *skip_to = bp + 22;
+ } else {
+ ns = StringChr (bp + 1, *bp);
+ ep = StringChr (bp + 1, GetClose(*bp));
+ if (ep != NULL && (ns == NULL || ns > ep)) {
+ if (ep - bp < 5) {
+ rval = TRUE;
+ *skip_to = ep + 1;
+ } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) {
+ rval = TRUE;
+ *skip_to = ep + 1;
+ }
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n)
+{
+ CharPtr cp, end;
+ Int4 num_found = 0;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+
+ cp = FindFirstOpen(search);
+ while (num_found < n && cp != NULL && *cp != 0) {
+ if (SkipBracketOrParen(cp, search, &cp)) {
+ /* ignore it */
+ cp = FindFirstOpen (cp);
+ } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) {
+ /* skip, doesn't close the bracket */
+ cp = FindFirstOpen (cp + 1);
+ } else {
+ cp = FindFirstOpen (end);
+ num_found ++;
+ }
+ }
+
+ if (num_found >= n) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean FollowedByFamily (CharPtr PNTR str)
+{
+ Int4 word_len;
+
+ if (str == NULL || *str == NULL || **str == 0) {
+ return FALSE;
+ }
+
+ word_len = StringCSpn (*str + 1, " ");
+ if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) {
+ *str = *str + word_len + 7;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start)
+{
+ if (cp == NULL) {
+ return FALSE;
+ }
+
+ while (cp > start && !isspace (*cp)) {
+ cp--;
+ }
+ if (cp == start) {
+ return FALSE;
+ }
+ while (cp > start && isspace (*cp)) {
+ cp--;
+ }
+ if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) {
+ return TRUE;
+ } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix)
+{
+ Int4 len;
+
+ if (search == NULL || cp == NULL || StringHasNoText (prefix)) {
+ return FALSE;
+ }
+ len = StringLen (prefix);
+ if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search)
+{
+ CharPtr p;
+ Int4 num_digits = 0;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+
+ p = search;
+ while (*p != 0) {
+ if (isdigit (*p)) {
+ if (PrecededByPrefix(search, p, "DUF")
+ || PrecededByPrefix(search, p, "UPF")
+ || PrecededByPrefix(search, p, "IS")
+ || PrecededByPrefix(search, p, "TIGR")) {
+ p += StrSpn (p, "0123456789") - 1;
+ num_digits = 0;
+ } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) {
+ p += StrSpn (p, "0123456789") - 1;
+ num_digits = 0;
+ } else {
+ num_digits ++;
+ if (num_digits == 3) {
+ if (FollowedByFamily (&p)) {
+ num_digits = 0;
+ } else {
+ return TRUE;
+ }
+ }
+ }
+ } else {
+ num_digits = 0;
+ }
+ p++;
+ }
+ return FALSE;
+}
+
+
+NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search)
+{
+ CharPtr cp;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+
+ cp = StringChr (search, '_');
+ while (cp != NULL) {
+ if (FollowedByFamily (&cp)) {
+ /* search again */
+ cp = StringChr (cp, '_');
+ } else if (cp - search < 3 || *(cp + 1) == 0) {
+ return TRUE;
+ } else if ((StringNCmp (cp - 3, "MFS", 3) == 0
+ || StringNCmp (cp - 3, "TPR", 3) == 0
+ || StringNCmp (cp - 3, "AAA", 3) == 0)
+ && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) {
+ cp = StringChr (cp + 1, '_');
+ } else {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search)
+{
+ CharPtr str;
+
+ /* don't bother searching for c-term or n-term if product name contains "domain" */
+ if (StringISearch (search, "domain") != NULL) {
+ return FALSE;
+ }
+
+ str = StringISearch(search, pattern);
+ /* c-term and n-term must be either first word or separated from other word by space, num, or punct */
+ if (str != NULL && (str == search || !isalpha (*(str - 1)))) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search)
+{
+ Int4 pattern_len, digit_len;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+ pattern_len = StringLen (prefix);
+ if (pattern_len > 0 && StringNCmp (search, prefix, pattern_len) != 0) {
+ return FALSE;
+ }
+
+ digit_len = StringSpn (search + pattern_len, "1234567890");
+ if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search)
+{
+ CharPtr buffer, cp_src;
+ Int4 pos = 0;
+ Boolean is_bad = FALSE;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+
+ /* note - don't need space for terminating character */
+ buffer = MemNew (sizeof (Char) * StringLen (search));
+ cp_src = search;
+ while (*cp_src != 0 && !is_bad) {
+ if (*cp_src == '(' || *cp_src == '[') {
+ buffer[pos++] = *cp_src;
+ } else if (*cp_src == ')') {
+ if (pos < 1) {
+ is_bad = TRUE;
+ } else if (buffer[pos - 1] != '(') {
+ is_bad = TRUE;
+ } else {
+ pos --;
+ }
+ } else if (*cp_src == ']') {
+ if (pos < 1) {
+ is_bad = TRUE;
+ } else if (buffer[pos - 1] != '[') {
+ is_bad = TRUE;
+ } else {
+ pos--;
+ }
+ }
+ ++cp_src;
+ }
+
+ if (pos > 0) {
+ is_bad = TRUE;
+ }
+ buffer = MemFree (buffer);
+ return is_bad;
+}
+
+
+static Boolean MatchesSearchFunc (CharPtr str, SearchFuncPtr search)
+{
+ Boolean rval = FALSE;
+
+ if (str == NULL) {
+ return FALSE;
+ } else if (search == NULL) {
+ return TRUE;
+ }
+
+ switch (search->choice) {
+ case SearchFunc_string_constraint:
+ rval = DoesStringMatchConstraint(str, (StringConstraintPtr) search->data.ptrvalue);
+ break;
+ case SearchFunc_contains_plural:
+ rval = StringMayContainPlural (str);
+ break;
+ case SearchFunc_n_or_more_brackets_or_parentheses:
+ rval = ContainsNorMoreSetsOfBracketsOrParentheses (str, search->data.intvalue);
+ break;
+ case SearchFunc_three_numbers:
+ rval = ContainsThreeOrMoreNumbersTogether (str);
+ break;
+ case SearchFunc_underscore:
+ rval = StringContainsUnderscore (str);
+ break;
+ case SearchFunc_prefix_and_numbers:
+ rval = IsPrefixPlusNumbers (search->data.ptrvalue, str);
+ break;
+ case SearchFunc_all_caps:
+ rval = IsAllCaps (str);
+ break;
+ case SearchFunc_unbalanced_paren:
+ rval = StringContainsUnbalancedParentheses (str);
+ break;
+ case SearchFunc_too_long:
+ if (StringISearch (str, "bifunctional") == NULL && StringISearch (str, "multifunctional") == NULL
+ && StringLen (str) > search->data.intvalue) {
+ rval = TRUE;
+ }
+ break;
+ case SearchFunc_has_term:
+ rval = ProductContainsTerm (search->data.ptrvalue, str);
+ break;
+ }
+ return rval;
+}
+
+
+static Boolean MatchesSuspectProductRule (CharPtr str, SuspectRulePtr rule)
+{
+ if (str == NULL) {
+ return FALSE;
+ } else if (rule == NULL) {
+ return TRUE;
+ }
+
+ if (!IsSearchFuncEmpty(rule->find) && !MatchesSearchFunc(str, rule->find)) {
+ return FALSE;
+ } else if (!IsSearchFuncEmpty(rule->except) && MatchesSearchFunc (str, rule->except)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+typedef struct suspectrulecallback {
+ SuspectRuleSetPtr rules;
+ ValNodePtr obj_lists;
+ Uint2 featdef;
+} SuspectRuleCallbackData, PNTR SuspectRuleCallbackPtr;
+
+
+NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule)
+{
+ BioseqPtr bsp;
+ SeqFeatPtr cds;
+ Boolean rval = FALSE;
+
+ if (rule == NULL) {
+ return TRUE;
+ }
+ if (MatchesSuspectProductRule(str, rule)) {
+ /* we want to list the coding region, rather than the protein feature, if we can */
+ if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ if (cds != NULL) {
+ sfp = cds;
+ }
+ }
+ }
+ if (sfp == NULL) {
+ if (rule->feat_constraint == NULL) {
+ rval = TRUE;
+ }
+ } else if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, rule->feat_constraint)) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static void SuspectRuleFeatCallback (SeqFeatPtr sfp, Pointer data)
+{
+ SuspectRuleCallbackPtr s;
+ ProtRefPtr prp;
+ SuspectRulePtr rule;
+ ValNodePtr vnp;
+ SeqFeatPtr cds;
+ BioseqPtr bsp;
+ ValNodePtr list;
+ SeqFeatPtr report_sfp = sfp;
+ CharPtr check_val = NULL;
+
+ if (sfp == NULL
+ || (s = (SuspectRuleCallbackPtr) data) == NULL
+ || sfp->idx.subtype != s->featdef) {
+ return;
+ }
+
+ if (s->featdef == FEATDEF_PROT) {
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+ if (prp == NULL || prp->name == NULL) {
+ return;
+ }
+ check_val = prp->name->data.ptrvalue;
+ /* we want to list the coding region, rather than the protein feature, if we can */
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ if (cds != NULL) {
+ report_sfp = cds;
+ }
+ }
+ } else if (s->featdef == FEATDEF_rRNA) {
+ check_val = GetRNAProductString (sfp, NULL);
+ }
+
+ for (rule = s->rules, vnp = s->obj_lists; rule != NULL; rule = rule->next, vnp = vnp->next) {
+ /* make sure we have space in the object lists */
+ if (vnp == NULL) {
+ vnp = ValNodeNew (s->obj_lists);
+ if (s->obj_lists == NULL) {
+ s->obj_lists = vnp;
+ }
+ }
+
+ if (MatchesSuspectProductRule (check_val, rule)) {
+ if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, report_sfp, rule->feat_constraint)) {
+ list = vnp->data.ptrvalue;
+ ValNodeAddPointer (&list, OBJ_SEQFEAT, report_sfp);
+ vnp->data.ptrvalue = list;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef)
+{
+ SuspectRuleCallbackData d;
+
+ MemSet (&d, 0, sizeof (SuspectRuleCallbackData));
+ d.obj_lists = NULL;
+ d.rules = rules;
+ d.featdef = featdef;
+
+ VisitFeaturesInSep (sep, &d, SuspectRuleFeatCallback);
+ return d.obj_lists;
+}
+
+
+NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list)
+{
+ ValNodePtr vnp;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ vnp->data.ptrvalue = FreeObjectList (vnp->data.ptrvalue);
+ }
+ list = ValNodeFree (list);
+ return list;
+}
+
+
+NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str)
+{
+ SimpleReplacePtr simple_replace;
+ Boolean rval = FALSE;
+ Boolean use_putative = FALSE;
+
+ if (str == NULL || rule == NULL || rule->replace == NULL || rule->replace->replace_func == NULL) {
+ return FALSE;
+ }
+
+ switch (rule->replace->replace_func->choice) {
+ case ReplaceFunc_simple_replace:
+ simple_replace = (SimpleReplacePtr) rule->replace->replace_func->data.ptrvalue;
+ if (simple_replace != NULL) {
+ if (simple_replace->weasel_to_putative) {
+ if (SkipWeasel(*str) != *str) {
+ use_putative = TRUE;
+ }
+ }
+
+ if (rule->find == NULL || rule->find->choice != SearchFunc_string_constraint) {
+ *str = MemFree (*str);
+ *str = StringSave (simple_replace->replace);
+ rval = TRUE;
+ } else if (simple_replace->whole_string && DoesStringMatchConstraint (*str, rule->find->data.ptrvalue)) {
+ *str = MemFree (*str);
+ *str = StringSave (simple_replace->replace);
+ rval = TRUE;
+ } else {
+ rval = ReplaceStringConstraintPortionInString (str, simple_replace->replace, rule->find->data.ptrvalue);
+ }
+ if (use_putative && StringNCmp (*str, kPutative, StringLen (kPutative)) != 0) {
+ SetStringValue (str, kPutative, ExistingTextOption_prefix_space);
+ }
+ }
+ break;
+ case ReplaceFunc_haem_replace:
+ FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "heme", FALSE, TRUE);
+ FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "hem", FALSE, FALSE);
+ break;
+ }
+ return rval;
+}
+
+
+NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp)
+{
+ BioseqPtr protbsp;
+ SeqFeatPtr protfeat;
+ SeqMgrFeatContext context;
+ ProtRefPtr prp;
+ CharPtr new_name, desc;
+ Boolean rval = FALSE;
+ ValNode vn;
+
+ if (rule == NULL || rule->replace == NULL || cds == NULL || cds->data.choice != SEQFEAT_CDREGION) {
+ return FALSE;
+ }
+
+ protbsp = BioseqFindFromSeqLoc (cds->product);
+ protfeat = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context);
+ if (protfeat == NULL || protfeat->idx.subtype != FEATDEF_PROT
+ || (prp = (ProtRefPtr) protfeat->data.value.ptrvalue) == NULL
+ || prp->name == NULL) {
+ return FALSE;
+ }
+ new_name = StringSave (prp->name->data.ptrvalue);
+ if (ApplySuspectProductNameFixToString (rule, &new_name)) {
+ if (fp != NULL) {
+ fprintf (fp, "Changed '%s' to '%s'", prp->name->data.ptrvalue == NULL ? "" : (CharPtr) prp->name->data.ptrvalue, new_name);
+ }
+ if (rule->replace->move_to_note) {
+ if (SetStringValue (&(cds->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi)) {
+ if (fp != NULL) {
+ fprintf (fp, " and moved original to note");
+ }
+ }
+ }
+ prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue);
+ prp->name->data.ptrvalue = new_name;
+ if (AdjustmRNAProductToMatchProteinProduct(protfeat)) {
+ if (fp != NULL) {
+ fprintf (fp, " and adjusted mRNA");
+ }
+ }
+ if (fp != NULL) {
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = cds;
+ desc = GetDiscrepancyItemText (&vn);
+ if (desc != NULL) {
+ fprintf (fp, " for %s", desc);
+ desc = MemFree (desc);
+ }
+ fprintf (fp, "\n");
+ }
+ rval = TRUE;
+ } else {
+ new_name = MemFree (new_name);
+ }
+ return rval;
+}
+
+
+static CharPtr TextFromSearchFunc (ValNodePtr s)
+{
+ StringConstraintPtr scp;
+
+ CharPtr rval = NULL;
+
+ if (s == NULL) {
+ return NULL;
+ }
+ switch (s->choice) {
+ case SearchFunc_string_constraint:
+ scp = (StringConstraintPtr) s->data.ptrvalue;
+ if (scp != NULL) {
+ rval = scp->match_text;
+ }
+ break;
+ case SearchFunc_contains_plural:
+ case SearchFunc_n_or_more_brackets_or_parentheses:
+ case SearchFunc_three_numbers:
+ case SearchFunc_all_caps:
+ case SearchFunc_unbalanced_paren:
+ case SearchFunc_too_long:
+ /* no text */
+ break;
+ case SearchFunc_underscore:
+ rval = "_";
+ break;
+ case SearchFunc_prefix_and_numbers:
+ case SearchFunc_has_term:
+ rval = s->data.ptrvalue;
+ break;
+ }
+ return rval;
+}
+
+
+static int CompareSearchFunc (ValNodePtr s1, ValNodePtr s2)
+{
+ CharPtr txt1, txt2;
+ int rval;
+
+ if (s1 == NULL && s2 == NULL) {
+ rval = 0;
+ } else if (s1 == NULL) {
+ rval = -1;
+ } else if (s2 == NULL) {
+ rval = 1;
+ } else {
+ txt1 = TextFromSearchFunc (s1);
+ txt2 = TextFromSearchFunc (s2);
+ rval = StringICmp (txt1, txt2);
+ if (rval == 0) {
+ if (s1->choice < s2->choice) {
+ rval = -1;
+ } else if (s1->choice > s2->choice) {
+ rval = 1;
+ }
+ }
+ }
+ return rval;
+}
+
+
+static int CompareSuspectRuleByFind (SuspectRulePtr rule1, SuspectRulePtr rule2)
+{
+ int rval = 0;
+
+ if (rule1 == NULL && rule2 == NULL) {
+ rval = 0;
+ } else if (rule1 == NULL) {
+ rval = -1;
+ } else if (rule2 == NULL) {
+ rval = 1;
+ } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) {
+ /* no further comparisons */
+ }
+
+ return rval;
+}
+
+
+static int LIBCALLBACK SortVnpBySuspectRuleFind (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL && vnp2 == NULL) {
+ rval = 0;
+ } else if (vnp1 == NULL) {
+ rval = -1;
+ } else if (vnp2 == NULL) {
+ rval = 1;
+ } else {
+ rval = CompareSuspectRuleByFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ return rval;
+}
+
+
+static int CompareSuspectRuleByFixTypeThenFind (SuspectRulePtr rule1, SuspectRulePtr rule2)
+{
+ int rval = 0;
+
+ if (rule1 == NULL && rule2 == NULL) {
+ rval = 0;
+ } else if (rule1 == NULL) {
+ rval = -1;
+ } else if (rule2 == NULL) {
+ rval = 1;
+ } else if (rule1->rule_type < rule2->rule_type) {
+ rval = -1;
+ } else if (rule1->rule_type > rule2->rule_type) {
+ rval = 1;
+ } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) {
+ /* no further comparisons */
+ }
+
+ return rval;
+}
+
+
+static int LIBCALLBACK SortVnpBySuspectRuleFixTypeThenFind (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL && vnp2 == NULL) {
+ rval = 0;
+ } else if (vnp1 == NULL) {
+ rval = -1;
+ } else if (vnp2 == NULL) {
+ rval = 1;
+ } else {
+ rval = CompareSuspectRuleByFixTypeThenFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
+ }
+ }
+ return rval;
+}
+
+
+static ValNodePtr MakeValNodeListFromSuspectRuleSet (SuspectRuleSetPtr rules)
+{
+ ValNodeBlock block;
+ SuspectRulePtr one;
+
+ InitValNodeBlock (&block, NULL);
+ for (one = rules; one != NULL; one = one->next) {
+ ValNodeAddPointerToEnd (&block, 0, one);
+ }
+ return block.head;
+}
+
+
+static SuspectRuleSetPtr MakeSuspectRuleSetFromValNodeList (ValNodePtr tmp_list)
+{
+ ValNodePtr vnp;
+ SuspectRuleSetPtr first = NULL, last = NULL;
+
+ for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
+ if (last == NULL) {
+ first = vnp->data.ptrvalue;
+ } else {
+ last->next = vnp->data.ptrvalue;
+ }
+ last = vnp->data.ptrvalue;
+ last->next = NULL;
+ }
+ return first;
+}
+
+
+NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules)
+{
+ ValNodePtr tmp_list;
+
+ if (rules == NULL || *rules == NULL) {
+ return;
+ }
+
+ tmp_list = MakeValNodeListFromSuspectRuleSet (*rules);
+ tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFind);
+
+ *rules = MakeSuspectRuleSetFromValNodeList (tmp_list);
+ tmp_list = ValNodeFree (tmp_list);
+}
+
+
+NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules)
+{
+ ValNodePtr tmp_list;
+
+ if (rules == NULL || *rules == NULL) {
+ return;
+ }
+
+ tmp_list = MakeValNodeListFromSuspectRuleSet (*rules);
+ tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFixTypeThenFind);
+
+ *rules = MakeSuspectRuleSetFromValNodeList (tmp_list);
+ tmp_list = ValNodeFree (tmp_list);
+}
+
+
+NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp)
+{
+ ValNodePtr vnp_l, vnp_o, obj_lists;
+ SuspectRulePtr rule;
+ CharPtr summ;
+
+ if (sep == NULL || rules == NULL || fp == NULL) {
+ return;
+ }
+
+ obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT);
+
+ for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
+ if (ValNodeLen (vnp_l->data.ptrvalue) > 0) {
+ summ = SummarizeSuspectRule (rule);
+ fprintf (fp, "%s:%d\n", summ, ValNodeLen (vnp_l->data.ptrvalue));
+ summ = MemFree (summ);
+ for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) {
+ summ = GetDiscrepancyItemText (vnp_o);
+ fprintf (fp, "\t%s", summ);
+ summ = MemFree (summ);
+ }
+ }
+ }
+
+ obj_lists = FreeListOfObjectLists (obj_lists);
+}
+
+
+NLM_EXTERN ValNodePtr
+GetSuspectRuleDiscrepancies
+(SeqEntryPtr sep,
+ SuspectRuleSetPtr rules,
+ Uint2 featdef,
+ Uint4 clickable_item_type)
+{
+ ValNodePtr vnp_l, obj_lists, rval = NULL;
+ SuspectRulePtr rule;
+ CharPtr summ;
+ CharPtr rna_fmt = "%%d rRNA product names %s";
+ CharPtr cds_fmt = "%%d product names %s";
+ CharPtr template_fmt;
+ CharPtr fmt;
+
+ if (sep == NULL || rules == NULL) {
+ return NULL;
+ }
+
+ obj_lists = GetFeaturesForSuspectRules (sep, rules, featdef);
+ if (featdef == FEATDEF_rRNA) {
+ template_fmt = rna_fmt;
+ } else {
+ template_fmt = cds_fmt;
+ }
+
+ for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
+ if (ValNodeLen (vnp_l->data.ptrvalue) > 0) {
+ summ = SummarizeSuspectRule (rule);
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (summ) + StringLen (template_fmt)));
+
+ sprintf (fmt, template_fmt, summ);
+ summ = MemFree (summ);
+ ValNodeAddPointer (&rval, 0, NewClickableItem (clickable_item_type, fmt, vnp_l->data.ptrvalue));
+ vnp_l->data.ptrvalue = NULL;
+ fmt = MemFree (fmt);
+ }
+ }
+
+ obj_lists = FreeListOfObjectLists (obj_lists);
+ return rval;
+}
+
+
+NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp)
+{
+ ValNodePtr vnp_l, vnp_o, obj_lists;
+ SuspectRulePtr rule;
+ CharPtr summ;
+ Int4 num_changed = 0, total_num_changed = 0;
+ Uint2 entityID;
+
+ if (sep == NULL || rules == NULL) {
+ return 0;
+ }
+
+ obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT);
+
+ for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
+ if (rule->replace == NULL || vnp_l->data.ptrvalue == NULL) {
+ continue;
+ }
+ if (fp != NULL) {
+ summ = SummarizeSuspectRule (rule);
+ fprintf (fp, "%s:%d identified\n", summ, ValNodeLen (vnp_l->data.ptrvalue));
+ summ = MemFree (summ);
+ }
+ num_changed = 0;
+ for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) {
+ if (ApplySuspectProductNameFixToFeature (rule, vnp_o->data.ptrvalue, fp)) {
+ num_changed++;
+ }
+ }
+ if (fp != NULL) {
+ fprintf (fp, "Num fixed: %d\n", num_changed);
+ }
+ total_num_changed += num_changed;
+ }
+ entityID = ObjMgrGetEntityIDForChoice(sep);
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+
+ obj_lists = FreeListOfObjectLists (obj_lists);
+ return total_num_changed;
+}
+
+
+typedef struct rulesort {
+ SuspectRulePtr rule;
+ Int4 pos;
+} RuleSortData, PNTR RuleSortPtr;
+
+static RuleSortPtr RuleSortNew (SuspectRulePtr rule, Int4 pos)
+{
+ RuleSortPtr r;
+
+ r = (RuleSortPtr) MemNew (sizeof (RuleSortData));
+ r->rule = AsnIoMemCopy (rule, (AsnReadFunc)SuspectRuleAsnRead, (AsnWriteFunc) SuspectRuleAsnWrite);
+ r->pos = pos;
+ return r;
+}
+
+
+static RuleSortPtr RuleSortFree (RuleSortPtr r)
+{
+ if (r != NULL) {
+ r->rule = SuspectRuleFree (r->rule);
+ r = MemFree (r);
+ }
+ return r;
+}
+
+
+static int LIBCALLBACK SortVnpByRuleSortRule (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ RuleSortPtr r1, r2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL && vnp2 == NULL) {
+ rval = 0;
+ } else if (vnp1 == NULL) {
+ rval = -1;
+ } else if (vnp2 == NULL) {
+ rval = 1;
+ } else {
+ r1 = (RuleSortPtr) vnp1->data.ptrvalue;
+ r2 = (RuleSortPtr) vnp2->data.ptrvalue;
+ rval = CompareSuspectRuleByFixTypeThenFind (r1->rule, r2->rule);
+ }
+ }
+ return rval;
+}
+
+
+static int LIBCALLBACK SortVnpByRuleSortPos (VoidPtr ptr1, VoidPtr ptr2)
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ RuleSortPtr r1, r2;
+ int rval = 0;
+
+ if (ptr1 != NULL && ptr2 != NULL) {
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL && vnp2 == NULL) {
+ rval = 0;
+ } else if (vnp1 == NULL) {
+ rval = -1;
+ } else if (vnp2 == NULL) {
+ rval = 1;
+ } else {
+ r1 = (RuleSortPtr) vnp1->data.ptrvalue;
+ r2 = (RuleSortPtr) vnp2->data.ptrvalue;
+ if (r1->pos < r2->pos) {
+ rval = -1;
+ } else if (r1->pos > r2->pos) {
+ rval = 1;
+ } else {
+ rval = 0;
+ }
+ }
+ }
+ return rval;
+}
+
+
+static ValNodePtr SuspectRuleSetToRuleSortList (SuspectRuleSetPtr set)
+{
+ ValNodeBlock block;
+ SuspectRulePtr rule;
+ Int4 pos;
+
+ InitValNodeBlock (&block, NULL);
+ for (rule = set, pos = 0; rule != NULL; rule = rule->next, pos++) {
+ ValNodeAddPointerToEnd (&block, 0, RuleSortNew (rule, pos));
+ }
+ return block.head;
+}
+
+
+static SuspectRuleSetPtr RuleSortListToSuspectRuleSet (ValNodePtr list)
+{
+ ValNodePtr vnp;
+ SuspectRuleSetPtr set = NULL;
+ SuspectRulePtr last = NULL;
+ RuleSortPtr r;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ r = (RuleSortPtr) vnp->data.ptrvalue;
+ if (r->rule != NULL) {
+ if (last == NULL) {
+ set = r->rule;
+ } else {
+ last->next = r->rule;
+ }
+ last = r->rule;
+ r->rule = NULL;
+ }
+ }
+ return set;
+}
+
+
+NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1)
+{
+ ValNodePtr list1, list2;
+ ValNodePtr vnp1, vnp2, cmp_start;
+ RuleSortPtr r1, r2;
+ Boolean found_match;
+
+ /* eliminate duplicates, while maintaining original order */
+ list1 = SuspectRuleSetToRuleSortList(set1);
+ list1 = ValNodeSort(list1, SortVnpByRuleSortRule);
+ list2 = SuspectRuleSetToRuleSortList(set2);
+ list2 = ValNodeSort(list2, SortVnpByRuleSortRule);
+
+ cmp_start = list2;
+ for (vnp1 = list1; vnp1 != NULL; vnp1 = vnp1->next) {
+ r1 = (RuleSortPtr) vnp1->data.ptrvalue;
+ for (vnp2 = cmp_start; vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) > 0); vnp2 = vnp2->next) {
+ cmp_start = vnp2;
+ }
+ found_match = FALSE;
+ while (vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) == 0) && !found_match) {
+ if (vnp2->data.ptrvalue != NULL) {
+ r2 = (RuleSortPtr) vnp2->data.ptrvalue;
+ if (AsnIoMemComp (r1->rule, r2->rule, (AsnWriteFunc) SuspectRuleAsnWrite)) {
+ found_match = TRUE;
+ }
+ }
+ if (!found_match) {
+ vnp2 = vnp2->next;
+ }
+ }
+ if (found_match) {
+ vnp1->data.ptrvalue = RuleSortFree(vnp1->data.ptrvalue);
+ vnp1->choice = 1;
+ vnp2->data.ptrvalue = RuleSortFree(vnp2->data.ptrvalue);
+ vnp2->choice = 1;
+ }
+ }
+
+ vnp1 = ValNodeExtractList (&list1, 1);
+ vnp1 = ValNodeFree (vnp1);
+ vnp2 = ValNodeExtractList (&list2, 1);
+ vnp2 = ValNodeFree (vnp2);
+
+ list1 = ValNodeSort (list1, SortVnpByRuleSortPos);
+ list2 = ValNodeSort (list2, SortVnpByRuleSortPos);
+
+ *in1not2 = RuleSortListToSuspectRuleSet (list1);
+ *in2not1 = RuleSortListToSuspectRuleSet (list2);
+ list1 = ValNodeFreeData (list1);
+ list2 = ValNodeFreeData (list2);
+}
+
+
+static Boolean ReportRuleSetProblems (CharPtr product_name, SuspectRuleSetPtr rule_list, FILE *output_file, CharPtr prefix)
+{
+ CharPtr summ;
+ SuspectRulePtr rule;
+ Boolean any_found = FALSE;
+
+ /* report with rule set */
+ for (rule = rule_list; rule != NULL; rule = rule->next) {
+ if (MatchesSuspectProductRule (product_name, rule)) {
+ summ = SummarizeSuspectRule(rule);
+ if (output_file == NULL) {
+ if (prefix != NULL) {
+ printf ("%s\t", prefix);
+ }
+ printf ("%s\t%s\n", product_name, summ);
+ } else {
+ if (prefix != NULL) {
+ fprintf (output_file, "%s\t", prefix);
+ }
+ fprintf (output_file, "%s\t%s\n", product_name, summ);
+ }
+ summ = MemFree (summ);
+ any_found = TRUE;
+ }
+ }
+ return any_found;
+}
+
+
+NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
+{
+ EntrezgenePtr egp;
+ AsnIoPtr aip;
+ ValNodePtr vnp;
+ ProtRefPtr prp;
+ GeneRefPtr grp;
+ CharPtr prefix = NULL;
+ Char geneid_buf[20];
+
+ aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL);
+ egp = EntrezgeneAsnRead (aip, NULL);
+ if (egp == NULL) {
+ return FALSE;
+ }
+
+ /* scan */
+ if (egp->prot != NULL) {
+ if (egp->track_info != NULL && egp->track_info->geneid > 0) {
+ sprintf (geneid_buf, "%d", egp->track_info->geneid);
+ prefix = geneid_buf;
+ } else if (egp->gene != NULL) {
+ grp = (GeneRefPtr) egp->gene;
+ if (grp->locus_tag == NULL) {
+ prefix = grp->locus;
+ } else {
+ prefix = grp->locus_tag;
+ }
+ }
+ prp = (ProtRefPtr) egp->prot;
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
+ if (rule_list == NULL) {
+ ReportProductNameProblems (vnp->data.ptrvalue, output_file, prefix);
+ } else {
+ ReportRuleSetProblems (vnp->data.ptrvalue, rule_list, output_file, prefix);
+ }
+ }
+ }
+
+ egp = EntrezgeneFree (egp);
+ return TRUE;
+}
+
+
+NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
+{
+ ReadBufferData rbd;
+ CharPtr line;
+
+ rbd.fp = input_file;
+ rbd.current_data = NULL;
+
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL)
+ {
+ if (rule_list == NULL) {
+ ReportProductNameProblems (line, output_file, NULL);
+ } else {
+ ReportRuleSetProblems (line, rule_list, output_file, NULL);
+ }
+
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+}
+
+
+/* code for special product table update */
+typedef struct productupdatetableitem {
+ CharPtr product_match;
+ CharPtr new_name;
+ CharPtr note_text;
+} ProductUpdateTableItemData, PNTR ProductUpdateTableItemPtr;
+
+
+static ProductUpdateTableItemPtr ProductUpdateTableItemNew (CharPtr product_match)
+{
+ ProductUpdateTableItemPtr item;
+
+ item = (ProductUpdateTableItemPtr) MemNew (sizeof (ProductUpdateTableItemData));
+ MemSet (item, 0, sizeof (ProductUpdateTableItemData));
+ item->product_match = product_match;
+ return item;
+}
+
+
+static ProductUpdateTableItemPtr ProductUpdateTableItemFree (ProductUpdateTableItemPtr item)
+{
+ if (item != NULL) {
+ item->product_match = MemFree (item->product_match);
+ item->new_name = MemFree (item->new_name);
+ item->note_text = MemFree (item->note_text);
+ item = MemFree (item);
+ }
+ return item;
+}
+
+
+static void ProductUpdateTableItemWrite (FILE *fp, ProductUpdateTableItemPtr item)
+{
+ if (fp == NULL || item == NULL || StringHasNoText (item->product_match)) {
+ return;
+ }
+
+ fprintf (fp, "%s", item->product_match);
+ if (!StringHasNoText (item->new_name)) {
+ fprintf (fp, "\tX\t%s", StringICmp (item->new_name, "hypothetical protein") == 0 ? "" : item->new_name);
+ if (!StringHasNoText (item->note_text)) {
+ fprintf (fp, "\tX\t%s", StringCmp (item->note_text, item->product_match) == 0 ? "" : item->note_text);
+ }
+ }
+ fprintf (fp, "\n");
+}
+
+
+NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list)
+{
+ ValNodePtr list_next;
+
+ while (list != NULL) {
+ list_next = list->next;
+ list->next = NULL;
+ list->data.ptrvalue = ProductUpdateTableItemFree (list->data.ptrvalue);
+ list = ValNodeFree (list);
+ list = list_next;
+ }
+ return list;
+}
+
+
+static void TrimBeginningAndEndingQuotes (CharPtr str)
+{
+ CharPtr src, dst;
+
+ if (str == NULL) {
+ return;
+ }
+ if (*str == '"') {
+ src = str + 1;
+ dst = src;
+ while (*src != 0) {
+ *dst = *src;
+ dst++;
+ src++;
+ }
+ *dst = 0;
+ }
+ dst = str + StringLen(str) - 1;
+ if (*dst == '"') {
+ *dst = 0;
+ }
+}
+
+
+static ProductUpdateTableItemPtr ProductUpdateTableItemFromValNodeList (ValNodePtr column_list)
+{
+ ProductUpdateTableItemPtr item;
+ ValNodePtr vnp;
+
+ if (column_list == NULL || StringHasNoText (column_list->data.ptrvalue)
+ || column_list->next == NULL
+ || StringICmp (column_list->next->data.ptrvalue, "X") != 0) {
+ return NULL;
+ }
+
+ item = ProductUpdateTableItemNew(column_list->data.ptrvalue);
+ column_list->data.ptrvalue = NULL;
+ vnp = column_list->next->next;
+
+ /* get new product name. Default to hypothetical protein if not specified */
+ if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) {
+ item->new_name = StringSave ("hypothetical protein");
+ } else {
+ item->new_name = vnp->data.ptrvalue;
+ vnp->data.ptrvalue = NULL;
+ }
+ if (vnp != NULL) {
+ vnp = vnp->next;
+ }
+
+ /* find out if note is required */
+ if (vnp != NULL && StringCmp (vnp->data.ptrvalue, "X") == 0) {
+ if (vnp->next == NULL || StringHasNoText (vnp->next->data.ptrvalue)) {
+ item->note_text = StringSave (item->product_match);
+ } else {
+ item->note_text = vnp->next->data.ptrvalue;
+ vnp->next->data.ptrvalue = NULL;
+ }
+ }
+ return item;
+}
+
+
+NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp)
+{
+ ReadBufferData rbd;
+ CharPtr line;
+ ValNodeBlock line_list;
+ ValNodePtr column_list;
+ ProductUpdateTableItemPtr item;
+
+ if (fp == NULL) return NULL;
+ rbd.fp = fp;
+ rbd.current_data = NULL;
+
+ InitValNodeBlock (&line_list, NULL);
+
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL)
+ {
+ column_list = ReadOneColumnList (line);
+ if (column_list != NULL) {
+ TrimBeginningAndEndingQuotes(column_list->data.ptrvalue);
+ item = ProductUpdateTableItemFromValNodeList(column_list);
+ if (item != NULL) {
+ ValNodeAddPointerToEnd (&line_list, 0, item);
+ }
+ column_list = ValNodeFreeData (column_list);
+ }
+ line = AbstractReadFunction (&rbd);
+ }
+ return line_list.head;
+}
+
+
+static void WriteProductUpdateTable (FILE *fp, ValNodePtr table)
+{
+ ValNodePtr vnp;
+ ProductUpdateTableItemPtr item;
+
+ for (vnp = table; vnp != NULL; vnp = vnp->next) {
+ item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue;
+ if (item != NULL && !StringHasNoText (item->product_match)) {
+ ProductUpdateTableItemWrite(fp, item);
+ }
+ }
+}
+
+
+static ProductUpdateTableItemPtr GetProductUpdateTableItemForProduct (CharPtr product, ValNodePtr list)
+{
+ ProductUpdateTableItemPtr item;
+ ValNodePtr vnp;
+
+ if (StringHasNoText (product) || list == NULL) {
+ return NULL;
+ }
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ if ((item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue) != NULL
+ && StringCmp (product, item->product_match) == 0) {
+ return item;
+ }
+ }
+ return NULL;
+}
+
+
+typedef struct productupdate {
+ ValNodePtr table;
+ FILE *log_fp;
+ Boolean any_change;
+} ProductUpdateData, PNTR ProductUpdatePtr;
+
+static void ApplyProductUpdateCallback (SeqFeatPtr sfp, Pointer data)
+{
+ ProductUpdatePtr pd;
+ BioseqPtr pbsp;
+ SeqFeatPtr prot;
+ ProtRefPtr prp = NULL;
+ SeqMgrFeatContext context;
+ ProductUpdateTableItemPtr item = NULL;
+ Char buf[255];
+ ValNodePtr vnp;
+ Boolean adjusted_mrna;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION
+ || (pd = (ProductUpdatePtr) data) == NULL) {
+ return;
+ }
+
+ pbsp = BioseqFindFromSeqLoc (sfp->product);
+ prot = SeqMgrGetNextFeature (pbsp, NULL, 0, FEATDEF_PROT, &context);
+ if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL) {
+ prp = GetProtRefForFeature(sfp);
+ }
+ if (prp != NULL && prp->name != NULL) {
+ item = GetProductUpdateTableItemForProduct (prp->name->data.ptrvalue, pd->table);
+
+ if (item != NULL) {
+ prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue);
+ prp->name->data.ptrvalue = StringSave (item->new_name);
+ if (item->note_text != NULL) {
+ SetStringValue (&(sfp->comment), item->note_text, ExistingTextOption_append_semi);
+ }
+ /* also need to move ec numbers to note, if any, for hypothetical protein */
+ if (StringICmp (item->new_name, "hypothetical protein") == 0
+ && prp->ec != NULL) {
+ SetStringValue (&(sfp->comment), " EC_number=", ExistingTextOption_append_semi);
+ SetStringValue (&(sfp->comment), prp->ec->data.ptrvalue, ExistingTextOption_append_none);
+ for (vnp = prp->ec->next; vnp != NULL; vnp = vnp->next) {
+ SetStringValue (&(sfp->comment), vnp->data.ptrvalue, ExistingTextOption_append_comma);
+ }
+ }
+
+ adjusted_mrna = AdjustmRNAProductToMatchProteinProduct(prot);
+
+ pd->any_change = TRUE;
+ if (pd->log_fp != NULL) {
+ SeqIdWrite (SeqIdFindBest (pbsp->id, SEQID_GENBANK), buf, PRINTID_REPORT, sizeof (buf) - 1);
+ fprintf (pd->log_fp, "%s\t%s\t%s\t%s\t%s\n", buf, item->product_match, item->new_name,
+ item->note_text == NULL ? "" : item->note_text,
+ adjusted_mrna ? "Adjusted mRNA" : "");
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp)
+{
+ ProductUpdateData pd;
+
+ if (table == NULL || sep == NULL) {
+ return FALSE;
+ }
+
+ MemSet (&pd, 0, sizeof (ProductUpdateData));
+ pd.table = table;
+ pd.log_fp = log_fp;
+
+ VisitFeaturesInSep (sep, &pd, ApplyProductUpdateCallback);
+ return pd.any_change;
+}
+
+
+static void ExportProductUpdateTableCallback (SeqFeatPtr sfp, Pointer data)
+{
+ ProtRefPtr prp;
+
+ if (sfp == NULL || data == NULL) {
+ return;
+ }
+ if (sfp->data.choice == SEQFEAT_PROT
+ && (prp = (ProtRefPtr)sfp->data.value.ptrvalue) != NULL
+ && prp->name != NULL
+ && !StringHasNoText (prp->name->data.ptrvalue)) {
+ ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue));
+ } else if (sfp->data.choice == SEQFEAT_CDREGION
+ && (prp = GetProtRefForFeature(sfp)) != NULL
+ && prp->name != NULL
+ && !StringHasNoText (prp->name->data.ptrvalue)) {
+ ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue));
+ }
+}
+
+
+NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp)
+{
+ ValNodeBlock block;
+ ValNodePtr vnp;
+
+ if (sep == NULL || fp == NULL) {
+ return;
+ }
+ InitValNodeBlock (&block, NULL);
+
+ VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback);
+
+ block.head = ValNodeSort (block.head, SortVnpByString);
+ ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData);
+
+ for (vnp = block.head; vnp != NULL; vnp = vnp->next) {
+ fprintf (fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
+ }
+ block.head = ValNodeFreeData (block.head);
+}
+
+
+static Boolean ApplySuspectProductNameFixToProductUpdateTableItem (SuspectRulePtr rule, ProductUpdateTableItemPtr item)
+{
+ CharPtr new_name;
+ Boolean rval = FALSE;
+
+ if (rule == NULL || rule->replace == NULL || item == NULL || StringHasNoText (item->product_match)) {
+ return FALSE;
+ }
+
+ if (item->new_name == NULL) {
+ new_name = StringSave (item->product_match);
+ } else {
+ new_name = StringSave (item->new_name);
+ }
+ if (ApplySuspectProductNameFixToString (rule, &new_name)) {
+ item->new_name = MemFree (item->new_name);
+ item->note_text = MemFree (item->note_text);
+ item->new_name = new_name;
+ if (rule->replace->move_to_note) {
+ item->note_text = StringSave (item->product_match);
+ }
+ rval = TRUE;
+ } else {
+ new_name = MemFree (new_name);
+ }
+ return rval;
+}
+
+
+static Boolean ApplySuspectProductNameFixesToProductUpdateTable (SuspectRuleSetPtr rule_set, ValNodePtr table)
+{
+ SuspectRulePtr rule;
+ ValNodePtr vnp;
+ Boolean rval = FALSE, this_rule_apply, this_rule_match;
+ ProductUpdateTableItemPtr item;
+
+ if (rule_set == NULL || table == NULL) {
+ return FALSE;
+ }
+
+ for (vnp = table; vnp != NULL; vnp = vnp->next) {
+ this_rule_apply = FALSE;
+ this_rule_match = FALSE;
+ item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue;
+ for (rule = rule_set; rule != NULL; rule = rule->next) {
+ if (ApplySuspectProductNameFixToProductUpdateTableItem (rule, item)) {
+ this_rule_apply = TRUE;
+ } else if (!this_rule_apply && !this_rule_match) {
+ this_rule_match = MatchesSuspectProductRule (item->product_match, rule);
+ }
+ }
+ if (!this_rule_apply && this_rule_match) {
+ item->new_name = StringSave ("hypothetical protein");
+ item->note_text = StringSave (item->product_match);
+ }
+ }
+ return rval;
+}
+
+
+NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules)
+{
+ ValNodeBlock block;
+ ValNodePtr vnp;
+ ProductUpdateTableItemPtr item;
+
+ if (sep == NULL || fp == NULL) {
+ return;
+ }
+ InitValNodeBlock (&block, NULL);
+
+ VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback);
+
+ block.head = ValNodeSort (block.head, SortVnpByString);
+ ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData);
+
+ for (vnp = block.head; vnp != NULL; vnp = vnp->next) {
+ item = ProductUpdateTableItemNew(vnp->data.ptrvalue);
+ vnp->data.ptrvalue = item;
+ }
+
+ ApplySuspectProductNameFixesToProductUpdateTable (rules, block.head);
+
+ WriteProductUpdateTable (fp, block.head);
+
+ block.head = ProductUpdateTableFree (block.head);
+}
diff --git a/api/macroapi.h b/api/macroapi.h
index 54509a60..f1868baf 100644
--- a/api/macroapi.h
+++ b/api/macroapi.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/15/2007
*
-* $Revision: 1.93 $
+* $Revision: 1.129 $
*
* File Description:
*
@@ -155,6 +155,7 @@ NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action);
NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype);
NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype);
NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list);
+NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual);
/* source qual functions */
NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint);
@@ -190,13 +191,25 @@ NLM_EXTERN CharPtr GettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp);
NLM_EXTERN Boolean SetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text);
NLM_EXTERN CharPtr GetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp);
+/* Structured Comment functions */
+NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp);
+NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop);
-
+/* Publication functions */
NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field);
NLM_EXTERN ValNodePtr GetPubFieldList (void);
NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp);
NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub);
+/* DBLink functions */
+NLM_EXTERN Int4 GetNumDBLinkFields (void);
+NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type);
+NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name);
+
+
+/* other useful functions */
+NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene);
+
/* generic string functions */
NLM_EXTERN Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text);
NLM_EXTERN Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp);
@@ -207,19 +220,24 @@ NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp);
NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp);
NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp);
NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp);
+NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp);
NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp);
NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp);
NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint);
+NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint);
NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint);
NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint);
NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint);
NLM_EXTERN Boolean IsLocationConstraintEmpty (LocationConstraintPtr lcp);
+NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint);
+NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint);
NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp);
NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint);
NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp);
NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action);
NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra);
NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep);
+NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp);
NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp);
NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra);
NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna);
@@ -235,11 +253,15 @@ NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodeP
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp);
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp);
NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit);
+NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action);
+NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins);
+NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp);
NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2);
NLM_EXTERN Boolean IsConversionSupported (Uint2 featdef_from, Uint2 featdef_to);
+NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list);
NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion);
NLM_EXTERN Boolean RemoveTextPortionFromString (CharPtr str, TextPortionPtr text_portion);
NLM_EXTERN Boolean IsTextMarkerEmpty (TextMarkerPtr marker);
@@ -282,8 +304,8 @@ NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_l
NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action);
-NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat);
-NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat, FILE *log_fp);
+NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro);
+NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp);
NLM_EXTERN SeqFeatPtr ApplyOneFeatureToBioseq (BioseqPtr bsp, Uint1 featdef, SeqLocPtr slp, ValNodePtr fields, ValNodePtr src_fields, Boolean add_mrna);
@@ -295,8 +317,10 @@ NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field);
NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp);
NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type);
NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text);
+NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp);
extern const CharPtr kTaxnameAfterBinomialString;
NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion);
+NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform);
NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src);
NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst);
NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a);
@@ -304,6 +328,15 @@ NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p);
NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef);
NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a);
NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a);
+NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action);
+NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action);
+NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action);
+NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib);
+NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action);
+NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a);
+NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word);
+NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness);
+NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint);
NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set);
NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint);
@@ -321,8 +354,10 @@ NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTy
NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data);
NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra);
+NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list);
NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data);
NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data);
+NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list);
typedef enum {
@@ -351,12 +386,14 @@ typedef struct tabcolumnconfig {
ValNodePtr constraint;
} TabColumnConfigData, PNTR TabColumnConfigPtr;
+
NLM_EXTERN MatchTypePtr MatchTypeNew ();
NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type);
NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void);
NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t);
NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig);
+NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t);
NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns);
NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig);
NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns);
@@ -393,6 +430,69 @@ NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb);
NLM_EXTERN CharPtr GetObjectIdString (ObjectIdPtr oip);
NLM_EXTERN Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text);
+NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep);
+NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev);
+NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep);
+
+NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop);
+NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list);
+NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data);
+
+NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop);
+
+NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp);
+
+NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor);
+NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor);
+
+
+/* for product name rules */
+NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set);
+
+NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func);
+NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule);
+
+NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func);
+NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace);
+NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type);
+NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace);
+NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule);
+
+
+NLM_EXTERN Boolean StringMayContainPlural (CharPtr search);
+NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n);
+NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search);
+NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search);
+NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search);
+NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search);
+NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search);
+
+NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule);
+NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef);
+NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list);
+NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp);
+NLM_EXTERN ValNodePtr GetSuspectRuleDiscrepancies (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef, Uint4 clickable_item_type);
+NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp);
+
+NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str);
+NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp);
+NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules);
+NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules);
+
+NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1);
+NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file);
+NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file);
+
+
+/* declarations for product update table handling */
+NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp);
+NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list);
+NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp);
+NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp);
+NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules);
+
+NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep);
+
#ifdef __cplusplus
}
#endif
diff --git a/api/objmgr.c b/api/objmgr.c
index cfa33f98..1be2a72b 100644
--- a/api/objmgr.c
+++ b/api/objmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.86 $
+* $Revision: 6.87 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -2405,6 +2405,9 @@ static Int4 NEAR ObjMgrLockFunc (ObjMgrPtr omp, Uint2 type, Pointer data, Boolea
}
omdp = ObjMgrFindTop(omp, omp->datalist[i]);
+ if (omdp == NULL) {
+ return lockcnt;
+ }
if (lockit) {
omdp->lockcnt++;
diff --git a/api/seqmgr.c b/api/seqmgr.c
index 55bb0d8b..3692fa8f 100644
--- a/api/seqmgr.c
+++ b/api/seqmgr.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/94
*
-* $Revision: 6.308 $
+* $Revision: 6.314 $
*
* File Description: Manager for Bioseqs and BioseqSets
*
@@ -274,6 +274,7 @@ NLM_EXTERN Boolean MakeReversedSeqIdString (SeqIdPtr sid, CharPtr buf, size_t le
case SEQID_TPE:
case SEQID_TPD:
case SEQID_GPIPE:
+ case SEQID_NAMED_ANNOT_TRACK:
tsip = (TextSeqIdPtr) (sid->data.ptrvalue);
if (tsip->accession != NULL) {
tmp = tsip->name;
@@ -2891,8 +2892,7 @@ NLM_EXTERN Boolean LIBCALL SeqMgrAdd (Uint2 type, Pointer data)
SeqMgrUnlock();
return retval;
}
-
- SeqMgrAddToBioseqIndex((BioseqPtr)data);
+ retval &= SeqMgrAddToBioseqIndex((BioseqPtr)data);
SeqMgrUnlock();
@@ -3320,6 +3320,11 @@ NLM_EXTERN Boolean LIBCALL SeqMgrAddToBioseqIndex (BioseqPtr bsp)
{
bspp = smp->NonIndexedBioseq;
smp->NonIndexedBioseq = MemNew((smp->NonIndexedBioseqNum + 10) * sizeof (BioseqPtr));
+ if (smp->NonIndexedBioseq == NULL) {
+ Message (MSG_POSTERR, "Unable to allocate memory for bioseq index");
+ smp->NonIndexedBioseq = bspp;
+ return FALSE;
+ }
MemCopy(smp->NonIndexedBioseq, bspp, (smp->NonIndexedBioseqNum * sizeof(BioseqPtr)));
MemFree(bspp);
smp->NonIndexedBioseqNum += 10;
@@ -6373,6 +6378,16 @@ static int LIBCALLBACK SortFeatItemListByLabel (VoidPtr vp1, VoidPtr vp2)
return -1;
}
+ /* If they're case-insensitive the same, but case-sensitive different,
+ then fall back to sort by case-sensitive
+ (e.g. AJ344068.1 has genes korA and KorA ) */
+ compare = StringCmp (sp1->label, sp2->label);
+ if( compare > 0 ) {
+ return 1;
+ } else if( compare < 0 ) {
+ return -1;
+ }
+
/* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
if (sp1->ignore) {
@@ -8341,6 +8356,33 @@ NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (SeqFeatPtr sfp)
return grp;
}
+NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr PNTR oipP)
+
+{
+ GeneRefPtr grp = NULL;
+ ObjectIdPtr oip;
+ SeqFeatXrefPtr xref;
+
+ if (oipP != NULL) {
+ *oipP = NULL;
+ }
+ if (sfp == NULL) return NULL;
+ xref = sfp->xref;
+ while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
+ xref = xref->next;
+ }
+ if (xref != NULL) {
+ grp = (GeneRefPtr) xref->data.value.ptrvalue;
+ if (xref->id.choice == 3) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL && oipP != NULL) {
+ *oipP = oip;
+ }
+ }
+ }
+ return grp;
+}
+
NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp)
{
@@ -8426,16 +8468,13 @@ static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp,
} else if (overlapType == LOCATION_SUBSET || overlapType == CHECK_INTERVALS) {
/* requires individual intervals to be completely contained within gene, etc. */
-
- if (feat->left <= left && feat->right >= right) {
- sfp = feat->sfp;
- if (sfp != NULL) {
- diff = SeqLocAinB (slp, sfp->location);
- if (diff >= 0) {
- if (overlapType == LOCATION_SUBSET || numivals == 1 ||
- CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) {
- return diff;
- }
+ sfp = feat->sfp;
+ if (sfp != NULL) {
+ diff = SeqLocAinB (slp, sfp->location);
+ if (diff >= 0) {
+ if (overlapType == LOCATION_SUBSET || numivals == 1 ||
+ CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) {
+ return diff;
}
}
}
@@ -9258,6 +9297,17 @@ NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, Uint2
numfeats, position, overlapType, context, NULL, NULL, NULL, FALSE);
}
+NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx (SeqLocPtr slp, Uint2 subtype,
+ VoidPtr featarray, Int4 numfeats,
+ Int4Ptr position, Int2 overlapType,
+ SeqMgrFeatContext PNTR context,
+ Boolean special)
+
+{
+ return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
+ numfeats, position, overlapType, context, NULL, NULL, NULL, special);
+}
+
NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 subtype,
VoidPtr featarray,
Int4 numfeats,
diff --git a/api/seqport.c b/api/seqport.c
index 6330111e..e9d30b1f 100644
--- a/api/seqport.c
+++ b/api/seqport.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.184 $
+* $Revision: 6.186 $
*
* File Description: Ports onto Bioseqs
*
@@ -2386,7 +2386,7 @@ static Int4 SeqPortStreamRaw (
return count;
}
-static Int4 SeqPortStreamLit (
+static Int4 SeqPortStreamSeqLit (
SeqLitPtr slitp,
Boolean is_na,
Int4 start,
@@ -2775,7 +2775,7 @@ static Int4 SeqPortStreamDelta (
} else if (sop->slitp != NULL) {
- count += SeqPortStreamLit (sop->slitp, is_na, sop->from, sop->to, sop->strand, sdp);
+ count += SeqPortStreamSeqLit (sop->slitp, is_na, sop->from, sop->to, sop->strand, sdp);
}
}
@@ -3006,6 +3006,7 @@ static Int4 SeqPortStreamSetup (
Int4 stop,
Uint1 strand,
SeqLocPtr loc,
+ SeqLitPtr lit,
StreamFlgType flags,
Pointer userdata,
SeqPortStreamProc proc
@@ -3017,10 +3018,11 @@ static Int4 SeqPortStreamSetup (
Int4 count = 0, from, to;
Uint2 entityID;
Int2 i;
+ Boolean is_na;
StreamData sd;
SeqLocPtr slp;
- if (bsp == NULL && loc == NULL) return 0;
+ if (bsp == NULL && loc == NULL && lit == NULL) return 0;
if (proc == NULL && userdata == NULL) return 0;
MemSet ((Pointer) &sd, 0, sizeof (StreamData));
@@ -3087,6 +3089,24 @@ static Int4 SeqPortStreamSetup (
slp = SeqLocFindNext (loc, slp);
}
+
+ } else if (lit != NULL) {
+
+ is_na = TRUE;
+ switch (lit->seq_data_type) {
+ case Seq_code_iupacaa :
+ case Seq_code_ncbi8aa :
+ case Seq_code_ncbieaa :
+ case Seq_code_ncbipaa :
+ case Seq_code_iupacaa3 :
+ case Seq_code_ncbistdaa :
+ is_na = FALSE;
+ break;
+ default :
+ break;
+ }
+
+ count += SeqPortStreamSeqLit (lit, is_na, 0, lit->length - 1, Seq_strand_plus, &sd);
}
/* return number of bases or residues streamed to callback */
@@ -3109,7 +3129,7 @@ NLM_EXTERN Int4 SeqPortStream (
)
{
- return SeqPortStreamSetup (bsp, 0, -1, Seq_strand_unknown, NULL, flags, userdata, proc);
+ return SeqPortStreamSetup (bsp, 0, -1, Seq_strand_unknown, NULL, NULL, flags, userdata, proc);
}
NLM_EXTERN Int4 SeqPortStreamInt (
@@ -3123,7 +3143,7 @@ NLM_EXTERN Int4 SeqPortStreamInt (
)
{
- return SeqPortStreamSetup (bsp, start, stop, strand, NULL, flags, userdata, proc);
+ return SeqPortStreamSetup (bsp, start, stop, strand, NULL, NULL, flags, userdata, proc);
}
NLM_EXTERN Int4 SeqPortStreamLoc (
@@ -3134,7 +3154,18 @@ NLM_EXTERN Int4 SeqPortStreamLoc (
)
{
- return SeqPortStreamSetup (NULL, 0, 0, 0, slp, flags, userdata, proc);
+ return SeqPortStreamSetup (NULL, 0, 0, 0, slp, NULL, flags, userdata, proc);
+}
+
+NLM_EXTERN Int4 SeqPortStreamLit (
+ SeqLitPtr lit,
+ StreamFlgType flags,
+ Pointer userdata,
+ SeqPortStreamProc proc
+)
+
+{
+ return SeqPortStreamSetup (NULL, 0, 0, 0, NULL, lit, flags, userdata, proc);
}
/*******************************************************************************
@@ -8686,6 +8717,7 @@ NLM_EXTERN void ConvertNsToGaps (
if (bases == NULL) return;
if (!NeedToConvert(bases, unknown_greater_than_or_equal, known_greater_than_or_equal, unknown_gap_size, known_gap_size)) {
+ MemFree (bases);
return;
}
diff --git a/api/seqport.h b/api/seqport.h
index 820f844d..22265681 100644
--- a/api/seqport.h
+++ b/api/seqport.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/13/91
*
-* $Revision: 6.62 $
+* $Revision: 6.64 $
*
* File Description: Ports onto Bioseqs
*
@@ -172,6 +172,7 @@ NLM_EXTERN Boolean LIBCALL SeqPortSetUpAlphabet PROTO((SeqPortPtr spp, Uint1 cur
* SeqPortStream (bsp, flags, userdata, proc)
* SeqPortStreamInt (bsp, start, stop, strand, flags, userdata, proc)
* SeqPortStreamLoc (slp, flags, userdata, proc)
+* SeqPortStreamLit (lit, flags, userdata, proc)
* Efficient functions to stream through sequence
*
********************************************************************************/
@@ -198,6 +199,8 @@ typedef unsigned long StreamFlgType;
#define STREAM_HTML_SPANS 256 /* show span tags at begining of each line */
+#define STREAM_ALL_FASTA_IDS 512 /* in FASTA streamer, show all Seq-ids */
+
NLM_EXTERN Int4 SeqPortStream (
BioseqPtr bsp,
StreamFlgType flags,
@@ -222,6 +225,13 @@ NLM_EXTERN Int4 SeqPortStreamLoc (
SeqPortStreamProc proc
);
+NLM_EXTERN Int4 SeqPortStreamLit (
+ SeqLitPtr lit,
+ StreamFlgType flags,
+ Pointer userdata,
+ SeqPortStreamProc proc
+);
+
/*******************************************************************************
*
* StreamCacheSetup (bsp, slp, flags, scp)
diff --git a/api/sequtil.c b/api/sequtil.c
index 1b398944..8cd935d7 100644
--- a/api/sequtil.c
+++ b/api/sequtil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/1/91
*
-* $Revision: 6.304 $
+* $Revision: 6.328 $
*
* File Description: Sequence Utilities for objseq and objsset
*
@@ -7677,6 +7677,11 @@ Boolean GetThePointForOffsetEx(SeqLocPtr of, SeqPntPtr target, Uint1 which_end,
while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
{
+ if( pnt->choice == SEQLOC_NULL )
+ {
+ /* Skip NULL parts when determining offsets */
+ continue;
+ }
last_strand = SeqLocStrand (pnt);
last_sip = SeqLocId (pnt);
if (last_strand != Seq_strand_minus)
@@ -7839,6 +7844,11 @@ Boolean GetPointsForLeftAndRightOffsets(SeqLocPtr of, SeqPntPtr left, SeqPntPtr
while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
{
+ if( pnt->choice == SEQLOC_NULL )
+ {
+ /* Skip NULL parts when determining offsets */
+ continue;
+ }
last_strand = SeqLocStrand (pnt);
last_sip = SeqLocId (pnt);
if (last_strand != Seq_strand_minus)
@@ -9795,7 +9805,7 @@ NLM_EXTERN SeqIdPtr LIBCALL SeqIdFromAccessionEx(CharPtr accession, Uint4 versi
BioseqPtr bsp=NULL;
TextSeqIdPtr tsp;
Uint4 status;
- if(accession==NULL || accession[0]=='\0')
+ if(accession==NULL || accession[0]=='\0' || accession[0]=='\n' || accession[0]=='\r')
return NULL;
sip=NULL;
status = WHICH_db_accession(accession);
@@ -10449,6 +10459,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_DDBJ_WGS_PROT;
} else if ((StringICmp(temp,"HAA") >= 0) && (StringICmp(temp,"HZZ") <= 0)) {
retcode = ACCN_NCBI_TPA_PROT;
+ } else if ((StringICmp(temp,"IAA") >= 0) && (StringICmp(temp,"IZZ") <= 0)) {
+ retcode = ACCN_DDBJ_TPA_PROT;
} else {
retcode = ACCN_IS_PROTEIN;
retval = TRUE;
@@ -10510,7 +10522,10 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"GR") == 0) ||
(StringICmp(temp,"GT") == 0) ||
(StringICmp(temp,"GW") == 0) ||
- (StringICmp(temp,"HO") == 0) ) { /* NCBI EST */
+ (StringICmp(temp,"HO") == 0) ||
+ (StringICmp(temp,"HS") == 0) ||
+ (StringICmp(temp,"JG") == 0) ||
+ (StringICmp(temp,"JK") == 0) ) { /* NCBI EST */
retcode = ACCN_NCBI_EST;
} else if ((StringICmp(temp,"BV") == 0) ||
(StringICmp(temp,"GF") == 0)) { /* NCBI STS */
@@ -10527,7 +10542,7 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"GQ") == 0) ||
(StringICmp(temp,"GU") == 0) ||
(StringICmp(temp,"HM") == 0) ||
- (StringICmp(temp,"HQ") == 0)) { /* NCBI direct submission */
+ (StringICmp(temp,"JF") == 0)) { /* NCBI direct submission */
retcode = ACCN_NCBI_DIRSUB;
} else if ((StringICmp(temp,"AE") == 0) ||
(StringICmp(temp,"CP") == 0) ||
@@ -10544,7 +10559,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"EQ") == 0) ||
(StringICmp(temp,"FA") == 0) ||
(StringICmp(temp,"GG") == 0) ||
- (StringICmp(temp,"GL") == 0)) { /* NCBI segmented set header Bioseq */
+ (StringICmp(temp,"GL") == 0) ||
+ (StringICmp(temp,"JH") == 0)) { /* NCBI segmented set header Bioseq */
retcode = ACCN_NCBI_SEGSET;
} else if ((StringICmp(temp,"AS") == 0) ||
(StringICmp(temp,"HR") == 0) ||
@@ -10573,7 +10589,10 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"FH") == 0) ||
(StringICmp(temp,"FI") == 0) ||
(StringICmp(temp,"GS") == 0) ||
- (StringICmp(temp,"HN") == 0) ) { /* NCBI GSS */
+ (StringICmp(temp,"HN") == 0) ||
+ (StringICmp(temp,"HR") == 0) ||
+ (StringICmp(temp,"JJ") == 0) ||
+ (StringICmp(temp,"JM") == 0) ) { /* NCBI GSS */
retcode = ACCN_NCBI_GSS;
} else if ((StringICmp(temp,"AR") == 0) ||
(StringICmp(temp,"DZ") == 0) ||
@@ -10599,10 +10618,17 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_NCBI_TPA;
} else if ((StringICmp(temp,"BN") == 0)) { /* EMBL third-party annotation */
retcode = ACCN_EMBL_TPA;
- } else if ((StringICmp(temp,"BR") == 0)) { /* DDBJ third-party annotation */
+ } else if ((StringICmp(temp,"BR") == 0) ||
+ (StringICmp(temp,"HT") == 0) ||
+ (StringICmp(temp,"HU") == 0)) { /* DDBJ third-party annotation */
retcode = ACCN_DDBJ_TPA;
} else if((StringICmp(temp,"EZ") == 0) ||
- (StringICmp(temp,"HP") == 0)) {
+ (StringICmp(temp,"HP") == 0) ||
+ (StringICmp(temp,"HQ") == 0) ||
+ (StringICmp(temp,"JI") == 0) ||
+ (StringICmp(temp,"JL") == 0) ||
+ (StringICmp(temp,"JO") == 0) ||
+ (StringICmp(temp,"JN") == 0)) {
retcode = ACCN_NCBI_TSA;
} else if((StringICmp(temp,"FX") == 0)) {
retcode = ACCN_DDBJ_TSA;
@@ -10612,7 +10638,6 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"HE") == 0) ||
(StringICmp(temp,"HF") == 0) ||
(StringICmp(temp,"HG") == 0) ||
- (StringICmp(temp,"HH") == 0) ||
(StringICmp(temp,"HI") == 0)) { /* EMBL direct submission */
retcode = ACCN_EMBL_DIRSUB;
} else if ((StringICmp(temp,"AL") == 0) ||
@@ -10632,7 +10657,13 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"HA") == 0) ||
(StringICmp(temp,"HB") == 0) ||
(StringICmp(temp,"HC") == 0) ||
- (StringICmp(temp,"HD") == 0)) { /* EMBL patent division */
+ (StringICmp(temp,"HD") == 0) ||
+ (StringICmp(temp,"HH") == 0) ||
+ (StringICmp(temp,"JA") == 0) ||
+ (StringICmp(temp,"JB") == 0) ||
+ (StringICmp(temp,"JC") == 0) ||
+ (StringICmp(temp,"JD") == 0) ||
+ (StringICmp(temp,"JE") == 0)) { /* EMBL patent division */
retcode = ACCN_EMBL_PATENT;
} else if ((StringICmp(temp,"AT") == 0) ||
(StringICmp(temp,"AU") == 0) ||
@@ -10672,15 +10703,16 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
(StringICmp(temp,"FU") == 0) ||
(StringICmp(temp,"FV") == 0) ||
(StringICmp(temp,"FW") == 0) ||
- (StringICmp(temp,"FZ") == 0)) { /* DDBJ patent division */
+ (StringICmp(temp,"FZ") == 0) ||
+ (StringICmp(temp,"GB") == 0) ||
+ (StringICmp(temp,"HV") == 0) ||
+ (StringICmp(temp,"HW") == 0)) { /* DDBJ patent division */
retcode = ACCN_DDBJ_PATENT;
} else if ((StringICmp(temp,"DE") == 0) ||
(StringICmp(temp,"DH") == 0) ||
- (StringICmp(temp,"FT") == 0)) { /* DDBJ GSS */
+ (StringICmp(temp,"FT") == 0) ||
+ (StringICmp(temp,"GA") == 0)) { /* DDBJ GSS */
retcode = ACCN_DDBJ_GSS;
- } else if ((StringICmp(temp,"GA") == 0) ||
- (StringICmp(temp,"GB") == 0)) { /* DDBJ unassigned */
- retcode = ACCN_DDBJ_OTHER;
} else {
retcode = ACCN_IS_NT;
break;
@@ -10778,6 +10810,8 @@ NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
retcode = ACCN_EMBL_WGS;
} else if ((StringNICmp(temp,"D", 1) == 0)) {
retcode = ACCN_NCBI_WGS;
+ } else if ((StringNICmp(temp,"E", 1) == 0)) {
+ retcode = ACCN_DDBJ_WGS;
} else
retval = FALSE;
while (*s) {
diff --git a/api/sqnutil1.c b/api/sqnutil1.c
index b67b5d46..73c9cfe7 100644
--- a/api/sqnutil1.c
+++ b/api/sqnutil1.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.587 $
+* $Revision: 6.648 $
*
* File Description:
*
@@ -87,6 +87,7 @@ static int descr_insert_order [] = {
Seq_descr_pdb,
Seq_descr_embl,
Seq_descr_genbank,
+ Seq_descr_modelev,
Seq_descr_create_date,
Seq_descr_update_date,
0
@@ -942,7 +943,8 @@ NLM_EXTERN void AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, B
}
} else if ((targetbssp->_class >= BioseqseqSet_class_mut_set &&
targetbssp->_class <= BioseqseqSet_class_eco_set) ||
- targetbssp->_class >= BioseqseqSet_class_wgs_set) {
+ targetbssp->_class == BioseqseqSet_class_wgs_set ||
+ targetbssp->_class == BioseqseqSet_class_small_genome_set) {
if (targetbssp->seq_set != NULL) {
tmp = targetbssp->seq_set;
@@ -1303,7 +1305,8 @@ NLM_EXTERN void RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
if (bssp != NULL && (bssp->_class == 7 ||
(bssp->_class >= 13 && bssp->_class <= 16) ||
bssp->_class == BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set)) {
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set)) {
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
RenormalizeNucProtSets (sep, relink);
}
@@ -1472,6 +1475,7 @@ static Boolean ReturnStackToItem (GatherContextPtr gcp)
bssp->_class != BioseqseqSet_class_phy_set &&
bssp->_class != BioseqseqSet_class_eco_set &&
bssp->_class != BioseqseqSet_class_wgs_set &&
+ bssp->_class != BioseqseqSet_class_small_genome_set &&
(bssp->_class != BioseqseqSet_class_gen_prod_set ||
(! tdp->skipGenProdSet))) {
return FALSE;
@@ -1600,12 +1604,13 @@ NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID)
return NULL;
}
-NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr)
+NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr)
{
SeqLocPtr firstSlp;
IntFuzzPtr ifp;
SeqLocPtr lastSlp;
+ Int4 lim;
Boolean partial5;
Boolean partial3;
SeqIntPtr sip;
@@ -1614,6 +1619,7 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo
partial5 = FALSE;
partial3 = FALSE;
+ lim = -1;
if (location != NULL) {
firstSlp = NULL;
lastSlp = NULL;
@@ -1652,6 +1658,10 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo
partial5 = TRUE;
}
}
+ ifp = spp->fuzz;
+ if (ifp != NULL && ifp->choice == 4) {
+ lim = ifp->a;
+ }
}
}
if (lastSlp != NULL) {
@@ -1681,6 +1691,10 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo
partial3 = TRUE;
}
}
+ ifp = spp->fuzz;
+ if (ifp != NULL && ifp->choice == 4) {
+ lim = ifp->a;
+ }
}
}
}
@@ -1690,7 +1704,16 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo
if (p3ptr != NULL) {
*p3ptr = partial3;
}
- return (Boolean) (partial5 || partial3);
+ if (limptr != NULL) {
+ *limptr = lim;
+ }
+ return (Boolean) (partial5 || partial3 || lim == 3 || lim == 4);
+}
+
+NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr)
+
+{
+ return CheckSeqLocForPartialEx (location, p5ptr, p3ptr, NULL);
}
static void ConvertWholeToIntLoc (SeqLocPtr slp)
@@ -1721,7 +1744,7 @@ static void ConvertWholeToIntLoc (SeqLocPtr slp)
}
}
-NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3)
+NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim)
{
SeqLocPtr firstSlp;
@@ -1788,6 +1811,14 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean
ifp->a = 2;
}
}
+ } else if (lim == 3 || lim == 4) {
+ ifp = IntFuzzNew ();
+ if (ifp != NULL) {
+ ifp->choice = 4;
+ spp->fuzz = IntFuzzFree (spp->fuzz);
+ spp->fuzz = ifp;
+ ifp->a = lim;
+ }
} else {
if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
spp->fuzz = IntFuzzFree (spp->fuzz);
@@ -1837,6 +1868,14 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean
ifp->a = 1;
}
}
+ } else if (lim == 3 || lim == 4) {
+ ifp = IntFuzzNew ();
+ if (ifp != NULL) {
+ ifp->choice = 4;
+ spp->fuzz = IntFuzzFree (spp->fuzz);
+ spp->fuzz = ifp;
+ ifp->a = lim;
+ }
} else {
if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
spp->fuzz = IntFuzzFree (spp->fuzz);
@@ -1849,10 +1888,17 @@ NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean
}
}
+NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3)
+
+{
+ SetSeqLocPartialEx (location, partial5, partial3, -1);
+}
+
NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
{
ValNodePtr head = NULL, last = NULL, vnp;
+ Int4 lim;
Boolean noLeft;
Boolean noRight;
SeqLocPtr slp;
@@ -1862,7 +1908,7 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
slp = SeqLocFindNext (location, NULL);
while (slp != NULL) {
- CheckSeqLocForPartial (slp, &noLeft, &noRight);
+ CheckSeqLocForPartialEx (slp, &noLeft, &noRight, &lim);
val = 0;
if (noLeft) {
val |= 2;
@@ -1870,6 +1916,11 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
if (noRight) {
val |= 1;
}
+ if (lim == 3) {
+ val |= 4;
+ } else if (lim == 4) {
+ val |= 8;
+ }
vnp = ValNodeAddInt (&last, 0, val);
if (head == NULL) {
head = vnp;
@@ -1884,6 +1935,7 @@ NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp)
{
+ Int4 lim;
Boolean noLeft;
Boolean noRight;
SeqLocPtr slp;
@@ -1896,7 +1948,13 @@ NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp)
val = (Int4) vnp->data.intvalue;
noLeft = (Boolean) ((val & 2) != 0);
noRight = (Boolean) ((val & 1) != 0);
- SetSeqLocPartial (slp, noLeft, noRight);
+ lim = -1;
+ if ((val & 4) != 0) {
+ lim = 3;
+ } else if ((val & 8) != 0) {
+ lim = 4;
+ }
+ SetSeqLocPartialEx (slp, noLeft, noRight, lim);
slp = SeqLocFindNext (location, slp);
vnp = vnp->next;
}
@@ -2373,7 +2431,8 @@ NLM_EXTERN void PromoteXrefsExEx (
Boolean include_stop,
Boolean remove_trailingX,
Boolean gen_prod_set,
- Boolean force_local_id
+ Boolean force_local_id,
+ BoolPtr seq_fetch_failP
)
{
@@ -2427,6 +2486,10 @@ NLM_EXTERN void PromoteXrefsExEx (
GeneRefPtr grp;
*/
+ if (seq_fetch_failP != NULL) {
+ *seq_fetch_failP = FALSE;
+ }
+
if (sfp == NULL || bsp == NULL) return;
/* set subtypes, used to find mRNA features for genomic product sets */
@@ -2569,6 +2632,9 @@ NLM_EXTERN void PromoteXrefsExEx (
}
if (sip != NULL || sfp->idx.subtype == FEATDEF_mRNA) {
rnaseq = GetSequenceByFeature (sfp);
+ if (rnaseq == NULL && seq_fetch_failP != NULL) {
+ *seq_fetch_failP = TRUE;
+ }
if (rnaseq != NULL) {
i = (Int4) StringLen (rnaseq);
bs = BSNew (i + 2);
@@ -2694,6 +2760,9 @@ NLM_EXTERN void PromoteXrefsExEx (
crp->frame = 0;
**/
bs = ProteinFromCdRegionEx (sfp, include_stop, remove_trailingX);
+ if (bs == NULL && seq_fetch_failP != NULL) {
+ *seq_fetch_failP = TRUE;
+ }
if (bs != NULL) {
protseq = BSMerge (bs, NULL);
bs = BSFree (bs);
@@ -2982,6 +3051,9 @@ NLM_EXTERN void PromoteXrefsExEx (
}
if (sip != NULL) {
protseq = GetSequenceByFeature (sfp);
+ if (protseq == NULL && seq_fetch_failP != NULL) {
+ *seq_fetch_failP = TRUE;
+ }
if (protseq != NULL) {
i = (Int4) StringLen (protseq);
bs = BSNew (i + 2);
@@ -3072,13 +3144,13 @@ NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, B
Boolean remove_trailingX, Boolean gen_prod_set)
{
- PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE);
+ PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE, NULL);
}
NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID)
{
- PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE);
+ PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE, NULL);
}
/* begin BasicSeqEntryCleanup section */
@@ -3297,6 +3369,18 @@ static void CleanVisString (CharPtr PNTR strp)
}
}
+static void CleanVisStringAndCompress (CharPtr PNTR strp)
+
+{
+ if (strp == NULL) return;
+ if (*strp == NULL) return;
+ TrimSpacesSemicolonsAndCommas (*strp);
+ Asn2gnbkCompressSpaces (*strp);
+ if (HasNoText (*strp)) {
+ *strp = MemFree (*strp);
+ }
+}
+
static void CleanVisStringJunk (CharPtr PNTR strp)
{
@@ -3308,6 +3392,18 @@ static void CleanVisStringJunk (CharPtr PNTR strp)
}
}
+static void CleanVisStringJunkAndCompress (CharPtr PNTR strp)
+
+{
+ if (strp == NULL) return;
+ if (*strp == NULL) return;
+ TrimSpacesAndJunkFromEnds (*strp, TRUE);
+ Asn2gnbkCompressSpaces (*strp);
+ if (HasNoText (*strp)) {
+ *strp = MemFree (*strp);
+ }
+}
+
static void CleanDoubleQuote (CharPtr str)
{
@@ -3395,6 +3491,31 @@ static void CleanVisStringList (ValNodePtr PNTR vnpp)
}
}
+static void CleanVisStringListAndCompress (ValNodePtr PNTR vnpp)
+
+{
+ ValNodePtr next;
+ ValNodePtr PNTR prev;
+ ValNodePtr vnp;
+
+ if (vnpp == NULL) return;
+ prev = vnpp;
+ vnp = *vnpp;
+ while (vnp != NULL) {
+ next = vnp->next;
+ TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
+ Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
+ if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
+ *prev = vnp->next;
+ vnp->next = NULL;
+ ValNodeFreeData (vnp);
+ } else {
+ prev = &(vnp->next);
+ }
+ vnp = next;
+ }
+}
+
static Boolean AlreadyInVnpListCaseSensitive (ValNodePtr head, ValNodePtr curr)
{
@@ -4049,6 +4170,23 @@ NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product)
return prot;
}
+static void CleanupECNumber (CharPtr str)
+
+{
+ size_t len;
+
+ len = StringLen (str);
+ if (len < 1) return;
+ if (str [len - 1] == '.') {
+ str [len - 1] = ' ';
+ }
+ if (StringNICmp (str, "EC ", 3) == 0) {
+ str [0] = ' ';
+ str [1] = ' ';
+ }
+ TrimSpacesAroundString (str);
+}
+
static Boolean HandledGBQualOnCDS (SeqFeatPtr sfp, GBQualPtr gbq, ValNodePtr PNTR afterMe)
{
@@ -4744,13 +4882,24 @@ static void CleanupReplace (GBQualPtr gbq)
}
}
+static CharPtr evCategoryPfx [] = {
+ "",
+ "COORDINATES: ",
+ "DESCRIPTION: ",
+ "EXISTENCE: ",
+ NULL
+};
+
static void CleanupInference (GBQualPtr gbq)
{
Char ch;
CharPtr colon;
CharPtr dst;
+ Int2 j;
+ size_t len;
CharPtr ptr;
+ CharPtr skip;
CharPtr space;
CharPtr str;
@@ -4761,6 +4910,16 @@ static void CleanupInference (GBQualPtr gbq)
space = NULL;
colon = NULL;
+ skip = NULL;
+ for (j = 0; evCategoryPfx [j] != NULL; j++) {
+ len = StringLen (evCategoryPfx [j]);
+ if (StringNICmp (str, evCategoryPfx [j], len) != 0) continue;
+ skip = str + len;
+ }
+ if (skip != NULL) {
+ str = skip;
+ }
+
dst = str;
ptr = str;
ch = *ptr;
@@ -4793,6 +4952,43 @@ static void CleanupInference (GBQualPtr gbq)
*dst = '\0';
}
+static CharPtr evCategoryNoSpace [] = {
+ "",
+ "COORDINATES:",
+ "DESCRIPTION:",
+ "EXISTENCE:",
+ NULL
+};
+
+static void RepairInference (GBQualPtr gbq)
+
+{
+ Int2 j;
+ size_t len;
+ CharPtr ptr;
+ CharPtr skip;
+ CharPtr str;
+
+ if (gbq == NULL) return;
+ if (StringHasNoText (gbq->val)) return;
+
+ str = gbq->val;
+ for (j = 0; evCategoryNoSpace [j] != NULL; j++) {
+ len = StringLen (evCategoryNoSpace [j]);
+ if (StringNICmp (str, evCategoryNoSpace [j], len) != 0) continue;
+ if (StringNICmp (str, evCategoryPfx [j], len + 1) == 0) continue;
+ /* need to repair */
+ skip = str + len;
+ ptr = MemNew (StringLen (skip) + 20);
+ if (ptr == NULL) return;
+ StringCpy (ptr, evCategoryPfx [j]);
+ StringCat (ptr, skip);
+ gbq->val = MemFree (gbq->val);
+ gbq->val = ptr;
+ return;
+ }
+}
+
static void CleanupConsSplice (GBQualPtr gbq)
{
@@ -5127,6 +5323,26 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
sfp->comment = MemFree (sfp->comment);
sfp->comment = str;
}
+ } else if (StringICmp (gbq->qual, "label") == 0) {
+ if (StringICmp (gbq->val, FindKeyFromFeatDefType (sfp->idx.subtype, FALSE)) == 0) {
+ /* skip label that is simply the feature key */
+ } else if (sfp->comment == NULL || StringISearch (sfp->comment, gbq->qual) == NULL) {
+ /* if label is not already in comment, append */
+ len = StringLen (sfp->comment) + StringLen (gbq->val) + StringLen ("label: ") + 5;
+ str = MemNew (sizeof (Char) * len);
+ if (sfp->comment == NULL) {
+ StringCpy (str, "label: ");
+ StringCat (str, gbq->val);
+ sfp->comment = str;
+ } else {
+ StringCpy (str, sfp->comment);
+ StringCat (str, "; ");
+ StringCat (str, "label: ");
+ StringCat (str, gbq->val);
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = str;
+ }
+ }
} else if (StringICmp (gbq->qual, "db_xref") == 0) {
tag = gbq->val;
ptr = StringChr (tag, ':');
@@ -5192,6 +5408,9 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
gbq->qual = StringSave ("rpt_unit_seq");
unlink = FALSE;
}
+ } else if (StringICmp (gbq->qual, "EC_number") == 0) {
+ CleanupECNumber (gbq->val);
+ unlink = FALSE;
} else if (StringICmp (gbq->qual, "pseudo") == 0) {
sfp->pseudo = TRUE;
} else if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) {
@@ -5213,6 +5432,7 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
/* remove default inference string if instantiated */
} else {
CleanupInference (gbq);
+ RepairInference (gbq);
unlink = FALSE;
}
} else if (StringICmp (gbq->qual, "transposon") == 0) {
@@ -5283,6 +5503,43 @@ static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
}
}
}
+ if (StringICmp (gbq->qual, "mobile_element") == 0) {
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (ifp != NULL) {
+ if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) {
+ gbq->qual = MemFree (gbq->qual);
+ gbq->qual = StringSave ("mobile_element_type");
+ ifp->key = MemFree (ifp->key);
+ ifp->key = StringSave ("mobile_element");
+ sfp->idx.subtype = FEATDEF_mobile_element;
+ }
+ }
+ }
+ }
+
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (ifp != NULL) {
+ if (StringICmp (ifp->key, "conflict") == 0 ) {
+ ifp->key = MemFree (ifp->key);
+ ifp->key = StringSave ("misc_difference");
+ sfp->idx.subtype = FEATDEF_misc_difference;
+ len = StringLen (sfp->comment) + StringLen ("conflict") + 5;
+ str = MemNew (sizeof (Char) * len);
+ if (sfp->comment == NULL) {
+ StringCpy (str, "conflict");
+ sfp->comment = str;
+ } else {
+ StringCpy (str, "conflict; ");
+ StringCat (str, sfp->comment);
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = str;
+ }
+ }
+ }
+ }
+
if (rpt_unit_seq != NULL) {
CleanupRptUnit (rpt_unit_seq);
}
@@ -5652,9 +5909,9 @@ static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
while (omp != NULL) {
next = omp->next;
unlink= FALSE;
- CleanVisString (&(omp->subname));
+ CleanVisStringAndCompress (&(omp->subname));
TrimSpacesAndJunkFromEnds (omp->subname, FALSE);
- CleanVisString (&(omp->attrib));
+ CleanVisStringAndCompress (&(omp->attrib));
if (omp->subtype == ORGMOD_common && StringICmp (omp->subname, orpcommon) == 0) {
unlink = TRUE;
} else if (last != NULL) {
@@ -5955,6 +6212,159 @@ static Uint1 LocationForPlastidText (CharPtr plastid_name)
}
}
+NLM_EXTERN void StringToLower (CharPtr str)
+
+{
+ Char ch;
+
+ if (str == NULL) return;
+ ch = *str;
+ while (ch != '\0') {
+ *str = TO_LOWER (ch);
+ str++;
+ ch = *str;
+ }
+}
+
+
+static void CleanPCRPrimerSeq (CharPtr seq)
+{
+ CharPtr ptr, src, dst, tmp;
+ Char ch;
+ Boolean in_brackets = FALSE;
+ Int4 i;
+
+ if (StringHasNoText (seq)) {
+ return;
+ }
+
+ /* upper case sequence */
+ ptr = seq;
+ ch = *ptr;
+ while (ch != '\0') {
+ if (IS_UPPER (ch)) {
+ *ptr = TO_LOWER (ch);
+ }
+ ptr++;
+ ch = *ptr;
+ }
+ /* remove any spaces in sequence outisde of <modified base> */
+ src = seq;
+ dst = seq;
+ ch = *src;
+ while (ch != '\0') {
+ if (ch == '<') {
+ in_brackets = TRUE;
+ *dst = ch;
+ dst++;
+ } else if (ch == '>') {
+ in_brackets = FALSE;
+ *dst = ch;
+ dst++;
+ } else if (ch != ' ') {
+ *dst = ch;
+ dst++;
+ } else if (in_brackets) {
+ *dst = ch;
+ dst++;
+ }
+ src++;
+ ch = *src;
+ }
+ *dst = '\0';
+ /* upper case modified base <OTHER> */
+ ptr = seq;
+ tmp = StringStr (ptr, "<other>");
+ while (tmp != NULL) {
+ ptr = tmp + 7;
+ for (i = 1; i < 6; i++) {
+ ch = tmp [i];
+ tmp [i] = TO_UPPER (ch);
+ }
+ tmp = StringStr (ptr, "<other>");
+ }
+}
+
+
+static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp)
+
+{
+ PCRPrimerPtr next;
+ PCRPrimerPtr PNTR prev;
+ PCRPrimerPtr ppp;
+
+ if (pppp == NULL) return;
+
+ prev = pppp;
+ ppp = *pppp;
+ while (ppp != NULL) {
+ next = ppp->next;
+
+ CleanVisString (&(ppp->seq));
+ CleanPCRPrimerSeq (ppp->seq);
+ CleanVisString (&(ppp->name));
+
+ if (ppp->seq == NULL && ppp->name == NULL) {
+ *prev = next;
+ ppp->next = NULL;
+ PCRPrimerFree (ppp);
+ } else {
+ StringToLower (ppp->seq);
+ prev = &(ppp->next);
+ }
+
+ ppp = next;
+ }
+
+ /* fix artifact caused by fwd/rev-primer-seq starting with colon, separating name and seq */
+
+ ppp = *pppp;
+ if (ppp == NULL) return;
+ next = ppp->next;
+ if (next == NULL) return;
+ if (next->next != NULL) return;
+
+ if (ppp->name != NULL && ppp->seq == NULL && next->name == NULL && next->seq != NULL) {
+ ppp->seq = next->seq;
+ next->seq = NULL;
+ ppp->next = NULL;
+ PCRPrimerFree (next);
+ } else if (ppp->seq != NULL && ppp->name == NULL && next->seq == NULL && next->name != NULL) {
+ ppp->name = next->name;
+ next->name = NULL;
+ ppp->next = NULL;
+ PCRPrimerFree (next);
+ }
+}
+
+static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp)
+
+{
+ PCRReactionSetPtr next;
+ PCRReactionSetPtr PNTR prev;
+ PCRReactionSetPtr prp;
+
+ if (prpp == NULL) return;
+
+ prev = prpp;
+ prp = *prpp;
+ while (prp != NULL) {
+ next = prp->next;
+
+ CleanupPCRPrimers (&(prp->forward));
+ CleanupPCRPrimers (&(prp->reverse));
+
+ if (prp->forward == NULL && prp->reverse == NULL) {
+ *prev = next;
+ prp->next = NULL;
+ PCRReactionFree (prp);
+ } else {
+ prev = &(prp->next);
+ }
+
+ prp = next;
+ }
+}
extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
@@ -5984,9 +6394,9 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
next = ssp->next;
unlink= FALSE;
if (! IsNoNameSubSource (ssp)) {
- CleanVisString (&(ssp->name));
+ CleanVisStringAndCompress (&(ssp->name));
TrimSpacesAndJunkFromEnds (ssp->name, FALSE);
- } else if (StringICmp (ssp->name, "TRUE") == 0) {
+ } else /* if (StringICmp (ssp->name, "TRUE") == 0) */ {
ssp->name = MemFree (ssp->name);
ssp->name = StringSave ("");
}
@@ -6119,6 +6529,10 @@ extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
last->name = ssp->name;
ssp->name = NULL;
unlink = TRUE;
+ } else if (ssp->subtype == SUBSRC_plastid_name &&
+ location != 0
+ && location == LocationForPlastidText (ssp->name)) {
+ unlink = TRUE;
}
} else if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) {
unlink = TRUE;
@@ -6601,8 +7015,6 @@ Nlm_QualNameAssoc current_subsource_subtype_alist[] = {
{"Endogenous-virus-name", SUBSRC_endogenous_virus_name},
{"Environmental-sample", SUBSRC_environmental_sample},
{"Frequency", SUBSRC_frequency},
- {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name},
- {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq},
{"Genotype", SUBSRC_genotype},
{"Germline", SUBSRC_germline},
{"Haplogroup", SUBSRC_haplogroup},
@@ -6618,8 +7030,6 @@ Nlm_QualNameAssoc current_subsource_subtype_alist[] = {
{"Plasmid-name", SUBSRC_plasmid_name},
{"Pop-variant", SUBSRC_pop_variant},
{"Rearranged", SUBSRC_rearranged},
- {"Rev-PCR-primer-name", SUBSRC_rev_primer_name},
- {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq},
{"Segment", SUBSRC_segment},
{"Sex", SUBSRC_sex},
{"Subclone", SUBSRC_subclone},
@@ -6635,6 +7045,10 @@ Nlm_QualNameAssoc discouraged_subsource_subtype_alist[] = {
Nlm_QualNameAssoc discontinued_subsource_subtype_alist[] = {
{"Ins-seq-name", SUBSRC_insertion_seq_name},
{"Transposon-name", SUBSRC_transposon_name},
+ {"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name},
+ {"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq},
+ {"Rev-PCR-primer-name", SUBSRC_rev_primer_name},
+ {"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq},
{ NULL, 0 } };
Nlm_NameNameAssoc subsource_aliases[] = {
@@ -6907,9 +7321,104 @@ static CharPtr FindASubSource (BioSourcePtr biop, Uint1 subtype)
return NULL;
}
+static CharPtr FindNextSingleTilde (CharPtr str)
+
+{
+ Char ch;
+
+ if (StringHasNoText (str)) return NULL;
+
+ ch = *str;
+ while (ch != '\0') {
+ if (ch == ' ') {
+ if (str [1] == '~') {
+ str++;
+ ch = *str;
+ while (ch == '~') {
+ str++;
+ ch = *str;
+ }
+ } else {
+ str++;
+ ch = *str;
+ }
+ } else if (ch == '~') {
+ if (str [1] != '~') return str;
+ str++;
+ ch = *str;
+ while (ch == '~') {
+ str++;
+ ch = *str;
+ }
+ } else {
+ str++;
+ ch = *str;
+ }
+ }
+
+ return NULL;
+}
+
+static ValNodePtr SplitAtSingleTilde (CharPtr strs)
+
+{
+ ValNodePtr head = NULL;
+ CharPtr ptr, str, tmp;
+
+ if (StringHasNoText (strs)) return NULL;
+
+ tmp = StringSave (strs);
+ str = tmp;
+
+ while (StringDoesHaveText (str)) {
+ ptr = FindNextSingleTilde (str);
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ TrimSpacesAroundString (str);
+ ValNodeCopyStr (&head, 0, str);
+ str = ptr;
+ }
+
+ MemFree (tmp);
+ return head;
+}
+
+static CharPtr MergeTildeStrings (ValNodePtr head)
+
+{
+ size_t len = 0;
+ CharPtr prefix = "", ptr, str;
+ ValNodePtr vnp;
+
+ if (head == NULL) return NULL;
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ len += StringLen (str) + 1;
+ }
+ if (len < 1) return NULL;
+
+ ptr = MemNew (sizeof (Char) * (len + 2));
+ if (ptr == NULL) return NULL;
+
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ StringCat (ptr, prefix);
+ StringCat (ptr, str);
+ prefix = "~";
+ }
+
+ return ptr;
+}
+
static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
{
+ ValNodePtr head, vnp;
OrgModPtr next;
OrgModPtr omp;
OrgModPtr PNTR prev;
@@ -6928,24 +7437,36 @@ static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
unlink= FALSE;
if (omp->subtype == ORGMOD_other) {
str = omp->subname;
- val = NULL;
- subtype_val = 0;
- StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
- if (val != NULL) {
- tmp = FindAnOrgMod (onp, subtype_val);
- if (tmp != NULL && StringICmp (tmp, val) == 0) {
- unlink = TRUE;
- }
- } else {
+ head = SplitAtSingleTilde (str);
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ val = NULL;
subtype_val = 0;
- StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
+ StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
if (val != NULL) {
- tmp = FindASubSource (biop, subtype_val);
+ tmp = FindAnOrgMod (onp, subtype_val);
if (tmp != NULL && StringICmp (tmp, val) == 0) {
- unlink = TRUE;
+ vnp->data.ptrvalue = NULL;
+ }
+ } else {
+ subtype_val = 0;
+ StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
+ if (val != NULL) {
+ tmp = FindASubSource (biop, subtype_val);
+ if (tmp != NULL && StringICmp (tmp, val) == 0) {
+ vnp->data.ptrvalue = NULL;
+ }
}
}
}
+ str = MergeTildeStrings (head);
+ ValNodeFreeData (head);
+ omp->subname = MemFree (omp->subname);
+ omp->subname = str;
+ if (StringHasNoText (str)) {
+ unlink = TRUE;
+ }
}
if (unlink) {
*prev = omp->next;
@@ -6961,6 +7482,7 @@ static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp)
{
+ ValNodePtr head, vnp;
SubSourcePtr next;
SubSourcePtr PNTR prev;
SubSourcePtr ssp;
@@ -6970,7 +7492,7 @@ static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp)
Boolean unlink;
CharPtr val;
- if (biop == NULL || onp == NULL) return;
+ if (biop == NULL /* || onp == NULL */ ) return;
prev = &(biop->subtype);
ssp = biop->subtype;
@@ -6979,24 +7501,36 @@ static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp)
unlink = FALSE;
if (ssp->subtype == SUBSRC_other) {
str = ssp->name;
- val = NULL;
- subtype_val = 0;
- StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
- if (val != NULL) {
- tmp = FindAnOrgMod (onp, subtype_val);
- if (tmp != NULL && StringICmp (tmp, val) == 0) {
- unlink = TRUE;
- }
- } else {
+ head = SplitAtSingleTilde (str);
+ for (vnp = head; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ val = NULL;
subtype_val = 0;
- StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
+ StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
if (val != NULL) {
- tmp = FindASubSource (biop, subtype_val);
+ tmp = FindAnOrgMod (onp, subtype_val);
if (tmp != NULL && StringICmp (tmp, val) == 0) {
- unlink = TRUE;
+ vnp->data.ptrvalue = NULL;
+ }
+ } else {
+ subtype_val = 0;
+ StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
+ if (val != NULL) {
+ tmp = FindASubSource (biop, subtype_val);
+ if (tmp != NULL && StringICmp (tmp, val) == 0) {
+ vnp->data.ptrvalue = NULL;
+ }
}
}
}
+ str = MergeTildeStrings (head);
+ ValNodeFreeData (head);
+ ssp->name = MemFree (ssp->name);
+ ssp->name = str;
+ if (StringHasNoText (str)) {
+ unlink = TRUE;
+ }
}
if (unlink) {
*prev = ssp->next;
@@ -7878,16 +8412,16 @@ static AffilPtr CleanAffil (AffilPtr afp)
{
if (afp == NULL) return NULL;
- CleanVisStringJunk (&(afp->affil));
- CleanVisStringJunk (&(afp->div));
- CleanVisStringJunk (&(afp->city));
- CleanVisStringJunk (&(afp->sub));
- CleanVisStringJunk (&(afp->country));
- CleanVisStringJunk (&(afp->street));
- CleanVisStringJunk (&(afp->email));
- CleanVisStringJunk (&(afp->fax));
- CleanVisStringJunk (&(afp->phone));
- CleanVisStringJunk (&(afp->postal_code));
+ CleanVisStringJunkAndCompress (&(afp->affil));
+ CleanVisStringJunkAndCompress (&(afp->div));
+ CleanVisStringJunkAndCompress (&(afp->city));
+ CleanVisStringJunkAndCompress (&(afp->sub));
+ CleanVisStringJunkAndCompress (&(afp->country));
+ CleanVisStringJunkAndCompress (&(afp->street));
+ CleanVisStringJunkAndCompress (&(afp->email));
+ CleanVisStringJunkAndCompress (&(afp->fax));
+ CleanVisStringJunkAndCompress (&(afp->phone));
+ CleanVisStringJunkAndCompress (&(afp->postal_code));
if (afp->choice == 2) {
if (StringCmp (afp->country, "U.S.A.") == 0) {
afp->country = MemFree (afp->country);
@@ -7922,12 +8456,21 @@ static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials)
ValNodePtr PNTR prev;
CharPtr str;
Boolean upcaseinits;
+ ValNodePtr vnp;
Boolean zap;
if (alp == NULL) return;
alp->affil = CleanAffil (alp->affil);
- if (alp == NULL || alp->choice != 1) return;
+ if (alp->choice == 2 || alp->choice == 3) {
+ for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ TrimSpacesAroundString (str);
+ Asn2gnbkCompressSpaces (str);
+ }
+ }
+ if (alp->choice != 1) return;
+
prev = &(alp->names);
names = alp->names;
while (names != NULL) {
@@ -8008,6 +8551,10 @@ static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials)
StringHasNoText (nsp->names [6])) {
zap = TRUE;
}
+ /* last name is required, so zap if not present */
+ if (StringHasNoText (nsp->names [0])) {
+ zap = TRUE;
+ }
}
} else if (pid->choice == 3 || pid->choice == 4 || pid->choice == 5) {
TrimSpacesAroundString ((CharPtr) pid->data);
@@ -8168,6 +8715,7 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti
CitBookPtr cbp;
CitGenPtr cgp;
CitJourPtr cjp;
+ CitPatPtr cpp;
CitSubPtr csp;
ImprintPtr imp;
CharPtr str;
@@ -8210,7 +8758,7 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti
csp->date = imp->date;
imp->date = NULL;
}
- if (imp != NULL && imp->pub == NULL) {
+ if (imp != NULL && imp->date == NULL) {
csp->imp = ImprintFree (csp->imp);
}
if (alp != NULL && alp->affil != NULL) {
@@ -8256,17 +8804,26 @@ static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixIniti
}
}
break;
+ case PUB_Patent :
+ cpp = (CitPatPtr) vnp->data.ptrvalue;
+ if (cpp != NULL) {
+ if (StringCmp (cpp->country, "USA") == 0) {
+ cpp->country = MemFree (cpp->country);
+ cpp->country = StringSave ("US");
+ }
+ }
+ break;
default :
break;
}
if (imp != NULL) {
- CleanVisString (&(imp->volume));
- CleanVisString (&(imp->issue));
- CleanVisString (&(imp->pages));
- CleanVisString (&(imp->section));
- CleanVisString (&(imp->part_sup));
- CleanVisString (&(imp->language));
- CleanVisString (&(imp->part_supi));
+ CleanVisStringAndCompress (&(imp->volume));
+ CleanVisStringAndCompress (&(imp->issue));
+ CleanVisStringAndCompress (&(imp->pages));
+ CleanVisStringAndCompress (&(imp->section));
+ CleanVisStringAndCompress (&(imp->part_sup));
+ CleanVisStringAndCompress (&(imp->language));
+ CleanVisStringAndCompress (&(imp->part_supi));
}
}
@@ -8330,7 +8887,7 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut
Int4 artpmid = 0;
Char buf1 [121];
Char buf2 [121];
- CitArtPtr cap;
+ CitArtPtr cap = NULL;
CitGenPtr cgp;
CitJourPtr cjp;
Boolean fixInitials = TRUE;
@@ -8451,7 +9008,9 @@ static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAut
vnp = next;
}
if (pmid == 0 && artpmid > 0) {
- vnp = ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid);
+ ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid);
+ } else if (pmid > 0 && artpmid == 0 && cap != NULL) {
+ ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, pmid);
}
}
@@ -8703,14 +9262,14 @@ static void CleanUserFields (
}
-static void CleanStructuredComment (
-UserObjectPtr uop
+NLM_EXTERN void CleanStructuredComment (
+ UserObjectPtr uop
)
{
+ Boolean genome_assembly_data = FALSE;
UserFieldPtr ufp;
- Int4 len;
- CharPtr str, new_str, cp;
+ CharPtr str, core, new_str;
if (uop == NULL || uop->type == NULL
|| StringCmp (uop->type->str, "StructuredComment") != 0) {
@@ -8722,80 +9281,49 @@ UserObjectPtr uop
&& ufp->choice == 1
&& (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
- len = StringLen (str);
- if (StringNCmp (str, "##", 2) == 0 && len > 12 && StringCmp (str + len - 12, "Data-START##") == 0) {
- /* it's ok, no changes necessary */
- } else {
- cp = str + len - 1;
- /* strip trailing pound signs (if present) */
- while (cp > str && *cp == '#') {
- *cp = 0;
- cp--;
- }
- /* remove START (if present) */
- if (cp - str > 4 && StringICmp (cp - 4, "START") == 0) {
- cp -= 4;
- *cp = 0;
- cp--;
- }
- /* remove dash (if present) */
- if (cp > str && *cp == '-') {
- *cp = 0;
- cp--;
- }
- /* remove Data (if present) */
- if (cp - str > 3 && StringICmp (cp - 3, "Data") == 0) {
- cp -= 3;
- *cp = 0;
- }
-
- /* skip leading pound signs */
- cp = str;
- while (*cp == '#') {
- ++cp;
- }
- new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (cp) + 15));
- sprintf (new_str, "##%sData-START##", cp);
- str = MemFree (str);
- ufp->data.ptrvalue = new_str;
+ core = StructuredCommentDbnameFromString(str);
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
+ sprintf (new_str, "##%s-START##", core);
+ str = MemFree (str);
+ ufp->data.ptrvalue = new_str;
+ if (StringCmp (core, "Genome-Assembly-Data") == 0) {
+ genome_assembly_data = TRUE;
}
+ core = MemFree (core);
} else if (StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
- len = StringLen (str);
- if (StringNCmp (str, "##", 2) == 0 && len > 10 && StringCmp (str + len - 10, "Data-END##") == 0) {
- /* it's ok, no changes necessary */
- } else {
- cp = str + len - 1;
- /* strip trailing pound signs (if present) */
- while (cp > str && *cp == '#') {
- *cp = 0;
- cp--;
- }
- /* remove END (if present) */
- if (cp - str > 2 && StringICmp (cp - 2, "END") == 0) {
- cp -= 2;
- *cp = 0;
- cp--;
- }
- /* remove dash (if present) */
- if (cp > str && *cp == '-') {
- *cp = 0;
- cp--;
- }
- /* remove Data (if present) */
- if (cp - str > 3 && StringICmp (cp - 3, "Data") == 0) {
- cp -= 3;
- *cp = 0;
- }
-
- /* skip leading pound signs */
- cp = str;
- while (*cp == '#') {
- ++cp;
- }
- new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (cp) + 15));
- sprintf (new_str, "##%sData-END##", cp);
- str = MemFree (str);
- ufp->data.ptrvalue = new_str;
+ core = StructuredCommentDbnameFromString(str);
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
+ sprintf (new_str, "##%s-END##", core);
+ str = MemFree (str);
+ ufp->data.ptrvalue = new_str;
+ if (StringCmp (core, "Genome-Assembly-Data") == 0) {
+ genome_assembly_data = TRUE;
+ }
+ core = MemFree (core);
+ }
+ }
+ }
+
+ if (genome_assembly_data) {
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL
+ && ufp->choice == 1
+ && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
+ if (StringCmp (ufp->label->str, "Finishing Goal") == 0 ||
+ StringCmp (ufp->label->str, "Current Finishing Status") == 0) {
+ if (StringCmp (str, "High Quality Draft") == 0) {
+ ufp->data.ptrvalue = StringSave ("High-Quality Draft");
+ str = MemFree (str);
+ } else if (StringCmp (str, "Improved High Quality Draft") == 0) {
+ ufp->data.ptrvalue = StringSave ("Improved High-Quality Draft");
+ str = MemFree (str);
+ } else if (StringCmp (str, "Annotation Directed") == 0) {
+ ufp->data.ptrvalue = StringSave ("Annotation-Directed Improvement");
+ str = MemFree (str);
+ } else if (StringCmp (str, "Non-contiguous Finished") == 0) {
+ ufp->data.ptrvalue = StringSave ("Noncontiguous Finished");
+ str = MemFree (str);
+ }
}
}
}
@@ -9434,10 +9962,59 @@ static Boolean NotExceptedRibosomalName (
return FALSE;
}
+NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BioSourcePtr biop;
+ OrgNamePtr onp = NULL;
+ OrgRefPtr orp;
+
+ if (sfp == NULL) return;
+ if (sfp->data.choice != SEQFEAT_BIOSRC) return;
+ biop = (BioSourcePtr) sfp->data.value.ptrvalue;
+ if (biop == NULL) return;
+ orp = biop->org;
+ if (orp != NULL) {
+ onp = orp->orgname;
+ if (orp != NULL) {
+ CleanupOrgModOther (biop, onp);
+ }
+ }
+ CleanupSubSourceOther (biop, onp);
+}
+
+NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (
+ SeqDescrPtr sdp,
+ Pointer userdata
+)
+
+{
+ BioSourcePtr biop;
+ OrgNamePtr onp = NULL;
+ OrgRefPtr orp;
+
+ if (sdp == NULL) return;
+ if (sdp->choice != Seq_descr_source) return;
+ biop = (BioSourcePtr) sdp->data.ptrvalue;
+ if (biop == NULL) return;
+ orp = biop->org;
+ if (orp != NULL) {
+ onp = orp->orgname;
+ if (orp != NULL) {
+ CleanupOrgModOther (biop, onp);
+ }
+ }
+ CleanupSubSourceOther (biop, onp);
+}
+
static void CleanupFeatureStrings (
SeqFeatPtr sfp,
Boolean isJscan,
Boolean stripSerial,
+ Boolean modernizeFeats,
ValNodePtr PNTR publist
)
@@ -9453,12 +10030,13 @@ static void CleanupFeatureStrings (
Boolean justTrnaText;
size_t len;
CharPtr name;
- OrgNamePtr onp;
+ OrgNamePtr onp = NULL;
OrgRefPtr orp;
PubdescPtr pdp;
ProtRefPtr prp;
CharPtr ptr;
RnaRefPtr rrp;
+ SubSourcePtr ssp;
CharPtr str;
CharPtr suff;
CharPtr temp;
@@ -9472,6 +10050,17 @@ static void CleanupFeatureStrings (
if (sfp == NULL) return;
CleanVisString (&(sfp->comment));
+ len = StringLen (sfp->comment);
+ if (len > 4) {
+ if (StringCmp (sfp->comment + len - 3, ",..") == 0 ||
+ StringCmp (sfp->comment + len - 3, ".,.") == 0 ||
+ StringCmp (sfp->comment + len - 3, "..,") == 0 ||
+ StringCmp (sfp->comment + len - 3, ",.,") == 0) {
+ sfp->comment [len - 3] = '.';
+ sfp->comment [len - 2] = '.';
+ sfp->comment [len - 1] = '.';
+ }
+ }
CleanVisString (&(sfp->title));
CleanVisString (&(sfp->except_text));
if (StringDoesHaveText (sfp->except_text)) {
@@ -9640,8 +10229,13 @@ static void CleanupFeatureStrings (
break;
case SEQFEAT_PROT :
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
- CleanVisString (&(prp->desc));
- CleanVisStringList (&(prp->name));
+ for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ CleanupECNumber (str);
+ }
+ CleanVisStringAndCompress (&(prp->desc));
+ CleanVisStringListAndCompress (&(prp->name));
CleanVisStringList (&(prp->ec));
CleanVisStringList (&(prp->activity));
CleanDoubleQuote (prp->desc);
@@ -9703,7 +10297,7 @@ static void CleanupFeatureStrings (
case SEQFEAT_RNA :
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
if (rrp->ext.choice == 1) {
- CleanVisString ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
+ CleanVisStringAndCompress ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
CleanDoubleQuote ((CharPtr) rrp->ext.value.ptrvalue);
if (rrp->ext.value.ptrvalue == NULL) {
rrp->ext.choice = 0;
@@ -9803,14 +10397,14 @@ static void CleanupFeatureStrings (
if (rrp->ext.choice == 3) {
rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
if (rgp != NULL) {
- CleanVisString (&(rgp->product));
+ CleanVisStringAndCompress (&(rgp->product));
CleanDoubleQuote (rgp->product);
- CleanVisString (&(rgp->_class));
+ CleanVisStringAndCompress (&(rgp->_class));
CleanDoubleQuote (rgp->_class);
for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
- CleanVisString (&(rqp->qual));
+ CleanVisStringAndCompress (&(rqp->qual));
CleanDoubleQuote (rqp->qual);
- CleanVisString (&(rqp->val));
+ CleanVisStringAndCompress (&(rqp->val));
CleanDoubleQuote (rqp->val);
}
}
@@ -9841,7 +10435,7 @@ static void CleanupFeatureStrings (
}
}
*/
- if (rrp->type == 4) {
+ if (rrp->type == 4 && rrp->ext.choice == 1 ) {
name = (CharPtr) rrp->ext.value.ptrvalue;
len = StringLen (name);
if (len > 5 && NotExceptedRibosomalName (name)) {
@@ -10039,6 +10633,15 @@ static void CleanupFeatureStrings (
}
name = MemFree (name);
}
+ if ((rrp->type == 255 || rrp->type == 10) && rrp->ext.choice == 0 && sfp->comment != NULL) {
+ if (StringICmp (sfp->comment, "internal transcribed spacer 1") == 0 ||
+ StringICmp (sfp->comment, "internal transcribed spacer 2") == 0 ||
+ StringICmp (sfp->comment, "internal transcribed spacer 3") == 0) {
+ rrp->ext.choice = 1;
+ rrp->ext.value.ptrvalue = sfp->comment;
+ sfp->comment = NULL;
+ }
+ }
break;
case SEQFEAT_PUB :
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
@@ -10054,7 +10657,7 @@ static void CleanupFeatureStrings (
CleanVisString (&(ifp->descr));
break;
case SEQFEAT_REGION :
- CleanVisString ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
+ CleanVisStringAndCompress ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
CleanDoubleQuote ((CharPtr) sfp->data.value.ptrvalue);
if (sfp->data.value.ptrvalue == NULL) {
sfp->data.choice = SEQFEAT_COMMENT;
@@ -10069,7 +10672,7 @@ static void CleanupFeatureStrings (
case SEQFEAT_RSITE :
break;
case SEQFEAT_USER :
- VisitUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject);
+ VisitAllUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject);
break;
case SEQFEAT_TXINIT :
break;
@@ -10089,24 +10692,36 @@ static void CleanupFeatureStrings (
}
orp = biop->org;
if (orp != NULL) {
- CleanVisStringList (&(orp->mod));
+ CleanVisStringListAndCompress (&(orp->mod));
OrpModToSubSource (&(orp->mod), &(biop->subtype));
onp = orp->orgname;
if (onp != NULL) {
CleanupOrgModOther (biop, onp);
- CleanupSubSourceOther (biop, onp);
}
}
biop->subtype = SortSubSourceList (biop->subtype);
CleanSubSourceList (&(biop->subtype), biop->genome);
+ CleanupSubSourceOther (biop, onp);
+ biop->subtype = SortSubSourceList (biop->subtype);
+ if (modernizeFeats) {
+ ModernizePCRPrimers (biop);
+ }
+ CleanupPCRReactionSet (&(biop->pcr_primers));
+ if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_plasmid_name) {
+ biop->genome = GENOME_plasmid;
+ }
+ }
+ }
}
break;
default :
break;
}
if (orp != NULL) {
- CleanVisString (&(orp->taxname));
- CleanVisString (&(orp->common));
+ CleanVisStringAndCompress (&(orp->taxname));
+ CleanVisStringAndCompress (&(orp->common));
CleanVisStringList (&(orp->mod));
CleanVisStringList (&(orp->syn));
FixOldDbxrefs (orp->db);
@@ -10122,20 +10737,28 @@ static void CleanupFeatureStrings (
OrpModToOrgMod (&(orp->mod), &(onp->mod));
onp->mod = SortOrgModList (onp->mod);
CleanOrgModListEx (&(onp->mod), orp->common);
+ onp->mod = SortOrgModList (onp->mod);
onp = onp->next;
}
}
}
-static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNodePtr PNTR publist, Boolean isEmblOrDdbj)
+static void CleanupDescriptorStrings (
+ ValNodePtr sdp,
+ Boolean stripSerial,
+ Boolean modernizeFeats,
+ ValNodePtr PNTR publist,
+ Boolean isEmblOrDdbj
+)
{
BioSourcePtr biop;
EMBLBlockPtr ebp;
GBBlockPtr gbp;
- OrgNamePtr onp;
+ OrgNamePtr onp = NULL;
OrgRefPtr orp;
PubdescPtr pdp;
+ SubSourcePtr ssp;
if (sdp == NULL) return;
switch (sdp->choice) {
@@ -10163,7 +10786,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
}
break;
case Seq_descr_title :
- CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
+ CleanVisStringAndCompress ((CharPtr PNTR) &sdp->data.ptrvalue);
if (sdp->data.ptrvalue == NULL) {
sdp->data.ptrvalue = StringSave ("");
}
@@ -10218,7 +10841,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
}
break;
case Seq_descr_user :
- VisitUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject);
+ VisitAllUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject);
break;
case Seq_descr_sp :
break;
@@ -10253,11 +10876,23 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
onp = orp->orgname;
if (onp != NULL) {
CleanupOrgModOther (biop, onp);
- CleanupSubSourceOther (biop, onp);
}
}
biop->subtype = SortSubSourceList (biop->subtype);
CleanSubSourceList (&(biop->subtype), biop->genome);
+ CleanupSubSourceOther (biop, onp);
+ biop->subtype = SortSubSourceList (biop->subtype);
+ if (modernizeFeats) {
+ ModernizePCRPrimers (biop);
+ }
+ CleanupPCRReactionSet (&(biop->pcr_primers));
+ if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_plasmid_name) {
+ biop->genome = GENOME_plasmid;
+ }
+ }
+ }
}
break;
case Seq_descr_molinfo :
@@ -10266,8 +10901,8 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
break;
}
if (orp != NULL) {
- CleanVisString (&(orp->taxname));
- CleanVisString (&(orp->common));
+ CleanVisStringAndCompress (&(orp->taxname));
+ CleanVisStringAndCompress (&(orp->common));
CleanVisStringList (&(orp->mod));
CleanVisStringList (&(orp->syn));
FixOldDbxrefs (orp->db);
@@ -10283,6 +10918,7 @@ static void CleanupDescriptorStrings (ValNodePtr sdp, Boolean stripSerial, ValNo
OrpModToOrgMod (&(orp->mod), &(onp->mod));
onp->mod = SortOrgModList (onp->mod);
CleanOrgModListEx (&(onp->mod), orp->common);
+ onp->mod = SortOrgModList (onp->mod);
onp = onp->next;
}
}
@@ -10488,12 +11124,16 @@ NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp)
{
BioseqPtr bsp;
+ SeqLocPtr curr;
+ SeqLocPtr head;
SeqLocPtr last;
SeqLocPtr loc;
+ SeqLocPtr next;
SeqIdPtr sip;
SeqIntPtr sintp;
SeqPntPtr spp;
Int4 swp;
+ SeqLocPtr tail;
if (slp == NULL) return;
@@ -10579,11 +11219,40 @@ NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp)
}
loc = (SeqLocPtr) slp->data.ptrvalue;
- if (loc == NULL || loc->next != NULL) return;
- /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */
- slp->choice = loc->choice;
- slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
- MemFree (loc);
+ if (loc == NULL) return;
+
+ if (loc->next == NULL) {
+ /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */
+ slp->choice = loc->choice;
+ slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
+ MemFree (loc);
+ return;
+ }
+
+ /* check for nested seqloc_mix, remove nesting */
+ curr = loc;
+ last = NULL;
+ while (curr != NULL) {
+ next = curr->next;
+ if (curr->choice == SEQLOC_MIX) {
+ head = (SeqLocPtr) curr->data.ptrvalue;
+ if (head != NULL) {
+ tail = head;
+ while (tail->next != NULL) {
+ tail = tail->next;
+ }
+ if (last != NULL) {
+ last->next = head;
+ }
+ tail->next = curr->next;
+ curr->next = NULL;
+ curr = MemFree (curr);
+ }
+ } else {
+ last = curr;
+ }
+ curr = next;
+ }
}
typedef struct cbloc {
@@ -10782,14 +11451,14 @@ static CharPtr GetMiRNAProduct (CharPtr str)
{
len = StringLen (str);
if (len > 6 && StringCmp (str + len - 6, " miRNA") == 0
- && (len < 15 || StringCmp (str - 15, "precursor miRNA") != 0))
+ && (len < 15 || StringCmp (str + len - 15, "precursor miRNA") != 0))
{
product = (CharPtr) MemNew (sizeof (Char) * (len - 5));
StringNCpy (product, str, len - 6);
product[len - 6] = 0;
}
else if (len > 9 && StringCmp (str + len - 9, " microRNA") == 0
- && (len < 21 || StringCmp (str - 21, "precursor microRNA") != 0))
+ && (len < 18 || StringCmp (str + len - 18, "precursor microRNA") != 0))
{
product = (CharPtr) MemNew (sizeof (Char) * (len - 8));
StringNCpy (product, str, len - 9);
@@ -11027,17 +11696,21 @@ static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp)
CodeBreakPtr cbp;
CharPtr comment;
CdRegionPtr crp;
+ SeqFeatPtr feat;
Uint1 from;
GBQualPtr gbq;
GeneRefPtr grp;
CharPtr name;
+ BioseqPtr prod;
ProtRefPtr prp;
Uint1 residue;
RNAGenPtr rgp;
RNAQualPtr rqp;
RnaRefPtr rrp;
+ SeqAnnotPtr sap;
SeqCodeTablePtr sctp;
Uint1 seqcode;
+ SeqIdPtr sip;
SeqMapTablePtr smtp;
CharPtr str;
tRNAPtr trp;
@@ -11113,6 +11786,27 @@ static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp)
}
}
}
+ if (sfp->product != NULL) {
+ sip = SeqLocId (sfp->product);
+ if (sip != NULL) {
+ prod = BioseqFind (sip);
+ if (prod != NULL) {
+ for (sap = prod->annot; sap != NULL; sap = sap->next) {
+ if (sap->type != 1) continue;
+ for (feat = (SeqFeatPtr) sap->data; feat != NULL; feat = feat->next) {
+ if (feat->data.choice != SEQFEAT_PROT) continue;
+ prp = (ProtRefPtr) feat->data.value.ptrvalue;
+ if (prp == NULL) continue;
+ for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ if (StringCmp (comment, str) == 0) return TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
break;
case SEQFEAT_PROT:
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
@@ -11258,6 +11952,10 @@ static CharPtr ExtractSatelliteFromComment (CharPtr comment)
}
TrimSpacesAroundString (comment);
}
+ if (comment != NULL && comment [0] == '~' && comment [1] != '~') {
+ comment [0] = ' ';
+ TrimSpacesAroundString (comment);
+ }
return satellite_qual;
}
@@ -11270,13 +11968,32 @@ static void DoModernizeRNAFields (SeqFeatPtr sfp)
RNAGenPtr rgp;
RNAQualSetPtr rqp;
RnaRefPtr rrp;
+ CharPtr str;
Boolean unlink;
+ Int2 i;
+ size_t len;
+ CharPtr ncclass;
+ CharPtr product;
+ CharPtr tmp;
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
ModernizeRNAFields (sfp);
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
- if (rrp == NULL || rrp->ext.choice != 3) return;
+ if (rrp == NULL) return;
+
+ if (rrp->ext.choice == 1 && rrp->type == 10) {
+ str = rrp->ext.value.ptrvalue;
+ if (StringHasNoText (str)) return;
+
+ rgp = (RNAGenPtr) MemNew (sizeof (RNAGen));
+ if (rgp == NULL) return;
+ rrp->ext.choice = 3;
+ rrp->ext.value.ptrvalue = (Pointer) rgp;
+ rgp->product = str;
+ }
+
+ if (rrp->ext.choice != 3) return;
rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
if (rgp == NULL) return;
@@ -11299,6 +12016,29 @@ static void DoModernizeRNAFields (SeqFeatPtr sfp)
rqp = nextrqp;
}
+ if (rrp->type == 10 && StringDoesHaveText (rgp->product) && rgp->_class == NULL) {
+ ncclass = rgp->product;
+ for (i = 0; ncrnaClassList [i] != NULL; i++) {
+ str = ncrnaClassList [i];
+ if (StringHasNoText (str)) continue;
+ len = StringLen (str);
+ if (len < 1) continue;
+ if (StringNICmp (ncclass, str, len) != 0) continue;
+ if (ncclass [len] != ' ') continue;
+ tmp = ncclass + len + 1;
+ if (StringHasNoText (tmp)) continue;
+ ncclass [len] = '\0';
+ rgp->_class = StringSave (ncclass);
+ product = StringSave (tmp);
+ rgp->product = MemFree (rgp->product);
+ rgp->product = product;
+ TrimSpacesAroundString (rgp->_class);
+ TrimSpacesAroundString (rgp->product);
+ rrp->type = 8;
+ sfp->idx.subtype = FEATDEF_ncRNA;
+ }
+ }
+
if (rgp->quals != NULL) return;
if (StringDoesHaveText (rgp->_class) || StringDoesHaveText (rgp->product)) return;
@@ -11385,7 +12125,7 @@ NLM_EXTERN void CleanUpSeqFeat (
ifp->key = MemFree (ifp->key);
ifp->key = StringSave ("misc_binding");
sfp->idx.subtype = FEATDEF_misc_binding;
- } else if (StringCmp (ifp->key, "satellite") == 0 ) {
+ } else if (StringCmp (ifp->key, "satellite") == 0 && (! isEmblOrDdbj)) {
ifp->key = MemFree (ifp->key);
ifp->key = StringSave ("repeat_region");
sfp->idx.subtype = FEATDEF_repeat_region;
@@ -11449,7 +12189,7 @@ NLM_EXTERN void CleanUpSeqFeat (
}
}
}
- if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0) {
+ if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0 && (! isEmblOrDdbj)) {
satellite_type = ExtractSatelliteFromComment (sfp->comment);
if (satellite_type != NULL) {
gbq = GBQualNew ();
@@ -11515,7 +12255,7 @@ NLM_EXTERN void CleanUpSeqFeat (
CleanupDuplicateGBQuals (&(sfp->qual));
CleanupFeatureGBQuals (sfp, isEmblOrDdbj);
sfp->qual = SortIllegalGBQuals (sfp->qual);
- CleanupFeatureStrings (sfp, isJscan, stripSerial, publist);
+ CleanupFeatureStrings (sfp, isJscan, stripSerial, modernizeFeats, publist);
FixOldDbxrefs (sfp->dbxref);
FixNumericDbxrefs (sfp->dbxref);
sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
@@ -11638,7 +12378,7 @@ NLM_EXTERN void CleanUpSeqFeat (
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
hasNulls = LocationHasNullsBetween (sfp->location);
- sfp->partial = (sfp->partial || partial5 || partial3 || hasNulls);
+ sfp->partial = (sfp->partial || partial5 || partial3 || (hasNulls && ! isEmblOrDdbj));
prevlink = (SeqFeatXrefPtr PNTR) &(sfp->xref);
xref = sfp->xref;
@@ -11765,8 +12505,46 @@ static void CleanSeqIdInSeqAnnot (SeqAnnotPtr annot, Pointer userdata)
VisitSeqIdsInSeqAnnot (annot, NULL, CleanUpSeqId);
}
+typedef struct npcounts {
+ Int4 nucs;
+ Int4 prots;
+} NPCounts, PNTR NPCountsPtr;
+
+static void CountNucsAndProts (BioseqPtr bsp, Pointer userdata)
+
+{
+ NPCountsPtr ncp;
+
+ if (bsp == NULL) return;
+ ncp = (NPCountsPtr) userdata;
+ if (ncp == NULL) return;
+
+ if (ISA_na (bsp->mol)) {
+ (ncp->nucs)++;
+ } else if (ISA_aa (bsp->mol)) {
+ (ncp->prots)++;
+ }
+}
+
+static void FixBadSetClass (BioseqSetPtr bssp, Pointer userdata)
+
+{
+ NPCounts nc;
+
+ if (bssp == NULL) return;
+ if (bssp->_class != BioseqseqSet_class_not_set && bssp->_class != BioseqseqSet_class_other) return;
+
+ MemSet ((Pointer) &nc, 0, sizeof (NPCounts));
+ VisitSequencesInSet (bssp, (Pointer) &nc, VISIT_MAINS, CountNucsAndProts);
+ if (nc.nucs == 1 && nc.prots > 0) {
+ bssp->_class = BioseqseqSet_class_nuc_prot;
+ } else {
+ bssp->_class = BioseqseqSet_class_genbank;
+ }
+}
static void RemoveDuplicateSeqIds (BioseqPtr bsp)
+
{
SeqIdPtr sip, sip_cmp, sip_prev, sip_next;
@@ -11913,7 +12691,7 @@ static void BasicSeqEntryCleanupInternal (
default :
break;
}
- CleanupDescriptorStrings (sdp, stripSerial, publist, isEmblOrDdbj);
+ CleanupDescriptorStrings (sdp, stripSerial, TRUE, publist, isEmblOrDdbj);
sdp = sdp->next;
}
@@ -12326,6 +13104,10 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
VisitGraphsInSep (sep, NULL, CleanSeqIdInSeqGraph);
VisitAnnotsInSep (sep, NULL, CleanSeqIdInSeqAnnot);
+ /* Fix Bioseq-sets with class 0 */
+
+ VisitSetsInSep (sep, NULL, FixBadSetClass);
+
/* removed unnecessarily nested Pub-equivs */
VisitPubdescsInSep (sep, NULL, FlattenPubdesc);
@@ -12387,6 +13169,94 @@ NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
}
}
+typedef struct bsecsmfedata {
+ Int4 max;
+ Int4 num_at_max;
+} BsecSmfeData, PNTR BsecSmfePtr;
+
+static Boolean LIBCALLBACK BsecSMFEProc (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+)
+
+
+{
+ BsecSmfePtr bsp;
+ Int4 len;
+
+ if (sfp == NULL || context == NULL) return TRUE;
+ bsp = context->userdata;
+ if (bsp == NULL) return TRUE;
+
+ len = SeqLocLen (sfp->location);
+ if (len < bsp->max) {
+ bsp->max = len;
+ bsp->num_at_max = 1;
+ } else if (len == bsp->max) {
+ (bsp->num_at_max)++;
+ }
+
+ return TRUE;
+}
+
+NLM_EXTERN void RemoveUnnecessaryGeneXrefs (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BsecSmfeData bsd;
+ Int2 count;
+ SeqFeatXrefPtr curr, next;
+ SeqMgrFeatContext fcontext;
+ SeqFeatXrefPtr PNTR last;
+ GeneRefPtr grp, grpx;
+ SeqFeatPtr sfpx;
+ CharPtr syn1, syn2;
+
+ if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) return;
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return;
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
+ if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE) return;
+ grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
+ if (grpx == NULL) return;
+
+ if ((!StringHasNoText (grp->locus)) && (!StringHasNoText (grpx->locus))) {
+ if ((StringICmp (grp->locus, grpx->locus) != 0)) return;
+ } else if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) {
+ if ((StringICmp (grp->locus_tag, grpx->locus_tag) != 0)) return;
+ } else if (grp->syn != NULL && grpx->syn != NULL) {
+ syn1 = (CharPtr) grp->syn->data.ptrvalue;
+ syn2 = (CharPtr) grpx->syn->data.ptrvalue;
+ if ((!StringHasNoText (syn1)) && (!StringHasNoText (syn2))) {
+ if ((StringICmp (syn1, syn2) != 0)) return;
+ }
+ }
+
+ MemSet ((Pointer) &bsd, 0, sizeof (BsecSmfeData));
+ bsd.max = INT4_MAX;
+ bsd.num_at_max = 0;
+ count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0,
+ LOCATION_SUBSET, (Pointer) &bsd, BsecSMFEProc);
+
+ if (bsd.num_at_max < 2) {
+ last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
+ curr = sfp->xref;
+ while (curr != NULL) {
+ next = curr->next;
+ if (curr->data.choice == SEQFEAT_GENE) {
+ *last = next;
+ curr->next = NULL;
+ SeqFeatXrefFree (curr);
+ } else {
+ last = &(curr->next);
+ }
+ curr = next;
+ }
+ }
+}
+
static void SortSeqFeatFields (
SeqFeatPtr sfp,
Pointer userdata
@@ -12501,7 +13371,7 @@ NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
if (StringCmp (orig_loc, new_loc) != 0) {
lip->data_in_log = TRUE;
if (lip->fp != NULL) {
- fprintf (lip->fp, "Adjusted protein feature location from %s to %s\n", orig_loc, new_loc);
+ fprintf (lip->fp, "Synchronized coding region partials for protein feature location at %s\n", orig_loc, new_loc);
}
}
new_loc = MemFree (new_loc);
@@ -12997,11 +13867,20 @@ NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
StrCpy (path, appPath);
ptr = StringStr (path, "/ncbi/build/");
if (ptr != NULL) {
- /* see if running under Xcode build environment */
+ /* see if running under Xcode 3 build environment */
ptr [5] = '\0';
dataFound = CheckDataPath (path, "data");
}
}
+ if (! dataFound) {
+ StrCpy (path, appPath);
+ ptr = StringStr (path, "/Library/Developer/");
+ if (ptr != NULL) {
+ /* see if running under Xcode 4 build environment */
+ ptr [19] = '\0';
+ dataFound = CheckDataPath (path, "data");
+ }
+ }
}
#endif
if (dataFound) {
@@ -13350,6 +14229,32 @@ NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location)
return FALSE;
}
+NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location)
+
+{
+ SeqLocPtr next, tmp, vnp;
+
+ if (location == NULL) return;
+ if (! LocationHasNullsBetween (location)) return;
+
+ if (location->choice != SEQLOC_MIX) return;
+ vnp = (ValNodePtr) location->data.ptrvalue;
+ if (vnp == NULL) return;
+
+ while (vnp != NULL && vnp->next != NULL) {
+ next = vnp->next;
+ if (vnp->choice != SEQLOC_NULL && next->choice != SEQLOC_NULL) {
+ tmp = ValNodeNew (NULL);
+ if (tmp != NULL) {
+ tmp->choice = SEQLOC_NULL;
+ tmp->next = vnp->next;
+ vnp->next = tmp;
+ }
+ }
+ vnp = next;
+ }
+}
+
NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype)
{
@@ -13409,6 +14314,9 @@ NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype)
if (subtype >= FEATDEF_gap && subtype <= FEATDEF_oriT) {
return SEQFEAT_IMP;
}
+ if (subtype == FEATDEF_mobile_element) {
+ return SEQFEAT_IMP;
+ }
}
return 0;
}
@@ -13942,7 +14850,6 @@ NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, Visi
return index;
}
-
NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback)
{
@@ -13982,6 +14889,7 @@ NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, Visit
return index;
}
+/* Visits only unnested nodes */
NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
{
@@ -14012,6 +14920,31 @@ NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, Visi
return index;
}
+NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
+
+{
+ Int4 index = 0;
+ UserObjectPtr obj;
+ UserFieldPtr ufp;
+
+ if (uop == NULL) return index;
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->choice == 6) {
+ obj = (UserObjectPtr) ufp->data.ptrvalue;
+ index += VisitAllUserObjectsInUop (obj, userdata, callback);
+ } else if (ufp->choice == 12) {
+ for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) {
+ index += VisitAllUserObjectsInUop (obj, userdata, callback);
+ }
+ }
+ }
+ if (callback != NULL) {
+ callback (uop, userdata);
+ }
+ index++;
+ return index;
+}
+
typedef struct uopdata {
UserObjectPtr rsult;
CharPtr tag;
@@ -15051,8 +15984,9 @@ NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElem
if (bssp == NULL) return index;
if (bssp->_class == 7 ||
(bssp->_class >= 13 && bssp->_class <= 16) ||
- bssp->_class != BioseqseqSet_class_wgs_set ||
- bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ bssp->_class == BioseqseqSet_class_wgs_set ||
+ bssp->_class == BioseqseqSet_class_gen_prod_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
index += VisitElementsInSep (tmp, userdata, callback);
}
@@ -15073,7 +16007,8 @@ NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class)
_class == BioseqseqSet_class_pop_set ||
_class == BioseqseqSet_class_phy_set ||
_class == BioseqseqSet_class_eco_set ||
- _class == BioseqseqSet_class_wgs_set) return TRUE;
+ _class == BioseqseqSet_class_wgs_set ||
+ _class == BioseqseqSet_class_small_genome_set) return TRUE;
return FALSE;
}
@@ -15162,7 +16097,7 @@ static Int4 ScanBioseqSetReleaseInt (
fp = FileOpen (inputFile, binary? "rb" : "r");
#endif
if (fp == NULL) {
- Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile);
+ Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
return index;
}
@@ -15335,7 +16270,7 @@ NLM_EXTERN Int4 ScanEntrezgeneSetRelease (
fp = FileOpen (inputFile, binary? "rb" : "r");
#endif
if (fp == NULL) {
- Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile);
+ Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
return index;
}
@@ -15674,8 +16609,8 @@ static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensi
{
BioseqPtr bsp1, bsp2;
Int2 ctr, pos1, pos2;
- Char buf1[50];
- Char buf2[50];
+ Char buf1[51];
+ Char buf2[51];
Int4 len = 50;
SeqFeatPtr sfp1, sfp2;
SeqMgrFeatContext fcontext1, fcontext2;
@@ -15796,6 +16731,59 @@ static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_d
}
+static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2)
+{
+ if (crp1 == NULL && crp2 == NULL) {
+ return TRUE;
+ } else if (crp1 == NULL || crp2 == NULL) {
+ return FALSE;
+ } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){
+ return FALSE;
+ } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){
+ return FALSE;
+ } else if (crp1->gaps != crp2->gaps) {
+ return FALSE;
+ } else if (crp1->mismatch != crp2->mismatch) {
+ return FALSE;
+ } else if (crp1->stops != crp2->stops) {
+ return FALSE;
+ } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL)
+ || (crp1->genetic_code != NULL && crp2->genetic_code == NULL)
+ || (crp1->genetic_code != NULL && crp2->genetic_code != NULL
+ && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) {
+ return FALSE;
+ } else if ((crp1->code_break == NULL && crp2->code_break != NULL)
+ || (crp1->code_break != NULL && crp2->code_break == NULL)
+ || (crp1->code_break != NULL && crp2->code_break != NULL
+ && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) {
+ return FALSE;
+ } else if (crp1->frame != crp2->frame) {
+ if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) {
+ /* both effectively frame 1, ignore this difference */
+ } else {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2)
+{
+ if (d1 == NULL && d2 == NULL) {
+ return TRUE;
+ } else if (d1 == NULL || d2 == NULL) {
+ return FALSE;
+ } else if (d1->choice != d2->choice) {
+ return FALSE;
+ } else if (d1->choice == SEQFEAT_CDREGION) {
+ return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue);
+ } else {
+ return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite);
+ }
+}
+
+
NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial)
{
if (sfp1 == NULL && sfp2 == NULL) {
@@ -15832,7 +16820,7 @@ NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean al
return FALSE;
} else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) {
return FALSE;
- } else if (!AsnIoMemComp(&(sfp1->data), &(sfp2->data), (AsnWriteFunc) SeqFeatDataAsnWrite)) {
+ } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) {
return FALSE;
} else if (!XrefsMatch (sfp1->xref, sfp2->xref)) {
return FALSE;
@@ -15859,7 +16847,7 @@ NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep)
FlattenPubdesc (sdp->data.ptrvalue, NULL);
}
- CleanupDescriptorStrings (sdp, stripSerial, NULL, isEmblOrDdbj);
+ CleanupDescriptorStrings (sdp, stripSerial, TRUE, NULL, isEmblOrDdbj);
}
@@ -16116,3 +17104,680 @@ NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEnt
return sd.num_unable_to_convert;
}
+
+NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2)
+{
+ SeqFeatXrefPtr xref, next, PNTR prevlink;
+ ObjectIdPtr oip;
+ SeqFeatPtr link_sfp;
+ Char buf [32];
+ CharPtr str = NULL;
+
+ if (sfp1 == NULL) return;
+
+ prevlink = (SeqFeatXrefPtr PNTR) &(sfp1->xref);
+ xref = sfp1->xref;
+ while (xref != NULL) {
+ next = xref->next;
+ link_sfp = NULL;
+
+ if (xref->id.choice == 3) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (StringDoesHaveText (oip->str)) {
+ str = oip->str;
+ } else {
+ sprintf (buf, "%ld", (long) oip->id);
+ str = buf;
+ }
+ link_sfp = SeqMgrGetFeatureByFeatID (sfp1->idx.entityID, NULL, str, NULL, NULL);
+ }
+ }
+ if (link_sfp == sfp2) {
+ *prevlink = xref->next;
+ xref->next = NULL;
+ MemFree (xref);
+ } else {
+ prevlink = (SeqFeatXrefPtr PNTR) &(xref->next);
+ }
+
+ xref = next;
+ }
+}
+
+
+NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp)
+
+{
+ ChoicePtr cp;
+ ObjectIdPtr oip;
+ SeqFeatXrefPtr xref, prev_xref, next_xref;
+ SeqFeatPtr old_match;
+
+ if (dst == NULL || sfp == NULL) return;
+
+ cp = &(dst->id);
+ if (cp == NULL) return;
+ if (cp->choice == 3) {
+ /* don't create a duplicate xref, remove links to other features */
+ xref = sfp->xref;
+ prev_xref = NULL;
+ while (xref != NULL) {
+ next_xref = xref->next;
+ if (xref->id.choice == 3 && xref->id.value.ptrvalue != NULL) {
+ if (ObjectIdMatch (cp->value.ptrvalue, xref->id.value.ptrvalue)) {
+ /* already have this xref */
+ return;
+ } else {
+ old_match = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
+ RemoveFeatureLink (sfp, old_match);
+ RemoveFeatureLink (old_match, sfp);
+ }
+ } else {
+ prev_xref = xref;
+ }
+ xref = next_xref;
+ }
+
+ oip = (ObjectIdPtr) cp->value.ptrvalue;
+ if (oip != NULL) {
+ oip = AsnIoMemCopy (oip, (AsnReadFunc) ObjectIdAsnRead,
+ (AsnWriteFunc) ObjectIdAsnWrite);
+ if (oip != NULL) {
+ xref = SeqFeatXrefNew ();
+ if (xref != NULL) {
+ xref->id.choice = 3;
+ xref->id.value.ptrvalue = (Pointer) oip;
+ xref->next = sfp->xref;
+ sfp->xref = xref;
+ }
+ }
+ }
+ }
+}
+
+
+static void MakeFeatureXrefsFromProteinIdQualsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ GBQualPtr gbq;
+ SeqIdPtr sip;
+ BioseqPtr pbsp;
+ SeqFeatPtr cds;
+ CharPtr product;
+ ProtRefPtr prp;
+ SeqEntryPtr sep;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
+ return;
+ }
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "protein_id") == 0 || StringICmp (gbq->qual, "orig_protein_id") == 0) {
+ sip = CreateSeqIdFromText (gbq->val, sep);
+ pbsp = BioseqFind (sip);
+ cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
+ if (cds != NULL) {
+ LinkTwoFeatures (cds, sfp);
+ LinkTwoFeatures (sfp, cds);
+ product = GetRNAProductString(sfp, NULL);
+ if (StringHasNoText (product)) {
+ prp = GetProtRefForFeature (cds);
+ if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
+ }
+ }
+ product = MemFree (product);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep)
+{
+ /* assign feature IDs, so that we can create xrefs that use them */
+ AssignFeatureIDs (sep);
+
+ VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromProteinIdQualsCallback);
+}
+
+
+static void MakeFeatureXrefsFromTranscriptIdQualsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ GBQualPtr gbq;
+ SeqIdPtr sip;
+ BioseqPtr pbsp;
+ SeqFeatPtr cds;
+ CharPtr product;
+ ProtRefPtr prp;
+ SeqEntryPtr sep;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA || (sep = (SeqEntryPtr) data) == NULL) {
+ return;
+ }
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "transcript_id") == 0 || StringICmp (gbq->qual, "orig_transcript_id") == 0) {
+ sip = CreateSeqIdFromText (gbq->val, sep);
+ pbsp = BioseqFind (sip);
+ cds = SeqMgrGetCDSgivenProduct (pbsp, NULL);
+ if (cds != NULL) {
+ LinkTwoFeatures (cds, sfp);
+ LinkTwoFeatures (sfp, cds);
+ product = GetRNAProductString(sfp, NULL);
+ if (StringHasNoText (product)) {
+ prp = GetProtRefForFeature (cds);
+ if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetRNAProductString (sfp, NULL, prp->name->data.ptrvalue, ExistingTextOption_replace_old);
+ }
+ }
+ product = MemFree (product);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep)
+{
+ /* assign feature IDs, so that we can create xrefs that use them */
+ AssignFeatureIDs (sep);
+
+ VisitFeaturesInSep (sep, (Pointer) sep, MakeFeatureXrefsFromTranscriptIdQualsCallback);
+}
+
+
+static void FinishHalfXrefsCallback (SeqFeatPtr sfp, Pointer data)
+{
+ SeqFeatPtr other;
+ SeqFeatXrefPtr xref, xref_other;
+ Boolean has_other_xref;
+
+ if (sfp == NULL) {
+ return;
+ }
+
+ xref = sfp->xref;
+ while (xref != NULL) {
+ if (xref->id.choice == 3) {
+ other = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
+ if (other != NULL) {
+ xref_other = other->xref;
+ has_other_xref = FALSE;
+ while (xref_other != NULL && !has_other_xref) {
+ if (xref_other->id.choice == 3) {
+ has_other_xref = TRUE;
+ }
+ xref_other = xref_other->next;
+ }
+ if (!has_other_xref) {
+ LinkTwoFeatures (sfp, other);
+ }
+ }
+ }
+ xref = xref->next;
+ }
+}
+
+
+NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep)
+{
+ VisitFeaturesInSep (sep, (Pointer) sep, FinishHalfXrefsCallback);
+}
+
+
+NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp)
+{
+ Uint1 aa;
+ Uint1 from;
+ SeqMapTablePtr smtp;
+
+ if (trp == NULL) {
+ return 0;
+ }
+
+ aa = 0;
+ if (trp->aatype == 2) {
+ aa = trp->aa;
+ } else {
+ from = 0;
+ switch (trp->aatype) {
+ case 0:
+ from = 0;
+ break;
+ case 1:
+ from = Seq_code_iupacaa;
+ break;
+ case 2:
+ from = Seq_code_ncbieaa;
+ break;
+ case 3:
+ from = Seq_code_ncbi8aa;
+ break;
+ case 4:
+ from = Seq_code_ncbistdaa;
+ break;
+ default:
+ break;
+ }
+ smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
+ if (smtp != NULL) {
+ aa = SeqMapTableConvert (smtp, trp->aa);
+ }
+ }
+ return aa;
+}
+
+
+NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode)
+{
+ BioseqPtr bsp;
+ Int2 code = 0;
+ GeneticCodePtr gncp;
+ ValNodePtr vnp;
+ CharPtr codes = NULL;
+
+ if (sfp == NULL) {
+ return NULL;
+ }
+
+ /* find genetic code table */
+
+ bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID);
+ BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL);
+
+ gncp = GeneticCodeFind (code, NULL);
+ if (gncp == NULL) {
+ gncp = GeneticCodeFind (1, NULL);
+ code = 1;
+ }
+ if (gncp != NULL) {
+ for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice != 3) continue;
+ codes = (CharPtr) vnp->data.ptrvalue;
+ break;
+ }
+ }
+ if (pCode != NULL) {
+ *pCode = code;
+ }
+ return codes;
+}
+
+
+static Boolean DoesCodonMatchAminoAcid (Uint1 aa, Uint1 index, CharPtr codes)
+{
+ Uint1 taa;
+ Boolean rval = FALSE;
+
+ if (aa == 0 || aa == 255 || codes == NULL)
+ {
+ return TRUE;
+ }
+ taa = codes [index];
+
+ if (taa == aa)
+ {
+ rval = TRUE;
+ }
+ /* selenocysteine normally uses TGA (14), so ignore without requiring exception in record */
+ else if (aa == 'U' && taa == '*' && index == 14)
+ {
+ rval = TRUE;
+ }
+ /* pyrrolysine normally uses TAG (11) in archaebacteria, ignore without requiring exception */
+ else if (aa == 'O' && taa == '*' && index == 11) {
+ rval = TRUE;
+ }
+ /* TAA (10) is not yet known to be used for an exceptional amino acid, but the night is young */
+
+ return rval;
+}
+
+
+static Boolean IsATGC (Char ch)
+{
+ if (ch == 'A' || ch == 'T' || ch == 'G' || ch == 'C') {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Char s_comp (Char ch)
+{
+ if (ch == 'A') {
+ return 'T';
+ } else if (ch == 'G') {
+ return 'C';
+ } else if (ch == 'C') {
+ return 'G';
+ } else if (ch == 'T') {
+ return 'A';
+ } else {
+ return 'N';
+ }
+}
+
+
+static CharPtr GetFlipCodonLoggingInfo (SeqFeatPtr sfp)
+{
+ SeqFeatPtr gene = NULL;
+ GeneRefPtr grp = NULL;
+ ValNode vn;
+ CharPtr txt = NULL;
+
+ GetGeneInfoForFeature (sfp, &grp, &gene);
+ if (grp != NULL && !StringHasNoText (grp->locus_tag)) {
+ txt = StringSave (grp->locus_tag);
+ } else {
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = sfp;
+ txt = GetDiscrepancyItemText (&vn);
+ }
+ return txt;
+}
+
+
+static Int4 CountCodonsRecognized (tRNAPtr trp)
+{
+ Int4 num = 0, i;
+
+ if (trp == NULL) {
+ return 0;
+ }
+ for (i = 0; i < 6; i++) {
+ if (trp->codon [i] < 64) {
+ num++;
+ }
+ }
+ return num;
+}
+
+
+static Int4 CountMatchingCodons (tRNAPtr trp, Uint1 aa, CharPtr codes)
+{
+ Int4 num = 0, i;
+
+ if (trp == NULL) {
+ return 0;
+ }
+ for (i = 0; i < 6; i++) {
+ if (trp->codon [i] < 64) {
+ if (DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)) {
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Int4 CountFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
+{
+ Int4 num = 0, i;
+ Int2 index;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+
+ if (trp == NULL) {
+ return 0;
+ }
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+ for (i = 0; i < 6; i++)
+ {
+ if (trp->codon [i] < 64
+ && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
+ && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ rcodon[0] = s_comp(codon[2]);
+ rcodon[1] = s_comp(codon[1]);
+ rcodon[2] = s_comp(codon[0]);
+ index = IndexForCodon (rcodon, code);
+ if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
+ {
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Int4 FlipFlippableCodons (tRNAPtr trp, Uint1 aa, CharPtr codes, Int2 code)
+{
+ Int4 num = 0, i;
+ Int2 index;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+
+ if (trp == NULL) {
+ return 0;
+ }
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+ for (i = 0; i < 6; i++)
+ {
+ if (trp->codon [i] < 64
+ && !DoesCodonMatchAminoAcid (aa, trp->codon[i], codes)
+ && CodonForIndex (trp->codon [i], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ rcodon[0] = s_comp(codon[2]);
+ rcodon[1] = s_comp(codon[1]);
+ rcodon[2] = s_comp(codon[0]);
+ index = IndexForCodon (rcodon, code);
+ if (index < 64 && DoesCodonMatchAminoAcid(aa, index, codes))
+ {
+ trp->codon[i] = index;
+ num++;
+ }
+ }
+ }
+
+ return num;
+}
+
+
+static Boolean IgnoretRNACodonRecognized (SeqFeatPtr sfp)
+{
+ if (sfp == NULL
+ || StringISearch (sfp->except_text, "RNA editing") != NULL
+ || StringISearch (sfp->except_text, "modified codon recognition") != NULL)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+
+static void FlipCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
+{
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+ Uint1 aa;
+ CharPtr txt;
+ LogInfoPtr lip;
+ Int2 code = 0;
+ CharPtr codes = NULL;
+ Int4 num_codons, num_match, num_flippable;
+
+ if (IgnoretRNACodonRecognized(sfp)
+ || sfp->idx.subtype != FEATDEF_tRNA
+ || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
+ || rrp->ext.choice != 2
+ || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
+ {
+ return;
+ }
+
+ num_codons = CountCodonsRecognized (trp);
+ if (num_codons == 0) {
+ return;
+ }
+
+ lip = (LogInfoPtr) data;
+
+ aa = GetAaFromtRNA (trp);
+
+ /* find genetic code table */
+ codes = GetCodesFortRNA (sfp, &code);
+
+ if (codes == NULL) return;
+
+ num_match = CountMatchingCodons (trp, aa, codes);
+ if (num_codons == num_match) {
+ return;
+ } else if (num_codons > 1) {
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ } else {
+ num_flippable = CountFlippableCodons(trp, aa, codes, code);
+ if (num_flippable == num_codons) {
+ FlipFlippableCodons (trp, aa, codes, code);
+ } else {
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Unable to flip bad codon_recognized for %s\n", txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+{
+ VisitFeaturesInSep (sep, lip, FlipCodonRecognizedCallback);
+}
+
+
+static void RemoveBadCodonRecognizedCallback (SeqFeatPtr sfp, Pointer data)
+{
+ RnaRefPtr rrp;
+ tRNAPtr trp;
+ Int2 j, k;
+ Uint1 aa;
+ Uint1 codon [4];
+ Uint1 rcodon [4];
+ CharPtr txt;
+ LogInfoPtr lip;
+ Int2 code = 0;
+ CharPtr codes = NULL;
+ Int4 num_codons, num_match;
+
+ if (IgnoretRNACodonRecognized(sfp)
+ || sfp->idx.subtype != FEATDEF_tRNA
+ || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
+ || rrp->ext.choice != 2
+ || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
+ {
+ return;
+ }
+
+ num_codons = CountCodonsRecognized (trp);
+ if (num_codons == 0) {
+ return;
+ }
+
+ lip = (LogInfoPtr) data;
+
+ aa = GetAaFromtRNA (trp);
+
+ /* find genetic code table */
+ codes = GetCodesFortRNA (sfp, &code);
+
+ if (codes == NULL) return;
+
+ num_match = CountMatchingCodons (trp, aa, codes);
+ if (num_match == num_codons) {
+ return;
+ }
+
+ /* Note - it is important to set the fourth character in the codon array to NULL
+ * because CodonForIndex only fills in the three characters of actual codon,
+ * so if you StringCpy the codon array and the NULL character is not found after
+ * the three codon characters, you will write in memory you did not intend to.
+ */
+ codon [3] = 0;
+ rcodon [3] = 0;
+
+ for (j = 0; j < 6; j++)
+ {
+ if (trp->codon [j] < 64)
+ {
+ if (DoesCodonMatchAminoAcid (aa, trp->codon[j], codes))
+ {
+ /* already ok - skip it */
+ }
+ else if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)
+ && IsATGC(codon[0])
+ && IsATGC(codon[1])
+ && IsATGC(codon[2]))
+ {
+ for (k = j + 1; k < 6; k++)
+ {
+ trp->codon[k - 1] = trp->codon[k];
+ }
+ trp->codon[5] = 255;
+ if (lip != NULL)
+ {
+ if (lip->fp != NULL)
+ {
+ /* text for log */
+ txt = GetFlipCodonLoggingInfo (sfp);
+ fprintf (lip->fp, "Removed codon_recognized '%s' for %s\n", codon, txt);
+ txt = MemFree (txt);
+ }
+ lip->data_in_log = TRUE;
+ }
+ /* push index down, so we don't skip over a codon */
+ j--;
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip)
+{
+ VisitFeaturesInSep (sep, lip, RemoveBadCodonRecognizedCallback);
+}
diff --git a/api/sqnutil2.c b/api/sqnutil2.c
index 1f966f30..19b6aff6 100644
--- a/api/sqnutil2.c
+++ b/api/sqnutil2.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.439 $
+* $Revision: 6.502 $
*
* File Description:
*
@@ -1876,6 +1876,39 @@ static CharPtr sqntag_biosrc_origin_list [] = {
"synthetic", "other", NULL
};
+
+static void SqnTagParsePrimers (SqnTagPtr stp, BioSourcePtr biop)
+{
+ ValNode quals[4];
+ Int4 qual_types[] = { SUBSRC_fwd_primer_name, SUBSRC_fwd_primer_seq, SUBSRC_rev_primer_name, SUBSRC_rev_primer_seq};
+ Int4 qual_defs[] = { Source_qual_fwd_primer_name, Source_qual_fwd_primer_seq, Source_qual_rev_primer_name, Source_qual_rev_primer_seq};
+ Int4 num_quals = 4, qual;
+ Int4 i, j;
+
+ if (stp == NULL || stp->num_tags == 0 || biop == NULL) return;
+
+ for (i = 0; i < num_quals; i++) {
+ MemSet (quals + i, 0, sizeof (ValNode));
+ quals[i].choice = SourceQualChoice_textqual;
+ quals[i].data.intvalue = qual_defs[i];
+ }
+
+ for (i = 0; i < stp->num_tags; i++) {
+ if (stp->tag [i] != NULL) {
+ qual = EquivalentSubSourceEx (stp->tag[i], TRUE);
+ for (j = 0; j < num_quals; j++) {
+ if (qual == qual_types[j]) {
+ stp->used [i] = TRUE;
+ SetSourceQualInBioSource (biop, quals + j, NULL, stp->val[i], ExistingTextOption_add_qual);
+ break;
+ }
+ }
+ }
+ }
+
+}
+
+
NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
SqnTagPtr stp,
CharPtr organism,
@@ -1968,6 +2001,8 @@ NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
SqnTagFindSubSourceQuals (stp, biop);
+ SqnTagParsePrimers (stp, biop);
+
list = SqnTagFindMultiple (stp, "db_xref");
for (list_vnp = list; list_vnp != NULL; list_vnp = list_vnp->next) {
str = list_vnp->data.ptrvalue;
@@ -3312,7 +3347,7 @@ static ValNodePtr ParseContigOrFeatureTableString (CharPtr contigs, Boolean tabD
{
Char ch;
- Int2 i, j, k;
+ Int4 i, j, k;
CharPtr str;
Char tmp [2048];
ValNodePtr vnp;
@@ -3345,12 +3380,16 @@ static ValNodePtr ParseContigOrFeatureTableString (CharPtr contigs, Boolean tabD
str [j + k] = '\0';
i += j + k + 1;
}
- StringNCpy_0 (tmp, str + k, sizeof (tmp));
- SqnTrimSpacesAroundString (tmp);
- if (HasNoText (tmp)) {
- ValNodeAdd (&vnp);
+ if (StringLen (str + k) < sizeof (tmp)) {
+ StringNCpy_0 (tmp, str + k, sizeof (tmp));
+ SqnTrimSpacesAroundString (tmp);
+ if (HasNoText (tmp)) {
+ ValNodeAdd (&vnp);
+ } else {
+ ValNodeCopyStr (&vnp, 0, tmp);
+ }
} else {
- ValNodeCopyStr (&vnp, 0, tmp);
+ ValNodeAddPointer (&vnp, 0, StringSave (str));
}
}
if (vnp != NULL) {
@@ -3559,7 +3598,7 @@ NLM_EXTERN Int4 ReadSequenceAsnFile (
fp = FileOpen (inputFile, binary? "rb" : "r");
#endif
if (fp == NULL) {
- Message (MSG_ERROR, "FileOpen failed for input file '%s'", inputFile);
+ Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
return index;
}
@@ -4433,6 +4472,11 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean
Int4 bad_char [256];
Boolean non_prot_char [256];
Int4 num_bad = 0;
+ Boolean is_nuc_char [256];
+ Boolean is_prot_char [256];
+ CharPtr nuc_list = "atgcbdhkmnrsuvwy";
+ CharPtr prot_list = "abcdefghijklmnopqrstuvwxyz";
+ CharPtr ptr;
if (fcp == NULL) return NULL;
bs = BSNew (1000);
@@ -4443,6 +4487,30 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean
*perr = FALSE;
}
+ MemSet (is_nuc_char, 0, sizeof (is_nuc_char));
+
+ ptr = nuc_list;
+ ch = *ptr;
+ while (ch != '\0') {
+ is_nuc_char [(int) ch] = TRUE;
+ ch = TO_UPPER (ch);
+ is_nuc_char [(int) ch] = TRUE;
+ ptr++;
+ ch = *ptr;
+ }
+
+ MemSet (is_prot_char, 0, sizeof (is_prot_char));
+
+ ptr = prot_list;
+ ch = *ptr;
+ while (ch != '\0') {
+ is_prot_char [(int) ch] = TRUE;
+ ch = TO_UPPER (ch);
+ is_prot_char [(int) ch] = TRUE;
+ ptr++;
+ ch = *ptr;
+ }
+
if (forceNuc) {
isProt = FALSE;
} else if (forceProt) {
@@ -4540,8 +4608,8 @@ static ByteStorePtr ReadFlatFileDNA (FileCachePtr fcp, BoolPtr protPtr, Boolean
noErrors = FALSE;
}
} else {
- if (IsNonSeqChar (ch, isProt))
- {
+ /* if (IsNonSeqChar (ch, isProt)) */
+ if ((isProt && (! is_prot_char [(int) ch])) || ((! isProt) && (! is_nuc_char [(int) ch]))) {
bad_char [(int) ch] ++;
noErrors = FALSE;
}
@@ -4632,10 +4700,27 @@ static SimpleSeqPtr ByteStoreToSimpleSeq (ByteStorePtr bs, CharPtr seqid, CharPt
#define qualVal field [QUAL_VAL_TAG]
#define strandStr field [STRAND_TAG]
+
+static Char UnexpectedCharInPositionString (CharPtr str)
+{
+ CharPtr cp;
+
+ if (str == NULL) {
+ return 0;
+ }
+
+ cp = str;
+ while (*cp == '<' || *cp == '>' || *cp == '^' || isdigit (*cp) || *cp == '-') {
+ cp++;
+ }
+ return *cp;
+}
+
+
static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
BoolPtr partial5P, BoolPtr partial3P, BoolPtr ispointP,
BoolPtr isminusP, CharPtr PNTR featP, CharPtr PNTR qualP,
- CharPtr PNTR valP, Int4 offset)
+ CharPtr PNTR valP, Int4 offset, Int4 lin_num)
{
Boolean badNumber;
@@ -4653,6 +4738,7 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
Int4 tmp;
long int val;
ValNodePtr vnp;
+ Char badch;
if (line == NULL || HasNoText (line)) return FALSE;
if (*line == '[') return FALSE; /* offset and other instructions encoded in brackets */
@@ -4676,6 +4762,10 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
badNumber = FALSE;
str = startStr;
+ badch = UnexpectedCharInPositionString (str);
+ if (badch != 0) {
+ Message (MSG_POSTERR, "Unexpected characters in from column of line %d - first bad character is '%c'", lin_num, badch);
+ }
if (str != NULL && *str == '<') {
partial5 = TRUE;
str++;
@@ -4692,6 +4782,10 @@ static Boolean ParseFeatTableLine (CharPtr line, Int4Ptr startP, Int4Ptr stopP,
badNumber = TRUE;
}
str = stopStr;
+ badch = UnexpectedCharInPositionString (str);
+ if (badch != 0) {
+ Message (MSG_POSTERR, "Unexpected characters in to column of line %d - first bad character is '%c'", lin_num, badch);
+ }
if (str != NULL && *str == '>') {
partial3 = TRUE;
str++;
@@ -5266,7 +5360,7 @@ static Boolean ParseQualIntoBioSource (SeqFeatPtr sfp, CharPtr qual, CharPtr val
return TRUE;
}
- found = EquivalentOrgMod (str);
+ found = EquivalentOrgMod (qual);
if (found > 0) {
if (found == 32) {
found = 253;
@@ -5286,7 +5380,7 @@ static Boolean ParseQualIntoBioSource (SeqFeatPtr sfp, CharPtr qual, CharPtr val
return TRUE;
}
- found = EquivalentSubSource (str);
+ found = EquivalentSubSource (qual);
if (found > 0) {
ssp = SubSourceNew ();
@@ -5638,6 +5732,14 @@ static Boolean ParseQualIntoGeneOntologyUserObject (SeqFeatPtr sfp, CharPtr qual
return FALSE;
}
+static CharPtr okayCategoryPrefixes [] = {
+ "",
+ "COORDINATES:",
+ "DESCRIPTION:",
+ "EXISTENCE:",
+ NULL
+};
+
static CharPtr okayInferencePrefixes [] = {
"",
"similar to sequence",
@@ -5659,10 +5761,25 @@ static Boolean InvalidInference (CharPtr str)
{
Int2 best, j;
+ Char ch;
size_t len;
if (StringHasNoText (str)) return TRUE;
+ for (j = 0; okayCategoryPrefixes [j] != NULL; j++) {
+ len = StringLen (okayCategoryPrefixes [j]);
+ if (StringNICmp (str, okayCategoryPrefixes [j], len) != 0) continue;
+ str += len;
+ ch = *str;
+ while (ch == ' ') {
+ str++;
+ ch = *str;
+ }
+ break;
+ }
+
+ if (StringHasNoText (str)) return TRUE;
+
best = -1;
for (j = 0; okayInferencePrefixes [j] != NULL; j++) {
len = StringLen (okayInferencePrefixes [j]);
@@ -6654,6 +6771,11 @@ static void ParseWhitespaceIntoTabs (CharPtr line)
StringCat (str, "\t\t\t");
TrimSpacesAroundString (ptr);
tmp = TokenizeAtWhiteSpace (ptr);
+ if (tmp != NULL) {
+ while (isspace (*tmp)) {
+ tmp++;
+ }
+ }
StringCat (str, ptr);
StringCat (str, "\t");
StringCat (str, tmp);
@@ -6680,7 +6802,40 @@ static void ParseWhitespaceIntoTabs (CharPtr line)
MemFree (str);
}
-static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname)
+
+static CharPtr ReadTheRestOfTheLine (FileCachePtr fcp, CharPtr original_buffer)
+{
+ Char line [2047];
+ CharPtr str;
+ Boolean nonewline = TRUE;
+ ValNodeBlock extra;
+ Int4 len = 1;
+ ValNodePtr vnp;
+
+ InitValNodeBlock(&extra, NULL);
+ ValNodeAddPointerToEnd (&extra, 0, StringSave(original_buffer));
+ len += StringLen (original_buffer);
+ while (nonewline) {
+ nonewline = FALSE;
+ str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline);
+ if (str == NULL) {
+ nonewline = FALSE;
+ } else {
+ ValNodeAddPointerToEnd (&extra, 0, StringSave (line));
+ len += StringLen (line);
+ }
+ }
+ str = (CharPtr) MemNew (sizeof (Char) * len);
+ *str = 0;
+ for (vnp = extra.head; vnp != NULL; vnp = vnp->next) {
+ StringCat(str, vnp->data.ptrvalue);
+ }
+ str[len - 1] = 0;
+ return str;
+}
+
+
+static SeqAnnotPtr ReadFeatureTableEx (FileCachePtr fcp, CharPtr seqid, CharPtr annotname, Int4Ptr p_line)
{
Boolean allowWhitesp = TRUE;
@@ -6728,6 +6883,7 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
CharPtr str;
CharPtr tmp;
CharPtr val;
+ Boolean free_str = FALSE;
if (fcp == NULL || fcp->fp == NULL || seqid == NULL) return NULL;
sip = SeqIdFindBest (MakeSeqID (seqid), 0);
@@ -6735,6 +6891,18 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
pos = FileCacheTell (fcp);
str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline);
+ if (nonewline) {
+ str = ReadTheRestOfTheLine (fcp, line);
+ if (StringDoesHaveText (str)) {
+ free_str = TRUE;
+ } else {
+ str = MemFree (str);
+ }
+ }
+
+ if (p_line != NULL) {
+ lin_num = *p_line;
+ }
lin_num++;
while (str != NULL) {
@@ -6746,30 +6914,42 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
endsinspace = TRUE;
}
- if (! HasNoText (line)) {
+ if (! HasNoText (str)) {
- if (StringNCmp (line, ">", 1) == 0 ||
- StringNCmp (line, "LOCUS ", 6) == 0 ||
- StringNCmp (line, "ID ", 3) == 0 ||
- StringStr (line, "::=") != NULL) {
+ if (StringNCmp (str, ">", 1) == 0 ||
+ StringNCmp (str, "LOCUS ", 6) == 0 ||
+ StringNCmp (str, "ID ", 3) == 0 ||
+ StringStr (str, "::=") != NULL) {
FileCacheSeek (fcp, pos);
SeqIdFree (sip);
+ if (p_line != NULL) {
+ *p_line = lin_num;
+ }
+ if (free_str) {
+ str = MemFree (str);
+ }
return sap;
- } else if (StringNCmp (line, "//", 2) == 0) {
+ } else if (StringNCmp (str, "//", 2) == 0) {
SeqIdFree (sip);
+ if (p_line != NULL) {
+ *p_line = lin_num;
+ }
+ if (free_str) {
+ str = MemFree (str);
+ }
return sap;
}
if (allowWhitesp) {
- ParseWhitespaceIntoTabs (line);
+ ParseWhitespaceIntoTabs (str);
}
feat = NULL;
qual = NULL;
val = NULL;
- if (*line == '[') {
- stp = SqnTagParse (line);
+ if (*str == '[') {
+ stp = SqnTagParse (str);
if (stp != NULL) {
tmp = SqnTagFind (stp, "offset");
if (tmp != NULL) {
@@ -6780,14 +6960,14 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
}
SqnTagFree (stp);
- } else if (StringNICmp (line, "ORDER", 5) == 0) {
+ } else if (StringNICmp (str, "ORDER", 5) == 0) {
if (sfp != NULL) {
PutNullsBetween (sfp->location);
}
- } else if (ParseFeatTableLine (line, &start, &stop, &partial5, &partial3, &ispoint,
- &isminus, &feat, &qual, &val, offset)) {
+ } else if (ParseFeatTableLine (str, &start, &stop, &partial5, &partial3, &ispoint,
+ &isminus, &feat, &qual, &val, offset, lin_num)) {
if (feat != NULL && start >= 0 && stop >= 0) {
if (sap == NULL) {
@@ -7069,6 +7249,8 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
ErrPostEx (SEV_ERROR, ERR_SEQ_FEAT_ImpFeatBadLoc, "Bad location on feature %s (start %ld, stop %ld)", feat, (long) start, (long) stop);
}
+ } else {
+ Message (MSG_POSTERR, "Unrecognized line in feature table: %s", str);
}
/* ParseFeatTableLine copies these three strings, so free here */
@@ -7079,6 +7261,8 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
}
+#if 0
+ /* commented out - always read in entire line now */
/* if humongously long line /note, now extends by concatenation */
while (nonewline && str != NULL) {
@@ -7101,16 +7285,45 @@ static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr an
}
}
}
+#endif
pos = FileCacheTell (fcp);
+ if (free_str) {
+ str = MemFree (str);
+ free_str = FALSE;
+ }
+
str = FileCacheReadLine (fcp, line, sizeof (line), &nonewline);
+ if (nonewline) {
+ str = ReadTheRestOfTheLine (fcp, line);
+ if (StringDoesHaveText (str)) {
+ free_str = TRUE;
+ } else {
+ str = MemFree (str);
+ }
+ } else {
+ free_str = FALSE;
+ }
+
lin_num++;
}
+ if (free_str) {
+ str = MemFree (str);
+ }
+
SeqIdFree (sip);
+ if (p_line != NULL) {
+ *p_line = lin_num;
+ }
return sap;
}
+static SeqAnnotPtr ReadFeatureTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname)
+{
+ return ReadFeatureTableEx (fcp, seqid, annotname, NULL);
+}
+
/* ReadVecScreenTable reads lines of vector screen output into a Seq-annot. */
static SeqAnnotPtr ReadVecScreenTable (FileCachePtr fcp, CharPtr seqid, CharPtr annotname)
@@ -7754,15 +7967,22 @@ typedef struct setatp {
AsnTypePtr atp_class;
AsnTypePtr atp_seqset;
AsnTypePtr atp_se;
+ AsnTypePtr atp_descr;
+ AsnTypePtr atp_descr_e;
+ AsnTypePtr atp_set_desc;
+ AsnTypePtr atp_bioseq_desc;
AsnTypePtr atp_desc;
AsnTypePtr atp_annot;
+ AsnTypePtr atp_bioseq_annot;
AsnTypePtr atp_annot_e;
+ AsnTypePtr atp_bioseq_annot_e;
AsnTypePtr atp_id;
AsnTypePtr atp_coll;
AsnTypePtr atp_date;
AsnTypePtr atp_level;
AsnTypePtr atp_release;
AsnTypePtr atp_bss;
+ AsnTypePtr atp_bioseq;
AsnTypePtr atp_seqentry;
AsnTypePtr atp_seq;
AsnTypePtr atp_set;
@@ -7772,6 +7992,8 @@ typedef struct setatp {
AsnTypePtr atp_seqsubmit_data_entries_E;
AsnTypePtr atp_seqsubmit_data_entries;
AsnTypePtr atp_seqsubmit_data_entries_set;
+ AsnTypePtr atp_bioseq_id_E;
+ AsnTypePtr atp_seqdesc_pub;
} SetAtpData, PNTR SetAtpPtr;
@@ -7781,9 +8003,15 @@ static SetAtpPtr GetSetAtp (void)
AsnTypePtr atp_class;
AsnTypePtr atp_seqset;
AsnTypePtr atp_se;
+ AsnTypePtr atp_descr;
+ AsnTypePtr atp_descr_e;
+ AsnTypePtr atp_set_desc;
+ AsnTypePtr atp_bioseq_desc;
AsnTypePtr atp_desc;
AsnTypePtr atp_annot;
+ AsnTypePtr atp_bioseq_annot;
AsnTypePtr atp_annot_e;
+ AsnTypePtr atp_bioseq_annot_e;
AsnTypePtr atp_id;
AsnTypePtr atp_coll;
AsnTypePtr atp_date;
@@ -7799,6 +8027,9 @@ static SetAtpPtr GetSetAtp (void)
AsnTypePtr atp_seqsubmit_data_entries_E;
AsnTypePtr atp_seqsubmit_data_entries;
AsnTypePtr atp_seqsubmit_data_entries_set;
+ AsnTypePtr atp_bioseq;
+ AsnTypePtr atp_bioseq_id_E;
+ AsnTypePtr atp_seqdesc_pub;
SetAtpPtr sp;
amp = AsnAllModPtr ();
@@ -7825,30 +8056,72 @@ static SetAtpPtr GetSetAtp (void)
return NULL;
}
+ atp_bioseq = AsnFind ("Bioseq");
+ if (atp_bioseq == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq");
+ return NULL;
+ }
+
atp_class = AsnFind ("Bioseq-set.class");
if (atp_class == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.class");
return NULL;
}
- atp_desc = AsnFind ("Bioseq-set.descr");
- if (atp_desc == NULL) {
+ atp_descr = AsnFind ("Seq-descr");
+ if (atp_descr == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-descr");
+ return NULL;
+ }
+
+ atp_descr_e = AsnFind ("Seq-descr.E");
+ if (atp_descr_e == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-descr.E");
+ return NULL;
+ }
+
+ atp_set_desc = AsnFind ("Bioseq-set.descr");
+ if (atp_set_desc == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
return NULL;
}
+ atp_bioseq_desc = AsnFind ("Bioseq.descr");
+ if (atp_bioseq_desc == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.descr");
+ return NULL;
+ }
+
+ atp_desc = AsnFind ("Seqdesc");
+ if (atp_desc == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Seqdesc");
+ return NULL;
+ }
+
atp_annot = AsnFind ("Bioseq-set.annot");
if (atp_annot == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.annot");
return NULL;
}
+ atp_bioseq_annot = AsnFind ("Bioseq.annot");
+ if (atp_bioseq_annot == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.annot");
+ return NULL;
+ }
+
atp_annot_e = AsnFind ("Bioseq-set.annot.E");
if (atp_annot_e == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.annot.E");
return NULL;
}
+ atp_bioseq_annot_e = AsnFind ("Bioseq.annot.E");
+ if (atp_bioseq_annot_e == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.annot.E");
+ return NULL;
+ }
+
atp_id = AsnFind ("Bioseq-set.id");
if (atp_id == NULL) {
Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.id");
@@ -7930,21 +8203,39 @@ static SetAtpPtr GetSetAtp (void)
return NULL;
}
+ atp_bioseq_id_E = AsnFind ("Bioseq.id.E");
+ if (atp_bioseq_id_E == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq.id.E");
+ return NULL;
+ }
+
+ atp_seqdesc_pub = AsnFind ("Seqdesc.pub");
+ if (atp_seqdesc_pub == NULL) {
+ Message (MSG_POSTERR, "Unable to find ASN.1 type Seqdesc.pub");
+ return NULL;
+ }
sp = (SetAtpPtr) MemNew (sizeof(SetAtpData));
sp->amp = amp;
sp->atp_class = atp_class;
sp->atp_seqset = atp_seqset;
sp->atp_se = atp_se;
+ sp->atp_descr = atp_descr;
+ sp->atp_descr_e = atp_descr_e;
+ sp->atp_set_desc = atp_set_desc;
+ sp->atp_bioseq_desc = atp_bioseq_desc;
sp->atp_desc = atp_desc;
sp->atp_annot = atp_annot;
+ sp->atp_bioseq_annot = atp_bioseq_annot;
sp->atp_annot_e = atp_annot_e;
+ sp->atp_bioseq_annot_e = atp_bioseq_annot_e;
sp->atp_id = atp_id;
sp->atp_coll = atp_coll;
sp->atp_date = atp_date;
sp->atp_level = atp_level;
sp->atp_release = atp_release;
sp->atp_bss = atp_bss;
+ sp->atp_bioseq = atp_bioseq;
sp->atp_seqentry = atp_seqentry;
sp->atp_seq = atp_seq;
sp->atp_set = atp_set;
@@ -7954,6 +8245,8 @@ static SetAtpPtr GetSetAtp (void)
sp->atp_seqsubmit_data_entries_E = atp_seqsubmit_data_entries_E;
sp->atp_seqsubmit_data_entries = atp_seqsubmit_data_entries;
sp->atp_seqsubmit_data_entries_set = atp_seqsubmit_data_entries_set;
+ sp->atp_bioseq_id_E = atp_bioseq_id_E;
+ sp->atp_seqdesc_pub = atp_seqdesc_pub;
return sp;
}
@@ -7964,7 +8257,7 @@ static BioseqSetPtr BioseqSetPartialRead (AsnIoPtr aip, AsnTypePtr PNTR orig, Se
DataVal av;
AsnTypePtr atp, oldatp;
BioseqSetPtr bsp=NULL;
- SeqEntryPtr curr, next, hold = NULL;
+ SeqEntryPtr curr, next;
if (aip == NULL)
@@ -8007,7 +8300,7 @@ static BioseqSetPtr BioseqSetPartialRead (AsnIoPtr aip, AsnTypePtr PNTR orig, Se
bsp->date = DateAsnRead(aip, atp);
if (bsp->date == NULL) goto erret;
}
- else if (atp == sp->atp_desc)
+ else if (atp == sp->atp_set_desc)
{
bsp->descr = SeqDescrAsnRead(aip, atp);
if (bsp->descr == NULL) goto erret;
@@ -8180,7 +8473,7 @@ NLM_EXTERN SeqEntryPtr ReadFilteredAsn (FILE *fp, Boolean is_binary, CharPtr acc
} else {
bsp = BioseqFree (bsp);
}
- } else if (atp == sp->atp_desc) {
+ } else if (atp == sp->atp_set_desc) {
sdp = SeqDescrAsnRead (aip, atp);
ValNodeLink (&(bssp->descr), ValNodeExtractList (&sdp, Seq_descr_pub));
sdp = SeqDescrFree (sdp);
@@ -8356,7 +8649,7 @@ static Boolean BioseqSetWriteBefore (BioseqSetPtr bsp, AsnIoPtr aip, AsnTypePtr
}
if (bsp->descr != NULL) /* Seq-descr optional */
{
- if (! SeqDescrAsnWrite(bsp->descr, aip, sp->atp_desc)) goto erret;
+ if (! SeqDescrAsnWrite(bsp->descr, aip, sp->atp_set_desc)) goto erret;
}
if (! AsnOpenStruct(aip, sp->atp_seqset, (Pointer)bsp->seq_set)) goto erret;
@@ -8397,7 +8690,7 @@ static SeqEntryPtr BioseqSetCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqE
DataVal av;
AsnTypePtr atp, oldatp;
BioseqSetPtr bsp=NULL, edited_set;
- SeqEntryPtr curr, next, hold = NULL;
+ SeqEntryPtr curr, next;
Boolean wrote_front = FALSE;
SeqDescrPtr tmp;
SeqEntryPtr tmp_sep, replace;
@@ -8445,7 +8738,7 @@ static SeqEntryPtr BioseqSetCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqE
bsp->date = DateAsnRead(aip_in, atp);
if (bsp->date == NULL) goto erret;
}
- else if (atp == sp->atp_desc)
+ else if (atp == sp->atp_set_desc)
{
bsp->descr = SeqDescrAsnRead(aip_in, atp);
if (bsp->descr == NULL) goto erret;
@@ -8615,11 +8908,11 @@ static void SeqEntryCopyReplace (AsnIoPtr aip_in, AsnIoPtr aip_out, SeqEntryPtr
tmp_sep = SeqEntryFree (tmp_sep);
}
}
- else if (atp == sp->atp_desc)
+ else if (atp == sp->atp_set_desc)
{
/* write out descriptors from holding set instead */
bssp = edited->data.ptrvalue;
- SeqDescrAsnWrite (bssp->descr, aip_out, sp->atp_desc);
+ SeqDescrAsnWrite (bssp->descr, aip_out, sp->atp_set_desc);
}
sep = SeqEntryFree (sep);
@@ -8767,9 +9060,7 @@ NLM_EXTERN DescStreamPtr DescStreamNew (SeqDescPtr sdp, BioseqPtr parent)
}
if (parent != NULL) {
ds->owners = SeqIdDup (SeqIdFindBest (parent->id, SEQID_GENBANK));
- if (ds->owners != NULL && ds->owners->next != NULL) {
- ds->owners->next = SeqIdSetFree (ds->owners->next);
- }
+ ds->last_owner = ds->owners;
}
@@ -8804,6 +9095,80 @@ NLM_EXTERN ValNodePtr DescStreamListFree (ValNodePtr vnp)
}
+static Boolean DoDescriptorsMatch (SeqDescPtr sdp1, SeqDescPtr sdp2)
+{
+ if (sdp1 == NULL && sdp2 == NULL) {
+ return TRUE;
+ } else if (sdp1 == NULL || sdp2 == NULL) {
+ return FALSE;
+ } else if (sdp1->choice != sdp2->choice) {
+ return FALSE;
+ } else if (sdp1->choice == Seq_descr_pub) {
+ return PubdescContentMatch (sdp1->data.ptrvalue, sdp2->data.ptrvalue);
+ } else {
+ return AsnIoMemComp (sdp1, sdp2, (AsnWriteFunc) SeqDescAsnWrite);
+ }
+}
+
+
+static void AddToDescStream (ValNodeBlockPtr vb, SeqDescPtr sdp, BioseqPtr parent)
+{
+ DescStreamPtr dsp_new, dsp;
+ CharPtr txt;
+ ValNodePtr vnp, prev = NULL, vnp_new;
+ Boolean add_to_prev = FALSE;
+
+ if (vb == NULL) {
+ return;
+ }
+ if (vb->head == NULL) {
+ ValNodeAddPointerToEnd (vb, 0, DescStreamNew (sdp, parent));
+ } else {
+ txt = GetDescriptorLabel(sdp);
+ vnp = vb->head;
+ dsp = vnp->data.ptrvalue;
+ while (vnp != NULL && StringCmp (txt, dsp->text) < 0) {
+ prev = vnp;
+ vnp = vnp->next;
+ if (vnp != NULL) {
+ dsp = vnp->data.ptrvalue;
+ }
+ }
+ if (vnp == NULL) {
+ ValNodeAddPointerToEnd (vb, 0, DescStreamNew (sdp, parent));
+ } else {
+ while (vnp != NULL && StringCmp (txt, dsp->text) == 0
+ && !(add_to_prev = DoDescriptorsMatch (sdp, dsp->orig)) ) {
+ prev = vnp;
+ vnp = vnp->next;
+ if (vnp != NULL) {
+ dsp = vnp->data.ptrvalue;
+ }
+ }
+ if (add_to_prev) {
+ dsp->last_owner->next = SeqIdDup (SeqIdFindBest (parent->id, SEQID_GENBANK));
+ dsp->last_owner = dsp->last_owner->next;
+ } else {
+ dsp_new = DescStreamNew (sdp, parent);
+ vnp_new = ValNodeNew (NULL);
+ vnp_new->data.ptrvalue = dsp_new;
+ if (prev == NULL) {
+ vb->head = vnp_new;
+ vb->tail = vnp_new;
+ } else {
+ vnp_new->next = prev->next;
+ prev->next = vnp_new;
+ if (vnp_new->next == NULL) {
+ vb->tail = vnp_new;
+ }
+ }
+ }
+ txt = MemFree (txt);
+ }
+ }
+}
+
+
static int DescStreamCompare (DescStreamPtr ds1, DescStreamPtr ds2)
{
if (ds1 == NULL && ds2 == NULL) {
@@ -8854,9 +9219,21 @@ static void RecombineDescStreamList (ValNodePtr PNTR p_list)
for (cmp = vnp->next;
cmp != NULL && (d2 = (DescStreamPtr) cmp->data.ptrvalue) != NULL && StringCmp (d1->text, d2->text) == 0;
cmp = cmp->next) {
- if (cmp->choice == 0 && AsnIoMemComp (d1->orig, d2->orig, (AsnWriteFunc) SeqDescAsnWrite)) {
- ValNodeLink (&d1->owners, d2->owners);
+ if (cmp->choice == 0 && DoDescriptorsMatch (d1->orig, d2->orig)) {
+ /* combine owner lists */
+ if (d1->last_owner == NULL) {
+ d1->owners = d2->owners;
+ d1->last_owner = d1->owners;
+ } else {
+ d1->last_owner->next = d2->owners;
+ }
d2->owners = NULL;
+ if (d1->last_owner != NULL) {
+ while (d1->last_owner->next != NULL) {
+ d1->last_owner = d1->last_owner->next;
+ }
+ }
+
/* add dependencies */
d1->num_dependent += d2->num_dependent;
/* mark choice for later extraction and deletion */
@@ -8871,52 +9248,46 @@ static void RecombineDescStreamList (ValNodePtr PNTR p_list)
}
-static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_list)
+static void AddPubCitationsFromFeat (SeqFeatPtr sfp, ValNodePtr desc_stream_list)
{
- SeqFeatPtr sfp;
ValNodePtr repl_v;
DescStreamPtr d;
PubdescPtr pdp;
ValNodePtr vnp;
ValNode vn_p, vn_c;
- Boolean found;
+ Boolean found = FALSE;
- if (annot == NULL || annot->type != 1)
+
+ if (sfp == NULL || sfp->cit == NULL || sfp->cit->choice != 1 || sfp->cit->data.ptrvalue == NULL)
{
return;
}
+
MemSet (&vn_p, 0, sizeof (ValNode));
MemSet (&vn_c, 0, sizeof (ValNode));
- for (sfp = annot->data; sfp != NULL; sfp = sfp->next)
+
+ /* note - there could be multiple identical copies of a pub in the list,
+ * we only need to count the match once - we will combine the totals
+ * in RecombineDescStreamList.
+ */
+ for (repl_v = desc_stream_list; repl_v != NULL && !found; repl_v = repl_v->next)
{
- if (sfp->cit == NULL || sfp->cit->choice != 1 || sfp->cit->data.ptrvalue == NULL)
+ d = (DescStreamPtr) repl_v->data.ptrvalue;
+ if (d->orig != NULL
+ && d->orig->choice == Seq_descr_pub
+ && (pdp = (PubdescPtr) d->orig->data.ptrvalue) != NULL)
{
- continue;
- }
- /* note - there could be multiple identical copies of a pub in the list,
- * we only need to count the match once - we will combine the totals
- * in RecombineDescStreamList.
- */
- found = FALSE;
- for (repl_v = desc_stream_list; repl_v != NULL && !found; repl_v = repl_v->next)
- {
- d = (DescStreamPtr) repl_v->data.ptrvalue;
- if (d->orig != NULL
- && d->orig->choice == Seq_descr_pub
- && (pdp = (PubdescPtr) d->orig->data.ptrvalue) != NULL)
- {
- for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
- /* each vnp is a pub */
- vn_p.choice = PUB_Equiv;
- vn_p.data.ptrvalue = pdp->pub;
- vn_c.choice = PUB_Equiv;
- vn_c.data.ptrvalue = vnp;
-
- if (PubLabelMatch (&vn_p, &vn_c) == 0)
- {
- d->num_dependent ++;
- found = TRUE;
- }
+ for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
+ /* each vnp is a pub */
+ vn_p.choice = PUB_Equiv;
+ vn_p.data.ptrvalue = pdp->pub;
+ vn_c.choice = PUB_Equiv;
+ vn_c.data.ptrvalue = vnp;
+
+ if (PubLabelMatch (&vn_p, &vn_c) == 0)
+ {
+ d->num_dependent ++;
+ found = TRUE;
}
}
}
@@ -8924,6 +9295,21 @@ static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_
}
+static void AddPubCitationsFromAnnot (SeqAnnotPtr annot, ValNodePtr desc_stream_list)
+{
+ SeqFeatPtr sfp;
+
+ if (annot == NULL || annot->type != 1)
+ {
+ return;
+ }
+ for (sfp = annot->data; sfp != NULL; sfp = sfp->next)
+ {
+ AddPubCitationsFromFeat (sfp, desc_stream_list);
+ }
+}
+
+
static void AddPubCitationsFromAnnotSet (SeqAnnotPtr annot, ValNodePtr desc_stream_list)
{
while (annot != NULL)
@@ -9080,12 +9466,35 @@ static void FixCitationsInSet (BioseqSetPtr bssp, ValNodePtr desc_stream_list)
typedef struct streamreader {
- ValNodePtr desc_stream_list;
+ ValNodeBlock desc_list;
SeqDescrPtr parent_list;
- SeqIdPtr PNTR sip_list;
+ ValNodeBlock seqid_list;
} StreamReaderData, PNTR StreamReaderPtr;
+static AsnTypePtr StreamingSkipElement (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp)
+{
+ AsnTypePtr atp;
+ DataVal av;
+
+ if (AsnReadVal(aip, orig, &av) <= 0) return NULL;
+
+ atp = AsnReadId(aip, sp->amp, orig); if (atp == NULL) return NULL;
+ while (atp != orig && atp != NULL) {
+ AsnReadVal(aip, atp, &av);
+ AsnKillValue (atp, &av);
+ atp = AsnReadId(aip, sp->amp, atp);
+ }
+
+ /* close structure */
+ if (atp == orig) {
+ AsnReadVal (aip, atp, &av);
+ AsnKillValue (atp, &av);
+ }
+ return atp;
+}
+
+
static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, StreamReaderPtr sr);
static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp, StreamReaderPtr sr)
@@ -9093,7 +9502,7 @@ static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpP
DataVal av;
AsnTypePtr atp, oldatp;
BioseqSetPtr bsp=NULL;
- SeqEntryPtr curr, next, hold = NULL;
+ SeqEntryPtr curr, next;
BioseqPtr nuc_bsp;
SeqDescPtr sdp = NULL;
SeqAnnotPtr annot;
@@ -9137,9 +9546,9 @@ static AsnTypePtr StreamingReadBioseqSet (AsnIoPtr aip, AsnTypePtr orig, SetAtpP
bsp->date = DateAsnRead(aip, atp);
if (bsp->date == NULL) goto erret;
}
- else if (atp == sp->atp_desc)
+ else if (atp == sp->atp_set_desc)
{
- bsp->descr = SeqDescrAsnRead(aip, atp);
+ bsp->descr = SeqDescrAsnRead (aip, atp);
if (bsp->descr == NULL) goto erret;
}
else if (atp == sp->atp_seqset && bsp->_class != BioseqseqSet_class_nuc_prot)
@@ -9218,22 +9627,22 @@ ret:
if (bsp->_class == BioseqseqSet_class_nuc_prot) {
if (bsp->seq_set != NULL && IS_Bioseq (bsp->seq_set)) {
nuc_bsp = bsp->seq_set->data.ptrvalue;
- if (sr->sip_list != NULL && nuc_bsp != NULL) {
- ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
+ if (nuc_bsp != NULL) {
+ ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
}
for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ ValNodeAddPointerToEnd (&(sr->desc_list), 0, DescStreamNew (sdp, nuc_bsp));
}
}
for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ ValNodeAddPointerToEnd (&(sr->desc_list), 0, DescStreamNew (sdp, nuc_bsp));
}
}
}
/* count feature citations */
- AddPubCitationsFromSet (bsp, sr->desc_stream_list);
+ AddPubCitationsFromSet (bsp, sr->desc_list.head);
}
bsp = BioseqSetFree (bsp);
@@ -9246,12 +9655,74 @@ erret:
}
+static BioseqPtr LIBCALL StreamingReadBioseq (AsnIoPtr aip, AsnTypePtr orig, SetAtpPtr sp)
+{
+ DataVal av;
+ AsnTypePtr atp;
+ BioseqPtr bsp=NULL;
+ Int2 level;
+
+ if (aip == NULL)
+ return bsp;
+
+ if (! ProgMon("Read Bioseq"))
+ return bsp;
+
+ if (orig == NULL) /* Bioseq ::= (self contained) */
+ atp = AsnReadId(aip, sp->amp, sp->atp_bioseq);
+ else
+ atp = AsnLinkType(orig, sp->atp_bioseq); /* link in local tree */
+ if (atp == NULL) return bsp;
+
+ bsp = BioseqNew();
+ if (bsp == NULL) goto erret;
+
+ level = AsnGetLevel(aip); /* for skipping */
+
+ if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* read the start struct */
+
+ atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret; /* id required, start struct */
+ bsp->id = SeqIdSetAsnRead(aip, atp, sp->atp_bioseq_id_E);
+ if (bsp->id == NULL) goto erret;
+
+ atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret;
+ if (atp == sp->atp_bioseq_desc) /* descr optional */
+ {
+ bsp->descr = SeqDescrAsnRead (aip, atp);
+ if (bsp->descr == NULL) goto erret;
+ atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret;
+ }
+
+ atp = StreamingSkipElement(aip, atp, sp);
+ if (atp == NULL) goto erret;
+
+ atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret;
+
+ if (atp == sp->atp_bioseq_annot)
+ {
+ bsp->annot = SeqAnnotSetAsnRead(aip, atp, sp->atp_bioseq_annot_e);
+ if (bsp->annot == NULL) goto erret;
+ atp = AsnReadId(aip, sp->amp, atp); if (atp == NULL) goto erret;
+ }
+
+ if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* end Bioseq */
+ret:
+ AsnUnlinkType(orig); /* unlink local tree */
+ return bsp;
+erret:
+ aip->io_failure = TRUE;
+ bsp = BioseqFree(bsp);
+ goto ret;
+}
+
+
static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, StreamReaderPtr sr)
{
BioseqPtr nuc_bsp;
SeqDescrPtr sdp = NULL;
AsnTypePtr atp_orig;
Boolean first = TRUE;
+ DataVal av;
if (aip == NULL || sp == NULL || sr == NULL) {
return;
@@ -9264,26 +9735,27 @@ static void StreamingReadAny (AsnIoPtr aip, AsnTypePtr atp, SetAtpPtr sp, Stream
atp = StreamingReadBioseqSet (aip, atp, sp, sr);
} else {
if (atp == sp->atp_seq) {
- nuc_bsp = BioseqAsnRead (aip, atp);
- if (sr->sip_list != NULL && nuc_bsp != NULL) {
- ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
+ nuc_bsp = StreamingReadBioseq (aip, atp, sp);
+ if (nuc_bsp != NULL) {
+ ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
}
for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
- AddPubCitationsFromAnnotSet(nuc_bsp->annot, sr->desc_stream_list);
+ AddPubCitationsFromAnnotSet(nuc_bsp->annot, sr->desc_list.head);
nuc_bsp = BioseqFree (nuc_bsp);
- } else if (atp == sp->atp_desc) {
+ } else if (atp == sp->atp_set_desc) {
ValNodeLink (&(sr->parent_list), SeqDescrAsnRead (aip, atp));
} else {
- AsnReadVal (aip, atp, NULL);
+ AsnReadVal (aip, atp, &av);
+ AsnKillValue (atp, &av);
}
}
atp = AsnReadId (aip, sp->amp, atp);
@@ -9298,6 +9770,7 @@ static Boolean StreamingReadSeqEntry (AsnIoPtr aip, SetAtpPtr sp, StreamReaderPt
AsnTypePtr atp;
BioseqPtr nuc_bsp;
SeqDescPtr sdp;
+ DataVal av;
atp = AsnReadId (aip, sp->amp, sp->atp_seqentry);
if (atp == NULL) {
@@ -9312,22 +9785,23 @@ static Boolean StreamingReadSeqEntry (AsnIoPtr aip, SetAtpPtr sp, StreamReaderPt
} else {
if (atp == sp->atp_seq) {
nuc_bsp = BioseqAsnRead (aip, atp);
- if (sr->sip_list != NULL && nuc_bsp != NULL) {
- ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
+ if (nuc_bsp != NULL) {
+ ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
}
for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
nuc_bsp = BioseqFree (nuc_bsp);
} else {
- AsnReadVal (aip, atp, NULL);
+ AsnReadVal (aip, atp, &av);
+ AsnKillValue (atp, &av);
}
}
return TRUE;
@@ -9361,17 +9835,17 @@ static Boolean StreamingReadSeqSubmit (AsnIoPtr aip, SetAtpPtr sp, StreamReaderP
} else {
if (atp == sp->atp_seq) {
nuc_bsp = BioseqAsnRead (aip, atp);
- if (sr->sip_list != NULL && nuc_bsp != NULL) {
- ValNodeLink (sr->sip_list, SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
+ if (nuc_bsp != NULL) {
+ ValNodeLinkToEnd (&(sr->seqid_list), SeqIdDup (SeqIdFindBest (nuc_bsp->id, SEQID_GENBANK)));
}
for (sdp = nuc_bsp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
for (sdp = sr->parent_list; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_pub) {
- ValNodeAddPointer (&(sr->desc_stream_list), 0, DescStreamNew (sdp, nuc_bsp));
+ AddToDescStream (&(sr->desc_list), sdp, nuc_bsp);
}
}
nuc_bsp = BioseqFree (nuc_bsp);
@@ -9393,11 +9867,9 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool
{
AsnIoPtr aip;
SetAtpPtr sp;
- AsnTypePtr atp = NULL;
- SeqEntryPtr sep = NULL, last_sep = NULL;
- BioseqSetPtr bssp = NULL;
StreamReaderData sr;
Boolean rval;
+ ValNodePtr tmp;
if (fp == NULL) return NULL;
@@ -9414,7 +9886,9 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool
}
MemSet (&sr, 0, sizeof (StreamReaderData));
- sr.sip_list = sip_list;
+ if (sip_list != NULL) {
+ InitValNodeBlock (&(sr.seqid_list), *sip_list);
+ }
if (is_submit) {
StreamingReadSeqSubmit (aip, sp, &sr);
@@ -9430,9 +9904,20 @@ NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Bool
sr.parent_list = SeqDescrFree (sr.parent_list);
/* combine list items */
- RecombineDescStreamList(&(sr.desc_stream_list));
+ RecombineDescStreamList(&(sr.desc_list.head));
+
+ if (sip_list == NULL) {
+ sr.seqid_list.head = SeqIdSetFree(sr.seqid_list.head);
+ } else {
+ *sip_list = sr.seqid_list.head;
+ }
- return sr.desc_stream_list;
+ /* set up on-all */
+ tmp = SeqIdListToValNodeSeqIdList (*sip_list);
+ SetOnAllValsForDescStreamList(sr.desc_list.head, tmp);
+ tmp = ValNodeSeqIdListFree (tmp);
+
+ return sr.desc_list.head;
}
@@ -9440,7 +9925,7 @@ static SeqDescrPtr GetDescriptorsForBioseq (BioseqPtr bsp, ValNodePtr desc_strea
{
ValNodePtr vnp;
DescStreamPtr d;
- SeqIdPtr sip;
+ SeqIdPtr sip, sip_tmp;
Boolean found;
SeqDescrPtr sdp = NULL;
@@ -9452,8 +9937,17 @@ static SeqDescrPtr GetDescriptorsForBioseq (BioseqPtr bsp, ValNodePtr desc_strea
d = (DescStreamPtr) vnp->data.ptrvalue;
if (d->replace != NULL) {
found = FALSE;
- for (sip = d->owners; sip != NULL && !found; sip = sip->next) {
- found = SeqIdIn (sip, bsp->id);
+ if (d->on_all) {
+ found = TRUE;
+ } else {
+ /* note - we can use just the best one, because that's the one that was copied */
+ sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
+ found = FALSE;
+ for (sip_tmp = d->owners; sip_tmp != NULL && !found; sip_tmp = sip_tmp->next) {
+ if (SeqIdComp(sip, sip_tmp) == SIC_YES) {
+ found = TRUE;
+ }
+ }
}
if (found) {
ValNodeLink (&sdp, AsnIoMemCopy (d->replace, (AsnReadFunc) SeqDescAsnRead, (AsnWriteFunc) SeqDescAsnWrite));
@@ -9506,8 +10000,7 @@ StreamingReadWriteBioseqSet
DataVal av;
AsnTypePtr atp, oldatp;
BioseqSetPtr bsp=NULL;
- SeqEntryPtr curr, next, hold = NULL;
- SeqDescPtr sdp = NULL;
+ SeqEntryPtr curr, next;
SeqDescrPtr tmp;
SeqAnnotPtr annot;
@@ -9549,7 +10042,7 @@ StreamingReadWriteBioseqSet
bsp->date = DateAsnRead(aip_in, atp);
if (bsp->date == NULL) goto erret;
}
- else if (atp == sp->atp_desc)
+ else if (atp == sp->atp_set_desc)
{
bsp->descr = SeqDescrAsnRead(aip_in, atp);
if (bsp->descr == NULL) goto erret;
@@ -9826,13 +10319,58 @@ NLM_EXTERN void WriteAsnWithReplacedDescriptors (ValNodePtr desc_stream_list, FI
rval = StreamingReadWriteSeqEntry(desc_stream_list, aip_in, aip_out, sp);
AsnIoFlush (aip_out);
}
- AsnIoClose (aip_in);
- AsnIoClose (aip_out);
+ AsnIoFree (aip_in, FALSE);
+ AsnIoFree (aip_out, FALSE);
sp = MemFree (sp);
}
+NLM_EXTERN Boolean IdListsMatch (SeqIdPtr sip_list, ValNodePtr all_sip)
+{
+ Boolean found = FALSE, any_missing = FALSE;
+ ValNodePtr vnp;
+
+ if (sip_list == NULL || all_sip == NULL) {
+ return FALSE;
+ }
+
+ if (ValNodeLen (sip_list) != ValNodeLen (all_sip)) {
+ return FALSE;
+ }
+
+ while (sip_list != NULL) {
+ found = FALSE;
+ for (vnp = all_sip; vnp != NULL && !found; vnp = vnp->next) {
+ if (vnp->choice == 0 && SeqIdComp (vnp->data.ptrvalue, sip_list) == SIC_YES) {
+ vnp->choice = 1;
+ found = TRUE;
+ }
+ }
+ sip_list = sip_list->next;
+ }
+ for (vnp = all_sip; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == 0) {
+ any_missing = TRUE;
+ }
+ vnp->choice = 0;
+ }
+ return !any_missing;
+}
+
+
+NLM_EXTERN void SetOnAllValsForDescStreamList (ValNodePtr desc_list, ValNodePtr all_sip)
+{
+ ValNodePtr vnp;
+ DescStreamPtr d;
+
+ for (vnp = desc_list; vnp != NULL; vnp = vnp->next) {
+ d = (DescStreamPtr) vnp->data.ptrvalue;
+ d->on_all = IdListsMatch(d->owners, all_sip);
+ }
+}
+
+
/* ReadAsnFastaOrFlatFileEx reads lines, looking for starts of ASN.1, FASTA, GenBank, EMBL,
or GenPept files. It then calls the appropriate read function, which is responsible for
reading the sequence (or object) and restoring the file pointer to the beginning of the
@@ -10765,7 +11303,7 @@ NLM_EXTERN Pointer ReadFeatureTableFile (
annotname = GetSeqId (seqid, line, sizeof (seqid), TRUE, FALSE);
if (! HasNoText (seqid)) {
- sap = ReadFeatureTable (&fc, seqid, annotname);
+ sap = ReadFeatureTableEx (&fc, seqid, annotname, lineP);
if (sap != NULL && sap->type == 1) {
sfp = (SeqFeatPtr) sap->data;
prevsfp = (Pointer PNTR) &(sap->data);
@@ -13041,6 +13579,7 @@ ReplaceItemPair AbbreviationList[] = {
{ "trna-", "tRNA-" },
{ "var.", "var." },
{ "var..", "var.." },
+ { "uk", "UK" },
{ "usa", "USA" },
{ "U.S.A.", "USA" },
{ "U.S.A", "USA" },
@@ -13173,6 +13712,122 @@ FixCapitalizationInElement
}
+static ReplaceItemPair s_CountryFixes[] = {
+ { "chnia", "China" },
+ { "pr china", "P.R. China" },
+ { "prchina", "P.R. China" },
+ { "p.r.china", "P.R. China" },
+ { "p.r china", "P.R. China" },
+ { "p, r, china", "P.R. China" },
+};
+
+#define NUM_CountryFixes sizeof (s_CountryFixes) / sizeof (ReplaceItemPair)
+
+
+static void InsertMissingSpacesAfterCommas (CharPtr PNTR pString)
+{
+ Int4 num_new_spaces = 0;
+ CharPtr str, cp, new_str, src, dst;
+
+ if (pString == NULL || *pString == NULL) {
+ return;
+ }
+
+ str = *pString;
+ cp = StringChr (str, ',');
+ while (cp != NULL) {
+ if (*(cp + 1) != 0 && !isspace (*(cp + 1))) {
+ num_new_spaces++;
+ }
+ cp = StringChr (cp + 1, ',');
+ }
+
+ if (num_new_spaces == 0) {
+ return;
+ }
+
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + num_new_spaces + 1));
+ src = str;
+ dst = new_str;
+ while (*src != 0) {
+ *dst = *src;
+ ++dst;
+ if (*src == ',' && *(src + 1) != 0 && !isspace (*(src + 1))) {
+ *dst = ' ';
+ ++dst;
+ }
+ ++src;
+ }
+ *dst = 0;
+ str = MemFree (str);
+ *pString = new_str;
+}
+
+
+static void InsertMissingSpacesAfterNo (CharPtr PNTR pString)
+{
+ Int4 num_new_spaces = 0;
+ CharPtr str, cp, new_str, src;
+
+ if (pString == NULL || *pString == NULL) {
+ return;
+ }
+
+ str = *pString;
+ cp = StringISearch (str, "No.");
+ while (cp != NULL) {
+ if (isalpha(*(cp + 3)) || isdigit(*(cp + 3))) {
+ num_new_spaces++;
+ }
+ cp = StringISearch (cp + 3, "No.");
+ }
+
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + num_new_spaces + 1));
+ new_str[0] = 0;
+
+ src = str;
+ cp = StringISearch (src, "No.");
+ while (cp != NULL) {
+ StringNCat (new_str, src, cp - src);
+ StringCat (new_str, "No.");
+ if (isalpha(*(cp + 3)) || isdigit(*(cp + 3))) {
+ StringCat (new_str, " ");
+ }
+ src = cp + 3;
+ cp = StringISearch (src, "No.");
+ }
+ StringCat (new_str, src);
+
+ str = MemFree (str);
+ *pString = new_str;
+}
+
+
+NLM_EXTERN void FixCapitalizationInCountryStringEx (CharPtr PNTR pCountry, Boolean punct_only)
+{
+ Int4 i;
+
+ if (pCountry == NULL || StringICmp (*pCountry, "US") == 0) {
+ return;
+ }
+ InsertMissingSpacesAfterCommas (pCountry);
+ InsertMissingSpacesAfterNo (pCountry);
+ if (!punct_only) {
+ FixCapitalizationInElement (pCountry, TRUE, TRUE, FALSE);
+
+ }
+ for (i = 0; i < NUM_CountryFixes; i++) {
+ FindReplaceString (pCountry, s_CountryFixes[i].FindString,
+ s_CountryFixes[i].ReplaceString, FALSE, TRUE);
+ }
+}
+
+NLM_EXTERN void FixCapitalizationInCountryString (CharPtr PNTR pCountry)
+{
+ FixCapitalizationInCountryStringEx (pCountry, FALSE);
+}
+
+
NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor)
{
NameStdPtr pNameStandard;
@@ -13196,32 +13851,87 @@ NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor)
}
-NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil)
+NLM_EXTERN void FixStateAbbreviationsInAffil (AffilPtr affil, LogInfoPtr lip)
+{
+ CharPtr abbrev;
+
+ if (affil == NULL) {
+ return;
+ }
+ if (StringCmp (affil->country, "USA") == 0) {
+ abbrev = GetStateAbbreviation (affil->sub);
+ if (abbrev != NULL) {
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed %s to %s\n", affil->sub, abbrev);
+ }
+ lip->data_in_log = TRUE;
+ }
+ affil->sub = MemFree (affil->sub);
+ affil->sub = StringSave (abbrev);
+ }
+ }
+}
+
+
+NLM_EXTERN void FixCapsInPubAffilEx (AffilPtr affil, Boolean punct_only)
{
if (affil == NULL) return;
- FixCapitalizationInElement (&(affil->affil), TRUE, TRUE, FALSE);
- FixAffiliationShortWordsInElement (&(affil->affil));
- FixCapitalizationInElement (&(affil->div), TRUE, TRUE, FALSE);
- FixAffiliationShortWordsInElement (&(affil->div));
- FixCapitalizationInElement (&(affil->city), FALSE, TRUE, FALSE);
- FixAffiliationShortWordsInElement (&(affil->city));
+ if (!punct_only) {
+ FixCapitalizationInElement (&(affil->affil), TRUE, TRUE, FALSE);
+ FixAffiliationShortWordsInElement (&(affil->affil));
+ FixCapitalizationInElement (&(affil->div), TRUE, TRUE, FALSE);
+ FixAffiliationShortWordsInElement (&(affil->div));
+ FixCapitalizationInElement (&(affil->city), FALSE, TRUE, FALSE);
+ FixAffiliationShortWordsInElement (&(affil->city));
+ }
+ FixKnownAbbreviationsInElement (&(affil->affil));
+ FixKnownAbbreviationsInElement (&(affil->street));
+ FixKnownAbbreviationsInElement (&(affil->div));
+ FixKnownAbbreviationsInElement (&(affil->city));
+
+ InsertMissingSpacesAfterCommas (&(affil->affil));
+ InsertMissingSpacesAfterNo (&(affil->affil));
+ InsertMissingSpacesAfterCommas (&(affil->div));
+ InsertMissingSpacesAfterNo (&(affil->div));
+ InsertMissingSpacesAfterCommas (&(affil->city));
+ InsertMissingSpacesAfterNo (&(affil->city));
/* special handling for states */
- if (affil->sub != NULL && StringLen (affil->sub) == 2
- && isalpha((Int4)(affil->sub[0])) && isalpha((Int4)(affil->sub[1])))
- {
- affil->sub[0] = toupper(affil->sub[0]);
- affil->sub[1] = toupper(affil->sub[1]);
+ if (punct_only) {
+ InsertMissingSpacesAfterCommas (&(affil->sub));
} else {
- FixCapitalizationInElement (&(affil->sub), FALSE, TRUE, FALSE);
- FixAffiliationShortWordsInElement (&(affil->sub));
+ if (affil->sub != NULL && StringLen (affil->sub) == 2
+ && isalpha((Int4)(affil->sub[0])) && isalpha((Int4)(affil->sub[1])))
+ {
+ affil->sub[0] = toupper(affil->sub[0]);
+ affil->sub[1] = toupper(affil->sub[1]);
+ } else {
+ FixCapitalizationInElement (&(affil->sub), FALSE, TRUE, FALSE);
+ FixAffiliationShortWordsInElement (&(affil->sub));
+ InsertMissingSpacesAfterCommas (&(affil->sub));
+ }
+ }
+
+ if (!punct_only) {
+ FixCapitalizationInCountryString (&(affil->country));
+ FixCapitalizationInElement (&(affil->street), FALSE, TRUE, FALSE);
+ FixAffiliationShortWordsInElement (&(affil->street));
+ FixStateAbbreviationsInAffil (affil, NULL);
}
- FixCapitalizationInElement (&(affil->country), TRUE, TRUE, FALSE);
- FixCapitalizationInElement (&(affil->street), FALSE, TRUE, FALSE);
- FixAffiliationShortWordsInElement (&(affil->street));
+ if (StringCmp (affil->country, "USA") == 0) {
+ FixStateAbbreviationsInAffil (affil, NULL);
+ }
+ InsertMissingSpacesAfterCommas (&(affil->street));
+ InsertMissingSpacesAfterNo (&(affil->street));
}
+NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil)
+{
+ FixCapsInPubAffilEx (affil, FALSE);
+}
+
ReplaceItemPair AffiliationShortWordList[] = {
{ "Au", "au" } ,
{ "Aux", "aux" } ,
@@ -13236,6 +13946,10 @@ ReplaceItemPair AffiliationShortWordList[] = {
{ "Le", "le" },
{ "Les", "les" },
{ "Rue", "rue" },
+ { "Po Box", "PO Box" },
+ { "Pobox", "PO Box" },
+ { "P.O box", "P.O. Box" },
+ { "P.Obox", "P.O. Box" },
{ "Y", "y" }
};
@@ -13271,6 +13985,37 @@ NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl)
}
+ReplaceItemPair KnownAbbreviationList[] = {
+ { "po box", "PO Box" },
+ { "Pobox", "PO Box" },
+ { "P.O box", "P.O. Box" },
+ { "P.Obox", "P.O. Box" },
+ { "PO.Box", "P.O. Box" },
+ { "PO. Box", "P.O. Box" },
+ { "pr china", "P.R. China" },
+ { "prchina", "P.R. China" },
+ { "p.r.china", "P.R. China" },
+ { "p.r china", "P.R. China" },
+ { "p, r, china", "P.R. China" },
+ { "p,r, china", "P.R. China" },
+ { "p,r,china", "P.R. China" }
+};
+
+NLM_EXTERN void FixKnownAbbreviationsInElement (CharPtr PNTR pEl)
+{
+ Int2 i;
+
+ if (pEl == NULL) return;
+ if (*pEl == NULL) return;
+
+ for (i = 0; i < sizeof (KnownAbbreviationList) / sizeof (ReplaceItemPair); i++)
+ {
+ FindReplaceString (pEl, KnownAbbreviationList[i].FindString,
+ KnownAbbreviationList[i].ReplaceString, FALSE, TRUE);
+ }
+}
+
+
NLM_EXTERN void FixOrgNamesInString (CharPtr str, ValNodePtr org_names)
{
ValNodePtr vnp;
@@ -14697,7 +15442,7 @@ NLM_EXTERN void AdjustFeatureForGapsCallback (SeqFeatPtr sfp, Pointer data)
SeqMgrFeatContext fcontext;
Boolean set_partial_ends;
- if (sfp == NULL || data == NULL) return;
+ if (sfp == NULL || data == NULL || sfp->location == NULL || sfp->idx.deleteme) return;
afgp = (AdjustFeatForGapPtr) data;
@@ -14861,7 +15606,7 @@ NLM_EXTERN void AdjustCDSLocationsForUnknownGapsCallback (SeqFeatPtr sfp, Pointe
agd.options = eAdjustFeatForGap_unknown_gaps | eAdjustFeatForGap_make_partial
| eAdjustFeatForGap_split_internal | eAdjustFeatForGap_trim_ends;
- agd.align_func = data;
+ agd.align_func = (GlobalAlignFunc) data;
agd.features_in_gap = NULL;
@@ -15185,6 +15930,28 @@ NLM_EXTERN ValNodePtr ReportCoverageForBioseqSeqHist (BioseqPtr bsp)
}
+static Boolean IsSingleEstCoverage (BioseqPtr bsp)
+{
+ SeqAlignPtr salp;
+ SeqIdPtr sip;
+
+ if (bsp == NULL || bsp->hist == NULL || bsp->hist->assembly == NULL) {
+ return FALSE;
+ }
+ if (bsp->hist->assembly->next == NULL) {
+ return TRUE;
+ } else {
+ sip = AlnMgr2GetNthSeqIdPtr (bsp->hist->assembly, 2);
+ for (salp = bsp->hist->assembly->next; salp != NULL; salp = salp->next) {
+ if (SeqIdComp (AlnMgr2GetNthSeqIdPtr (salp, 2), sip) == SIC_NO) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+}
+
+
NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr list)
{
ValNodePtr range_list = NULL, new_list;
@@ -15192,6 +15959,9 @@ NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr l
Char id_str[255];
CharPtr good_fmt = "Coverage is complete for %s";
CharPtr msg;
+ Int4 num_single = 0, num_mult = 0;
+ CharPtr single_fmt = "%d records are covered by a single EST";
+ CharPtr mult_fmt = "%d records are covered by multiple ESTs";
while (list != NULL) {
t = (TranscriptomeIdsPtr) list->data.ptrvalue;
@@ -15205,9 +15975,20 @@ NLM_EXTERN ValNodePtr ReportCoverageForTranscriptomeIdsListSeqHist (ValNodePtr l
} else {
ValNodeLink (&range_list, new_list);
}
+ if (IsSingleEstCoverage(t->consensus_bsp)) {
+ num_single++;
+ } else {
+ num_mult++;
+ }
}
list = list->next;
}
+ msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (single_fmt) + 15));
+ sprintf (msg, single_fmt, num_single);
+ ValNodeAddPointer (&range_list, 0, msg);
+ msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mult_fmt) + 15));
+ sprintf (msg, mult_fmt, num_mult);
+ ValNodeAddPointer (&range_list, 0, msg);
return range_list;
}
@@ -15573,9 +16354,9 @@ MakeTranscriptomeAssemblySeqHist
for (vnp = t->token_list; vnp != NULL; vnp = vnp->next) {
if (StringChr (vnp->data.ptrvalue, '|') == NULL) {
- sprintf (id_buf, "gb|%s", vnp->data.ptrvalue);
+ sprintf (id_buf, "gb|%s", (char *) vnp->data.ptrvalue);
} else {
- sprintf (id_buf, "%s", vnp->data.ptrvalue);
+ sprintf (id_buf, "%s", (char *) vnp->data.ptrvalue);
}
sip = MakeSeqID (id_buf);
read_bsp = BioseqLockById (sip);
@@ -16256,6 +17037,7 @@ ReadOneColumnListEx
if (plen == StringLen (p_start))
{
found_end = TRUE;
+ p_end = p_start + plen;
}
else
{
@@ -16500,13 +17282,31 @@ NLM_EXTERN ValNodePtr FreeTabTable (ValNodePtr row_list)
}
+NLM_EXTERN ValNodePtr CopyTabTable (ValNodePtr row_list)
+{
+ ValNodeBlock row_block;
+ ValNodeBlock col_block;
+ ValNodePtr row, col;
+
+ InitValNodeBlock(&row_block, NULL);
+ for (row = row_list; row != NULL; row = row->next) {
+ InitValNodeBlock(&col_block, NULL);
+ for (col = row->data.ptrvalue; col != NULL; col = col->next) {
+ ValNodeAddPointerToEnd (&col_block, col->choice, StringSave(col->data.ptrvalue));
+ }
+ ValNodeAddPointerToEnd (&row_block, 0, col_block.head);
+ }
+ return row_block.head;
+}
+
+
NLM_EXTERN void WriteTabTableToFile (ValNodePtr table, FILE *fp)
{
ValNodePtr line, vnp;
for (line = table; line != NULL; line = line->next) {
for (vnp = line->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
- fprintf (fp, "%s%s", (CharPtr) vnp->data.ptrvalue, vnp->next == NULL ? "\n" : "\t");
+ fprintf (fp, "%s%s", vnp->data.ptrvalue == NULL ? "" : (CharPtr) vnp->data.ptrvalue, vnp->next == NULL ? "\n" : "\t");
}
}
}
@@ -16597,6 +17397,49 @@ NLM_EXTERN void ReparseTabTableConvertFirstSpaceToTab (ValNodePtr row_list)
}
+NLM_EXTERN void ReparseTabTableSeparateColumnAtDelimiter (ValNodePtr row_list, Char delimiter, Int4 col, Boolean stop_after_first)
+{
+ ValNodePtr line_vnp, col_vnp, new_vnp, next_col;
+ CharPtr first_text, second_text, first_space;
+ Int4 col_num;
+
+ for (line_vnp = row_list; line_vnp != NULL; line_vnp = line_vnp->next)
+ {
+ col_vnp = line_vnp->data.ptrvalue;
+ col_num = 0;
+ while (col_num < col && col_vnp != NULL) {
+ col_num++;
+ col_vnp = col_vnp->next;
+ }
+ if (col_vnp != NULL) {
+ next_col = col_vnp->next;
+ while (col_vnp != next_col) {
+ first_text = col_vnp->data.ptrvalue;
+ if ((first_space = StringChr (first_text, delimiter)) != NULL) {
+ second_text = first_space + 1;
+ if (*second_text != 0) {
+ /* terminate first text at first delimiter */
+ *first_space = 0;
+ /* create new column with text after first delimiter */
+ second_text = StringSave (second_text);
+ new_vnp = ValNodeNew (NULL);
+ new_vnp->data.ptrvalue = second_text;
+ /* insert new column */
+ new_vnp->next = col_vnp->next;
+ col_vnp->next = new_vnp;
+ }
+ }
+ if (stop_after_first) {
+ col_vnp = next_col;
+ } else {
+ col_vnp = col_vnp->next;
+ }
+ }
+ }
+ }
+}
+
+
static Int4 LenToNextTabOrMultispace (CharPtr cp)
{
Int4 len = 0;
@@ -16621,7 +17464,6 @@ static Int4 LenToNextTabOrMultispace (CharPtr cp)
static Int4 LenTabOrMultispace (CharPtr cp)
{
Int4 len = 0;
- Boolean found = FALSE;
if (StringHasNoText (cp)) {
len = 0;
@@ -16710,11 +17552,21 @@ NLM_EXTERN void CombineTabTableColumns (ValNodePtr row_list, ValNodePtr column_p
add_vnp = col_vnp;
col_prev = col_vnp;
} else {
- len = StringLen (add_vnp->data.ptrvalue) + StringLen (delimiter) + StringLen (col_vnp->data.ptrvalue) + 1;
- tmp = (CharPtr) MemNew (sizeof (Char) * len);
- sprintf (tmp, "%s%s%s", add_vnp->data.ptrvalue, delimiter == NULL ? "" : delimiter, col_vnp->data.ptrvalue);
- add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue);
- add_vnp->data.ptrvalue = tmp;
+ if (StringHasNoText (col_vnp->data.ptrvalue)) {
+ /* do nothing - no need to add blank to blank */
+ } else if (StringHasNoText (add_vnp->data.ptrvalue)) {
+ /* move from col_vnp */
+ add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue);
+ add_vnp->data.ptrvalue = col_vnp->data.ptrvalue;
+ col_vnp->data.ptrvalue = NULL;
+ } else {
+ /* combine with delimiter */
+ len = StringLen (add_vnp->data.ptrvalue) + StringLen (delimiter) + StringLen (col_vnp->data.ptrvalue) + 1;
+ tmp = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (tmp, "%s%s%s", (char *) add_vnp->data.ptrvalue, delimiter == NULL ? "" : delimiter, (char *) col_vnp->data.ptrvalue);
+ add_vnp->data.ptrvalue = MemFree (add_vnp->data.ptrvalue);
+ add_vnp->data.ptrvalue = tmp;
+ }
col_prev->next = col_vnp->next;
col_vnp->next = NULL;
col_vnp = ValNodeFreeData (col_vnp);
@@ -16753,6 +17605,151 @@ NLM_EXTERN void AddTextToTabTableColumn (ValNodePtr row_list, Int4 col, CharPtr
}
+static int LIBCALLBACK SortTableRowByColumn (VoidPtr ptr1, VoidPtr ptr2, Int4 column)
+
+{
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
+ ValNodePtr col1, col2;
+ Int4 colpos = 1;
+ int rval = 0;
+
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ col1 = vnp1->data.ptrvalue;
+ col2 = vnp2->data.ptrvalue;
+ while (col1 != NULL && col2 != NULL && colpos < column) {
+ col1 = col1->next;
+ col2 = col2->next;
+ colpos++;
+ }
+ if (col1 == NULL && col2 == NULL) {
+ rval = 0;
+ } else if (col1 == NULL) {
+ rval = -1;
+ } else if (col2 == NULL) {
+ rval = 1;
+ } else {
+ rval = StringCmp (col1->data.ptrvalue, col2->data.ptrvalue);
+ }
+ return rval;
+}
+
+
+static Int4 s_TableRowSortColumn = 0;
+
+static int LIBCALLBACK SortTableRowByColumnStatic (VoidPtr ptr1, VoidPtr ptr2)
+{
+ return SortTableRowByColumn (ptr1, ptr2, s_TableRowSortColumn);
+}
+
+
+NLM_EXTERN ValNodePtr SortTableRowByAnyColumn (ValNodePtr table, Int4 column)
+{
+ s_TableRowSortColumn = column;
+ table = ValNodeSort (table, SortTableRowByColumnStatic);
+ return table;
+}
+
+
+NLM_EXTERN TwoStringHashPtr TwoStringHashFree (TwoStringHashPtr tsh)
+{
+ Int4 i;
+
+ if (tsh != NULL) {
+ for (i = 0; i < tsh->num_lines; i++) {
+ tsh->table[2 * i] = MemFree (tsh->table[2 * i]);
+ tsh->table[2 * i + 1] = MemFree (tsh->table[2 * i + 1]);
+ }
+ tsh->table = MemFree (tsh->table);
+ tsh = MemFree (tsh);
+ }
+ return tsh;
+}
+
+
+static ValNodePtr GetNthValNode (ValNodePtr list, Int4 n)
+{
+ Int4 pos = 1;
+ ValNodePtr vnp;
+
+ if (n < 1) {
+ return NULL;
+ }
+ for (vnp = list; vnp != NULL && pos < n; vnp = vnp->next)
+ {
+ pos++;
+ }
+ return vnp;
+}
+
+
+NLM_EXTERN TwoStringHashPtr MakeTwoStringHashFromTabTable (ValNodePtr line_list, Int4 column1, Int4 column2)
+{
+ ValNodePtr tmp, vnp, col1, col2;
+ Int4 len, i;
+ TwoStringHashPtr tsh;
+
+ tmp = CopyTabTable(line_list);
+ tmp = SortTableRowByAnyColumn (tmp, column1);
+ len = ValNodeLen (tmp);
+
+ tsh = (TwoStringHashPtr) MemNew (sizeof (TwoStringHashData));
+ tsh->table = (CharPtr PNTR) MemNew (sizeof (CharPtr) * len * 2);
+ for (i = 0, vnp = tmp; vnp != NULL; vnp = vnp->next) {
+ col1 = GetNthValNode (vnp->data.ptrvalue, column1);
+ col2 = GetNthValNode (vnp->data.ptrvalue, column2);
+ if (col1 != NULL && col2 != NULL && !StringHasNoText (col1->data.ptrvalue) && !StringHasNoText (col2->data.ptrvalue)) {
+ tsh->table[2 * i] = StringSave (col1->data.ptrvalue);
+ tsh->table[2 * i + 1] = StringSave (col2->data.ptrvalue);
+ i++;
+ }
+ }
+ tsh->num_lines = i;
+ tmp = FreeTabTable(tmp);
+ return tsh;
+}
+
+
+NLM_EXTERN CharPtr GetValueFromTwoStringHash (CharPtr key, TwoStringHashPtr tsh)
+{
+ Int4 min = 0, num = -1, i, j;
+ Int4 max;
+ CharPtr tmp;
+
+ if (StringHasNoText (key) || tsh == NULL) {
+ return NULL;
+ }
+ max = tsh->num_lines - 1;
+
+ while (max >= min)
+ {
+ i = (max + min)/2;
+ tmp = tsh->table[2 * i];
+ if ((j = StringCmp(tmp, key)) > 0)
+ {
+ max = i - 1;
+ }
+ else if (j < 0)
+ {
+ min = i + 1;
+ }
+ else
+ {
+ num = i;
+ break;
+ }
+ }
+ if (num == -1) {
+ return NULL;
+ } else {
+ return tsh->table[2 * num + 1];
+ }
+}
+
+
static void AddToContextList (Char ch, CharPtr PNTR strp, ValNodePtr PNTR search_list)
{
ValNodePtr vnp, vnp_last = NULL, vnp2, clist;
@@ -16860,6 +17857,125 @@ NLM_EXTERN ValNodePtr ScanTabTableForSpecialCharacters (ValNodePtr row_list)
}
+NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInText (CharPtr PNTR text)
+{
+ CharPtr cp, str, new_str, cp_dst;
+ Int4 len;
+ Int4 extra_len = 0;
+ Boolean any = FALSE;
+ CharPtr replace_fmt = "Replaced '%c' with '%s'";
+ ValNodePtr repl_list = NULL;
+ CharPtr repl_str;
+
+ if (text == NULL || (cp = *text) == NULL) {
+ return NULL;
+ }
+
+ while (*cp != 0) {
+ if (*cp < ' ' || *cp > '~') {
+#ifdef OS_WINNT
+ str = GetSpecialWinCharacterReplacement ((unsigned char) *cp);
+#else
+ str = GetSpecialMacCharacterReplacement ((unsigned char) *cp);
+#endif
+ len = StringLen (str);
+ if (len > 1) {
+ extra_len += len - 1;
+ }
+ any = TRUE;
+ }
+ ++cp;
+ }
+ if (any) {
+ new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*text) + extra_len + 1));
+ cp = *text;
+ cp_dst = new_str;
+ while (*cp != 0) {
+ if (*cp < ' ' || *cp > '~') {
+#ifdef OS_WINNT
+ str = GetSpecialWinCharacterReplacement ((unsigned char) *cp);
+#else
+ str = GetSpecialMacCharacterReplacement ((unsigned char) *cp);
+#endif
+ repl_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (replace_fmt) + StringLen (str)));
+ sprintf (repl_str, replace_fmt, *cp, str == NULL ? "" : str);
+ ValNodeAddPointer (&repl_list, 0, repl_str);
+ if (str != NULL) {
+ while (*str != 0) {
+ *cp_dst = *str;
+ cp_dst++;
+ str++;
+ }
+ }
+ } else {
+ *cp_dst = *cp;
+ cp_dst++;
+ }
+ cp++;
+ }
+ *text = MemFree (*text);
+ *text = new_str;
+ }
+ return repl_list;
+}
+
+
+NLM_EXTERN void AutoReplaceSpecialCharactersWithMessage (CharPtr PNTR text)
+{
+ ValNodePtr list, vnp;
+
+ list = AutoReplaceSpecialCharactersInText(text);
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ Message (MSG_POSTERR, "%s", vnp->data.ptrvalue);
+ }
+ list = ValNodeFreeData (list);
+}
+
+
+NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInTabTable (ValNodePtr row_list)
+{
+ ValNodePtr repl_list = NULL, col;
+ CharPtr cp;
+
+ while (row_list != NULL) {
+ for (col = row_list->data.ptrvalue; col != NULL; col = col->next) {
+ cp = col->data.ptrvalue;
+ ValNodeLink (&repl_list, AutoReplaceSpecialCharactersInText(&cp));
+ col->data.ptrvalue = cp;
+ }
+ row_list = row_list->next;
+ }
+ return repl_list;
+}
+
+
+NLM_EXTERN void AutoFixSpecialCharactersInEntity (Uint2 entityID)
+{
+ ValNodePtr bad_list = NULL, vnp, vnp_c;
+ Char label[2];
+ CharPtr repl;
+
+ label[1] = 0;
+ StringActionInEntity (entityID, FALSE, UPDATE_NEVER, NULL, NULL, NULL, TRUE,
+ SpecialCharFindWithContext, NULL, &bad_list);
+ for (vnp = bad_list; vnp != NULL; vnp = vnp->next)
+ {
+#ifdef OS_WINNT
+ repl = GetSpecialWinCharacterReplacement ((unsigned char) vnp->choice);
+#else
+ repl = GetSpecialMacCharacterReplacement ((unsigned char) vnp->choice);
+#endif
+ label[0] = vnp->choice;
+ Message (MSG_POSTERR, "Replaced '%s' with '%s'", label, repl == NULL ? "" : repl);
+ for (vnp_c = vnp->data.ptrvalue; vnp_c != NULL; vnp_c = vnp_c->next)
+ {
+ FindReplaceString (vnp_c->data.ptrvalue, label, repl, TRUE, FALSE);
+ }
+ }
+ bad_list = FreeContextList (bad_list);
+}
+
+
/* Functions for reassigning affiliations of authors for Flu sequences */
typedef struct authaffil {
CharPtr affil;
@@ -17056,7 +18172,7 @@ NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, Se
}
-static AuthListPtr GetAuthorListForPub (PubPtr the_pub)
+NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub)
{
CitGenPtr cgp;
CitSubPtr csp;
@@ -17331,6 +18447,101 @@ static void AddStructuredCommentCallback (BioseqPtr bsp, Pointer data)
}
+static CharPtr official_prefix_list[] = {
+ "HIVDataBaseData",
+ "MIGS-Data",
+ "MIMS-Data",
+ "MIENS-Data",
+ "MIGS:3.0-Data",
+ "GISAID_EpiFlu(TM)Data",
+ "FluData",
+ "EpifluData",
+ "International Barcode of Life (iBOL)Data",
+ "Assembly-Data",
+ "Genome-Assembly-Data",
+ NULL
+};
+
+
+NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void)
+{
+ ValNodePtr list = NULL;
+ Int4 i;
+
+ for (i = 0; official_prefix_list[i] != NULL; i++) {
+ ValNodeAddPointer (&list, 0, StringSave (official_prefix_list[i]));
+ }
+ return list;
+}
+
+
+static Int4 GetDbnameCoreLen (CharPtr dbname)
+{
+ Int4 len = StringLen (dbname);
+ if (len > 4 && StringICmp (dbname + len - 4, "Data") == 0) {
+ len -= 4;
+ }
+ if (len > 1 && StringNICmp (dbname + len - 1, "-", 1) == 0) {
+ len -= 1;
+ }
+ return len;
+}
+
+
+static CharPtr MatchesOfficialStructuredCommentDbname (CharPtr dbname)
+{
+ Int4 i;
+ Int4 len_orig;
+ Int4 len_can;
+
+ len_orig = GetDbnameCoreLen (dbname);
+ for (i = 0; official_prefix_list[i] != NULL; i++) {
+ len_can = GetDbnameCoreLen (official_prefix_list[i]);
+ if (len_orig == len_can && StringNICmp (dbname, official_prefix_list[i], len_orig) == 0) {
+ return official_prefix_list[i];
+ }
+ }
+ if (StringNICmp (dbname, "HIV-Database", len_orig) == 0) {
+ return "HIVDatabase";
+ }
+ return NULL;
+}
+
+
+NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string)
+{
+ CharPtr dbname, tmp;
+ Int4 len;
+
+ if (StringHasNoText (string)) {
+ return NULL;
+ }
+
+ dbname = StringSave (string + StringSpn (string, "##"));
+ len = StringLen (dbname);
+ if (len > 2 && StringCmp (dbname + len - 2, "##") == 0) {
+ dbname[len - 2] = 0;
+ len -= 2;
+ }
+ if (len > 6 && StringCmp (dbname + len - 6, "-START") == 0) {
+ dbname[len - 6] = 0;
+ len -= 6;
+ }
+ if (len > 6 && StringCmp (dbname + len - 4, "-END") == 0) {
+ dbname[len - 4] = 0;
+ len -= 4;
+ }
+
+ /* correct for weirdnesses with -data for recognizable prefixes */
+ tmp = MatchesOfficialStructuredCommentDbname (dbname);
+ if (tmp != NULL) {
+ dbname = MemFree (dbname);
+ dbname = StringSave (tmp);
+ }
+ return dbname;
+}
+
+
static CharPtr MakeStructuredCommentPrefixFromString (CharPtr orig)
{
CharPtr core, new_prefix;
@@ -17340,21 +18551,14 @@ static CharPtr MakeStructuredCommentPrefixFromString (CharPtr orig)
return StringSave ("##Metadata-START##");
}
- core = orig;
- while (*core == '#') {
- core++;
- }
+ core = StructuredCommentDbnameFromString(orig);
core_len = StringLen (core);
- if (core_len > 8 && StringICmp (core + core_len - 8, "-START##") == 0) {
- core_len -= 8;
- } else if (core_len > 6 && StringICmp (core + core_len - 6, "-START") == 0) {
- core_len -= 6;
- }
new_prefix = (CharPtr) MemNew (sizeof (Char) * (11 + core_len));
StringCpy (new_prefix, "##");
StringNCat (new_prefix, core, core_len);
StringCat (new_prefix, "-START##");
+ core = MemFree (core);
return new_prefix;
}
@@ -17368,21 +18572,14 @@ static CharPtr MakeStructuredCommentSuffixFromString (CharPtr orig)
return StringSave ("##Metadata-END##");
}
- core = orig;
- while (*core == '#') {
- core++;
- }
+ core = StructuredCommentDbnameFromString(orig);
core_len = StringLen (core);
- if (core_len > 6 && StringICmp (core + core_len - 6, "-END##") == 0) {
- core_len -= 6;
- } else if (core_len > 4 && StringICmp (core + core_len - 4, "-END") == 0) {
- core_len -= 4;
- }
new_suffix = (CharPtr) MemNew (sizeof (Char) * (9 + core_len));
StringCpy (new_suffix, "##");
StringNCat (new_suffix, core, core_len);
StringCat (new_suffix, "-END##");
+ core = MemFree (core);
return new_suffix;
}
@@ -17460,6 +18657,24 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNod
}
+NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list)
+{
+ ValNodePtr tmp, vnp_l;
+ UserObjectPtr uop;
+
+ while (line != NULL) {
+ tmp = CreateStructuredCommentsFromRow (header, line->data.ptrvalue, NULL, err_list);
+ for (vnp_l = tmp; vnp_l != NULL; vnp_l = vnp_l->next) {
+ uop = (UserObjectPtr) vnp_l->data.ptrvalue;
+ VisitBioseqsInSep (sep, uop, AddStructuredCommentCallback);
+ uop = UserObjectFree (uop);
+ }
+ tmp = ValNodeFree (tmp);
+ line = line->next;
+ }
+}
+
+
NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all)
{
ValNodePtr err_list = NULL;
@@ -17469,7 +18684,6 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr se
CharPtr bad_id_fmt = "Unable to find sequence for %s";
CharPtr msg;
BioseqPtr bsp;
- UserObjectPtr uop;
SeqDescrPtr sdp;
if (fp == NULL || sep == NULL) {
@@ -17496,16 +18710,7 @@ NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr se
line = table->next;
if (apply_to_all) {
- while (line != NULL) {
- tmp = CreateStructuredCommentsFromRow (header, line->data.ptrvalue, NULL, &err_list);
- for (vnp_l = tmp; vnp_l != NULL; vnp_l = vnp_l->next) {
- uop = (UserObjectPtr) vnp_l->data.ptrvalue;
- VisitBioseqsInSep (sep, uop, AddStructuredCommentCallback);
- uop = UserObjectFree (uop);
- }
- tmp = ValNodeFree (tmp);
- line = line->next;
- }
+ CreateStructuredCommentsForAllFromTable (sep, header, line, &err_list);
} else {
while (line != NULL) {
vnp_h = header;
@@ -17658,7 +18863,7 @@ static void GetStructuredCommentsForBioseq(BioseqPtr bsp, Pointer data)
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
sdp != NULL;
- sdp = sdp->next) {
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
ValNodeLink (&list, RowFromStructuredComment (sdp->data.ptrvalue, &header));
}
@@ -18122,7 +19327,9 @@ static Boolean RemoveDuplicateNestedSetsInSeqEntry (SeqEntryPtr top_sep)
sep_prev->next = lower_bssp->seq_set;
}
sep_tmp = lower_bssp->seq_set;
+ sep_prev = sep_tmp;
while (sep_tmp->next != NULL) {
+ sep_prev = sep_tmp;
sep_tmp = sep_tmp->next;
}
sep_tmp->next = sep_next;
@@ -18131,6 +19338,8 @@ static Boolean RemoveDuplicateNestedSetsInSeqEntry (SeqEntryPtr top_sep)
sep->next = NULL;
sep = SeqEntryFree (sep);
rval = TRUE;
+ } else {
+ sep_prev = sep;
}
sep = sep_next;
}
@@ -18198,6 +19407,12 @@ NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop)
keyword = StringSave ("GSC:MIMS:2.1");
} else if (StringCmp (prefix, "##MIENS-Data-START##") == 0) {
keyword = StringSave ("GSC:MIENS:2.1");
+ } else if (StringCmp (prefix, "##MIGS:3.0-Data-START##") == 0) {
+ keyword = StringSave ("GSC:MIxS;MIGS:3.0");
+ } else if (StringCmp (prefix, "##MIMS:3.0-Data-START##") == 0) {
+ keyword = StringSave ("GSC:MIxS;MIMS:3.0");
+ } else if (StringCmp (prefix, "##MIMARKS:3.0-Data-START##") == 0) {
+ keyword = StringSave ("GSC:MIxS;MIMARKS:3.0");
}
return keyword;
@@ -18402,9 +19617,39 @@ NLM_EXTERN void RemoveStructuredCommentKeywords (Uint2 entityID)
}
+static Boolean StartsWith(CharPtr str, CharPtr start)
+{
+ Int4 str_len, start_len;
+
+ str_len = StringLen (str);
+ start_len = StringLen (start);
+
+ if (str_len < start_len || StringNICmp(str, start, start_len) != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static Boolean EndsWith(CharPtr str, CharPtr end)
+{
+ Int4 str_len, end_len;
+
+ str_len = StringLen (str);
+ end_len = StringLen (end);
+
+ if (str_len < end_len || StringICmp(str + str_len - end_len, end) != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
static void TrimPrimerSeqJunkFromString (CharPtr str)
{
- Int4 len;
+ Int4 len, start_len = 0, end_len = 0;
CharPtr src, dst;
if (StringHasNoText (str)) {
@@ -18412,23 +19657,27 @@ static void TrimPrimerSeqJunkFromString (CharPtr str)
}
len = StringLen (str);
- if (len >= 7 && StringNCmp (str, "5'-", 3) == 0 && StringCmp (str + len - 3, "-3'") == 0) {
- src = str + 3;
- dst = str;
- len -= 6;
+ if (StartsWith (str, "5'-") || StartsWith (str, "5`-")) {
+ start_len = 3;
+ } else if (StartsWith (str, "5-") || StartsWith (str, "5'") || StartsWith (str, "5`")) {
+ start_len = 2;
+ } else if (StartsWith (str, "-")) {
+ start_len = 1;
+ }
- while (len > 0) {
- *dst = *src;
- src++;
- dst++;
- len--;
- }
- *dst = 0;
- } else if ((len >= 5 && StringNCmp (str, "5-", 2) == 0 && StringCmp (str + len - 2, "-3") == 0)
- || (len >= 5 && StringNCmp (str, "5'", 2) == 0 && StringCmp (str + len - 2, "3'") == 0)) {
- src = str + 2;
+ if (EndsWith (str, "-3'") || EndsWith (str, "-3`")) {
+ end_len = 3;
+ } else if (EndsWith (str, "-3") || EndsWith(str, "3'") || EndsWith(str, "3`")) {
+ end_len = 2;
+ } else if (EndsWith (str, "-")) {
+ end_len = 1;
+ }
+
+ if (end_len > 0 || start_len > 0) {
+ src = str + start_len;
dst = str;
- len -= 4;
+ len -= (end_len + start_len);
+
while (len > 0) {
*dst = *src;
src++;
@@ -18441,30 +19690,46 @@ static void TrimPrimerSeqJunkFromString (CharPtr str)
}
+static Boolean TrimJunkFromPrimer (PCRPrimerPtr pp, FILE *log_fp)
+{
+ CharPtr orig = NULL;
+ Boolean rval = FALSE;
+
+ if (pp == NULL || StringHasNoText (pp->seq)) {
+ return FALSE;
+ }
+ if (log_fp != NULL) {
+ orig = StringSave (pp->seq);
+ }
+ TrimPrimerSeqJunkFromString (pp->seq);
+ if (log_fp != NULL && StringCmp (orig, pp->seq) != 0) {
+ fprintf (log_fp, "Changed primer seq from %s to %s\n", orig, pp->seq);
+ rval = TRUE;
+ }
+ orig = MemFree (orig);
+ return rval;
+}
+
+
static Boolean TrimPrimerSeqJunkOnBioSource (BioSourcePtr biop, FILE *log_fp)
{
- SubSourcePtr ssp;
- CharPtr orig = NULL;
- Boolean rval = FALSE;
+ PCRReactionSetPtr ps;
+ PCRPrimerPtr pp;
+ Boolean rval = FALSE;
if (biop == NULL) {
return FALSE;
}
- for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
- if (ssp->subtype == SUBSRC_fwd_primer_seq
- || ssp->subtype == SUBSRC_rev_primer_seq) {
- if (log_fp != NULL) {
- orig = StringSave (ssp->name);
- }
- TrimPrimerSeqJunkFromString (ssp->name);
- if (log_fp != NULL && StringCmp (orig, ssp->name) != 0) {
- fprintf (log_fp, "Changed primer seq from %s to %s\n", orig, ssp->name);
- rval = TRUE;
- }
- orig = MemFree (orig);
+ for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
+ for (pp = ps->forward; pp != NULL; pp = pp->next) {
+ rval |= TrimJunkFromPrimer(pp, log_fp);
+ }
+ for (pp = ps->reverse; pp != NULL; pp = pp->next) {
+ rval |= TrimJunkFromPrimer(pp, log_fp);
}
}
+
return rval;
}
@@ -18512,6 +19777,7 @@ static Boolean IsUSA (CharPtr country)
{
if (StringICmp (country, "USA") == 0
|| StringICmp (country, "United States of America") == 0
+ || StringICmp (country, "United States") == 0
|| StringICmp (country, "U.S.A.") == 0
|| StringICmp (country, "U S A") == 0) {
return TRUE;
@@ -18521,11 +19787,30 @@ static Boolean IsUSA (CharPtr country)
}
+static void FixStateAbbreviationsInCitSub (CitSubPtr csp, LogInfoPtr lip)
+{
+ if (csp != NULL && csp->authors != NULL
+ && csp->authors->affil != NULL
+ && IsUSA(csp->authors->affil->country)) {
+ if (StringCmp (csp->authors->affil->country, "USA") != 0) {
+ if (lip != NULL) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Changed %s to USA\n", csp->authors->affil->country);
+ }
+ lip->data_in_log = TRUE;
+ }
+ csp->authors->affil->country = MemFree (csp->authors->affil->country);
+ csp->authors->affil->country = StringSave ("USA");
+ }
+ FixStateAbbreviationsInAffil (csp->authors->affil, NULL);
+ }
+}
+
+
static void AbbreviateCitSubAffilStatesCallback (PubdescPtr pdp, Pointer data)
{
ValNodePtr vnp;
CitSubPtr csp;
- CharPtr abbrev;
LogInfoPtr lip;
if (pdp == NULL) return;
@@ -18533,32 +19818,8 @@ static void AbbreviateCitSubAffilStatesCallback (PubdescPtr pdp, Pointer data)
for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
if (vnp->choice == PUB_Sub) {
- csp = (CitSubPtr) vnp->data.ptrvalue;
- if (csp != NULL && csp->authors != NULL
- && csp->authors->affil != NULL
- && IsUSA(csp->authors->affil->country)) {
- if (StringCmp (csp->authors->affil->country, "USA") != 0) {
- if (lip != NULL) {
- if (lip->fp != NULL) {
- fprintf (lip->fp, "Changed %s to USA\n", csp->authors->affil->country);
- }
- lip->data_in_log = TRUE;
- }
- csp->authors->affil->country = MemFree (csp->authors->affil->country);
- csp->authors->affil->country = StringSave ("USA");
- }
- abbrev = GetStateAbbreviation (csp->authors->affil->sub);
- if (abbrev != NULL) {
- if (lip != NULL) {
- if (lip->fp != NULL) {
- fprintf (lip->fp, "Changed %s to %s\n", csp->authors->affil->sub, abbrev);
- }
- lip->data_in_log = TRUE;
- }
- csp->authors->affil->sub = MemFree (csp->authors->affil->sub);
- csp->authors->affil->sub = StringSave (abbrev);
- }
- }
+ csp = (CitSubPtr) vnp->data.ptrvalue;
+ FixStateAbbreviationsInCitSub (csp, lip);
}
}
}
@@ -18568,6 +19829,7 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp)
{
SeqEntryPtr sep;
LogInfoData lid;
+ SeqSubmitPtr ssp;
sep = GetTopSeqEntryForEntityID (entityID);
if (sep == NULL)
@@ -18576,10 +19838,185 @@ NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp)
MemSet (&lid, 0, sizeof (LogInfoData));
lid.fp = log_fp;
VisitPubdescsInSep (sep, &lid, AbbreviateCitSubAffilStatesCallback);
+
+ ssp = FindSeqSubmitForSeqEntry (sep);
+ if (ssp != NULL && ssp->sub != NULL && ssp->sub->cit != NULL) {
+ FixStateAbbreviationsInCitSub (ssp->sub->cit, &lid);
+ }
return lid.data_in_log;
}
+static ValNodePtr FindExonForInterval (BioseqPtr bsp, SeqLocPtr slp, Boolean match_from_exactly, Boolean match_to_exactly)
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+ ValNodePtr list = NULL;
+ Int4 from, to, feat_from, feat_to;
+ Uint1 strand;
+ SeqPntPtr spp;
+ SeqIntPtr sint;
+
+ if (slp == NULL) {
+ return NULL;
+ } else if (slp->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) slp->data.ptrvalue;
+ from = spp->point;
+ to = spp->point;
+ strand = spp->strand;
+ } else if (slp->choice == SEQLOC_INT) {
+ sint = (SeqIntPtr) slp->data.ptrvalue;
+ from = sint->from;
+ to = sint->to;
+ strand = sint->strand;
+ } else {
+ return NULL;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_exon, &context);
+ sfp != NULL && context.left <= to;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_exon, &context))
+ {
+ /* note - have to use location values, rather than context.left and context.right,
+ * because exon may already have been altered for another mRNA/CDS
+ */
+ if (sfp->location == NULL) {
+ /* no location */
+ continue;
+ } else if (sfp->location->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) sfp->location->data.ptrvalue;
+ feat_from = spp->point;
+ feat_to = spp->point;
+ } else if (sfp->location->choice == SEQLOC_INT) {
+ sint = (SeqIntPtr) sfp->location->data.ptrvalue;
+ feat_from = sint->from;
+ feat_to = sint->to;
+ } else {
+ /* not handling other types of locations */
+ continue;
+ }
+ if (context.numivals != 1) {
+ /* not going to match multi-interval exons */
+ } else if (match_from_exactly && feat_from != from) {
+ /* no match on from */
+ } else if (!match_from_exactly && (feat_from < from || feat_from > to)) {
+ /* less restrictive match fails for from */
+ } else if (match_to_exactly && feat_to != to) {
+ /* no match on to */
+ } else if (!match_to_exactly && (feat_to > to || feat_to < from)) {
+ /* less restrictive match fails for to */
+ } else if ((strand == Seq_strand_minus && context.strand != Seq_strand_minus)
+ || (strand != Seq_strand_minus && context.strand == Seq_strand_minus)) {
+ /* strand match fails */
+ } else {
+ ValNodeAddPointer (&list, OBJ_SEQFEAT, sfp);
+ }
+ }
+ return list;
+}
+
+
+static ValNodePtr SaveOrigExonPositions (ValNodePtr exon_list)
+{
+ ValNodePtr vnp;
+ SeqFeatPtr exon;
+ CharPtr orig_loc;
+ ValNodePtr loc_list = NULL;
+
+ for (vnp = exon_list; vnp != NULL; vnp = vnp->next)
+ {
+ exon = (SeqFeatPtr) vnp->data.ptrvalue;
+ orig_loc = SeqLocPrintUseBestID (exon->location);
+ ValNodeAddPointer (&loc_list, 0, orig_loc);
+ }
+ return loc_list;
+}
+
+
+static void FixExonsForInterval (ValNodePtr list, Int4 from_diff, Int4 to_diff)
+{
+ ValNodePtr vnp;
+ SeqFeatPtr exon;
+ SeqPntPtr spp;
+ SeqIntPtr sint;
+
+ if (list == NULL) {
+ return;
+ }
+ for (vnp = list; vnp != NULL; vnp = vnp->next) {
+ exon = vnp->data.ptrvalue;
+ if (exon != NULL && exon->location != NULL) {
+ if (exon->location->choice == SEQLOC_PNT) {
+ spp = (SeqPntPtr) exon->location->data.ptrvalue;
+ sint = SeqIntNew ();
+ sint->id = spp->id;
+ spp->id = NULL;
+ sint->strand = spp->strand;
+ sint->to = spp->point;
+ sint->from = spp->point;
+ spp = SeqPntFree (spp);
+ exon->location->data.ptrvalue = sint;
+ }
+ sint = (SeqIntPtr) exon->location->data.ptrvalue;
+ sint->from += from_diff;
+ sint->to += to_diff;
+ }
+ }
+}
+
+typedef struct exonloclist {
+ ValNodePtr feature_list;
+ ValNodePtr orig_loc_list;
+} ExonLocListData, PNTR ExonLocListPtr;
+
+
+static ExonLocListPtr ExonLocListNew (BioseqPtr bsp, SeqLocPtr slp, Boolean match_from_exactly, Boolean match_to_exactly)
+{
+ ExonLocListPtr el = (ExonLocListPtr) MemNew (sizeof (ExonLocListData));
+ el->feature_list = FindExonForInterval(bsp, slp, match_from_exactly, match_to_exactly);
+ if (el->feature_list == NULL) {
+ el = MemFree (el);
+ } else {
+ el->orig_loc_list = SaveOrigExonPositions(el->feature_list);
+ }
+ return el;
+}
+
+
+static ExonLocListPtr ExonLocListFree (ExonLocListPtr el)
+{
+ if (el != NULL) {
+ el->feature_list = ValNodeFree (el->feature_list);
+ el->orig_loc_list = ValNodeFreeData (el->orig_loc_list);
+ el = MemFree (el);
+ }
+ return el;
+}
+
+
+static void ReportExonLocationChanges (ExonLocListPtr el, LogInfoPtr lip)
+{
+ ValNodePtr exon_v, orig;
+ SeqFeatPtr exon;
+ CharPtr new_loc;
+
+ if (lip == NULL || el == NULL) {
+ return;
+ }
+ for (exon_v = el->feature_list, orig = el->orig_loc_list; exon_v != NULL && orig != NULL; exon_v = exon_v->next, orig = orig->next) {
+ exon = (SeqFeatPtr) exon_v->data.ptrvalue;
+ new_loc = SeqLocPrintUseBestID (exon->location);
+ if (StringCmp (orig->data.ptrvalue, new_loc) != 0) {
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Adjusted location for splice consensus: %s became %s\n", orig->data.ptrvalue, new_loc);
+ }
+ lip->data_in_log = TRUE;
+ }
+ new_loc = MemFree (new_loc);
+ }
+}
+
+
static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
{
SeqLocPtr slp, slp_last = NULL;
@@ -18593,6 +20030,7 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
Boolean match;
SeqIntPtr sint;
SeqPntPtr spp;
+ ExonLocListPtr last_exon_list = NULL, this_exon_list = NULL;
/* variables used for logging change */
CharPtr orig_loc = NULL, new_loc;
Boolean changed = FALSE;
@@ -18637,9 +20075,13 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
for (slp = sfp->location->data.ptrvalue; slp != NULL; slp = slp->next) {
CheckSeqLocForPartial (slp, &partial5, &partial3);
exon_len = SeqLocLen (slp);
+ /* record underlying exon features */
+ this_exon_list = ExonLocListNew (bsp, slp, TRUE, TRUE);
+
if (!first && !partial5 && !partial3_last
&& (slp_last->choice == SEQLOC_INT || slp_last->choice == SEQLOC_PNT)
- && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)) {
+ && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT)) {
+
/* check for donor and acceptor pair */
/* maximum search space is beginning of previous exon to end of current exon */
exon_len_last = SeqLocLen (slp_last);
@@ -18710,7 +20152,7 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
&& buf[len - exon_len - 1 - diff] == 'G' && buf[len - exon_len - 2 - diff] == 'A') {
match = TRUE;
} else {
- diff--;
+ diff++;
}
}
if (match) {
@@ -18728,14 +20170,26 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
sint = (SeqIntPtr) slp->data.ptrvalue;
if (sint->strand == Seq_strand_minus) {
sint->to += diff;
+ if (this_exon_list != NULL) {
+ FixExonsForInterval (this_exon_list->feature_list, 0, diff);
+ }
} else {
sint->from -= diff;
+ if (this_exon_list != NULL) {
+ FixExonsForInterval (this_exon_list->feature_list, -diff, 0);
+ }
}
sint = (SeqIntPtr) slp_last->data.ptrvalue;
if (sint->strand == Seq_strand_minus) {
sint->from += diff;
+ if (last_exon_list != NULL) {
+ FixExonsForInterval (last_exon_list->feature_list, diff, 0);
+ }
} else {
sint->to -= diff;
+ if (last_exon_list != NULL) {
+ FixExonsForInterval (last_exon_list->feature_list, 0, -diff);
+ }
}
changed = TRUE;
}
@@ -18754,8 +20208,15 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
partial5_last = partial5;
partial3_last = partial3;
slp_last = slp;
+ ReportExonLocationChanges (last_exon_list, lip);
+ last_exon_list = ExonLocListFree (last_exon_list);
+ last_exon_list = this_exon_list;
first = FALSE;
}
+
+ ReportExonLocationChanges (last_exon_list, lip);
+ last_exon_list = ExonLocListFree (last_exon_list);
+
BioseqUnlock (bsp);
if (changed) {
@@ -18770,6 +20231,36 @@ static void AdjustForConsensusSpliceCallback (SeqFeatPtr sfp, Pointer data)
}
+static void AdjustSeqEntryForConsensusSpliceBioseqCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext dcontext;
+ BioSourcePtr biop;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext fcontext;
+
+ if (bsp == NULL || ISA_aa (bsp->mol)) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
+ if (sdp == NULL || (biop = (BioSourcePtr)sdp->data.ptrvalue) == NULL
+ || (biop->genome != GENOME_genomic && biop->genome != GENOME_unknown)
+ || (biop->org != NULL && biop->org->orgname != NULL && StringISearch (biop->org->orgname->lineage, "viruses") != NULL)
+ || !HasTaxonomyID(biop))
+ {
+ return;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext))
+ {
+ AdjustForConsensusSpliceCallback (sfp, data);
+ }
+}
+
+
NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp)
{
LogInfoData lid;
@@ -18780,7 +20271,7 @@ NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *lo
MemSet (&lid, 0, sizeof (LogInfoData));
lid.fp = log_fp;
- VisitFeaturesInSep (sep, &lid, AdjustForConsensusSpliceCallback);
+ VisitBioseqsInSep (sep, &lid, AdjustSeqEntryForConsensusSpliceBioseqCallback);
return lid.data_in_log;
}
@@ -18788,3 +20279,124 @@ NLM_EXTERN void AdjustSeqEntryForConsensusSplice (SeqEntryPtr sep)
{
AdjustSeqEntryForConsensusSpliceEx (sep, NULL);
}
+
+
+NLM_EXTERN CharPtr ValNodeSeqIdName (ValNodePtr vnp)
+{
+ Char buf[100];
+
+ if (vnp == NULL || vnp->data.ptrvalue == NULL)
+ {
+ return NULL;
+ }
+ else
+ {
+ SeqIdWrite (vnp->data.ptrvalue, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
+ return StringSave (buf);
+ }
+}
+
+
+NLM_EXTERN void ValNodeSeqIdFree (ValNodePtr vnp)
+{
+ if (vnp != NULL && vnp->data.ptrvalue != NULL)
+ {
+ vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
+ }
+}
+
+
+NLM_EXTERN ValNodePtr ValNodeSeqIdCopy (ValNodePtr vnp)
+{
+ ValNodePtr vnp_copy = NULL;
+ if (vnp != NULL)
+ {
+ ValNodeAddPointer (&vnp_copy, vnp->choice, SeqIdDup (vnp->data.ptrvalue));
+ }
+ return vnp_copy;
+}
+
+NLM_EXTERN Boolean ValNodeSeqIdMatch (ValNodePtr vnp1, ValNodePtr vnp2)
+{
+ if (vnp1 == NULL || vnp2 == NULL)
+ {
+ return FALSE;
+ }
+ if (SeqIdComp (vnp1->data.ptrvalue, vnp2->data.ptrvalue) == SIC_YES)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+
+NLM_EXTERN ValNodePtr ValNodeSeqIdListFree (ValNodePtr list)
+{
+ ValNodePtr list_next;
+
+ while (list != NULL) {
+ list_next = list->next;
+ list->next = NULL;
+ list->data.ptrvalue = SeqIdFree (list->data.ptrvalue);
+ list = ValNodeFree (list);
+ list = list_next;
+ }
+ return list;
+}
+
+
+NLM_EXTERN ValNodePtr ValNodeSeqIdListCopy (ValNodePtr list)
+{
+ ValNodePtr vnp, list_copy = NULL, list_prev = NULL;
+
+ while (list != NULL) {
+ vnp = ValNodeNew (list_prev);
+ vnp->data.ptrvalue = SeqIdDup (list->data.ptrvalue);
+ if (list_copy == NULL) {
+ list_copy = vnp;
+ }
+ list_prev = vnp;
+ list = list->next;
+ }
+ return list_copy;
+}
+
+
+NLM_EXTERN ValNodePtr SeqIdListToValNodeSeqIdList (SeqIdPtr sip_list)
+{
+ SeqIdPtr sip;
+ ValNodePtr list = NULL, vnp_p = NULL, vnp;
+
+ for (sip = sip_list; sip != NULL; sip = sip->next) {
+ vnp = ValNodeNew (vnp_p);
+ if (vnp_p == NULL) {
+ list = vnp;
+ }
+ vnp->data.ptrvalue = SeqIdDup (sip);
+ vnp_p = vnp;
+ }
+ return list;
+}
+
+
+NLM_EXTERN SeqIdPtr ValNodeSeqIdListToSeqIdList (ValNodePtr vnp_list)
+{
+ ValNodePtr vnp;
+ SeqIdPtr sip_list = NULL, sip_prev = NULL, sip;
+
+ for (vnp = vnp_list; vnp != NULL; vnp = vnp->next) {
+ sip = SeqIdDup (vnp->data.ptrvalue);
+ if (sip_prev == NULL) {
+ sip_list = sip;
+ } else {
+ sip_prev->next = sip;
+ }
+ sip_prev = sip;
+ }
+ return sip_list;
+}
+
+
diff --git a/api/sqnutil3.c b/api/sqnutil3.c
index 37086009..f38e1df2 100644
--- a/api/sqnutil3.c
+++ b/api/sqnutil3.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 2/7/00
*
-* $Revision: 6.633 $
+* $Revision: 6.762 $
*
* File Description:
*
@@ -712,6 +712,166 @@ NLM_EXTERN void LinkCDSmRNAbyLabel (
VisitBioseqsInSep (sep, NULL, BspLinkCDSmRNAbyLabel);
}
+
+static void MakeOneLink (
+ SeqFeatPtr f1,
+ SeqFeatPtr f2
+)
+
+{
+ ObjectIdPtr oip;
+ SeqFeatXrefPtr xref;
+ Int4 id;
+
+ if (f1 == NULL || f2 == NULL || f1->id.choice != 3 || f2->id.choice != 3) {
+ return;
+ }
+
+ oip = (ObjectIdPtr) f1->id.value.ptrvalue;
+ if (oip != NULL && oip->str == NULL) {
+ id = oip->id;
+ if (id > 0) {
+ for (xref = f2->xref; xref != NULL && xref->id.choice != 3; xref = xref->next) continue;
+ if (xref != NULL) {
+ oip = (ObjectIdPtr) xref->id.value.ptrvalue;
+ if (oip != NULL) {
+ if (oip->str != NULL) {
+ oip->str = MemFree (oip->str);
+ }
+ oip->id = id;
+ }
+ } else {
+ xref = SeqFeatXrefNew ();
+ if (xref != NULL) {
+ oip = ObjectIdNew ();
+ if (oip != NULL) {
+ oip->id = id;
+ xref->id.choice = 3;
+ xref->id.value.ptrvalue = (Pointer) oip;
+ xref->next = f2->xref;
+ f2->xref = xref;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+static void CreateReciprocalLink (
+ SeqFeatPtr f1,
+ SeqFeatPtr f2
+)
+
+{
+ if (f1 == NULL || f2 == NULL || f1->id.choice != 3 || f2->id.choice != 3) {
+ return;
+ }
+
+ MakeOneLink (f1, f2);
+ MakeOneLink (f2, f1);
+}
+
+
+static void LinkCDSmRNAbyLabelAndLocationCallback (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ SMFeatItemPtr PNTR array;
+ BioseqExtraPtr bspextra;
+ Uint2 entityID;
+ SMFeatItemPtr feat;
+ Int4 i, j, best_index, best_diff, diff;
+ Int4 num;
+ ObjMgrDataPtr omdp;
+
+ if (bsp == NULL) return;
+
+ omdp = SeqMgrGetOmdpForBioseq (bsp);
+ if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
+
+ bspextra = (BioseqExtraPtr) omdp->extradata;
+ if (bspextra == NULL) return;
+ array = bspextra->featsByLabel;
+ num = bspextra->numfeats;
+ if (array == NULL || num < 1) return;
+
+ entityID = bsp->idx.entityID;
+ if (entityID < 1) {
+ entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
+ }
+
+ /* labels are all grouped together - for each cds/mRNA in group of identical labels,
+ * find match with best location.
+ */
+ for (i = 0; i < num - 1; i++) {
+ feat = array [i];
+ if (feat->sfp == NULL) {
+ continue;
+ } else if (feat->sfp->xref != NULL) {
+ /* already assigned feat xref */
+ continue;
+ } else if (feat->sfp->idx.subtype != FEATDEF_CDS && feat->sfp->idx.subtype != FEATDEF_mRNA) {
+ /* not interested in these feature types */
+ } else {
+ best_index = -1;
+ for (j = i + 1; j < num && StringCmp (feat->label, array[j]->label) == 0; j++) {
+ if (array[j]->sfp == NULL) {
+ /* bad */
+ } else if (array[j]->sfp->xref != NULL) {
+ /* already assigned feat xref */
+ } else if (feat->sfp->idx.subtype == FEATDEF_CDS) {
+ if (array[j]->sfp->idx.subtype != FEATDEF_mRNA) {
+ /* wrong feature type */
+ } else if ((diff = SeqLocAinB (feat->sfp->location, array[j]->sfp->location)) < 0) {
+ /* locations don't match */
+ } else {
+ if (best_index == -1) {
+ /* don't have a best yet */
+ best_index = j;
+ best_diff = diff;
+ } else if (diff < best_diff) {
+ best_index = j;
+ best_diff = diff;
+ }
+ }
+ } else if (feat->sfp->idx.subtype == FEATDEF_mRNA) {
+ if (array[j]->sfp->idx.subtype != FEATDEF_CDS) {
+ /* wrong feature type */
+ } else if ((diff = SeqLocAinB (array[j]->sfp->location, feat->sfp->location)) < 0) {
+ /* locations don't match */
+ } else {
+ if (best_index == -1) {
+ /* don't have a best yet */
+ best_index = j;
+ best_diff = diff;
+ } else if (diff < best_diff) {
+ best_index = j;
+ best_diff = diff;
+ }
+ }
+ }
+ }
+ if (best_index > -1) {
+ CreateReciprocalLink (feat->sfp, array[best_index]->sfp);
+ }
+ }
+ }
+}
+
+
+NLM_EXTERN void LinkCDSmRNAbyLabelAndLocation (
+ SeqEntryPtr sep
+)
+
+{
+ AssignFeatureIDs (sep);
+ VisitBioseqsInSep (sep, NULL, LinkCDSmRNAbyLabelAndLocationCallback);
+}
+
+
typedef struct ovpdata {
SeqFeatPtr sfp;
Char revstr [42];
@@ -1309,6 +1469,44 @@ NLM_EXTERN void StripGeneRnaPcrAsnFilter (
}
}
+NLM_EXTERN void StripSeqFeatSupportAsnFilter (
+ AsnIoPtr aip,
+ AsnIoPtr aop
+)
+
+{
+ AsnModulePtr amp;
+ AsnTypePtr atp, atp_se, atp_sf;
+ DataVal dv;
+ SeqFeatPtr sfp;
+ SeqFeatSupportPtr support;
+
+ if (aip == NULL || aop == NULL) return;
+
+ amp = AsnAllModPtr ();
+ if (amp == NULL) return;
+ atp_se = AsnFind ("Seq-entry");
+ atp_sf = AsnFind ("Seq-annot.data.ftable.E");
+ if (atp_se == NULL || atp_sf == NULL) return;
+
+ atp = atp_se;
+
+ while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
+ if (atp == atp_sf) {
+ sfp = SeqFeatAsnRead (aip, atp);
+ support = sfp->support;
+ sfp->support = NULL;
+ SeqFeatAsnWrite (sfp, aop, atp);
+ sfp->support = support;
+ SeqFeatFree (sfp);
+ } else {
+ AsnReadVal (aip, atp, &dv);
+ AsnWrite (aop, atp, &dv);
+ AsnKillValue (atp, &dv);
+ }
+ }
+}
+
/* CautiousSeqEntryCleanup section */
static Boolean EmptyOrNullString (CharPtr str)
@@ -2163,6 +2361,7 @@ static FeatdefNameData featdefWithName [] = {
{ FEATDEF_otherRNA , "misc_RNA" },
{ FEATDEF_misc_signal , "misc_signal" },
{ FEATDEF_misc_structure , "misc_structure" },
+ { FEATDEF_mobile_element , "mobile_element" },
{ FEATDEF_modified_base , "modified_base" },
{ FEATDEF_mRNA , "mRNA" },
{ FEATDEF_NON_STD_RESIDUE , "NonStdRes" },
@@ -2343,7 +2542,8 @@ static CharPtr featurekeys [] = {
"ncRNA",
"tmRNA",
"CloneRef",
- "VariationRef"
+ "VariationRef",
+ "mobile_element"
};
NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF)
@@ -2364,6 +2564,8 @@ NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF)
type == FEATDEF_BOND ||
type == FEATDEF_SITE) {
key = "misc_feature";
+ } else if (type == FEATDEF_VARIATIONREF) {
+ key = "variation";
}
}
@@ -3849,14 +4051,37 @@ static ValNodePtr ParsePCRColonString (
return head;
}
+static CharPtr FusePrimerNames (
+ CharPtr first,
+ CharPtr second
+)
+
+{
+ size_t len;
+ CharPtr str;
+
+ if (first == NULL) return second;
+ if (second == NULL) return first;
+
+ len = StringLen (first) + StringLen (second) + 5;
+ str = MemNew (len);
+ if (str == NULL) return NULL;
+
+ StringCpy (str, first);
+ StringCat (str, ":");
+ StringCat (str, second);
+
+ return str;
+}
+
static PCRPrimerPtr ModernizePCRPrimerHalf (
CharPtr seq,
CharPtr name
)
{
- CharPtr curr_name = NULL, curr_seq = NULL;
- PCRPrimerPtr curr_primer, last_primer = NULL, primer_set = NULL;
+ CharPtr curr_name = NULL, curr_seq = NULL, fused_name;
+ PCRPrimerPtr curr_primer = NULL, last_primer = NULL, primer_set = NULL;
ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
seq_list = ParsePCRColonString (seq);
@@ -3865,7 +4090,7 @@ static PCRPrimerPtr ModernizePCRPrimerHalf (
seq_vnp = seq_list;
name_vnp = name_list;
- while (seq_vnp != NULL || name_vnp != NULL) {
+ while (seq_vnp != NULL /* || name_vnp != NULL */) {
if (seq_vnp != NULL) {
curr_seq = (CharPtr) seq_vnp->data.ptrvalue;
seq_vnp = seq_vnp->next;
@@ -3873,6 +4098,8 @@ static PCRPrimerPtr ModernizePCRPrimerHalf (
if (name_vnp != NULL) {
curr_name = (CharPtr) name_vnp->data.ptrvalue;
name_vnp = name_vnp->next;
+ } else {
+ curr_name = NULL;
}
curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
@@ -3890,6 +4117,31 @@ static PCRPrimerPtr ModernizePCRPrimerHalf (
}
}
+ while (name_vnp != NULL && last_primer != NULL) {
+ curr_name = (CharPtr) name_vnp->data.ptrvalue;
+ fused_name = FusePrimerNames (last_primer->name, curr_name);
+ MemFree (last_primer->name);
+ last_primer->name = StringSaveNoNull (fused_name);
+ name_vnp = name_vnp->next;
+ }
+
+ while (name_vnp != NULL && last_primer == NULL) {
+ curr_name = (CharPtr) name_vnp->data.ptrvalue;
+ curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
+ if (curr_primer != NULL) {
+ curr_primer->name = StringSaveNoNull (curr_name);
+
+ if (primer_set == NULL) {
+ primer_set = curr_primer;
+ }
+ if (last_primer != NULL) {
+ last_primer->next = curr_primer;
+ }
+ last_primer = curr_primer;
+ }
+ name_vnp = name_vnp->next;
+ }
+
ValNodeFreeData (seq_list);
ValNodeFreeData (name_list);
@@ -3911,7 +4163,7 @@ NLM_EXTERN void ModernizePCRPrimers (
Boolean unlink;
if (biop == NULL) return;
- if (biop->pcr_primers != NULL) return;
+ /* if (biop->pcr_primers != NULL) return; */
pset = ParsePCRSet (biop);
if (pset == NULL) return;
@@ -3944,6 +4196,10 @@ NLM_EXTERN void ModernizePCRPrimers (
FreePCRSet (pset);
if (reaction_set != NULL) {
+ if (last_reaction != NULL) {
+ /* merge with existing structured pcr_primers */
+ last_reaction->next = biop->pcr_primers;
+ }
biop->pcr_primers = reaction_set;
ssp = biop->subtype;
@@ -5576,6 +5832,13 @@ extern Boolean RemoveSequenceFromAlignments (SeqEntryPtr sep, SeqIdPtr sip)
return TRUE;
}
+static CharPtr evCategoryPrefix [] = {
+ "",
+ "COORDINATES: ",
+ "DESCRIPTION: ",
+ "EXISTENCE: ",
+ NULL
+};
static CharPtr inferencePrefix [] = {
"",
@@ -5657,6 +5920,23 @@ static Int2 ValidateInferenceAccession (CharPtr str, Char chr, Boolean fetchAccn
return rsult;
}
+static Char NextColonOrVerticalBar (CharPtr ptr)
+
+{
+ Char ch = '\0';
+
+ if (ptr == NULL) return ch;
+
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == ':' || ch == '|') return ch;
+ ptr++;
+ ch = *ptr;
+ }
+
+ return ch;
+}
+
NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
{
@@ -5664,13 +5944,21 @@ NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
Char ch;
Boolean has_fetch_function, same_species;
size_t len;
- CharPtr nxt, ptr, rest, str;
+ CharPtr nxt, ptr, rest, skip, str;
ObjMgrProcPtr ompp = NULL;
if (StringHasNoText (val)) return EMPTY_INFERENCE_STRING;
- rest = NULL;
- best = -1;
+ skip = NULL;
+ for (j = 0; evCategoryPrefix [j] != NULL; j++) {
+ len = StringLen (evCategoryPrefix [j]);
+ if (StringNICmp (val, evCategoryPrefix [j], len) != 0) continue;
+ skip = val + len;
+ }
+ if (skip != NULL) {
+ val = skip;
+ }
+
for (j = 0; inferencePrefix [j] != NULL; j++) {
len = StringLen (inferencePrefix [j]);
if (StringNICmp (val, inferencePrefix [j], len) != 0) continue;
@@ -5715,27 +6003,38 @@ NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn)
str = StringSave (rest);
if (best >= 1 && best <= 7) {
- tmprsult = ValidateInferenceAccession (str, ':', fetchAccn, has_fetch_function);
- if (tmprsult != VALID_INFERENCE) {
- rsult = tmprsult;
+ ptr = str;
+ while (ptr != NULL) {
+ nxt = StringChr (ptr, ',');
+ if (nxt != NULL) {
+ *nxt = '\0';
+ nxt++;
+ }
+ tmprsult = ValidateInferenceAccession (ptr, ':', fetchAccn, has_fetch_function);
+ if (tmprsult != VALID_INFERENCE) {
+ rsult = tmprsult;
+ }
+ ptr = nxt;
}
} else if (best == 12) {
tmprsult = VALID_INFERENCE;
- if (StringChr (str, '|') != NULL) {
- ptr = StringRChr (str, ':');
- while (ptr != NULL) {
- *ptr = '\0';
- ptr++;
- nxt = StringChr (ptr, ',');
- if (nxt != NULL) {
- *nxt = '\0';
- }
- tmprsult = ValidateInferenceAccession (ptr, '|', fetchAccn, has_fetch_function);
- if (tmprsult != VALID_INFERENCE) {
- rsult = tmprsult;
- }
- ptr = nxt;
+ ptr = StringRChr (str, ':');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ while (ptr != NULL) {
+ nxt = StringChr (ptr, ',');
+ if (nxt != NULL) {
+ *nxt = '\0';
+ nxt++;
+ }
+ ch = NextColonOrVerticalBar (ptr);
+ tmprsult = ValidateInferenceAccession (ptr, ch, fetchAccn, has_fetch_function);
+ if (tmprsult != VALID_INFERENCE) {
+ rsult = tmprsult;
}
+ ptr = nxt;
}
}
@@ -5804,6 +6103,7 @@ extern void MergeFeatureIntervalsToParts (SeqFeatPtr sfp, Boolean ordered)
}
extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
+
{
MolInfoPtr mip;
SeqDescrPtr sdp;
@@ -5812,6 +6112,8 @@ extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
SeqFeatPtr sfp;
SeqMgrFeatContext context;
Int4 num_cds = 0;
+ Int4 num_mrna = 0;
+ SeqIdPtr sip;
SeqLocPtr slp;
Boolean partial5, partial3;
BioSourcePtr biop;
@@ -5866,12 +6168,27 @@ extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
}
} else if (sfp->data.choice == SEQFEAT_CDREGION) {
num_cds++;
- /* skip this seuqence if it has more than one coding region */
+ /* skip this sequence if it has more than one coding region */
if (num_cds > 1 && !is_master_seq) {
return;
}
+ } else if (sfp->idx.subtype == FEATDEF_mRNA) {
+ num_mrna++;
+ /* skip this sequence if it has more than one mRNA */
+ if (num_mrna > 1) return;
}
}
+
+ if (gene != NULL && gene->location != NULL) {
+ slp = gene->location;
+ if (slp->choice != SEQLOC_INT) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ /* skip this sequence if it is multi-interval and EMBL or DDBJ */
+ if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return;
+ }
+ }
+ }
+
if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) {
CheckSeqLocForPartial (gene->location, &partial5, &partial3);
has_nulls = LocationHasNullsBetween (gene->location);
@@ -5994,6 +6311,30 @@ NewClickableItem
}
+extern ClickableItemPtr
+NewClickableItemNoList
+(Uint4 clickable_item_type,
+ CharPtr description)
+{
+ ClickableItemPtr dip;
+
+ dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ if (dip != NULL)
+ {
+ dip->clickable_item_type = clickable_item_type;
+ dip->description = StringSave (description);
+ dip->callback_func = NULL;
+ dip->datafree_func = NULL;
+ dip->callback_data = NULL;
+ dip->item_list = NULL;
+ dip->subcategories = NULL;
+ dip->expanded = FALSE;
+ dip->level = 0;
+ }
+ return dip;
+}
+
+
extern ValNodePtr ClickableItemObjectListFree (ValNodePtr vnp)
{
ValNodePtr vnp_next;
@@ -7511,6 +7852,8 @@ static void GeneLocusTagDiscrepancyCallback (ValNodePtr item_list, Pointer userd
Message (MSG_OK, "I could launch the editor for the individual gene...");
}
+static Boolean IsBacterialBioSource (BioSourcePtr biop);
+
/* Not WGS, genome, or RefSeq */
static Boolean IsLocationDirSub (SeqLocPtr slp)
{
@@ -7571,7 +7914,7 @@ static Boolean IsLocationDirSub (SeqLocPtr slp)
sdp != NULL && rval;
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) {
biop = (BioSourcePtr) sdp->data.ptrvalue;
- if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL && StringICmp (biop->org->orgname->div, "BCT") == 0) {
+ if (IsBacterialBioSource(biop)) {
rval = FALSE;
}
}
@@ -8348,7 +8691,7 @@ static void FindShortIntronsCallback (SeqFeatPtr sfp, Pointer data)
Boolean found_short = FALSE, partial5, partial3;
Uint1 strand;
- if (sfp == NULL || data == NULL) {
+ if (sfp == NULL || data == NULL || IsPseudo (sfp)) {
return;
}
if (sfp->idx.subtype == FEATDEF_intron) {
@@ -8698,6 +9041,77 @@ CheckFeatureTypeForLocationDiscrepancies
}
+static Boolean HasLineage (BioSourcePtr biop, CharPtr lineage)
+{
+ CharPtr forced_lineage;
+
+ forced_lineage = GetAppProperty ("ReportLineage");
+ if (StringISearch (forced_lineage, lineage) != NULL)
+ {
+ return TRUE;
+ }
+ else if (StringHasNoText (forced_lineage)
+ && biop != NULL && biop->org != NULL && biop->org->orgname != NULL
+ && StringISearch (biop->org->orgname->lineage, lineage) != NULL)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+
+static Boolean BioseqHasLineage (BioseqPtr bsp, CharPtr lineage)
+{
+ SeqMgrDescContext context;
+ SeqDescrPtr sdp;
+ BioSourcePtr biop;
+ CharPtr forced_lineage;
+
+ forced_lineage = GetAppProperty ("ReportLineage");
+ if (!StringHasNoText (forced_lineage)) {
+ if (StringISearch (forced_lineage, lineage) != NULL)
+ {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ } else if (bsp == NULL) {
+ return FALSE;
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL
+ || biop->org == NULL
+ || biop->org->orgname == NULL
+ || StringISearch (biop->org->orgname->lineage, lineage) == NULL) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+
+}
+
+
+static Boolean IsEukaryoticBioSource (BioSourcePtr biop)
+{
+ return HasLineage(biop, "Eukaryota");
+}
+
+
+static Boolean IsViralBioSource (BioSourcePtr biop)
+{
+ return HasLineage(biop, "Viruses");
+}
+
+
+static Boolean IsBacterialBioSource (BioSourcePtr biop)
+{
+ return HasLineage(biop, "Bacteria");
+}
+
+
static Boolean IsEukaryotic (BioseqPtr bsp)
{
SeqMgrDescContext context;
@@ -8713,8 +9127,7 @@ static Boolean IsEukaryotic (BioseqPtr bsp)
|| biop->genome == GENOME_chloroplast
|| biop->genome == GENOME_plastid
|| biop->genome == GENOME_apicoplast
- || biop->org == NULL || biop->org->orgname == NULL
- || StringSearch (biop->org->orgname->lineage, "Eukaryota") == NULL) {
+ || !IsEukaryoticBioSource(biop)) {
return FALSE;
} else {
return TRUE;
@@ -9174,6 +9587,20 @@ extern void FindPseudoDiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePt
}
+
+static Boolean IsProtRefEmpty (ProtRefPtr prp)
+{
+ if (prp == NULL) {
+ return TRUE;
+ } else if (prp->name != NULL || prp->desc != NULL || prp->ec != NULL
+ || prp->activity != NULL || prp->db != NULL || prp->processed != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
{
ValNodePtr vnp, entityIDList = NULL;
@@ -9181,6 +9608,10 @@ NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointe
SeqFeatPtr sfp, mrna;
CharPtr feat_txt;
SeqMgrFeatContext fcontext;
+ SeqFeatXrefPtr xref, prev_xref, next_xref;
+ ValNodePtr next_name;
+ ProtRefPtr prp;
+ RnaRefPtr rrp;
MemSet (&vn, 0, sizeof (ValNode));
vn.choice = OBJ_SEQFEAT;
@@ -9209,7 +9640,44 @@ NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointe
lip->data_in_log = TRUE;
}
mrna->pseudo = TRUE;
+ /* move mRNA product to comment */
+ if ((rrp = (RnaRefPtr) mrna->data.value.ptrvalue) != NULL
+ && rrp->ext.choice == 1) {
+ SetStringValue (&(mrna->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi);
+ rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
+ rrp->ext.choice = 0;
+ }
}
+
+ /* move CDS protein name to comment */
+ prev_xref = NULL;
+ for (xref = sfp->xref; xref != NULL; xref = next_xref) {
+ next_xref = xref->next;
+ if (xref->data.choice == SEQFEAT_PROT
+ && (prp = (ProtRefPtr) xref->data.value.ptrvalue) != NULL
+ && prp->name != NULL
+ && !StringHasNoText (prp->name->data.ptrvalue)) {
+ SetStringValue (&(sfp->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi);
+ prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue);
+ next_name = prp->name->next;
+ prp->name->next = NULL;
+ prp->name = ValNodeFreeData (prp->name);
+ prp->name = next_name;
+ if (IsProtRefEmpty(prp)) {
+ if (prev_xref == NULL) {
+ sfp->xref = next_xref;
+ } else {
+ prev_xref->next = next_xref;
+ }
+ xref->next = NULL;
+ xref = SeqFeatXrefFree (xref);
+ } else {
+ prev_xref = xref;
+ }
+ } else {
+ prev_xref = xref;
+ }
+ }
}
}
}
@@ -9600,7 +10068,7 @@ static void RemoveCodingRegionsWithSuppressionWords (ValNodePtr PNTR cds_list)
}
field = FeatureFieldNew ();
- field->type = Feature_type_cds;
+ field->type = Macro_feature_type_cds;
field->field = ValNodeNew (NULL);
field->field->choice = FeatQualChoice_legal_qual;
field->field->data.intvalue = Feat_qual_legal_product;
@@ -10155,6 +10623,52 @@ extern void FindShortContigs (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_l
}
}
+
+static void RemoveShortContigsWithoutAnnotation (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp, entityIDList = NULL;
+ BioseqPtr bsp;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+ CharPtr txt;
+
+ if (Message (MSG_OKC, "Are you sure you want to remove short contigs without annotation?") == ANS_CANCEL) {
+ return;
+ }
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_BIOSEQ) {
+ bsp = (BioseqPtr) vnp->data.ptrvalue;
+ if (bsp->annot == NULL) {
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ if (sfp == NULL) {
+ if (lip != NULL) {
+ lip->data_in_log = TRUE;
+ if (lip->fp != NULL) {
+ txt = GetDiscrepancyItemText (vnp);
+ fprintf (lip->fp, "Removed short contig without annotation: %s\n", txt);
+ txt = MemFree (txt);
+ }
+ }
+ bsp->idx.deleteme = TRUE;
+ ValNodeAddInt (&entityIDList, 0, bsp->idx.entityID);
+ }
+ }
+ }
+ }
+
+ entityIDList = ValNodeSort (entityIDList, SortByIntvalue);
+ ValNodeUnique (&entityIDList, SortByIntvalue, ValNodeFree);
+
+ for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
+ DeleteMarkedObjects (vnp->data.intvalue, 0, NULL);
+ ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0);
+ }
+ entityIDList = ValNodeFree (entityIDList);
+}
+
+
static void FindShortSequencesCallback (BioseqPtr bsp, Pointer userdata)
{
ValNodePtr PNTR bioseq_list;
@@ -10473,10 +10987,57 @@ static Boolean DoesStringContainPhrase (CharPtr str, CharPtr phrase, Boolean cas
}
typedef Boolean (*SuspectProductNameSearchFunc) PROTO ((CharPtr, CharPtr));
+typedef void (*SuspectProductNameReplaceFunc) PROTO ((CharPtr PNTR, CharPtr, CharPtr, SeqFeatPtr));
+
+typedef enum {
+ eSuspectNameType_None = 0,
+ eSuspectNameType_Typo = 1,
+ eSuspectNameType_QuickFix,
+ eSuspectNameType_NoOrganelleForProkaryote,
+ eSuspectNameType_MightBeNonfunctional,
+ eSuspectNameType_Database,
+ eSuspectNameType_RemoveOrganismName,
+ eSuspectNameType_InappropriateSymbol,
+ eSuspectNameType_EvolutionaryRelationship,
+ eSuspectNameType_UseProtein,
+ eSuspectNameType_Max
+} ESuspectNameType;
+
+static CharPtr suspect_name_category_names[] = {
+ "Unknown category",
+ "Typo",
+ "Quick fix",
+ "Organelles not appropriate in prokaryote",
+ "Suspicous phrase; should this be nonfunctional?",
+ "May contain database identifer more appropriate in note; remove from product name",
+ "Remove organism from product name",
+ "Possible parsing error or incorrect formatting; remove inappropriate symbols",
+ "Implies evolutionary relationship; change to -like protein",
+ "Use xxx protein or xxx-containing protein",
+ "Unknown category"
+};
+
+
+static Boolean CategoryOkForBioSource (BioSourcePtr biop, ESuspectNameType name_type)
+{
+ if (name_type != eSuspectNameType_NoOrganelleForProkaryote) {
+ return TRUE;
+ } else if (!HasTaxonomyID (biop)) {
+ return TRUE;
+ } else if (IsEukaryoticBioSource(biop)) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
typedef struct suspectproductname {
CharPtr pattern;
SuspectProductNameSearchFunc search_func;
+ ESuspectNameType fix_type;
+ CharPtr replace_phrase;
+ SuspectProductNameReplaceFunc replace_func;
} SuspectProductNameData, PNTR SuspectProductNamePtr;
@@ -10510,185 +11071,34 @@ static Boolean StartsWithPattern (CharPtr pattern, CharPtr search)
}
-static Boolean ProductContainsTerm (CharPtr pattern, CharPtr search)
-{
- CharPtr str;
-
- /* don't bother searching for c-term or n-term if product name contains "domain" */
- if (StringISearch (search, "domain") != NULL) {
- return FALSE;
- }
+static CharPtr s_putative_replacements[] = {
+ "possible",
+ "potential",
+ "predicted",
+ "probable",
+ NULL
+};
- str = StringISearch(search, pattern);
- /* c-term and n-term must be either first word or separated from other word by space, num, or punct */
- if (str != NULL && (str == search || !isalpha (*(str - 1)))) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-static Boolean MayContainPlural (CharPtr pattern, CharPtr search)
+static Boolean StartsWithPutativeReplacement (CharPtr pattern, CharPtr search)
{
- CharPtr cp;
- Char last_letter, second_to_last_letter, next_letter;
- Int4 word_len = 0;
- Boolean may_contain_plural = FALSE;
- CharPtr word_skip = " ,";
+ Int4 i;
- if (search == NULL) return FALSE;
- cp = search;
- while (*cp != 0 && !may_contain_plural) {
- word_len = StringCSpn (cp, word_skip);
- last_letter = *(cp + word_len - 1);
- if (last_letter == 's') {
- if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) {
- /* not plural */
- cp = cp + word_len;
- cp += StringSpn (cp, word_skip);
- } else if (word_len > 2
- && (second_to_last_letter = *(cp + word_len - 2)) != 's'
- && second_to_last_letter != 'i'
- && second_to_last_letter != 'u'
- && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) {
- may_contain_plural = TRUE;
- } else {
- cp = cp + word_len;
- cp += StringSpn (cp, word_skip);
- }
- } else {
- cp = cp + word_len;
- cp += StringSpn (cp, word_skip);
+ for (i = 0; s_putative_replacements[i] != NULL; i++) {
+ if (StartsWithPattern(s_putative_replacements[i], search)) {
+ return TRUE;
}
}
- return may_contain_plural;
-}
-
-
-static CharPtr FindFirstOpen (CharPtr cp)
-{
- CharPtr pa, ba;
-
- if (cp == NULL) {
- return NULL;
- }
- pa = StringChr (cp, '(');
- ba = StringChr (cp, '[');
- if (pa == NULL) {
- return ba;
- } else if (ba == NULL || ba > pa) {
- return pa;
- } else {
- return ba;
- }
+ return FALSE;
}
-static Char GetClose (Char ch)
+static Boolean MayContainPlural (CharPtr pattern, CharPtr search)
{
- if (ch == '(') {
- return ')';
- } else if (ch == '[') {
- return ']';
- } else if (ch == '{') {
- return '}';
- } else {
- return ch;
- }
+ return StringMayContainPlural (search);
}
-static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to)
-{
- Boolean rval = FALSE;
- CharPtr ep, ns;
-
- if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) {
- rval = TRUE;
- *skip_to = bp + 6;
- } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) {
- rval = TRUE;
- *skip_to = bp + 9;
- } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) {
- rval = TRUE;
- *skip_to = bp + 8;
- } else if (StringNCmp (bp, "(I)", 3) == 0) {
- rval = TRUE;
- *skip_to = bp + 4;
- } else if (StringNCmp (bp, "(II)", 4) == 0) {
- rval = TRUE;
- *skip_to = bp + 5;
- } else if (StringNCmp (bp, "(III)", 5) == 0) {
- rval = TRUE;
- *skip_to = bp + 6;
- } else if (StringNCmp (bp, "(NADPH)", 7) == 0) {
- rval = TRUE;
- *skip_to = bp + 7;
- } else if (StringNCmp (bp, "(NAD+)", 6) == 0) {
- rval = TRUE;
- *skip_to = bp + 6;
- } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) {
- rval = TRUE;
- *skip_to = bp + 12;
- } else if (StringNCmp (bp, "(NADP+)", 7) == 0) {
- rval = TRUE;
- *skip_to = bp + 7;
- } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) {
- rval = TRUE;
- *skip_to = bp + 22;
- } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) {
- rval = TRUE;
- *skip_to = bp + 22;
- } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) {
- rval = TRUE;
- *skip_to = bp + 22;
- } else {
- ns = StringChr (bp + 1, *bp);
- ep = StringChr (bp + 1, GetClose(*bp));
- if (ep != NULL && (ns == NULL || ns > ep)) {
- if (ep - bp < 5) {
- rval = TRUE;
- *skip_to = ep + 1;
- } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) {
- rval = TRUE;
- *skip_to = ep + 1;
- }
- }
- }
- return rval;
-}
-
-
-static Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n)
-{
- CharPtr cp, end;
- Int4 num_found = 0;
-
- if (search == NULL) {
- return FALSE;
- }
-
- cp = FindFirstOpen(search);
- while (num_found < n && cp != NULL && *cp != 0) {
- if (SkipBracketOrParen(cp, search, &cp)) {
- /* ignore it */
- cp = FindFirstOpen (cp);
- } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) {
- /* skip, doesn't close the bracket */
- cp = FindFirstOpen (cp + 1);
- } else {
- cp = FindFirstOpen (end);
- num_found ++;
- }
- }
-
- if (num_found >= n) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
static Boolean ContainsBracketsOrParentheses (CharPtr pattern, CharPtr search)
{
@@ -10731,6 +11141,24 @@ static Boolean BeginsWithPunct (CharPtr pattern, CharPtr search)
}
+static Boolean BeginsOrEndsWithQuotes (CharPtr pattern, CharPtr search)
+{
+ Int4 len;
+
+ if (search == NULL) return FALSE;
+ if (search[0] == '\'' || search[0] == '"') {
+ return TRUE;
+ } else {
+ len = StringLen (search);
+ if (search[len - 1] == '\'' || search[len - 1] == '"') {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ }
+}
+
+
static Boolean ContainsUnknownName (CharPtr pattern, CharPtr search)
{
if (StringISearch(search, pattern) != NULL
@@ -10810,150 +11238,21 @@ static Boolean NormalSearch (CharPtr pattern, CharPtr search)
}
-static Boolean FollowedByFamily (CharPtr PNTR str)
-{
- Int4 word_len;
-
- if (str == NULL || *str == NULL || **str == 0) {
- return FALSE;
- }
-
- word_len = StringCSpn (*str + 1, " ");
- if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) {
- *str = *str + word_len + 7;
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
-static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix)
-{
- Int4 len;
-
- if (search == NULL || cp == NULL || StringHasNoText (prefix)) {
- return FALSE;
- }
- len = StringLen (prefix);
- if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
-static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start)
-{
- if (cp == NULL) {
- return FALSE;
- }
-
- while (cp > start && !isspace (*cp)) {
- cp--;
- }
- if (cp == start) {
- return FALSE;
- }
- while (cp > start && isspace (*cp)) {
- cp--;
- }
- if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) {
- return TRUE;
- } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static Boolean ThreeOrMoreNumbersTogether (CharPtr pattern, CharPtr search)
{
- CharPtr p;
- Int4 num_digits = 0;
-
- if (search == NULL) {
- return FALSE;
- }
-
- p = search;
- while (*p != 0) {
- if (isdigit (*p)) {
- if (PrecededByPrefix(search, p, "DUF")
- || PrecededByPrefix(search, p, "UPF")
- || PrecededByPrefix(search, p, "IS")
- || PrecededByPrefix(search, p, "TIGR")) {
- p += StrSpn (p, "0123456789") - 1;
- num_digits = 0;
- } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) {
- p += StrSpn (p, "0123456789") - 1;
- num_digits = 0;
- } else {
- num_digits ++;
- if (num_digits == 3) {
- if (FollowedByFamily (&p)) {
- num_digits = 0;
- } else {
- return TRUE;
- }
- }
- }
- } else {
- num_digits = 0;
- }
- p++;
- }
- return FALSE;
+ return ContainsThreeOrMoreNumbersTogether (search);
}
+
static Boolean ContainsUnderscore (CharPtr pattern, CharPtr search)
{
- CharPtr cp;
-
- if (search == NULL) {
- return FALSE;
- }
-
- cp = StringChr (search, '_');
- while (cp != NULL) {
- if (FollowedByFamily (&cp)) {
- /* search again */
- cp = StringChr (cp, '_');
- } else if (cp - search < 3 || *(cp + 1) == 0) {
- return TRUE;
- } else if ((StringNCmp (cp - 3, "MFS", 3) == 0
- || StringNCmp (cp - 3, "TPR", 3) == 0
- || StringNCmp (cp - 3, "AAA", 3) == 0)
- && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) {
- cp = StringChr (cp + 1, '_');
- } else {
- return TRUE;
- }
- }
- return FALSE;
+ return StringContainsUnderscore (search);
}
static Boolean PrefixPlusNumbersOnly (CharPtr pattern, CharPtr search)
{
- Int4 pattern_len, digit_len;
-
- if (search == NULL) {
- return FALSE;
- }
- pattern_len = StringLen (pattern);
- if (pattern_len > 0 && StringNCmp (search, pattern, pattern_len) != 0) {
- return FALSE;
- }
-
- digit_len = StringSpn (search + pattern_len, "1234567890");
- if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
+ return IsPrefixPlusNumbers (pattern, search);
}
@@ -11004,45 +11303,20 @@ static Boolean AllCapitalLetters (CharPtr pattern, CharPtr search)
static Boolean ContainsUnbalancedParentheses (CharPtr pattern, CharPtr search)
{
- CharPtr buffer, cp_src;
- Int4 pos = 0;
- Boolean is_bad = FALSE;
-
- if (search == NULL) {
- return FALSE;
- }
+ return StringContainsUnbalancedParentheses (search);
+}
- /* note - don't need space for terminating character */
- buffer = MemNew (sizeof (Char) * StringLen (search));
- cp_src = search;
- while (*cp_src != 0 && !is_bad) {
- if (*cp_src == '(' || *cp_src == '[') {
- buffer[pos++] = *cp_src;
- } else if (*cp_src == ')') {
- if (pos < 1) {
- is_bad = TRUE;
- } else if (buffer[pos - 1] != '(') {
- is_bad = TRUE;
- } else {
- pos --;
- }
- } else if (*cp_src == ']') {
- if (pos < 1) {
- is_bad = TRUE;
- } else if (buffer[pos - 1] != '[') {
- is_bad = TRUE;
- } else {
- pos--;
- }
- }
- ++cp_src;
- }
- if (pos > 0) {
- is_bad = TRUE;
+static Boolean IsTooLong (CharPtr pattern, CharPtr search)
+{
+ if (StringISearch (search, "bifunctional") != NULL
+ || StringISearch (search, "multifunctional") != NULL) {
+ return FALSE;
+ } else if (StringLen (search) > 100) {
+ return TRUE;
+ } else {
+ return FALSE;
}
- buffer = MemFree (buffer);
- return is_bad;
}
@@ -11092,6 +11366,159 @@ static Boolean ContainsDoubleSpace (CharPtr pattern, CharPtr search)
}
+static void SimpleReplaceFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ FindReplaceString (orig, find, replace, FALSE, TRUE);
+}
+
+
+static void SimpleReplaceAnywhereFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ FindReplaceString (orig, find, replace, FALSE, FALSE);
+}
+
+
+static void ReplaceWholeNameFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ if (orig == NULL) {
+ return;
+ }
+ if (IsSingleWordOrWeaselPlusSingleWord(find, *orig)) {
+ *orig = MemFree (*orig);
+ *orig = StringSave (replace);
+ }
+}
+
+
+static void ReplaceWholeNameAddNoteFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ if (orig == NULL) {
+ return;
+ }
+ if (IsSingleWordOrWeaselPlusSingleWord(find, *orig)) {
+ SetStringValue (&(sfp->comment), *orig, ExistingTextOption_append_semi);
+ *orig = MemFree (*orig);
+ *orig = StringSave (replace);
+ }
+}
+
+
+static void ReplaceAtFront (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ Int4 orig_len, find_len, replace_len, new_len;
+ CharPtr new_str;
+
+ if (orig == NULL || find == NULL) {
+ return;
+ }
+
+ orig_len = StringLen (*orig);
+ find_len = StringLen (find);
+ if (find_len > orig_len || StringNICmp (*orig, find, find_len) != 0) {
+ return;
+ }
+ replace_len = StringLen (replace);
+
+ new_len = orig_len + replace_len - find_len;
+ new_str = (CharPtr) MemNew (sizeof (Char) * (new_len + 1));
+ if (replace_len > 0) {
+ StringCpy (new_str, replace);
+ }
+ StringCat (new_str, (*orig) + find_len);
+ *orig = MemFree (*orig);
+ *orig = new_str;
+}
+
+
+static void ReplaceAtEnd (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ Int4 orig_len, find_len, replace_len, new_len;
+ CharPtr new_str;
+
+ if (orig == NULL || find == NULL) {
+ return;
+ }
+
+ orig_len = StringLen (*orig);
+ find_len = StringLen (find);
+ if (find_len > orig_len || StringICmp ((*orig) + orig_len - find_len, find) != 0) {
+ return;
+ }
+ replace_len = StringLen (replace);
+
+ new_len = orig_len + replace_len - find_len;
+ new_str = (CharPtr) MemNew (sizeof (Char) * (new_len + 1));
+ StringNCpy (new_str, *orig, orig_len - find_len);
+ if (replace_len > 0) {
+ StringCat (new_str, replace);
+ }
+ *(new_str + new_len) = 0;
+ *orig = MemFree (*orig);
+ *orig = new_str;
+}
+
+
+static void UsePutative (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ Int4 i;
+ for (i = 0; s_putative_replacements[i] != NULL; i++) {
+ ReplaceAtFront (orig, s_putative_replacements[i], "putative", sfp);
+ }
+}
+
+
+static void RemoveBeginningAndEndingQuotes (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ CharPtr src, dst;
+ Int4 len;
+
+ if (orig == NULL || *orig == NULL || !BeginsOrEndsWithQuotes (NULL, *orig)) {
+ return;
+ }
+ src = *orig;
+ dst = *orig;
+ if (*src == '\'' || *src == '"') {
+ src++;
+ while (*src != 0) {
+ *dst = *src;
+ dst++;
+ src++;
+ }
+ *dst = 0;
+ }
+ len = StringLen (*orig);
+ if ((*orig)[len - 1] == '\'' || (*orig)[len - 1] == '"') {
+ (*orig)[len - 1] = 0;
+ }
+}
+
+
+static void FixLongProduct (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ Int4 len, keep_len;
+ if (orig == NULL || *orig == NULL || sfp == NULL || *orig == sfp->comment) {
+ return;
+ }
+ len = StringLen (*orig);
+ keep_len = StringCSpn (*orig, ",;(");
+ if (keep_len < len) {
+ SetStringValue (&(sfp->comment), *orig, ExistingTextOption_append_semi);
+ *((*orig) + keep_len) = 0;
+ }
+}
+
+
+static void HaemReplaceFunc (CharPtr PNTR orig, CharPtr find, CharPtr replace, SeqFeatPtr sfp)
+{
+ if (orig == NULL || *orig == NULL) {
+ return;
+ }
+
+ FindReplaceString (orig, find, "heme", FALSE, TRUE);
+ FindReplaceString (orig, find, "hem", FALSE, FALSE);
+}
+
+
static CharPtr SummarizeSuspectPhraseFunc (SuspectProductNameSearchFunc s)
{
if (s == NULL) {
@@ -11114,413 +11541,635 @@ static CharPtr SummarizeSuspectPhraseFunc (SuspectProductNameSearchFunc s)
return "contains double space";
} else if (s == PrefixPlusNumbersOnly) {
return "entire product is prefix followed by numbers";
+ } else if (s == IsTooLong) {
+ return "longer than 50 characters";
} else {
return "special rules";
}
}
+static CharPtr SummarizeSuspectReplacementPhrase (SuspectProductNameReplaceFunc s, CharPtr replace_phrase)
+{
+ CharPtr phrase = NULL;
+ CharPtr simple_fmt = "Replace with '%s' (whole word)";
+ CharPtr simple_anywhere_fmt = "Replace with '%s'";
+ CharPtr whole_fmt = "Replace entire product name with '%s'";
+ CharPtr whole_note_fmt = "Move product name to note, use '%s' for product name";
+
+
+ if (s == NULL) {
+ return StringSave ("No replacement");
+ } else if (s == SimpleReplaceFunc) {
+ phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_fmt) + StringLen (replace_phrase)));
+ sprintf (phrase, simple_fmt, replace_phrase);
+ } else if (s == SimpleReplaceAnywhereFunc) {
+ phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_anywhere_fmt) + StringLen (replace_phrase)));
+ sprintf (phrase, simple_anywhere_fmt, replace_phrase);
+ } else if (s == FixLongProduct) {
+ phrase = StringSave ("Truncate at first comma or semicolon");
+ } else if (s == UsePutative) {
+ phrase = StringSave ("Replace with 'putative'");
+ } else if (s == ReplaceWholeNameFunc) {
+ phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (whole_fmt) + StringLen (replace_phrase)));
+ sprintf (phrase, whole_fmt, replace_phrase);
+ } else if (s == ReplaceWholeNameAddNoteFunc) {
+ phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (whole_note_fmt) + StringLen (replace_phrase)));
+ sprintf (phrase, whole_note_fmt, replace_phrase);
+ } else if (s == ReplaceAtEnd || s == ReplaceAtFront) {
+ phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (simple_anywhere_fmt) + StringLen (replace_phrase)));
+ sprintf (phrase, simple_anywhere_fmt, replace_phrase);
+ } else {
+ phrase = StringSave ("Unknown replacement action");
+ }
+ return phrase;
+}
+
+
static SuspectProductNameData suspect_product_terms[] = {
- { "like", EndsWithPattern },
- { "repeat", EndsWithPattern },
- { "domain", EndsWithPattern },
- { "fold", EndsWithFold },
- { "motif", EndsWithPattern },
- { "related", EndsWithPattern },
- { "binding", EndsWithPattern },
- { "containing", EndsWithPattern },
- { "containing", StartsWithPattern },
- { "from", StartsWithPattern },
- { "N-term", ProductContainsTerm },
- { "N term", ProductContainsTerm },
- { "C-term", ProductContainsTerm },
- { "C term", ProductContainsTerm },
- { "may contain a plural", MayContainPlural },
- { "Brackets or parenthesis [] ()", ContainsBracketsOrParentheses },
- { "Two or more sets of brackets or parentheseis", ContainsTwoSetsOfBracketsOrParentheses },
- { "ending with period, comma, hyphen, underscore, colon, or forward slash", EndsWithPunct },
- { "beginning with period, comma, or hyphen", BeginsWithPunct },
- { "unknown", ContainsUnknownName },
- { "COG", ContainsWholeWordCaseSensitive },
- { "EST", ContainsWholeWordCaseSensitive },
- { "DUF", ContainsWholeWordCaseSensitive },
- { "UPF", ContainsWholeWordCaseSensitive },
- { "DUF", PrefixPlusNumbersOnly },
- { "UPF", PrefixPlusNumbersOnly },
- { "IS", PrefixPlusNumbersOnly },
- { "FOG", ContainsWholeWordCaseSensitive },
- { "Subtilis", ContainsWholeWord },
- { "coli", ContainsWholeWord },
- { "pseudo", ContainsWholeWord },
- { "gene", ContainsWholeWord },
- { "genes", ContainsWholeWord },
- { "homo", ContainsWholeWord },
- { "argininte", ContainsWholeWord },
- { "diacyglycerol", ContainsWholeWord },
- { "glycosy", ContainsWholeWord },
- { "hypothetica", ContainsWholeWord },
- { "ncharacterized", ContainsWholeWord },
- { "obalt", ContainsWholeWord },
- { "odule", ContainsWholeWord },
- { "protei", ContainsWholeWord },
- { "sigm", ContainsWholeWord },
- { "thiamin/thiamin", ContainsWholeWord },
- { "threonin", ContainsWholeWord },
- { "ypothetical", ContainsWholeWord },
- { "ytochrome", ContainsWholeWord },
- { "aminotransferasee", ContainsWholeWord },
- { "bioin", ContainsWholeWord },
- { "biosythesis", ContainsWholeWord },
- { "chelatin", ContainsWholeWord },
- { "componenet", ContainsWholeWord },
- { "familie", ContainsWholeWord },
- { "hexpeptide", ContainsWholeWord },
- { "homocystein", ContainsWholeWord },
- { "initation", ContainsWholeWord },
- { "mobilisation", ContainsWholeWord },
- { "mutatrotase", ContainsWholeWord },
- { "oxidoreductasee", ContainsWholeWord },
- { "periplasmc", ContainsWholeWord },
- { "puter", ContainsWholeWord },
- { "reductasee", ContainsWholeWord },
- { "thioderoxin", ContainsWholeWord },
- { "transferasee", ContainsWholeWord },
- { "protein", IsSingleWord },
- { "putative protein", IsSingleWord },
- { "probable protein", IsSingleWord },
- { "sodium", IsSingleWord },
- { "CHC2 zinc finger", IsSingleWord },
- { "SWIM zinc finger", IsSingleWord },
- { "putative", IsSingleWordOrWeaselPlusSingleWord },
- { "probable", IsSingleWordOrWeaselPlusSingleWord },
- { "protein-containing", IsSingleWordOrWeaselPlusSingleWord },
- { "protein containing", IsSingleWordOrWeaselPlusSingleWord },
- { "transposase of", IsSingleWordOrWeaselPlusSingleWord },
- { "hypothetical", IsSingleWordOrWeaselPlusSingleWord },
- { "conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord },
- { "conserved", IsSingleWordOrWeaselPlusSingleWord },
- { "purine", IsSingleWordOrWeaselPlusSingleWord },
- { "iron", IsSingleWordOrWeaselPlusSingleWord },
- { "phage", IsSingleWordOrWeaselPlusSingleWord },
- { "insertion sequence", IsSingleWordOrWeaselPlusSingleWord },
- { "transposon", IsSingleWordOrWeaselPlusSingleWord },
- { "signal peptide", IsSingleWordOrWeaselPlusSingleWord },
- { "NAD", IsSingleWordOrWeaselPlusSingleWord },
- { "p-loop", IsSingleWordOrWeaselPlusSingleWord },
- { "helix-turn-helix", IsSingleWordOrWeaselPlusSingleWord },
- { "domain family", IsSingleWordOrWeaselPlusSingleWord },
- { "PASTA", IsSingleWordOrWeaselPlusSingleWord },
- { "zinc finger", IsSingleWordOrWeaselPlusSingleWord },
- { "amino acid", IsSingleWordOrWeaselPlusSingleWord },
- { "peptide", IsSingleWordOrWeaselPlusSingleWord },
- { "citrate", IsSingleWordOrWeaselPlusSingleWord },
- { "PTS system", IsSingleWordOrWeaselPlusSingleWord },
- { "putative protein", IsSingleWordOrWeaselPlusSingleWord },
- { "Alanine", IsSingleWordOrWeaselPlusSingleWord },
- { "Arginine", IsSingleWordOrWeaselPlusSingleWord },
- { "Asparagine", IsSingleWordOrWeaselPlusSingleWord },
- { "Aspartic acid", IsSingleWordOrWeaselPlusSingleWord },
- { "Cysteine", IsSingleWordOrWeaselPlusSingleWord },
- { "DNA", IsSingleWordOrWeaselPlusSingleWord },
- { "Glutamic acid", IsSingleWordOrWeaselPlusSingleWord },
- { "Glutamine", IsSingleWordOrWeaselPlusSingleWord },
- { "Glycine", IsSingleWordOrWeaselPlusSingleWord },
- { "Histidine", IsSingleWordOrWeaselPlusSingleWord },
- { "Isoleucine", IsSingleWordOrWeaselPlusSingleWord },
- { "Leucine", IsSingleWordOrWeaselPlusSingleWord },
- { "Lysine", IsSingleWordOrWeaselPlusSingleWord },
- { "Methionine", IsSingleWordOrWeaselPlusSingleWord },
- { "ORF", IsSingleWordOrWeaselPlusSingleWord },
- { "Phenylalanine", IsSingleWordOrWeaselPlusSingleWord },
- { "Proline", IsSingleWordOrWeaselPlusSingleWord },
- { "RNA", IsSingleWordOrWeaselPlusSingleWord },
- { "Serine", IsSingleWordOrWeaselPlusSingleWord },
- { "Threonine", IsSingleWordOrWeaselPlusSingleWord },
- { "Tryptophan", IsSingleWordOrWeaselPlusSingleWord },
- { "Tyrosine", IsSingleWordOrWeaselPlusSingleWord },
- { "Valine", IsSingleWordOrWeaselPlusSingleWord },
- { "adenine", IsSingleWordOrWeaselPlusSingleWord },
- { "barrel", IsSingleWordOrWeaselPlusSingleWord },
- { "carbon", IsSingleWordOrWeaselPlusSingleWord },
- { "cytosine", IsSingleWordOrWeaselPlusSingleWord },
- { "domain", IsSingleWordOrWeaselPlusSingleWord },
- { "domain protein", IsSingleWordOrWeaselPlusSingleWord },
- { "factor", IsSingleWordOrWeaselPlusSingleWord },
- { "family protein", IsSingleWordOrWeaselPlusSingleWord },
- { "finger", IsSingleWordOrWeaselPlusSingleWord },
- { "ggdef", IsSingleWordOrWeaselPlusSingleWord },
- { "guanine", IsSingleWordOrWeaselPlusSingleWord },
- { "helium", IsSingleWordOrWeaselPlusSingleWord },
- { "helix", IsSingleWordOrWeaselPlusSingleWord },
- { "hydrogen", IsSingleWordOrWeaselPlusSingleWord },
- { "hypothetical ORF", IsSingleWordOrWeaselPlusSingleWord },
- { "mRNA", IsSingleWordOrWeaselPlusSingleWord },
- { "membrane", IsSingleWordOrWeaselPlusSingleWord },
- { "ncRNA", IsSingleWordOrWeaselPlusSingleWord },
- { "nitrogen", IsSingleWordOrWeaselPlusSingleWord },
- { "oxygen", IsSingleWordOrWeaselPlusSingleWord },
- { "plasmid", IsSingleWordOrWeaselPlusSingleWord },
- { "precursor", IsSingleWordOrWeaselPlusSingleWord },
- { "protein of unknown function", IsSingleWordOrWeaselPlusSingleWord },
- { "putative conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord },
- { "putative hypothetical", IsSingleWordOrWeaselPlusSingleWord },
- { "putative signal peptide", IsSingleWordOrWeaselPlusSingleWord },
- { "rRNA", IsSingleWordOrWeaselPlusSingleWord },
- { "repeat", IsSingleWordOrWeaselPlusSingleWord },
- { "secreted", IsSingleWordOrWeaselPlusSingleWord },
- { "signal", IsSingleWordOrWeaselPlusSingleWord },
- { "subunit", IsSingleWordOrWeaselPlusSingleWord },
- { "tRNA", IsSingleWordOrWeaselPlusSingleWord },
- { "thymine", IsSingleWordOrWeaselPlusSingleWord },
- { "uracil", IsSingleWordOrWeaselPlusSingleWord },
- { "zinc", IsSingleWordOrWeaselPlusSingleWord },
- { "transport-associated", IsSingleWordOrWeaselPlusSingleWord },
- { "Similar to", NormalSearch },
- { "Related to", NormalSearch },
- { "interrupt", NormalSearch },
- { "Homolog", NormalSearch },
- { "Homologue", NormalSearch },
- { "Fragment", NormalSearch },
- { "Frameshift", NormalSearch },
- { "Intein", NormalSearch },
- { "Chloroplast", NormalSearch },
- { "Mitochondrial", NormalSearch },
- { "puatative", NormalSearch },
- { "putaive", NormalSearch },
- { "putaitve", NormalSearch },
- { "putatitve", NormalSearch },
- { "putataive", NormalSearch },
- { "putatuve", NormalSearch },
- { "ortholog", NormalSearch },
- { "orthologue", NormalSearch },
- { "paralog", NormalSearch },
- { "paralogue", NormalSearch },
- { "bifunctional protein", NormalSearch },
- { "pseudogene", NormalSearch },
- { "frame shift", NormalSearch },
- { "protien", NormalSearch },
- { "partial", NormalSearch },
- { "sphaeroides", NormalSearch },
- { "or related", NormalSearch },
- { "authentic point mutation", NormalSearch },
- { "novel protein", NormalSearch },
- { "ttg start", NormalSearch },
- { "domain protein domain protein", NormalSearch },
- { "deletion", NormalSearch },
- { "truncat", NormalSearch },
- { "hypothteical", NormalSearch },
- { "hypotethical", NormalSearch },
- { "hypothetcial", NormalSearch },
- { "consevered", NormalSearch },
- { "cotaining", NormalSearch },
- { "gIycerol", NormalSearch },
- { "haemagglutination", NormalSearch },
- { "family family", NormalSearch },
- { "domain domain", NormalSearch },
- { "putative, putative", NormalSearch },
- { "putative putative", NormalSearch },
- { "putative probable", NormalSearch },
- { "probable putative", NormalSearch },
- { "similar", NormalSearch },
- { "characterisation", NormalSearch },
- { "uncharacterised", NormalSearch },
- { "putatvie", NormalSearch },
- { "putaitve", NormalSearch },
- { "simmilar", NormalSearch },
- { "ribosoml", NormalSearch },
- { "transcirbed", NormalSearch },
- { "recognised", NormalSearch },
- { "heam", NormalSearch },
- { "haem", NormalSearch },
- { "golgi", NormalSearch },
- { "active site", NormalSearch },
- { "human", NormalSearch },
- { "domian", NormalSearch },
- { "facotr", NormalSearch },
- { "proein", NormalSearch },
- { "trnasporter", NormalSearch },
- { "tranporter", NormalSearch },
- { "proteinn", NormalSearch },
- { "homo sapiens", NormalSearch },
- { "sapiens", NormalSearch },
- { "Transmebrane", NormalSearch },
- { "Transemembrane", NormalSearch },
- { "Intiation", NormalSearch },
- { "Portein", NormalSearch },
- { "protrein", NormalSearch },
- { "hypotehtical", NormalSearch },
- { "K potassium", NormalSearch },
- { "K+ potassium", NormalSearch },
- { "outers", NormalSearch },
- { "weakly conserved", NormalSearch },
- { "highly conserved", NormalSearch },
- { "narrowly conserved", NormalSearch },
- { "No definition line found", NormalSearch },
- { "ECOLI", NormalSearch },
- { "alternate protein name", NormalSearch },
- { "widely conserved", NormalSearch },
- { "putative orphan protein", NormalSearch },
- { "orphan protein", NormalSearch },
- { "Plasmodium", NormalSearch },
- { "bos taurus", NormalSearch },
- { "open reading frame", NormalSearch },
- { "?", NormalSearch },
- { "#", NormalSearch },
- { ". ", NormalSearch },
- { "|", NormalSearch },
- { "=", NormalSearch },
- { "\\-PA", NormalSearch },
- { "_", ContainsUnderscore },
- { "three or more numbers together, not after 'UPF' or 'DUF' or 'IS' and not followed by the word 'family' and not preceded by either 'cytochrome' or 'coenzyme'", ThreeOrMoreNumbersTogether },
- { "putaitive", NormalSearch },
- { "putatve", NormalSearch },
- { "hypothtical", NormalSearch },
- { "hypotheical", NormalSearch },
- { "meausure", NormalSearch },
- { "flageller", NormalSearch },
- { "tumour", NormalSearch },
- { "dimerising", NormalSearch },
- { "dimerisation", NormalSearch },
- { "nucelar", NormalSearch },
- { "nulcear", NormalSearch },
- { "proteine", NormalSearch },
- { "unkown", NormalSearch },
- { "periplsmic", NormalSearch },
- { "molybopterin", NormalSearch },
- { "molydopterin", NormalSearch },
- { "aluminium", NormalSearch },
- { "aminopetidase", NormalSearch },
- { "asparate", NormalSearch },
- { "aparaginase", NormalSearch },
- { "bifunctionnal", NormalSearch },
- { "biosyntesis", NormalSearch },
- { "bnding", NormalSearch },
- { "carboxilic", NormalSearch },
- { "cell divisionFtsK/SpoIIIE", NormalSearch },
- { "coantaining", NormalSearch },
- { "coenzye", NormalSearch },
- { "componnent", NormalSearch },
- { "degration", NormalSearch },
- { "dependant", NormalSearch },
- { "disulphide", NormalSearch },
- { "divison", NormalSearch },
- { "dyhydrogenase", NormalSearch },
- { "glcosyl", NormalSearch },
- { "glucosainyl", NormalSearch },
- { "glutaminne", NormalSearch },
- { "hemelysin", NormalSearch },
- { "hemoglobine", NormalSearch },
- { "histadine", NormalSearch },
- { "homeserine", NormalSearch },
- { "hyphotetical", NormalSearch },
- { "hypotetical", NormalSearch },
- { "hypotheitcal", NormalSearch },
- { "hpothetical", NormalSearch },
- { "inductible", NormalSearch },
- { "majour", NormalSearch },
- { "mambrane", NormalSearch },
- { "meausure", NormalSearch },
- { "membranne", NormalSearch },
- { "methlytransferase", NormalSearch },
- { "metylase", NormalSearch },
- { "monoxyde", NormalSearch },
- { "monoxygenase", NormalSearch },
- { "mulitdrug", NormalSearch },
- { "ndoribonuclease", ContainsWholeWord },
- { "nickle", NormalSearch },
- { "oxidoreductasse", NormalSearch },
- { "oxydase", NormalSearch },
- { "phophate", NormalSearch },
- { "phopho", NormalSearch },
- { "phophoserine", NormalSearch },
- { "phoshate", NormalSearch },
- { "phosphotase", NormalSearch },
- { "posible", NormalSearch },
- { "presursor", NormalSearch },
- { "prortein", NormalSearch },
- { "regulatot", NormalSearch },
- { "resistence", NormalSearch },
- { "serinr", NormalSearch },
- { "signalling", NormalSearch },
- { "spscific", NormalSearch },
- { "stabilisation", NormalSearch },
- { "subnit", NormalSearch },
- { "sulpho", NormalSearch },
- { "sulphur", NormalSearch },
- { "sythase", NormalSearch },
- { "threonin", ContainsWholeWord },
- { "tranferase", NormalSearch },
- { "transebrane", NormalSearch },
- { "transglycolase", NormalSearch },
- { "transorter", NormalSearch },
- { "transpoase", NormalSearch },
- { "transportor", NormalSearch },
- { "transproter", NormalSearch },
- { "transulfuration", NormalSearch },
- { "typr", NormalSearch },
- { "uncharaterized", NormalSearch },
- { "undecapaprenyl", NormalSearch },
- { "utilisation", ContainsWholeWord },
- { "contain", ContainsWholeWord },
- { "start codon", ContainsWholeWord },
- { "Includes:", ContainsWholeWord },
- { "inactivated derivative", ContainsWholeWord },
- { "double space", ContainsDoubleSpace },
- { "all capital letters", AllCapitalLetters },
- { "unbalanced brackets or parentheses", ContainsUnbalancedParentheses },
- /* organism names */
- { "aureus", ContainsWholeWord },
- { "Arabidopsis", ContainsWholeWord },
- { "Aspergillus", ContainsWholeWord },
- { "niger", ContainsWholeWord },
- { "Bacillus", ContainsWholeWord },
- { "Bacteroides", ContainsWholeWord },
- { "B.subtilis", ContainsWholeWord },
- { "Campylobacter", ContainsWholeWord },
- { "cerevisiae", ContainsWholeWord },
- { "Chlamydial", ContainsWholeWord },
- { "Chlamydomonas", ContainsWholeWord },
- { "Drosophila", ContainsWholeWord },
- { "enterica", ContainsWholeWord },
- { "Escherichia", ContainsWholeWord },
- { "E.coli", ContainsWholeWord },
- { "halophilus", ContainsWholeWord },
- { "Helicobacter", ContainsWholeWord },
- { "Jejuni", ContainsWholeWord },
- { "Leishmania", ContainsWholeWord },
- { "Marinococcus", ContainsWholeWord },
- { "mouse", ContainsWholeWord },
- { "Mus musculus", ContainsWholeWord },
- { "Mycobacterium", ContainsWholeWord },
- { "Pestis", ContainsWholeWord },
- { "pseudomonas", ContainsWholeWord },
- { "pombe", ContainsWholeWord },
- { "pylori", ContainsWholeWord },
- { "Tuberculosis", ContainsWholeWord },
- { "rat", ContainsWholeWord },
- { "Rhodobacter", ContainsWholeWord },
- { "Staphylococcus", ContainsWholeWord },
- { "subsp", ContainsWholeWord },
- { "serovar", ContainsWholeWord },
- { "thaliana", ContainsWholeWord },
- { "Typhimurium", ContainsWholeWord },
- { "Salmonella", ContainsWholeWord },
- { "Staphlococcal", ContainsWholeWord },
- { "Staphlococcus", ContainsWholeWord },
- { "staphylococcal", ContainsWholeWord },
- { "sreptomyces", ContainsWholeWord },
- { "Streptococcus", ContainsWholeWord },
- { "streptococcal", ContainsWholeWord },
- { "streptomyces", ContainsWholeWord },
- { "xenopus", ContainsWholeWord },
- { "yeast", ContainsWholeWord },
- { "Yersinia", ContainsWholeWord }
+ { "beginning with period, comma, or hyphen" , BeginsWithPunct, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "begins or ends with quotes", BeginsOrEndsWithQuotes, eSuspectNameType_QuickFix, NULL, RemoveBeginningAndEndingQuotes } ,
+ { "binding" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "domain", EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "like" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "motif" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "related" , EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "repeat", EndsWithPattern, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "fold" , EndsWithFold, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "Arabidopsis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Aspergillus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "B.subtilis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Bacillus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Bacteroides" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Campylobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Chlamydial" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Chlamydomonas" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Drosophila" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "E.coli" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Escherichia" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Helicobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Includes:" , ContainsWholeWord, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "Jejuni" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Leishmania" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Marinococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Mus musculus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Mycobacterium" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Pestis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Rhodobacter" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Salmonella" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Staphlococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Staphlococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Staphylococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Streptococcus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Subtilis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Tuberculosis" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Typhimurium" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Yersinia" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "aminotransferasee" , ContainsWholeWord, eSuspectNameType_Typo , "aminotransferase", SimpleReplaceFunc } ,
+ { "arginin " , ContainsWholeWord, eSuspectNameType_Typo , "arginine ", SimpleReplaceFunc } ,
+ { "argininte" , ContainsWholeWord, eSuspectNameType_Typo , "arginine", SimpleReplaceFunc } ,
+ { "aureus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "bioin" , ContainsWholeWord, eSuspectNameType_Typo , "biotin", SimpleReplaceFunc } ,
+ { "biosythesis" , ContainsWholeWord, eSuspectNameType_Typo , "biosynthesis", SimpleReplaceFunc } ,
+ { "cerevisiae" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "chelatin" , ContainsWholeWord, eSuspectNameType_Typo , "chelating", SimpleReplaceFunc } ,
+ { "coli" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "contain" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "deydrogenase" , ContainsWholeWord, eSuspectNameType_Typo, "dehydrogenase", SimpleReplaceFunc } ,
+ { "diacyglycerol" , ContainsWholeWord, eSuspectNameType_Typo, "diacylglycerol", SimpleReplaceFunc } ,
+ { "domainl", ContainsWholeWord, eSuspectNameType_Typo, "domain", SimpleReplaceFunc } ,
+ { "enterica" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "exporte" , ContainsWholeWord, eSuspectNameType_Typo, "exported", SimpleReplaceFunc } ,
+ { "familie" , ContainsWholeWord, eSuspectNameType_Typo, "family", SimpleReplaceFunc } ,
+ { "gene" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "genes" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "glycin" , ContainsWholeWord, eSuspectNameType_Typo, "glycine", SimpleReplaceFunc } ,
+ { "glycosy" , ContainsWholeWord, eSuspectNameType_Typo, "glucosyl", SimpleReplaceFunc } ,
+ { "halophilus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "hemaggltinin" , ContainsWholeWord, eSuspectNameType_Typo, "hemagglutinin", SimpleReplaceFunc } ,
+ { "hexpeptide" , ContainsWholeWord, eSuspectNameType_Typo, "hexapeptide", SimpleReplaceFunc } ,
+ { "histide" , ContainsWholeWord, eSuspectNameType_Typo, "histidine", SimpleReplaceFunc } ,
+ { "homo" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "homocystein" , ContainsWholeWord, eSuspectNameType_Typo, "homocysteine", SimpleReplaceFunc } ,
+ { "hyp domain protein" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc },
+ { "hypot" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothe" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothet" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothetic" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothetica" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothetical domain protein" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc },
+ { "inactivated derivative" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "initation" , ContainsWholeWord, eSuspectNameType_Typo, "initiation", SimpleReplaceFunc } ,
+ { "invertion" , ContainsWholeWord, eSuspectNameType_Typo, "inversion", SimpleReplaceFunc } ,
+ { "isomaerase" , ContainsWholeWord, eSuspectNameType_Typo, "isomerase", SimpleReplaceFunc } ,
+ { "mobilisation" , ContainsWholeWord, eSuspectNameType_Typo, "mobilization", SimpleReplaceFunc } ,
+ { "mouse" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "mutatrotase" , ContainsWholeWord, eSuspectNameType_Typo, "mutarotase", SimpleReplaceFunc } ,
+ { "ncharacterized" , ContainsWholeWord, eSuspectNameType_Typo, "uncharacterized", SimpleReplaceFunc } ,
+ { "ndoribonuclease" , ContainsWholeWord, eSuspectNameType_Typo, "endoribonuclease", SimpleReplaceFunc } ,
+ { "niger" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "ntegral " , ContainsWholeWord, eSuspectNameType_Typo, "integral ", SimpleReplaceFunc } ,
+ { "obalt" , ContainsWholeWord, eSuspectNameType_Typo, "cobalt", SimpleReplaceFunc } ,
+ { "odule" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "orf, hyp" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc },
+ { "orf, hypothetical" , IsSingleWord, eSuspectNameType_Typo, "hypothetical protein", SimpleReplaceFunc },
+ { "oxidoreductasee" , ContainsWholeWord, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } ,
+ { "oxidoredutase" , ContainsWholeWord, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } ,
+ { "periplamic" , ContainsWholeWord, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } ,
+ { "periplasmc" , ContainsWholeWord, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } ,
+ { "phosphatidyltransferse" , ContainsWholeWord, eSuspectNameType_Typo, "phosphatidyltransferase", SimpleReplaceFunc } ,
+ { "phosphopantethiene" , ContainsWholeWord, eSuspectNameType_Typo, "phosphopantetheine", SimpleReplaceFunc } ,
+ { "pombe" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "portein" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "protei" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "protwin" , ContainsWholeWord, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "pseudo" , ContainsWholeWord, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "pseudomonas" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "puter" , ContainsWholeWord, eSuspectNameType_Typo, "outer", SimpleReplaceFunc } ,
+ { "pylori" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "rat" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "reductasee" , ContainsWholeWord, eSuspectNameType_Typo, "reductase", SimpleReplaceFunc } ,
+ { "rsponse" , ContainsWholeWord, eSuspectNameType_Typo, "response", SimpleReplaceFunc } ,
+ { "serovar" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "sigm" , ContainsWholeWord, eSuspectNameType_Typo, "sigma", NULL } ,
+ { "sreptomyces" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "staphylococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "start codon" , ContainsWholeWord, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "streptococcal" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "streptomyces" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "subsp" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "tetracenpmycin" , ContainsWholeWord, eSuspectNameType_Typo, "tetracenomycin", SimpleReplaceFunc } ,
+ { "thaliana" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "thiamin/thiamin" , ContainsWholeWord, eSuspectNameType_Typo, "thiamin/thiamine", SimpleReplaceFunc } ,
+ { "thioderoxin" , ContainsWholeWord, eSuspectNameType_Typo, "thioredoxin", SimpleReplaceFunc } ,
+ { "threonin" , ContainsWholeWord, eSuspectNameType_Typo, "threonine", SimpleReplaceFunc } ,
+ { "transcrIptional" , ContainsWholeWordCaseSensitive, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } ,
+ { "transemembrane" , ContainsWholeWord, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } ,
+ { "transferasee" , ContainsWholeWord, eSuspectNameType_Typo, "transferase", SimpleReplaceFunc } ,
+ { "transmebrane" , ContainsWholeWord, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } ,
+ { "unkn", IsSingleWord, eSuspectNameType_None, "hypothetical protein", SimpleReplaceFunc },
+ { "unnamed" , ContainsWholeWord, eSuspectNameType_None, NULL, NULL } ,
+ { "utilisation" , ContainsWholeWord, eSuspectNameType_Typo, "utilization", SimpleReplaceFunc } ,
+ { "xenopus" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "yeast" , ContainsWholeWord, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "ypothetical" , ContainsWholeWord, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "ytochrome" , ContainsWholeWord, eSuspectNameType_Typo, "cytochrome", SimpleReplaceFunc } ,
+ { "containing" , StartsWithPattern, eSuspectNameType_None, NULL, NULL } ,
+ { "from" , StartsWithPattern, eSuspectNameType_None, NULL, NULL } ,
+ { "CHC2 zinc finger" , IsSingleWord, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "SWIM zinc finger" , IsSingleWord, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "probable protein" , IsSingleWord, eSuspectNameType_None, NULL, NULL } ,
+ { "protein" , IsSingleWord, eSuspectNameType_None, NULL, NULL } ,
+ { "sodium" , IsSingleWord, eSuspectNameType_None, NULL, NULL } ,
+ { "IS" , PrefixPlusNumbersOnly, eSuspectNameType_None, NULL, NULL } ,
+ { "three or more numbers together, not after 'UPF' or 'DUF' or 'IS' and not followed by the word 'family' and not preceded by either 'cytochrome' or 'coenzyme'" , ThreeOrMoreNumbersTogether,
+ eSuspectNameType_Database, NULL, NULL } ,
+ { "all capital letters" , AllCapitalLetters, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "#" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { ". " , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "=" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "?" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "%" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Chloroplast" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } ,
+ { "ECOLI" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Fragment" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "Frameshift" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "Homolog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } ,
+ { "Intein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Intiation" , NormalSearch, eSuspectNameType_Typo, "initiation", SimpleReplaceFunc } ,
+ { "K potassium" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "K+ potassium" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Mitochondrial" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } ,
+ { "No definition line found" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Plasmodium" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "Portein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Related to" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Similar to" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Transemembrane" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "Transmebrane" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "\\-PA" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "accessroy" , NormalSearch, eSuspectNameType_Typo, "accessory", SimpleReplaceFunc } ,
+ { "aceytltranferase" , NormalSearch, eSuspectNameType_Typo, "acetyltransferase", SimpleReplaceFunc } ,
+ { "active site" , NormalSearch, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "adenylattransferase" , NormalSearch, eSuspectNameType_Typo, "adenylate transferase", SimpleReplaceFunc } ,
+ { "adenylytransferase" , NormalSearch, eSuspectNameType_Typo, "adenylyltransferase", SimpleReplaceFunc } ,
+ { "alternate protein name" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "aluminium" , NormalSearch, eSuspectNameType_Typo, "aluminum", SimpleReplaceFunc } ,
+ { "aminopetidase" , NormalSearch, eSuspectNameType_Typo, "aminopeptidase", SimpleReplaceFunc } ,
+ { "aparaginase" , NormalSearch, eSuspectNameType_Typo, "asparaginase", SimpleReplaceFunc } ,
+ { "asparate" , NormalSearch, eSuspectNameType_Typo, "aspartate", SimpleReplaceFunc } ,
+ { "authentic point mutation" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "bifunctional" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "bifunctionnal" , NormalSearch, eSuspectNameType_Typo, "bifunctional", SimpleReplaceFunc } ,
+ { "bigenesis" , NormalSearch, eSuspectNameType_Typo, "biogenesis", SimpleReplaceFunc } ,
+ { "biosyntesis" , NormalSearch, eSuspectNameType_Typo, "biosynthesis", SimpleReplaceFunc } ,
+ { "bnding" , NormalSearch, eSuspectNameType_Typo, "binding", SimpleReplaceFunc } ,
+ { "bos taurus" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "carboxilic" , NormalSearch, eSuspectNameType_Typo, "carboxylic", SimpleReplaceFunc } ,
+ { "cell divisionFtsK/SpoIIIE" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "characteris" , NormalSearch, eSuspectNameType_Typo, "characteriz", SimpleReplaceAnywhereFunc } ,
+ { "coantaining" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } ,
+ { "coenzye" , NormalSearch, eSuspectNameType_Typo, "coenzyme", SimpleReplaceFunc } ,
+ { "componenet" , NormalSearch, eSuspectNameType_Typo, "component", SimpleReplaceFunc } ,
+ { "componnent" , NormalSearch, eSuspectNameType_Typo, "component", SimpleReplaceFunc } ,
+ { "consevered" , NormalSearch, eSuspectNameType_Typo, "conserved", SimpleReplaceFunc } ,
+ { "containg" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } ,
+ { "cotaining" , NormalSearch, eSuspectNameType_Typo, "containing", SimpleReplaceFunc } ,
+ { "degration" , NormalSearch, eSuspectNameType_Typo, "degradation", SimpleReplaceFunc } ,
+ { "deletion" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "dependant" , NormalSearch, eSuspectNameType_Typo, "dependent", SimpleReplaceFunc } ,
+ { "dimerisation" , NormalSearch, eSuspectNameType_Typo, "dimerization", SimpleReplaceFunc } ,
+ { "dimerising" , NormalSearch, eSuspectNameType_Typo, "dimerizing", SimpleReplaceFunc } ,
+ { "dioxyenase" , NormalSearch, eSuspectNameType_Typo, "dioxygenase", SimpleReplaceFunc } ,
+ { "disulphide" , NormalSearch, eSuspectNameType_Typo, "disulfide", SimpleReplaceFunc } ,
+ { "divison" , NormalSearch, eSuspectNameType_Typo, "division", SimpleReplaceFunc } ,
+ { "domain domain" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "domain protein domain protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "domian" , NormalSearch, eSuspectNameType_Typo, "domain", SimpleReplaceFunc } ,
+ { "dyhydrogenase" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "dyhydrogenase" , NormalSearch, eSuspectNameType_Typo, "dehydrogenase", SimpleReplaceFunc } ,
+ { "enentioselective" , NormalSearch, eSuspectNameType_Typo, "enantioselective", SimpleReplaceFunc } ,
+ { "facotr" , NormalSearch, eSuspectNameType_Typo, "factor", SimpleReplaceFunc } ,
+ { "fagella", NormalSearch, eSuspectNameType_Typo, "flagella", SimpleReplaceFunc } ,
+ { "family family" , NormalSearch, eSuspectNameType_Typo, "family", SimpleReplaceFunc } ,
+ { "flageller" , NormalSearch, eSuspectNameType_Typo, "flagellar", SimpleReplaceFunc } ,
+ { "frame shift" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "gIycerol" , NormalSearch, eSuspectNameType_Typo, "glycerol", SimpleReplaceFunc } ,
+ { "glcosyl" , NormalSearch, eSuspectNameType_Typo, "glycosyl", SimpleReplaceFunc } ,
+ { "glucosainyl" , NormalSearch, eSuspectNameType_Typo, "glucosaminyl", SimpleReplaceFunc } ,
+ { "glutaminne" , NormalSearch, eSuspectNameType_Typo, "glutamine", SimpleReplaceFunc } ,
+ { "golgi" , NormalSearch, eSuspectNameType_NoOrganelleForProkaryote, NULL, NULL } ,
+ { "haem" , NormalSearch, eSuspectNameType_Typo, "heme", HaemReplaceFunc } ,
+ { "haemagglutination" , NormalSearch, eSuspectNameType_Typo, "hemagglutination", SimpleReplaceFunc } ,
+ { "heam" , NormalSearch, eSuspectNameType_Typo, "heme", HaemReplaceFunc } ,
+ { "hemelysin" , NormalSearch, eSuspectNameType_Typo, "hemolysin", SimpleReplaceFunc } ,
+ { "hemoglobine" , NormalSearch, eSuspectNameType_Typo, "hemoglobin", SimpleReplaceFunc } ,
+ { "hexapaptide" , NormalSearch, eSuspectNameType_Typo, "hexapeptide", SimpleReplaceFunc } ,
+ { "highly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "histadine" , NormalSearch, eSuspectNameType_Typo, "histidine", SimpleReplaceFunc } ,
+ { "homeserine" , NormalSearch, eSuspectNameType_Typo, "homoserine", SimpleReplaceFunc } ,
+ { "homo sapiens" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "hpothetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "human" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "hyphotetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hyphotheical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypotehtical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypotethical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypotetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypotheical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypotheitcal" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothetcial" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothteical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypothtical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hypthetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "hyptothetical" , NormalSearch, eSuspectNameType_Typo, "hypothetical", SimpleReplaceFunc } ,
+ { "inductible" , NormalSearch, eSuspectNameType_Typo, "inducible", SimpleReplaceFunc } ,
+ { "interrupt" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "isomerse" , NormalSearch, eSuspectNameType_Typo, "isomerase", SimpleReplaceFunc } ,
+ { "majour" , NormalSearch, eSuspectNameType_Typo, "major", SimpleReplaceFunc } ,
+ { "mambrane" , NormalSearch, eSuspectNameType_Typo, "membrane", SimpleReplaceFunc } ,
+ { "meausure" , NormalSearch, eSuspectNameType_Typo, "measure", SimpleReplaceFunc } ,
+ { "membranne" , NormalSearch, eSuspectNameType_Typo, "membrane", SimpleReplaceFunc } ,
+ { "methlytransferase" , NormalSearch, eSuspectNameType_Typo, "methyltransferase", SimpleReplaceFunc } ,
+ { "metylase" , NormalSearch, eSuspectNameType_Typo, "methylase", SimpleReplaceFunc } ,
+ { "molibdenum" , NormalSearch, eSuspectNameType_Typo, "molybdenum", SimpleReplaceFunc } ,
+ { "molybopterin" , NormalSearch, eSuspectNameType_Typo, "molybdopterin", SimpleReplaceFunc } ,
+ { "molydopterin" , NormalSearch, eSuspectNameType_Typo, "molybdopterin", SimpleReplaceFunc } ,
+ { "monooxigenase" , NormalSearch, eSuspectNameType_Typo, "monooxygenase", SimpleReplaceFunc } ,
+ { "monoxyde" , NormalSearch, eSuspectNameType_Typo, "monoxide", SimpleReplaceFunc } ,
+ { "monoxygenase" , NormalSearch, eSuspectNameType_Typo, "monooxygenase", SimpleReplaceFunc } ,
+ { "mulitdrug" , NormalSearch, eSuspectNameType_Typo, "multidrug", SimpleReplaceFunc } ,
+ { "multifunctional", NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "narrowly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "nickle" , NormalSearch, eSuspectNameType_Typo, "nickel", SimpleReplaceFunc } ,
+ { "novel protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "nucelar" , NormalSearch, eSuspectNameType_Typo, "nuclear", SimpleReplaceFunc } ,
+ { "nucleotydyl" , NormalSearch, eSuspectNameType_Typo, "nucleotidyl", SimpleReplaceFunc } ,
+ { "nulcear" , NormalSearch, eSuspectNameType_Typo, "nuclear", SimpleReplaceFunc } ,
+ { "open reading frame" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "or related" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "orphan protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "ortholog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } ,
+ { "outers" , NormalSearch, eSuspectNameType_Typo, "outer", SimpleReplaceFunc } ,
+ { "oxidoreducatse" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } ,
+ { "oxidoreductasse" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } ,
+ { "oxidoreduxtase" , NormalSearch, eSuspectNameType_Typo, "oxidoreductase", SimpleReplaceFunc } ,
+ { "oxydase" , NormalSearch, eSuspectNameType_Typo, "oxidase", SimpleReplaceFunc } ,
+ { "paralog" , NormalSearch, eSuspectNameType_EvolutionaryRelationship, NULL, NULL } ,
+ { "partial" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "peptidodoglycan" , NormalSearch, eSuspectNameType_Typo, "peptidoglycan", SimpleReplaceFunc } ,
+ { "periplsmic" , NormalSearch, eSuspectNameType_Typo, "periplasmic", SimpleReplaceFunc } ,
+ { "phophate" , NormalSearch, eSuspectNameType_Typo, "phosphate", SimpleReplaceFunc } ,
+ { "phopho" , NormalSearch, eSuspectNameType_Typo, "phospho", SimpleReplaceFunc } ,
+ { "phophoserine" , NormalSearch, eSuspectNameType_Typo, "phosphoserine", SimpleReplaceFunc } ,
+ { "phoshate" , NormalSearch, eSuspectNameType_Typo, "phosphate", SimpleReplaceFunc } ,
+ { "phosphatransferase" , NormalSearch, eSuspectNameType_Typo, "phosphotransferase", SimpleReplaceFunc } ,
+ { "phosphotase" , NormalSearch, eSuspectNameType_Typo, "phosphatase", SimpleReplaceFunc } ,
+ { "posible" , NormalSearch, eSuspectNameType_Typo, "possible", SimpleReplaceFunc } ,
+ { "presursor" , NormalSearch, eSuspectNameType_Typo, "precursor", SimpleReplaceFunc } ,
+ { "probable putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "proein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "prortein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "proteine" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "proteinn" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "protien" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "protrein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "prptein" , NormalSearch, eSuspectNameType_Typo, "protein", SimpleReplaceFunc } ,
+ { "pseudogene" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "puatative" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "puative" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putaitive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putaitve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putaive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putataive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putatitve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putative orphan protein" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "putative probable" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "putative putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "putative, putative" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "putatuve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putatve" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putatvie" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putayive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "putitive" , NormalSearch, eSuspectNameType_Typo, "putative", SimpleReplaceFunc } ,
+ { "qlcohol" , NormalSearch, eSuspectNameType_Typo, "alcohol", SimpleReplaceFunc } ,
+ { "recognised" , NormalSearch, eSuspectNameType_Typo, "recognized", SimpleReplaceFunc } ,
+ { "regulatot" , NormalSearch, eSuspectNameType_Typo, "regulator", SimpleReplaceFunc } ,
+ { "reponse" , NormalSearch, eSuspectNameType_Typo, "response", SimpleReplaceFunc } ,
+ { "resistence" , NormalSearch, eSuspectNameType_Typo, "resistance", SimpleReplaceFunc } ,
+ { "ribosimal" , NormalSearch, eSuspectNameType_Typo, "ribosomal", SimpleReplaceFunc } ,
+ { "ribosoml" , NormalSearch, eSuspectNameType_Typo, "ribosomal", SimpleReplaceFunc } ,
+ { "sapiens" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "serinr" , NormalSearch, eSuspectNameType_Typo, "serine", SimpleReplaceFunc } ,
+ { "signalling" , NormalSearch, eSuspectNameType_Typo, "signaling", SimpleReplaceFunc } ,
+ { "similar" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "simmilar" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "specfic" , NormalSearch, eSuspectNameType_Typo, "specific", SimpleReplaceFunc } ,
+ { "sphaeroides" , NormalSearch, eSuspectNameType_RemoveOrganismName, NULL, NULL } ,
+ { "spscific" , NormalSearch, eSuspectNameType_Typo, "specific", SimpleReplaceFunc } ,
+ { "stabilisation" , NormalSearch, eSuspectNameType_Typo, "stabilization", SimpleReplaceFunc } ,
+ { "subnit" , NormalSearch, eSuspectNameType_Typo, "subunit", SimpleReplaceFunc } ,
+ { "suger" , NormalSearch, eSuspectNameType_Typo, "sugar", SimpleReplaceFunc } ,
+ { "sulpho" , NormalSearch, eSuspectNameType_None, "sulfo", SimpleReplaceFunc } ,
+ { "sulphur" , NormalSearch, eSuspectNameType_Typo, "sulfur", SimpleReplaceFunc } ,
+ { "systhesis" , NormalSearch, eSuspectNameType_Typo, "synthesis", SimpleReplaceFunc } ,
+ { "sythase" , NormalSearch, eSuspectNameType_Typo, "synthase", SimpleReplaceFunc } ,
+ { "thiredoxin" , NormalSearch, eSuspectNameType_Typo, "thioredoxin", SimpleReplaceFunc } ,
+ { "trancsriptional" , NormalSearch, eSuspectNameType_Typo, "transcription", SimpleReplaceFunc } ,
+ { "tranferase" , NormalSearch, eSuspectNameType_Typo, "transferase", SimpleReplaceFunc } ,
+ { "tranporter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } ,
+ { "transcirbed" , NormalSearch, eSuspectNameType_Typo, "transcribed", SimpleReplaceFunc } ,
+ { "transcriptonal" , NormalSearch, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } ,
+ { "transcritional" , NormalSearch, eSuspectNameType_Typo, "transcriptional", SimpleReplaceFunc } ,
+ { "transebrane" , NormalSearch, eSuspectNameType_Typo, "transmembrane", SimpleReplaceFunc } ,
+ { "transglycolase" , NormalSearch, eSuspectNameType_Typo, "transglycosylase", SimpleReplaceFunc } ,
+ { "transorter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } ,
+ { "transpoase" , NormalSearch, eSuspectNameType_Typo, "transposase", SimpleReplaceFunc } ,
+ { "transportor" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } ,
+ { "transproter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } ,
+ { "transulfuration" , NormalSearch, eSuspectNameType_Typo, "transsulfuration", SimpleReplaceFunc } ,
+ { "trnasporter" , NormalSearch, eSuspectNameType_Typo, "transporter", SimpleReplaceFunc } ,
+ { "truncat" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "ttg start" , NormalSearch, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "tumour" , NormalSearch, eSuspectNameType_Typo, "tumor", SimpleReplaceFunc } ,
+ { "typr" , NormalSearch, eSuspectNameType_Typo, "type", SimpleReplaceFunc } ,
+ { "uncharacterized protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "uncharaterized" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "undecapaprenyl" , NormalSearch, eSuspectNameType_Typo, "undecaprenyl", SimpleReplaceFunc } ,
+ { "unkown" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "utilising" , NormalSearch, eSuspectNameType_Typo, "utilizing", SimpleReplaceFunc } ,
+ { "weakly conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "widely conserved" , NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "|" , NormalSearch, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "C term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "C-term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "N term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "N-term" , ProductContainsTerm, eSuspectNameType_MightBeNonfunctional, NULL, NULL } ,
+ { "Two or more sets of brackets or parentheseis" , ContainsTwoSetsOfBracketsOrParentheses, eSuspectNameType_None, NULL, NULL } ,
+ { "unknown" , ContainsUnknownName, eSuspectNameType_None, NULL, NULL } ,
+ { "double space" , ContainsDoubleSpace, eSuspectNameType_None, NULL, NULL } ,
+ { "COG" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } ,
+ { "DUF" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } ,
+ { "EST" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } ,
+ { "FOG" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } ,
+ { "UPF" , ContainsWholeWordCaseSensitive, eSuspectNameType_Database, NULL, NULL } ,
+ { "_" , ContainsUnderscore, eSuspectNameType_Database, NULL, NULL } ,
+ { "ending with period, comma, hyphen, underscore, colon, or forward slash" , EndsWithPunct, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "PTS system" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "helix-turn-helix" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "transposase of" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_None, NULL, NULL } ,
+ { "zinc finger" , IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, NULL, NULL } ,
+ { "may contain a plural" , MayContainPlural, eSuspectNameType_None, NULL, NULL } ,
+ { "unbalanced brackets or parentheses" , ContainsUnbalancedParentheses, eSuspectNameType_InappropriateSymbol, NULL, NULL } ,
+ { "long product name that may contain descriptive information more appropriate in a note", IsTooLong, eSuspectNameType_QuickFix, NULL, NULL } ,
+ { "Product name begins with possible, potential, predicted or probable. Please use putative.", StartsWithPutativeReplacement, eSuspectNameType_QuickFix, "putative", UsePutative } ,
+
+ { "CDS", NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "doubtful", NormalSearch, eSuspectNameType_None, NULL, NULL } ,
+ { "alternate protein name", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "conser", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "conserve", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "conserved hypothetical protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "conserved", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "domain family", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "domain of unknown function", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "domain protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "domain", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "doubtful CDS found within S. typhi pathogenicity island", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "factor", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "family protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "hypo", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "hypothetical ORF", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "hypothetical domain protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "No definition line found", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "orphan protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "ORF", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "orf, hyp", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "orf, hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "precursor", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "probable", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "predicted", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "predicted protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "probable protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "protein containing", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "protein of unknown function", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "protein-containing", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "pseudo", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "putative conserved hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "putative hypothetical", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "putative protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "putative", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "uncharacterized conserved protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "unnamed", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameFunc } ,
+ { "o252", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "o252 protein", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Alanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Arginine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Asparagine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Aspartic acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Cysteine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "DNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Glutamic acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Glutamine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Glycine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Histidine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Isoleucine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Leucine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Lysine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Methionine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "NAD", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "PASTA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Phenylalanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Proline", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "RNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Serine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Threonine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Tryptophan", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Tyrosine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "Valine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "adenine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "amino acid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "barrel", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "carbon", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "citrate", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "cytosine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "finger", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "ggdef", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "guanine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "helium", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "helix", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "hydrogen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "insertion sequence", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "iron", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "mRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "membrane", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "ncRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "nitrogen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "oxygen", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "p-loop", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "phage", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "plasmid", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "purine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "rRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "repeat", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "secreted", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "signal peptide", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_UseProtein, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "signal", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "subunit", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "tRNA", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "thymine", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "transport-associated", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "transposon", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "uracil", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc } ,
+ { "zinc", IsSingleWordOrWeaselPlusSingleWord, eSuspectNameType_QuickFix, "hypothetical protein", ReplaceWholeNameAddNoteFunc }
};
const int num_suspect_product_terms = sizeof (suspect_product_terms) / sizeof (SuspectProductNameData);
+static void FixSuspectProductNameTyposInOneFeature (SeqFeatPtr cds, LogInfoPtr lip, ESuspectNameType fix_type)
+{
+ Int4 k;
+ ProtRefPtr prp;
+ ValNodePtr vnp;
+ CharPtr tmp, desc;
+ ValNode vn;
+ SeqFeatPtr mrna;
+ SeqMgrFeatContext context;
+ RnaRefPtr rrp;
+ CharPtr extra;
+ CharPtr and_associated_mrna = " and associated mRNA";
+
+ if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || cds->data.value.ptrvalue == NULL
+ || cds->product == NULL || (prp = GetProtRefForFeature(cds)) == NULL)
+ {
+ return;
+ }
+
+
+
+ for (k = 0; k < num_suspect_product_terms; k++)
+ {
+ for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
+ {
+ if (suspect_product_terms[k].fix_type == fix_type
+ && suspect_product_terms[k].replace_func != NULL
+ && suspect_product_terms[k].search_func != NULL
+ && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, vnp->data.ptrvalue))
+ {
+ if (lip != NULL && lip->fp != NULL) {
+ tmp = StringSave ((CharPtr) vnp->data.ptrvalue);
+ (suspect_product_terms[k].replace_func)(&tmp,
+ suspect_product_terms[k].pattern,
+ suspect_product_terms[k].replace_phrase,
+ cds);
+ if (StringCmp (tmp, vnp->data.ptrvalue) != 0) {
+ extra = "";
+ mrna = SeqMgrGetOverlappingmRNA (cds->location, &context);
+ if (mrna != NULL && mrna->data.choice == SEQFEAT_RNA
+ && (rrp = mrna->data.value.ptrvalue) != NULL
+ && rrp->ext.choice == 1
+ && StringCmp (rrp->ext.value.ptrvalue, vnp->data.ptrvalue) == 0) {
+ rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
+ rrp->ext.value.ptrvalue = StringSave (tmp);
+ extra = and_associated_mrna;
+ }
+ MemSet (&vn, 0, sizeof (ValNode));
+ vn.choice = OBJ_SEQFEAT;
+ vn.data.ptrvalue = cds;
+ desc = GetDiscrepancyItemText (&vn);
+ fprintf (lip->fp, "Changed '%s' to '%s' for %s%s\n", (CharPtr) vnp->data.ptrvalue, tmp, desc, extra);
+ vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
+ vnp->data.ptrvalue = tmp;
+ tmp = NULL;
+ desc = MemFree (desc);
+ lip->data_in_log = TRUE;
+ }
+ tmp = MemFree (tmp);
+ } else {
+ tmp = (CharPtr) vnp->data.ptrvalue;
+ (suspect_product_terms[k].replace_func)(&tmp, suspect_product_terms[k].pattern, suspect_product_terms[k].replace_phrase, cds);
+ vnp->data.ptrvalue = tmp;
+ }
+ break;
+ }
+ /* only check the first name */
+ if (!StringHasNoText (vnp->data.ptrvalue)) {
+ break;
+ }
+ }
+ }
+}
+
+
+static void FixSuspectProductNameTypos (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT) {
+ FixSuspectProductNameTyposInOneFeature ((SeqFeatPtr) vnp->data.ptrvalue, lip, eSuspectNameType_Typo);
+ }
+ }
+}
+
+
+static void FixSuspectProductNameQuickFixes (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ ValNodePtr vnp;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT) {
+ FixSuspectProductNameTyposInOneFeature ((SeqFeatPtr) vnp->data.ptrvalue, lip, eSuspectNameType_QuickFix);
+ }
+ }
+}
+
static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
{
ValNodePtr PNTR feature_list;
@@ -11529,6 +12178,7 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
ValNodePtr vnp;
BioseqPtr bsp;
SeqFeatPtr cds;
+ BioSourcePtr biop = NULL;
if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL
|| userdata == NULL)
@@ -11547,11 +12197,16 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
if (cds != NULL) {
sfp = cds;
}
+ /* find BioSource, to check whether we want to run all categories */
+ biop = GetBiopForBsp (bsp);
}
}
for (k = 0; k < num_suspect_product_terms; k++)
- {
+ {
+ if (!CategoryOkForBioSource(biop, suspect_product_terms[k].fix_type)) {
+ continue;
+ }
for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
{
if (suspect_product_terms[k].search_func != NULL
@@ -11570,15 +12225,22 @@ static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
}
-static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list)
+static ClickableItemPtr SuspectPhraseEx (Uint4 clickable_item_type, CharPtr phrase, Boolean quote_phrase, CharPtr feat_type, ValNodePtr feature_list)
{
ClickableItemPtr dip = NULL;
- CharPtr bad_fmt = "%d %ss contain '%s'";
+ CharPtr bad_fmt_quote = "%d %ss contain '%s'";
+ CharPtr bad_fmt_noquote = "%d %ss contain %s";
+ CharPtr bad_fmt;
if (feature_list == NULL || phrase == NULL || StringHasNoText (feat_type))
{
return NULL;
}
+ if (quote_phrase) {
+ bad_fmt = bad_fmt_quote;
+ } else {
+ bad_fmt = bad_fmt_noquote;
+ }
dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
if (dip != NULL)
@@ -11595,6 +12257,12 @@ static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase
}
+static ClickableItemPtr SuspectPhrase (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list)
+{
+ return SuspectPhraseEx (clickable_item_type, phrase, TRUE, feat_type, feature_list);
+}
+
+
static ClickableItemPtr SuspectPhraseEnd (Uint4 clickable_item_type, CharPtr phrase, CharPtr feat_type, ValNodePtr feature_list)
{
ClickableItemPtr dip = NULL;
@@ -11645,18 +12313,201 @@ static ClickableItemPtr SuspectPhraseStart (Uint4 clickable_item_type, CharPtr p
}
-extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+static Uint4 ClickableItemTypeForNameCat (Int4 k)
+{
+ if (k == eSuspectNameType_Typo) {
+ return DISC_PRODUCT_NAME_TYPO;
+ } else if (k == eSuspectNameType_QuickFix) {
+ return DISC_PRODUCT_NAME_QUICKFIX;
+ } else {
+ return DISC_SUSPECT_PRODUCT_NAME;
+ }
+}
+
+typedef struct suspectrulefeats {
+ SuspectRuleSetPtr rule_list;
+ ValNodePtr PNTR feature_list;
+ Int4 num_rules;
+} SuspectRuleFeatsData, PNTR SuspectRuleFeatsPtr;
+
+
+static void FindSuspectProductNamesWithRulesCallback (SeqFeatPtr sfp, Pointer userdata)
+{
+ SuspectRuleFeatsPtr srlist;
+ SuspectRulePtr rule;
+ Int4 k;
+ ProtRefPtr prp;
+ BioseqPtr bsp;
+ SeqFeatPtr cds;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL
+ || (srlist = (SuspectRuleFeatsPtr)userdata) == NULL)
+ {
+ return;
+ }
+
+ prp = (ProtRefPtr) sfp->data.value.ptrvalue;
+
+ if (prp == NULL || prp->name == NULL) {
+ return;
+ }
+
+ /* add coding region rather than protein */
+ if (sfp->idx.subtype == FEATDEF_PROT) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp != NULL) {
+ cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
+ if (cds != NULL) {
+ sfp = cds;
+ }
+ }
+ }
+
+ for (k = 0, rule = srlist->rule_list; k < srlist->num_rules && rule != NULL; k++, rule = rule->next)
+ {
+ if (DoesStringMatchSuspectRule (prp->name->data.ptrvalue, sfp, rule))
+ {
+ ValNodeAddPointer (&(srlist->feature_list[k]), OBJ_SEQFEAT, sfp);
+ break;
+ }
+ }
+
+}
+
+
+static void AutoFixSuspectProductRules (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip)
+{
+ SuspectRulePtr rule;
+ ValNodePtr vnp;
+
+ if ((rule = (SuspectRulePtr) userdata) == NULL || item_list == NULL) {
+ return;
+ }
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT) {
+ if (ApplySuspectProductNameFixToFeature (rule, (SeqFeatPtr) vnp->data.ptrvalue, lip == NULL ? NULL : lip->fp)) {
+ if (lip != NULL) {
+ lip->data_in_log = TRUE;
+ }
+ }
+ }
+ }
+}
+
+
+static void
+FindSuspectProductNamesWithRules
+(ValNodePtr PNTR discrepancy_list,
+ ValNodePtr sep_list,
+ SuspectRuleSetPtr rule_list)
+{
+ SuspectRuleFeatsData srdata;
+ SuspectRulePtr rule;
+ CharPtr summ;
+ CharPtr fmt = "%d features %s";
+ ValNodePtr PNTR name_cat;
+ ValNodePtr master_list = NULL, vnp;
+ Int4 k;
+ ClickableItemPtr dip, tdip = NULL;
+ ValNodePtr subcategories = NULL;
+ Int4 num_cat = Fix_type_gene + 1;
+
+ if (discrepancy_list == NULL) return;
+
+ srdata.num_rules = CountSuspectRuleSet (rule_list);
+ if (srdata.num_rules == 0) {
+ return;
+ }
+
+ srdata.rule_list = rule_list;
+ srdata.feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * srdata.num_rules);
+ if (srdata.feature_list == NULL) return;
+
+ name_cat = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_cat);
+
+ /* initialize array for suspicious product names */
+ for (k = 0; k < srdata.num_rules; k++)
+ {
+ srdata.feature_list[k] = NULL;
+ }
+
+ /* initialize named categories */
+ for (k = 0; k < num_cat; k++) {
+ name_cat[k] = NULL;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitGenProdSetFeatures (vnp->data.ptrvalue, &srdata, FindSuspectProductNamesWithRulesCallback);
+ }
+
+ for (k = 0, rule = srdata.rule_list; k < srdata.num_rules && rule != NULL; k++, rule = rule->next)
+ {
+ if (srdata.feature_list[k] != NULL)
+ {
+ summ = SummarizeSuspectRule(rule);
+ dip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ dip->clickable_item_type = DISC_SUSPECT_PRODUCT_NAME;
+ dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (summ) + 15));
+ sprintf (dip->description, fmt, ValNodeLen (srdata.feature_list[k]), summ);
+ summ = MemFree (summ);
+ dip->callback_func = NULL;
+ dip->datafree_func = NULL;
+ dip->callback_data = NULL;
+ dip->item_list = srdata.feature_list[k];
+ if (rule->replace != NULL) {
+ dip->autofix_func = AutoFixSuspectProductRules;
+ dip->autofix_data = rule;
+ }
+ ValNodeAddPointer (&name_cat[rule->rule_type], 0, dip);
+ ValNodeLinkCopy (&master_list, srdata.feature_list[k]);
+ }
+ }
+ if (master_list != NULL)
+ {
+ for (k = 0; k < num_cat; k++) {
+ if (name_cat[k] != NULL) {
+ tdip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (tdip, 0, sizeof (ClickableItemData));
+ tdip->description = StringSave (SummarizeFixType(k));
+ tdip->item_list = ItemListFromSubcategories (name_cat[k]);
+ tdip->clickable_item_type = DISC_SUSPECT_PRODUCT_NAME;
+ tdip->subcategories = name_cat[k];
+ tdip->expanded = TRUE;
+ ValNodeAddPointer (&subcategories, 0, tdip);
+ }
+ }
+ dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", FALSE, "product_name", master_list);
+ if (dip != NULL)
+ {
+ dip->subcategories = subcategories;
+ dip->expanded = TRUE;
+ ValNodeAddPointer (discrepancy_list, 0, dip);
+ }
+ }
+
+ MemFree (srdata.feature_list);
+ MemFree (name_cat);
+}
+
+
+
+static void FindSuspectProductNamesWithStaticList (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
ValNodePtr PNTR feature_list = NULL;
ValNodePtr master_list = NULL, vnp;
Int4 k;
- ClickableItemPtr dip;
+ ClickableItemPtr dip, tdip = NULL;
+ ValNodePtr name_cat[eSuspectNameType_Max];
ValNodePtr subcategories = NULL;
if (discrepancy_list == NULL) return;
feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_suspect_product_terms);
if (feature_list == NULL) return;
+
+ MemSet (&name_cat, 0, sizeof (name_cat));
/* initialize array for suspicious product names */
for (k = 0; k < num_suspect_product_terms; k++)
@@ -11675,30 +12526,42 @@ extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePt
{
if (suspect_product_terms[k].search_func == EndsWithPattern)
{
- dip = SuspectPhraseEnd (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]);
+ dip = SuspectPhraseEnd (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]);
}
else if (suspect_product_terms[k].search_func == StartsWithPattern)
{
- dip = SuspectPhraseStart (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]);
+ dip = SuspectPhraseStart (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]);
}
else
{
- dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, suspect_product_terms[k].pattern, "product name", feature_list[k]);
+ dip = SuspectPhrase (ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type), suspect_product_terms[k].pattern, "product name", feature_list[k]);
}
if (dip != NULL)
{
- ValNodeAddPointer (&subcategories, 0, dip);
+ ValNodeAddPointer (&name_cat[suspect_product_terms[k].fix_type], 0, dip);
}
ValNodeLinkCopy (&master_list, feature_list[k]);
}
}
-
if (master_list != NULL)
{
- dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", "product_name", master_list);
+ for (k = 0; k < eSuspectNameType_Max; k++) {
+ if (name_cat[k] != NULL) {
+ tdip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (tdip, 0, sizeof (ClickableItemData));
+ tdip->description = StringSave (suspect_name_category_names[k]);
+ tdip->item_list = ItemListFromSubcategories (name_cat[k]);
+ tdip->clickable_item_type = ClickableItemTypeForNameCat(suspect_product_terms[k].fix_type);
+ tdip->subcategories = name_cat[k];
+ tdip->expanded = TRUE;
+ ValNodeAddPointer (&subcategories, 0, tdip);
+ }
+ }
+ dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrase or characters", FALSE, "product_name", master_list);
if (dip != NULL)
{
- dip->subcategories = subcategories;
+ dip->subcategories = subcategories;
+ dip->expanded = TRUE;
ValNodeAddPointer (discrepancy_list, 0, dip);
}
}
@@ -11707,6 +12570,40 @@ extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePt
}
+static SuspectRuleSetPtr s_SuspectProductRuleList = NULL;
+
+extern void FindSuspectProductNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ Char rule_file[PATH_MAX];
+ AsnIoPtr aip;
+
+
+ if (s_SuspectProductRuleList == NULL)
+ {
+ if (GetAppParam ("SEQUINCUSTOM", "SETTINGS", "PRODUCT_RULES_LIST", NULL, rule_file, sizeof (rule_file) - 1)
+ || GetAppParam ("SEQUIN", "SETTINGS", "PRODUCT_RULES_LIST", NULL, rule_file, sizeof (rule_file) - 1))
+ {
+ if ((aip = AsnIoOpen (rule_file, "r")) == NULL) {
+ Message (MSG_ERROR, "Unable to read %s", rule_file);
+ } else {
+ if ((s_SuspectProductRuleList = SuspectRuleSetAsnRead (aip, NULL)) == NULL) {
+ Message (MSG_ERROR, "Unable to read suspect product rules from %s", rule_file);
+ }
+ AsnIoClose (aip);
+ }
+ }
+ }
+ if (s_SuspectProductRuleList == NULL)
+ {
+ FindSuspectProductNamesWithStaticList(discrepancy_list, sep_list);
+ }
+ else
+ {
+ FindSuspectProductNamesWithRules(discrepancy_list, sep_list, s_SuspectProductRuleList);
+ }
+}
+
+
NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name)
{
Int4 k;
@@ -11724,41 +12621,41 @@ NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name)
}
-extern void FindSuspectProductNamesInNameList (FILE *input_file, FILE *output_file)
+NLM_EXTERN Boolean ReportProductNameProblems (CharPtr product_name, FILE *output_file, CharPtr prefix)
{
- ReadBufferData rbd;
- CharPtr line, func_name;
- Int4 k;
-
- rbd.fp = input_file;
- rbd.current_data = NULL;
+ Int4 k;
+ Boolean any_problems = FALSE;
+ CharPtr func_name;
- line = AbstractReadFunction (&rbd);
- while (line != NULL)
- {
- for (k = 0; k < num_suspect_product_terms; k++)
- {
- if (suspect_product_terms[k].search_func != NULL
- && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, line))
- {
- if (suspect_product_terms[k].search_func == EndsWithPattern) {
- func_name = "Ends with";
- } else if (suspect_product_terms[k].search_func == StartsWithPattern) {
- func_name = "Starts with";
+ for (k = 0; k < num_suspect_product_terms; k++)
+ {
+ if (suspect_product_terms[k].search_func != NULL
+ && (suspect_product_terms[k].search_func) (suspect_product_terms[k].pattern, product_name))
+ {
+ if (suspect_product_terms[k].search_func == EndsWithPattern) {
+ func_name = "Ends with";
+ } else if (suspect_product_terms[k].search_func == StartsWithPattern) {
+ func_name = "Starts with";
+ } else {
+ func_name = "Contains";
+ }
+ if (output_file) {
+ if (prefix == NULL) {
+ fprintf (output_file, "%s\t%s '%s'\n", product_name, func_name, suspect_product_terms[k].pattern);
} else {
- func_name = "Contains";
+ fprintf (output_file, "%s\t%s\t%s '%s'\n", prefix, product_name, func_name, suspect_product_terms[k].pattern);
}
- if (output_file) {
- fprintf (output_file, "%s\t%s '%s'\n", line, func_name, suspect_product_terms[k].pattern);
+ } else {
+ if (prefix == NULL) {
+ printf ("%s\t%s '%s'\n", product_name, func_name, suspect_product_terms[k].pattern);
} else {
- printf ("%s\t%s '%s'\n", line, func_name, suspect_product_terms[k].pattern);
+ printf ("%s\t%s\t%s '%s'\n", prefix, product_name, func_name, suspect_product_terms[k].pattern);
}
}
+ any_problems = TRUE;
}
-
- line = MemFree (line);
- line = AbstractReadFunction (&rbd);
}
+ return any_problems;
}
@@ -11840,7 +12737,7 @@ extern void FindSuspectPhrases (ValNodePtr PNTR discrepancy_list, ValNodePtr sep
if (subcat != NULL)
{
- dip = SuspectPhrase (DISC_SUSPECT_PRODUCT_NAME, "suspect phrases", "cds comments or protein description", ItemListFromSubcategories (subcat));
+ dip = SuspectPhraseEx (DISC_SUSPECT_PRODUCT_NAME, "suspect phrases", FALSE, "cds comments or protein description", ItemListFromSubcategories (subcat));
if (dip != NULL)
{
dip->subcategories = subcat;
@@ -11961,7 +12858,7 @@ static void FindSuspiciousPhraseInNoteText (ValNodePtr PNTR discrepancy_list, Va
if (subcat != NULL)
{
- dip = SuspectPhrase (DISC_SUSPICIOUS_NOTE_TEXT, "suspicious phrases", "note text", ItemListFromSubcategories (subcat));
+ dip = SuspectPhraseEx (DISC_SUSPICIOUS_NOTE_TEXT, "suspicious phrases", FALSE, "note text", ItemListFromSubcategories (subcat));
if (dip != NULL)
{
dip->subcategories = subcat;
@@ -12696,7 +13593,7 @@ static void FindRNAsWithoutProductsCallback (SeqFeatPtr sfp, Pointer data)
}
ff = FeatureFieldNew ();
- ff->type = Feature_type_any;
+ ff->type = Macro_feature_type_any;
ValNodeAddInt (&ff->field, FeatQualChoice_legal_qual, Feat_qual_legal_product);
field.choice = FieldType_feature_field;
field.data.ptrvalue = ff;
@@ -13237,7 +14134,7 @@ static void PercentNDiscrepancy (BioseqPtr bsp, Pointer userdata)
}
pct = PercentNInBioseq (bsp, FALSE);
- if (pct > 10.0)
+ if (pct > 5.0)
{
ValNodeAddPointer ((ValNodePtr PNTR)userdata, OBJ_BIOSEQ, bsp);
}
@@ -13655,9 +14552,7 @@ static void AddMissingViralQualsDiscrepancies (BioSourcePtr biop, Uint1 choice,
Boolean has_country = FALSE;
Boolean has_specific_host = FALSE;
- if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL
- || StringSearch (biop->org->orgname->lineage, "Viruses") == NULL
- || q == NULL) {
+ if (!IsViralBioSource(biop) || q == NULL) {
return;
}
@@ -14038,10 +14933,16 @@ static ClickableItemPtr FindMultipleSourceQuals (ValNodePtr qual, ValNodePtr ite
CharPtr str1, str2, qualname, fmt;
CharPtr has_multi_fmt = "%%d sources have multiple %s qualifiers";
ValNodePtr has_multi = NULL;
+ ValNodePtr src_choice;
if (qual == NULL || item_list == NULL) {
return NULL;
}
+ if (qual->choice == FieldType_source_qual
+ && (src_choice = qual->data.ptrvalue) != NULL
+ && src_choice->choice != SourceQualChoice_textqual) {
+ return NULL;
+ }
scp = StringConstraintNew ();
scp->not_present = TRUE;
@@ -14427,6 +15328,7 @@ static ValNodePtr RunBioSourceTest (SeqEntryPtr sep, BioSourceTestFunc func)
static Boolean HasAmplifiedWithSpeciesSpecificPrimerNote (BioSourcePtr biop)
{
SubSourcePtr ssp;
+ OrgModPtr mod;
Boolean rval = FALSE;
if (biop == NULL) {
@@ -14438,6 +15340,14 @@ static Boolean HasAmplifiedWithSpeciesSpecificPrimerNote (BioSourcePtr biop)
rval = TRUE;
}
}
+ if (!rval && biop->org != NULL && biop->org->orgname != NULL) {
+ for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
+ if (mod->subtype == ORGMOD_other
+ && StringCmp (mod->subname, "amplified with species-specific primers") == 0) {
+ rval = TRUE;
+ }
+ }
+ }
return rval;
}
@@ -14500,6 +15410,98 @@ static void FindRequiredClones (ValNodePtr PNTR discrepancy_list, ValNodePtr sep
}
+static Boolean IsMissingRequiredStrain (BioSourcePtr biop)
+{
+ OrgModPtr mod;
+
+ if (biop == NULL || !IsBacterialBioSource(biop)
+ || biop->org == NULL || biop->org->orgname == NULL) {
+ return FALSE;
+ }
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
+ if (mod->subtype == ORGMOD_strain) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+static void FindRequiredStrains (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, IsMissingRequiredStrain));
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_REQUIRED_STRAIN, "%d biosources are missing required strain value", item_list));
+ }
+}
+
+
+static Boolean BacterialTaxShouldEndWithStrain (BioSourcePtr biop)
+{
+ OrgModPtr mod;
+ Int4 tax_len, len;
+
+ if (biop == NULL || !IsBacterialBioSource(biop)
+ || biop->org == NULL || biop->org->orgname == NULL) {
+ return FALSE;
+ }
+ tax_len = StringLen (biop->org->taxname);
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
+ if (mod->subtype == ORGMOD_strain) {
+ len = StringLen (mod->subname);
+ if (len > tax_len || StringCmp (biop->org->taxname + tax_len - len, mod->subname) != 0) {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+
+static void FindBacterialTaxStrainMismatch (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, BacterialTaxShouldEndWithStrain));
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_BACTERIAL_TAX_STRAIN_MISMATCH, "%d biosources have tax name/strain mismatch", item_list));
+ }
+}
+
+
+static Boolean SpNotUncultured (BioSourcePtr biop)
+{
+ Int4 len;
+
+ if (biop == NULL || biop->org == NULL || (len = StringLen(biop->org->taxname)) < 4
+ || StringCmp (biop->org->taxname + len - 4, " sp.") != 0
+ || StringNICmp (biop->org->taxname, "uncultured ", 11) == 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
+static void FindSpNotUncultured (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, SpNotUncultured));
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_SP_NOT_UNCULTURED, "%d biosources have taxnames that end with ' sp.' but do not start with 'uncultured'", item_list));
+ }
+}
+
+
static void RetroviridaeDNACallback (BioseqPtr bsp, Pointer data)
{
SeqMgrDescContext context;
@@ -14511,9 +15513,8 @@ static void RetroviridaeDNACallback (BioseqPtr bsp, Pointer data)
}
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL
- || biop->org == NULL || biop->org->orgname == NULL
|| biop->genome == GENOME_proviral
- || StringSearch (biop->org->orgname->lineage, "Retroviridae") == NULL) {
+ || !HasLineage(biop, "Retroviridae")) {
return;
} else {
ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
@@ -15647,7 +16648,7 @@ static void ChangeMoltypeToGenomicDNA (ValNodePtr item_list, Pointer data, LogIn
const CharPtr kmRNAVariant = ", transcript variant ";
const CharPtr kCDSVariant = ", isoform ";
-static Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str)
+NLM_EXTERN Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str)
{
CharPtr join_mrna, join_cds;
Int4 len;
@@ -15680,9 +16681,92 @@ static Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str)
}
+NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds)
+{
+ SeqFeatPtr mrna = NULL;
+ SeqFeatXrefPtr xref;
+ SeqMgrFeatContext mcontext;
+
+ /* first, check for mRNA identified by feature xref */
+ for (xref = cds->xref; xref != NULL && mrna == NULL; xref = xref->next) {
+ if (xref->id.choice != 0) {
+ mrna = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL);
+ if (mrna != NULL && mrna->idx.subtype != FEATDEF_mRNA) {
+ mrna = NULL;
+ }
+ }
+ }
+
+ /* try by location if not by xref */
+ if (mrna == NULL) {
+ mrna = SeqMgrGetLocationSupersetmRNA (cds->location, &mcontext);
+ if (mrna == NULL) {
+ mrna = SeqMgrGetOverlappingmRNA (cds->location, &mcontext);
+ }
+ }
+ return mrna;
+}
+
+typedef struct underlyingfeat {
+ SeqFeatPtr orig_feat;
+ ValNodePtr matching_features;
+} UnderlyingFeatData, PNTR UnderlyingFeatPtr;
+
+static Boolean LIBCALLBACK FindUnderlyingCDS (
+ SeqFeatPtr sfp,
+ SeqMgrFeatContextPtr context
+)
+
+{
+ UnderlyingFeatPtr uf;
+
+ if (sfp == NULL || context == NULL) return TRUE;
+ uf = context->userdata;
+ if (uf == NULL) return TRUE;
+
+ if (TestFeatOverlap(uf->orig_feat, sfp, CHECK_INTERVALS) >= 0) {
+ ValNodeAddPointer (&(uf->matching_features), OBJ_SEQFEAT, sfp);
+ }
+
+ return TRUE;
+}
+
+
+NLM_EXTERN SeqFeatPtr GetCDSformRNA (SeqFeatPtr mrna)
+{
+ SeqFeatPtr cds = NULL;
+ SeqFeatXrefPtr xref;
+ Int2 count;
+ UnderlyingFeatData uf;
+
+ /* first, check for cds identified by feature xref */
+ for (xref = mrna->xref; xref != NULL && cds == NULL; xref = xref->next) {
+ if (xref->id.choice != 0) {
+ cds = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL);
+ if (cds != NULL && cds->idx.subtype != FEATDEF_CDS) {
+ cds = NULL;
+ }
+ }
+ }
+
+ /* try by location if not by xref */
+ if (cds == NULL) {
+ MemSet (&uf, 0, sizeof (UnderlyingFeatData));
+ uf.orig_feat = mrna;
+ count = SeqMgrGetAllOverlappingFeatures (mrna->location, FEATDEF_CDS, NULL, 0,
+ SIMPLE_OVERLAP, &uf, FindUnderlyingCDS);
+ if (uf.matching_features != NULL) {
+ cds = uf.matching_features->data.ptrvalue;
+ uf.matching_features = ValNodeFree (uf.matching_features);
+ }
+ }
+ return cds;
+}
+
+
static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data)
{
- SeqMgrFeatContext fcontext, mcontext;
+ SeqMgrFeatContext fcontext;
SeqMgrDescContext dcontext;
SeqFeatPtr sfp, mRNA;
SeqDescrPtr sdp;
@@ -15690,6 +16774,7 @@ static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data)
CharPtr feat_product, mrna_product;
ValNode field;
FeatureFieldPtr ff;
+ BioSourcePtr biop;
if (bsp == NULL || bsp->mol != Seq_mol_dna || data == NULL) {
return;
@@ -15698,6 +16783,11 @@ static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data)
if (!IsEukaryotic (bsp)) {
return;
}
+ biop = GetBiopForBsp(bsp);
+ if (biop != NULL && IsLocationOrganelle(biop->genome)) {
+ return;
+ }
+
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
if (sdp == NULL || sdp->data.ptrvalue == NULL) {
return;
@@ -15708,7 +16798,7 @@ static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data)
}
ff = FeatureFieldNew ();
- ff->type = Feature_type_any;
+ ff->type = Macro_feature_type_any;
ValNodeAddInt (&(ff->field), FeatQualChoice_legal_qual, Feat_qual_legal_product);
field.choice = FieldType_feature_field;
field.data.ptrvalue = ff;
@@ -15720,10 +16810,8 @@ static void ReportCDSWithoutmRNACallback (BioseqPtr bsp, Pointer data)
if (IsPseudo (sfp)) {
continue;
}
- mRNA = SeqMgrGetLocationSupersetmRNA (sfp->location, &mcontext);
- if (mRNA == NULL) {
- mRNA = SeqMgrGetOverlappingmRNA (sfp->location, &mcontext);
- }
+
+ mRNA = GetmRNAforCDS(sfp);
if (mRNA == NULL) {
ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
@@ -17497,9 +18585,17 @@ static CharPtr flatfile_find_list_oncaller[] = {
static CharPtr flatfile_find_list_oncaller_wholeword[] = {
- "chian",
+ "caputre",
"casette",
+ "chian",
+ "cytochome",
"diveristy",
+ "genone",
+ "muesum",
+ "musuem",
+ "nuclear shutting",
+ "reserach",
+ "transcirption",
"unversity",
"varent",
NULL
@@ -17678,7 +18774,7 @@ static void FindTextInCDSProduct (ValNodePtr PNTR discrepancy_list, ValNodePtr s
if (master_list != NULL)
{
- dip = SuspectPhrase (DISC_CDS_PRODUCT_FIND, "suspect phrase or characters", "coding region product", master_list);
+ dip = SuspectPhraseEx (DISC_CDS_PRODUCT_FIND, "suspect phrase or characters", FALSE, "coding region product", master_list);
if (dip != NULL)
{
dip->subcategories = subcategories;
@@ -18370,6 +19466,7 @@ NLM_EXTERN void RemoveExonsOnMrna (ValNodePtr item_list, Pointer data, LogInfoPt
ObjMgrSetDirtyFlag (vnp->data.intvalue, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, vnp->data.intvalue, 0, 0);
}
+ ValNodeFree (entityIDList);
}
@@ -18627,6 +19724,24 @@ static Boolean IsNameCapitalizationOk (CharPtr str)
return rval;
}
+static Boolean IsAuthorInitialsCapitalizationOk (CharPtr init)
+{
+ CharPtr cp;
+
+ if (StringHasNoText (init)) {
+ return TRUE;
+ }
+
+ cp = init;
+ while (*cp != 0) {
+ if (isalpha (*cp) && !isupper(*cp)) {
+ return FALSE;
+ }
+ cp++;
+ }
+ return TRUE;
+}
+
static void CheckAuthCapsAuthCallback (NameStdPtr nsp, Pointer userdata)
{
@@ -18642,7 +19757,7 @@ static void CheckAuthCapsAuthCallback (NameStdPtr nsp, Pointer userdata)
} else if(!IsNameCapitalizationOk (nsp->names[1])) {
/* first name bad */
*pIsBad = TRUE;
- } else if(!IsNameCapitalizationOk (nsp->names[4])) {
+ } else if(!IsAuthorInitialsCapitalizationOk (nsp->names[4])) {
/* initials bad */
*pIsBad = TRUE;
}
@@ -19959,9 +21074,7 @@ static void FindBacterialNonExtendablePartialsCallback (BioseqPtr bsp, Pointer u
/* only perform test if associated organism cannot be identified as eukaryote */
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL
- || biop->org->orgname == NULL
- || StringISearch (biop->org->orgname->lineage, "Eukaryota") == NULL) {
+ if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsEukaryoticBioSource(biop)) {
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) {
@@ -20040,9 +21153,7 @@ static void FindBacterialNonExtendablePartialsWithExceptionsCallback (BioseqPtr
/* only perform test if associated organism cannot be identified as eukaryote */
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
- if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL
- || biop->org->orgname == NULL
- || StringISearch (biop->org->orgname->lineage, "Eukaryota") == NULL) {
+ if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || !IsEukaryoticBioSource(biop)) {
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
sfp != NULL;
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext)) {
@@ -20215,13 +21326,76 @@ static void FindSuspectrRNAProductsCallback (SeqFeatPtr sfp, Pointer data)
}
+static StringConstraintPtr MakeSimpleSearchConstraint (CharPtr search, Boolean whole_word)
+{
+ StringConstraintPtr scp;
+ scp = StringConstraintNew();
+ scp->match_text = StringSave (search);
+ scp->whole_word = whole_word;
+ return scp;
+}
+
+
+static SuspectRulePtr MakeSimpleSearchRule (CharPtr search, Boolean whole_word)
+{
+ SuspectRulePtr rule;
+
+ rule = SuspectRuleNew();
+ rule->find = ValNodeNew (NULL);
+ rule->find->choice = SearchFunc_string_constraint;
+ rule->find->data.ptrvalue = MakeSimpleSearchConstraint (search, whole_word);
+ return rule;
+}
+
+
+static SuspectRuleSetPtr MakeSuspectrRNARules (void)
+{
+ SuspectRuleSetPtr rna_rules = NULL, last_rule = NULL, tmp;
+ Int4 i;
+
+ for (i = 0; i < num_suspect_rrna_product_names; i++) {
+ tmp = MakeSimpleSearchRule (suspect_rrna_product_names[i], FALSE);
+ if (last_rule == NULL) {
+ rna_rules = tmp;
+ } else {
+ last_rule->next = tmp;
+ }
+ last_rule = tmp;
+ }
+
+ tmp = MakeSimpleSearchRule("8S", TRUE);
+ tmp->except = ValNodeNew (NULL);
+ tmp->except->choice = SearchFunc_string_constraint;
+ tmp->except->data.ptrvalue = MakeSimpleSearchConstraint("5.8S", TRUE);
+ if (last_rule == NULL) {
+ rna_rules = tmp;
+ } else {
+ last_rule->next = tmp;
+ }
+ last_rule = tmp;
+
+ return rna_rules;
+}
+
+
static void FindSuspectrRNAProducts (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
- CheckForSuspectPhraseByList (discrepancy_list, sep_list,
- suspect_rrna_product_names, num_suspect_rrna_product_names,
- FindSuspectrRNAProductsCallback,
- DISC_SUSPECT_RRNA_PRODUCTS,
- "rRNA product name");
+ SuspectRuleSetPtr rna_rules;
+ ValNodePtr subcat;
+ ClickableItemPtr cip;
+
+ rna_rules = MakeSuspectrRNARules();
+
+ while (sep_list != NULL) {
+ subcat = GetSuspectRuleDiscrepancies (sep_list->data.ptrvalue, rna_rules, FEATDEF_rRNA, DISC_SUSPECT_RRNA_PRODUCTS);
+ if (subcat != NULL) {
+ cip = SuspectPhraseEx (DISC_SUSPECT_RRNA_PRODUCTS, "suspect phrase", FALSE, "rRNA product name", ItemListFromSubcategories (subcat));
+ cip->subcategories = subcat;
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+ sep_list = sep_list->next;
+ }
+ rna_rules = SuspectRuleSetFree (rna_rules);
}
@@ -20297,7 +21471,7 @@ static Boolean HasMissingBacteriaStrain (BioSourcePtr biop)
return FALSE;
}
- if (StringCmp (biop->org->orgname->div, "BCT") != 0) {
+ if (!IsBacterialBioSource(biop)) {
return FALSE;
}
@@ -20328,9 +21502,9 @@ static Boolean IsBacterialIsolate (BioSourcePtr biop)
Boolean has_bad_isolate = FALSE;
if (biop == NULL
+ || !IsBacterialBioSource(biop)
|| biop->org == NULL
|| biop->org->orgname == NULL
- || StringISearch (biop->org->orgname->lineage, "Bacteria") == NULL
|| biop->org->orgname->mod == NULL
|| HasAmplifiedWithSpeciesSpecificPrimerNote(biop)) {
return FALSE;
@@ -20339,6 +21513,7 @@ static Boolean IsBacterialIsolate (BioSourcePtr biop)
for (mod = biop->org->orgname->mod; mod != NULL && !has_bad_isolate; mod = mod->next) {
if (mod->subtype == ORGMOD_isolate
&& StringNICmp (mod->subname, "DGGE gel band", 13) != 0
+ && StringNICmp (mod->subname, "TGGE gel band", 13) != 0
&& StringNICmp (mod->subname, "SSCP gel band", 13) != 0) {
has_bad_isolate = TRUE;
}
@@ -20419,38 +21594,31 @@ static void FindMetagenomeSource (ValNodePtr PNTR discrepancy_list, ValNodePtr s
}
+static void FindBacteriamRNACallback (BioseqPtr bsp, Pointer data)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+
+ if (bsp == NULL || !BioseqHasLineage(bsp, "Bacteria") || data == NULL) {
+ return;
+ }
+
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &context);
+ if (sfp != NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+}
+
static void FindBacteriamRNA (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
{
- ValNodePtr vnp, item_list = NULL, constraint = NULL, src_list, vnp_s;
+ ValNodePtr vnp, item_list = NULL;
SeqEntryPtr sep;
- SourceConstraintPtr src;
- SequenceConstraintPtr seq;
-
- src = SourceConstraintNew ();
- src->field1 = ValNodeNew (NULL);
- src->field1->choice = SourceQualChoice_textqual;
- src->field1->data.intvalue = Source_qual_lineage;
- src->constraint = StringConstraintNew ();
- src->constraint->match_text = StringSave ("Bacteria");
- src->constraint->match_location = String_location_starts;
- ValNodeAddPointer (&constraint, ConstraintChoice_source, src);
-
- seq = SequenceConstraintNew ();
- seq->feature = Feature_type_mRNA;
- ValNodeAddPointer (&constraint, ConstraintChoice_sequence, seq);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
sep = (SeqEntryPtr) vnp->data.ptrvalue;
- src_list = GetObjectListForFieldType (FieldType_molinfo_field, sep);
- for (vnp_s = src_list; vnp_s != NULL; vnp_s = vnp_s->next) {
- if (DoesObjectMatchConstraintChoiceSet (vnp_s->choice, vnp_s->data.ptrvalue, constraint)) {
- ValNodeAddPointer (&item_list, vnp_s->choice, vnp_s->data.ptrvalue);
- }
- }
- src_list = FreeObjectList (src_list);
+ VisitBioseqsInSep (sep, &item_list, FindBacteriamRNACallback);
}
- constraint = ConstraintChoiceSetFree (constraint);
if (item_list != NULL) {
ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (DISC_BACTERIA_SHOULD_NOT_HAVE_MRNA, "%d bacterial sequences have mRNA features", item_list));
@@ -20641,7 +21809,7 @@ static Boolean IsTrinomialWithoutQualifier (BioSourcePtr biop)
}
/* ignore viruses */
- if (biop->org->orgname != NULL && StringICmp (biop->org->orgname->div, "VRL") == 0) {
+ if (IsViralBioSource(biop)) {
return FALSE;
}
@@ -20734,6 +21902,38 @@ static void FindShortrRNAs (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_lis
}
+static void FindStandardNameCallback (SeqFeatPtr sfp, Pointer data)
+{
+ GBQualPtr q;
+
+ if (sfp == NULL || data == NULL) {
+ return;
+ }
+
+ for (q = sfp->qual; q != NULL; q = q->next) {
+ if (StringCmp (q->qual, "standard_name") == 0) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ return;
+ }
+ }
+}
+
+
+static void FindStandardName (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr item_list = NULL, vnp;
+
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, FindStandardNameCallback);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (ONCALLER_HAS_STANDARD_NAME, "%d features have standard_name qualifier", item_list));
+ }
+}
+
+
static Boolean DoAuthorityAndTaxnameConflict (BioSourcePtr biop)
{
OrgModPtr mod;
@@ -21408,7 +22608,7 @@ static void FindBadBacterialGeneNamesCallback (BioseqPtr bsp, Pointer data)
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
if (sdp == NULL || (biop = sdp->data.ptrvalue) == NULL || biop->org == NULL
|| biop->org->orgname == NULL
- || StringISearch (biop->org->orgname->lineage, "Bacteria") == NULL) {
+ || !IsBacterialBioSource (biop)) {
return;
}
@@ -21423,17 +22623,154 @@ static void FindBadBacterialGeneNamesCallback (BioseqPtr bsp, Pointer data)
}
-static void FindBadBacterialGeneNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+typedef Boolean (*BadGeneNameTestFunc) PROTO ((CharPtr, CharPtr, SeqFeatPtr));
+
+typedef struct badgenename {
+ CharPtr pattern;
+ BadGeneNameTestFunc func;
+} BadGeneNameData, PNTR BadGeneNamePtr;
+
+static Boolean GeneNameLongerThanTenChars (CharPtr pattern, CharPtr search, SeqFeatPtr sfp)
+{
+ if (StringLen (search) > 10) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static Boolean GeneNameContainsPhrase (CharPtr pattern, CharPtr search, SeqFeatPtr sfp)
+{
+ if (StringISearch (search, pattern) != NULL) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean GeneNameHas4Numbers (CharPtr pattern, CharPtr search, SeqFeatPtr sfp)
{
- ValNodePtr feature_list = NULL, vnp;
- CharPtr fmt = "%d genes do not start with lowercase letters";
+ CharPtr cp;
+ Int4 num_digits = 0;
+
+ if (search == NULL) {
+ return FALSE;
+ }
+
+ for (cp = search; *cp != 0 && num_digits < 4; cp++) {
+ if (isdigit (*cp)) {
+ ++num_digits;
+ } else {
+ num_digits = 0;
+ }
+ }
+ if (num_digits >= 4) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static BadGeneNameData bad_gene_rules[] = {
+ { "more than 10 characters", GeneNameLongerThanTenChars },
+ { "putative", GeneNameContainsPhrase },
+ { "fragment", GeneNameContainsPhrase },
+ { "gene", GeneNameContainsPhrase },
+ { "orf", GeneNameContainsPhrase },
+ { "like", GeneNameContainsPhrase },
+ { "4 or more consecutive numbers", GeneNameHas4Numbers }
+};
+
+
+static const Int4 kNumBadGeneRules = sizeof (bad_gene_rules) / sizeof (BadGeneNameData);
+static void FindBadGeneNameCallback (SeqFeatPtr sfp, Pointer data)
+{
+ ValNodePtr PNTR feature_lists;
+ GeneRefPtr grp;
+ Int4 k;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE
+ || (grp = (GeneRefPtr) sfp->data.value.ptrvalue) == NULL
+ || StringHasNoText (grp->locus)
+ || (feature_lists = (ValNodePtr PNTR) data) == NULL) {
+ return;
+ }
+
+ for (k = 0; k < kNumBadGeneRules; k++) {
+ if (bad_gene_rules[k].func(bad_gene_rules[k].pattern, grp->locus, sfp)) {
+ ValNodeAddPointer (feature_lists + k, OBJ_SEQFEAT, sfp);
+ }
+ }
+}
+
+
+static void FindBadGeneNames (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr PNTR feature_lists, vnp;
+ ValNodePtr bad_bacterial_genes = NULL;
+ ValNodePtr subcat = NULL;
+ CharPtr fmt = "%d bacterial genes do not start with lowercase letters";
+ Int4 k;
+ ClickableItemPtr dip;
+
+ feature_lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * kNumBadGeneRules);
+ MemSet (feature_lists, 0, sizeof (ValNodePtr) * kNumBadGeneRules);
for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
- VisitBioseqsInSep (vnp->data.ptrvalue, &feature_list, FindBadBacterialGeneNamesCallback);
+ VisitFeaturesInSep (vnp->data.ptrvalue, feature_lists, FindBadGeneNameCallback);
+ VisitBioseqsInSep (vnp->data.ptrvalue, &bad_bacterial_genes, FindBadBacterialGeneNamesCallback);
}
- if (feature_list != NULL) {
- ValNodeAddPointer ((ValNodePtr PNTR) discrepancy_list, 0, NewClickableItem (DISC_BAD_BACTERIAL_GENE_NAME, fmt, feature_list));
+ if (bad_bacterial_genes != NULL) {
+ ValNodeAddPointer (&subcat, 0, NewClickableItem (DISC_BAD_BACTERIAL_GENE_NAME, fmt, bad_bacterial_genes));
+ }
+
+ for (k = 0; k < kNumBadGeneRules; k++) {
+ if (feature_lists[k] != NULL) {
+ ValNodeAddPointer (&subcat, 0, SuspectPhraseEx(TEST_BAD_GENE_NAME, bad_gene_rules[k].pattern, FALSE, "gene", feature_lists[k]));
+ }
+ }
+ feature_lists = MemFree (feature_lists);
+
+ if (subcat == NULL) {
+ /* do nothing */
+ } else if (subcat->next == NULL) {
+ ValNodeLink (discrepancy_list, subcat);
+ } else {
+ dip = SuspectPhraseEx (TEST_BAD_GENE_NAME, "suspect phrase or characters", FALSE, "gene", ItemListFromSubcategories (subcat));
+ if (dip != NULL)
+ {
+ dip->subcategories = subcat;
+ ValNodeAddPointer (discrepancy_list, 0, dip);
+ }
+ }
+}
+
+
+static void MoveBadGeneNames (ValNodePtr item_list, Pointer data, LogInfoPtr lip)
+{
+ SeqFeatPtr sfp;
+ GeneRefPtr grp;
+ ValNodePtr vnp;
+ Int4 num = 0;
+
+ for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
+ if (vnp->choice == OBJ_SEQFEAT && (sfp = (SeqFeatPtr) vnp->data.ptrvalue) != NULL
+ && sfp->data.choice == SEQFEAT_GENE
+ && (grp = (GeneRefPtr) sfp->data.value.ptrvalue) != NULL
+ && !StringHasNoText (grp->locus)) {
+ SetStringValue (&(sfp->comment), grp->locus, ExistingTextOption_append_semi);
+ grp->locus = MemFree (grp->locus);
+ num++;
+ }
+ }
+ if (num > 0 && lip != NULL) {
+ lip->data_in_log = TRUE;
+ if (lip->fp != NULL) {
+ fprintf (lip->fp, "Moved %d bad gene names to gene comment.\n", num);
+ }
}
}
@@ -21463,6 +22800,7 @@ static BioseqSetClassNameClassValData bioseqsetclassname_classval[] = {
{"Eco-set", BioseqseqSet_class_eco_set},
{"Gen-prod-set", BioseqseqSet_class_gen_prod_set},
{"WGS-set", BioseqseqSet_class_wgs_set},
+ {"Small-genome-set", BioseqseqSet_class_small_genome_set},
{"Other", BioseqseqSet_class_other}};
#define NUM_bioseqsetclassname_classval sizeof (bioseqsetclassname_classval) / sizeof (BioseqSetClassNameClassValData)
@@ -22118,6 +23456,1294 @@ static void FindProjectIdSequences (ValNodePtr PNTR discrepancy_list, ValNodePtr
}
+static void FindSeqWithStructuredComments (BioseqPtr bsp, Pointer data)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext context;
+ Uint1 num_present = 0;
+ UserObjectPtr uop;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
+ return;
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "StructuredComment") == 0) {
+ num_present++;
+ }
+ }
+ ValNodeAddPointer ((ValNodePtr PNTR) data, num_present, bsp);
+}
+
+
+static void FindMissingStructuredComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr count_list = NULL;
+ ValNodePtr tmp_list = NULL;
+ ValNodePtr vnp;
+ CharPtr fmt;
+ CharPtr num_fmt = "%%d sequences have %d structured comments";
+ ClickableItemPtr cip;
+ ValNodePtr subcat = NULL;
+ Uint1 orig_choice;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &count_list, FindSeqWithStructuredComments);
+ }
+
+ if (count_list == NULL) {
+ return;
+ }
+
+ tmp_list = ValNodeExtractList (&count_list, 0);
+ if (tmp_list == NULL) {
+ /* no sequences have 0 */
+ tmp_list = ValNodeExtractList (&count_list, count_list->choice);
+ }
+ if (count_list == NULL) {
+ /* all sequences have same number of structured comments, no report */
+ tmp_list = ValNodeFree (tmp_list);
+ } else {
+ while (tmp_list != NULL) {
+ orig_choice = tmp_list->choice;
+ for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
+ vnp->choice = OBJ_BIOSEQ;
+ }
+ fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_fmt) + 15));
+ sprintf (fmt, num_fmt, orig_choice);
+ cip = NewClickableItem (ONCALLER_MISSING_STRUCTURED_COMMENTS, fmt, tmp_list);
+ fmt = MemFree (fmt);
+ ValNodeAddPointer (&subcat, 0, cip);
+ if (count_list == NULL) {
+ tmp_list = NULL;
+ } else {
+ tmp_list = ValNodeExtractList (&count_list, count_list->choice);
+ }
+ }
+ if (subcat->next == NULL) {
+ subcat = FreeClickableList (subcat);
+ } else {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->clickable_item_type = ONCALLER_MISSING_STRUCTURED_COMMENTS;
+ cip->subcategories = subcat;
+ cip->description = StringSave ("Sequences have different numbers of structured comments");
+ ValNodeAddPointer (discrepancy_list, 0, cip);
+ }
+ }
+}
+
+
+static void MissingGenomeAssemblyStructuredCommentCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ Boolean found = FALSE;
+ UserObjectPtr uop;
+ UserFieldPtr ufp;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
+ return;
+ }
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ sdp != NULL && !found;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext)) {
+ if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "StructuredComment") == 0) {
+ for (ufp = uop->data; ufp != NULL && !found; ufp = ufp->next) {
+ if (StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
+ if (ufp->choice == 1 && StringICmp (ufp->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
+ found = TRUE;
+ }
+ break;
+ }
+ }
+ }
+ }
+ if (!found) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+static void FindMissingGenomeAssemblyStructuredComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, MissingGenomeAssemblyStructuredCommentCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (MISSING_GENOMEASSEMBLY_COMMENTS, "%d bioseqs are missing GenomeAssembly structured comments", item_list));
+ }
+}
+
+
+static void FindCDSWithCDDXrefCallback (SeqFeatPtr sfp, Pointer data)
+{
+ ValNodePtr vnp;
+ DbtagPtr dbtag;
+ Boolean has_cdd_xref = FALSE;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || data == NULL) {
+ return;
+ }
+
+ for (vnp = sfp->dbxref; vnp != NULL && !has_cdd_xref; vnp = vnp->next) {
+ if ((dbtag = (DbtagPtr) vnp->data.ptrvalue) != NULL && StringICmp (dbtag->db, "CDD") == 0) {
+ has_cdd_xref = TRUE;
+ }
+ }
+
+ if (has_cdd_xref) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+}
+
+
+static void FindCDSWithCDDXref (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitFeaturesInSep (vnp->data.ptrvalue, &item_list, FindCDSWithCDDXrefCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_CDS_HAS_CDD_XREF, "%d features have CDD Xrefs", item_list));
+ }
+}
+
+
+static void LIBCALLBACK CountUnusualNTProc (CharPtr sequence, Pointer userdata)
+{
+ Int4Ptr p_i;
+ CharPtr cp;
+
+ if (sequence == NULL || userdata == NULL) return;
+ p_i = (Int4Ptr) userdata;
+
+ for (cp = sequence; *cp != 0; cp++)
+ {
+ if (*cp != 'N' && *cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C')
+ {
+ (*p_i) ++;
+ }
+ }
+}
+
+
+static void FindUnusualNTCallback (BioseqPtr bsp, Pointer data)
+{
+ Int4 num_bad = 0;
+ Int4 flags = 0;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
+ return;
+ }
+
+ SeqPortStream (bsp, flags, (Pointer) &num_bad, CountUnusualNTProc);
+ if (num_bad > 0) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+
+}
+
+
+static void FindUnusualNT (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnusualNTCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNUSUAL_NT, "%d sequences contain nucleotides that are not ATCG or N", item_list));
+ }
+}
+
+
+typedef struct qualityinterval {
+ Int4 start;
+ Int4 pos;
+ Int4 num_ns;
+ FloatLo min_pct;
+ Int4 min_length;
+ Boolean found_interval;
+} QualityIntervalData, PNTR QualityIntervalPtr;
+
+
+static void LIBCALLBACK FindLowQualityIntervalProc (CharPtr sequence, Pointer userdata)
+{
+ QualityIntervalPtr p_i;
+ CharPtr cp;
+ Int4 len;
+
+ if (sequence == NULL || userdata == NULL) return;
+ p_i = (QualityIntervalPtr) userdata;
+
+ for (cp = sequence; *cp != 0; cp++)
+ {
+ if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C') {
+ if (p_i->start == -1) {
+ /* start new interval if we aren't already in one */
+ p_i->start = p_i->pos;
+ p_i->num_ns = 1;
+ } else {
+ /* add to number of ns in this interval */
+ p_i->num_ns++;
+ }
+ } else {
+ if (p_i->start > -1) {
+ /* if we are already in an interval, see if we should continue to be */
+ len = p_i->pos - p_i->start;
+ if ((FloatLo) p_i->num_ns / (FloatLo) len >= p_i->min_pct) {
+ /* yes */
+ } else {
+ /* no */
+ /* is the interval long enough to qualify? */
+ if (len >= p_i->min_length) {
+ p_i->found_interval = TRUE;
+ }
+ /* reset for next interval */
+ p_i->start = -1;
+ p_i->num_ns = 0;
+ }
+ }
+ }
+ p_i->pos ++;
+ }
+}
+
+
+static void FindLowQualityRegionsCallback (BioseqPtr bsp, Pointer data)
+{
+ QualityIntervalData q;
+
+ Int4 flags = 0;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
+ return;
+ }
+ MemSet (&q, 0, sizeof (QualityIntervalData));
+ q.start = -1;
+ q.min_pct = 0.25;
+ q.min_length = 30;
+
+ SeqPortStream (bsp, flags, (Pointer) &q, FindLowQualityIntervalProc);
+ /* check final interval, in case the end of the sequence is low quality */
+ if (q.start > -1 && q.pos - q.start >= q.min_length) {
+ q.found_interval = TRUE;
+ }
+
+ if (q.found_interval) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+
+}
+
+
+static void FindLowQualityRegions (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindLowQualityRegionsCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_LOW_QUALITY_REGION, "%d sequences contains low quality region", item_list));
+ }
+}
+
+
+NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome)
+{
+ if (genome == GENOME_chloroplast
+ || genome == GENOME_chromoplast
+ || genome == GENOME_kinetoplast
+ || genome == GENOME_mitochondrion
+ || genome == GENOME_cyanelle
+ || genome == GENOME_nucleomorph
+ || genome == GENOME_apicoplast
+ || genome == GENOME_leucoplast
+ || genome == GENOME_proplastid
+ || genome == GENOME_hydrogenosome
+ || genome == GENOME_plastid
+ || genome == GENOME_chromatophore) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static void FindOrganelleNotGenomicCallback(BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ MolInfoPtr mip;
+ BioSourcePtr biop;
+
+ if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
+ if (sdp == NULL || (mip = (MolInfoPtr) sdp->data.ptrvalue) == NULL) {
+ return;
+ } else if ((mip->biomol == MOLECULE_TYPE_GENOMIC || mip->biomol == 0) && bsp->mol == Seq_mol_dna) {
+ return;
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp != NULL && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL
+ && IsLocationOrganelle(biop->genome)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindOrganelleNotGenomic (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindOrganelleNotGenomicCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_ORGANELLE_NOT_GENOMIC, "%d non-genomic sequences are organelles", item_list));
+ }
+}
+
+
+static Boolean HasUnculturedNonOrganelleName (CharPtr taxname)
+{
+ if (StringCmp (taxname, "uncultured organism") == 0
+ || StringCmp (taxname, "uncultured microorganism") == 0
+ || StringCmp (taxname, "uncultured bacterium") == 0
+ || StringCmp (taxname, "uncultured archaeon") == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static CharPtr kIntergenicSpacerNames[] = {
+ "trnL-trnF intergenic spacer",
+ "trnH-psbA intergenic spacer",
+ "trnS-trnG intergenic spacer",
+ "trnF-trnL intergenic spacer",
+ "psbA-trnH intergenic spacer",
+ "trnG-trnS intergenic spacer",
+ NULL};
+
+static Boolean HasIntergenicSpacerName(CharPtr str)
+{
+ Int4 i;
+ Boolean rval = FALSE;
+
+ for (i = 0; kIntergenicSpacerNames[i] != NULL && !rval; i++) {
+ if (StringISearch (str, kIntergenicSpacerNames[i]) != NULL) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static void FindUnwantedSpacersCallback(BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ BioSourcePtr biop;
+ SeqMgrFeatContext fcontext;
+ SeqFeatPtr sfp;
+
+ if (bsp == NULL || data == NULL) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL
+ || biop->genome == GENOME_chloroplast || biop->genome == GENOME_plastid) {
+ return;
+ }
+ /* shouldn't be uncultured non-organelle */
+ if (biop != NULL && biop->org != NULL && HasUnculturedNonOrganelleName(biop->org->taxname)) {
+ return;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_misc_feature, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_misc_feature, &fcontext)) {
+ if (HasIntergenicSpacerName(sfp->comment)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+ }
+}
+
+
+static void FindUnwantedSpacers (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnwantedSpacersCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNWANTED_SPACER, "%d suspect intergenic spacer notes not organelle", item_list));
+ }
+}
+
+
+static SuspectRuleSetPtr OrganelleRules = NULL;
+static Boolean OrganelleRuleReadAttempted = FALSE;
+
+static SuspectRuleSetPtr ReadOrganelleRules(void)
+{
+ AsnIoPtr aip;
+ Char buf [PATH_MAX];
+ SuspectRuleSetPtr rule_list;
+
+ if (! FindPath("ncbi", "ncbi", "data", buf, sizeof (buf)))
+ {
+ Message (MSG_POSTERR, "Failed to find organelle product rules");
+ return NULL;
+ }
+
+ StringCat(buf, "organelle_products.prt");
+
+ aip = AsnIoOpen (buf, "r");
+ if (aip == NULL) {
+ Message (MSG_POSTERR, "Unable to open %s", buf);
+ return NULL;
+ }
+
+ rule_list = SuspectRuleSetAsnRead (aip, NULL);
+ if (rule_list == NULL) {
+ Message (MSG_POSTERR, "Unable to read organelle product rule list from %s.", buf);
+ }
+
+ AsnIoClose (aip);
+ return rule_list;
+}
+
+
+typedef struct findorganelleproducts {
+ SuspectRuleSetPtr rule_list;
+ ValNodePtr item_list;
+} FindOrganelleProductsData, PNTR FindOrganelleProductsPtr;
+
+static void FindOrganelleProductsCallback(BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ BioSourcePtr biop;
+ SeqMgrFeatContext fcontext, pcontext;
+ SeqFeatPtr sfp, protsfp;
+ ProtRefPtr prp;
+ SuspectRulePtr rule;
+ FindOrganelleProductsPtr fop;
+ Boolean match;
+ BioseqPtr protbsp;
+
+ if (bsp == NULL || (fop = (FindOrganelleProductsPtr)data) == NULL) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL
+ || biop->genome == GENOME_mitochondrion
+ || biop->genome == GENOME_chloroplast
+ || biop->genome == GENOME_plastid) {
+ return;
+ }
+
+ /* source should not be bacterial or viral */
+ if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL) {
+ if (IsBacterialBioSource (biop) || IsViralBioSource(biop)) {
+ return;
+ }
+ }
+
+ /* shouldn't be uncultured non-organelle */
+ if (biop != NULL && biop->org != NULL && HasUnculturedNonOrganelleName(biop->org->taxname)) {
+ return;
+ }
+
+ /* look for misc_features */
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_misc_feature, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_misc_feature, &fcontext)) {
+ if (StringNICmp (sfp->comment, "contains ", 9) == 0) {
+ match = FALSE;
+ for (rule = fop->rule_list; rule != NULL && !match; rule = rule->next) {
+ match = DoesStringMatchSuspectRule (sfp->comment, sfp, rule);
+ }
+ if (match) {
+ ValNodeAddPointer (&(fop->item_list), OBJ_SEQFEAT, sfp);
+ }
+ }
+ }
+
+ /* also look for coding regions */
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) {
+ protbsp = BioseqFindFromSeqLoc (sfp->product);
+ protsfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &pcontext);
+ if (protsfp != NULL && (prp = (ProtRefPtr) protsfp->data.value.ptrvalue) != NULL
+ && prp->name != NULL) {
+ match = FALSE;
+ for (rule = fop->rule_list; rule != NULL && !match; rule = rule->next) {
+ match = DoesStringMatchSuspectRule (prp->name->data.ptrvalue, sfp, rule);
+ }
+ if (match) {
+ ValNodeAddPointer (&(fop->item_list), OBJ_SEQFEAT, sfp);
+ }
+ }
+ }
+}
+
+
+static void FindOrganelleProducts(ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp;
+ FindOrganelleProductsData fd;
+
+ if (!OrganelleRuleReadAttempted) {
+ OrganelleRules = ReadOrganelleRules();
+ OrganelleRuleReadAttempted = TRUE;
+ }
+ if (OrganelleRules == NULL) {
+ return;
+ }
+
+ MemSet (&fd, 0, sizeof (FindOrganelleProductsData));
+ fd.rule_list = OrganelleRules;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &fd, FindOrganelleProductsCallback);
+ }
+ if (fd.item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_ORGANELLE_PRODUCTS, "%d suspect products not organelle", fd.item_list));
+ }
+}
+
+
+static void FindBadMrnaQualCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ BioSourcePtr biop;
+ SubSourcePtr ssp;
+ Boolean found = FALSE;
+
+ if (!IsMrnaSequence(bsp) || data == NULL) {
+ return;
+ }
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL) {
+ return;
+ }
+
+ for (ssp = biop->subtype; ssp != NULL && !found; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_germline || ssp->subtype == SUBSRC_rearranged) {
+ found = TRUE;
+ }
+ }
+ if (found) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindBadMrnaQual (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindBadMrnaQualCallback);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_BAD_MRNA_QUAL, "%d mRNA sequences have germline or rearranged qualifier", item_list));
+ }
+}
+
+
+/* A warning when environmental sample qualifier is present and the organism name
+ * does not contain 'uncultured' or 'enrichment culture' or 'metagenome'
+ * and the source does not have note (orgmod or subsrc)
+ * 'amplified with species-specific primers'
+ * and the /metagenomic-source qualifier is not used
+ */
+static Boolean HasUnnecessaryEnvironmental(BioSourcePtr biop)
+{
+ SubSourcePtr ssp;
+ OrgModPtr mod;
+ Boolean found = FALSE;
+ Boolean has_note = FALSE;
+ Boolean has_metagenomic = FALSE;
+
+ if (biop == NULL) {
+ return FALSE;
+ }
+
+ for (ssp = biop->subtype; ssp != NULL && !has_note && !has_metagenomic; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_environmental_sample) {
+ found = TRUE;
+ } else if (ssp->subtype == SUBSRC_other && StringISearch (ssp->name, "amplified with species-specific primers") != NULL) {
+ has_note = TRUE;
+ } else if (ssp->subtype == SUBSRC_metagenomic) {
+ has_metagenomic = TRUE;
+ }
+ }
+
+ if (!found || has_note || has_metagenomic) {
+ return FALSE;
+ }
+ if (biop->org != NULL) {
+ if (StringISearch (biop->org->taxname, "uncultured") != NULL
+ || StringISearch (biop->org->taxname, "enrichment culture") != NULL
+ || StringISearch (biop->org->taxname, "metagenome") != NULL
+ || StringISearch (biop->org->taxname, "environmental sample") != NULL) {
+ return FALSE;
+ }
+ if (biop->org->orgname != NULL) {
+ for (mod = biop->org->orgname->mod; mod != NULL && !has_note; mod = mod->next) {
+ if (mod->subtype == ORGMOD_other && StringISearch (mod->subname, "amplified with species-specific primers") != NULL) {
+ has_note = TRUE;
+ }
+ }
+ if (has_note) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+
+static void FindUnnecessaryEnvironmental (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, HasUnnecessaryEnvironmental));
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNNECESSARY_ENVIRONMENTAL, "%d biosources have unnecessary environmental qualifier", item_list));
+ }
+}
+
+
+static void FindUnnecessaryVirusGeneCallback(BioseqPtr bsp, Pointer data)
+{
+ BioSourcePtr biop;
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+
+ if (bsp == NULL || data == NULL || ISA_aa(bsp->mol)) {
+ return;
+ }
+
+ biop = GetBiopForBsp(bsp);
+ if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) {
+ return;
+ }
+ if (HasLineage (biop, "Picornaviridae")
+ || HasLineage (biop, "Potyviridae")
+ || HasLineage (biop, "Flaviviridae")
+ || HasLineage (biop, "Togaviridae")) {
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &context)) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+ }
+}
+
+
+static void FindUnnecessaryVirusGene (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnnecessaryVirusGeneCallback);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNNECESSARY_VIRUS_GENE, "%d unnecessary virus genes", item_list));
+ }
+}
+
+
+typedef struct isunwanted {
+ Boolean has_sat_feat;
+ Boolean has_non_sat_feat;
+ Boolean has_rearranged;
+} IsUnwantedData, PNTR IsUnwantedPtr;
+
+
+static Boolean IsMicrosatelliteRepeatRegion (SeqFeatPtr sfp)
+{
+ GBQualPtr qual;
+ Boolean rval = FALSE;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) {
+ return FALSE;
+ }
+ for (qual = sfp->qual; qual != NULL && !rval; qual = qual->next) {
+ if (StringICmp (qual->qual, "satellite") == 0 && StringNICmp (qual->val, "microsatellite", 14) == 0) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static void FindUnwantedSetWrappersCallback(BioseqPtr bsp, Pointer data)
+{
+ IsUnwantedPtr up;
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+ BioSourcePtr biop;
+ SubSourcePtr ssp;
+
+ if (bsp == NULL || ISA_aa(bsp->mol) || (up = (IsUnwantedPtr) data) == NULL) {
+ return;
+ }
+
+ biop = GetBiopForBsp(bsp);
+ if (biop != NULL) {
+ for (ssp = biop->subtype; ssp != NULL && !up->has_rearranged; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_rearranged) {
+ up->has_rearranged = TRUE;
+ }
+ }
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ sfp != NULL && (!up->has_sat_feat || !up->has_non_sat_feat);
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
+ if (IsMicrosatelliteRepeatRegion(sfp)) {
+ up->has_sat_feat = TRUE;
+ } else {
+ up->has_non_sat_feat = TRUE;
+ }
+ }
+}
+
+
+static void FindUnwantedSetWrappersInSep(SeqEntryPtr sep, ValNodePtr PNTR pList)
+{
+ BioseqSetPtr bssp;
+ IsUnwantedData ud;
+
+ if (sep == NULL || !IS_Bioseq_set(sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL || pList == NULL) {
+ return;
+ }
+
+ if (bssp->_class == BioseqseqSet_class_eco_set
+ || bssp->_class == BioseqseqSet_class_mut_set
+ || bssp->_class == BioseqseqSet_class_phy_set
+ || bssp->_class == BioseqseqSet_class_pop_set) {
+ MemSet (&ud, 0, sizeof (IsUnwantedData));
+ VisitBioseqsInSep (sep, &ud, FindUnwantedSetWrappersCallback);
+
+ if (ud.has_rearranged || (ud.has_sat_feat && !ud.has_non_sat_feat)) {
+ ValNodeAddPointer (pList, OBJ_BIOSEQSET, bssp);
+ }
+ } else {
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ FindUnwantedSetWrappersInSep (sep, pList);
+ }
+ }
+}
+
+
+static void FindUnwantedSetWrappers (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ FindUnwantedSetWrappersInSep (vnp->data.ptrvalue, &item_list);
+ }
+
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNWANTED_SET_WRAPPER, "%d unwanted set wrappers", item_list));
+ }
+}
+
+
+static Boolean IsMissingPrimerValue (BioSourcePtr biop)
+{
+ PCRReactionSetPtr set;
+ PCRPrimerPtr fwd, rev;
+ Boolean rval = FALSE;
+
+ if (biop == NULL) {
+ return FALSE;
+ }
+ for (set = biop->pcr_primers; set != NULL && !rval; set = set->next) {
+ for (fwd = set->forward, rev = set->reverse;
+ fwd != NULL && rev != NULL && !rval;
+ fwd = fwd->next, rev = rev->next) {
+ if ((StringHasNoText(fwd->name) && !StringHasNoText(rev->name))
+ || (!StringHasNoText (fwd->name) && StringHasNoText (rev->name))
+ || (StringHasNoText(fwd->seq) && !StringHasNoText(rev->seq))
+ || (!StringHasNoText (fwd->seq) && StringHasNoText (rev->seq))) {
+ rval = TRUE;
+ }
+ }
+ if (fwd != NULL || rev != NULL) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+static void FindMissingPrimerValues (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, IsMissingPrimerValue));
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_MISSING_PRIMER, "%d biosources have primer sets with missing values", item_list));
+ }
+}
+
+
+static void FindUnexpectedMiscRNABioseq (BioseqPtr bsp, Pointer data)
+{
+ SeqFeatPtr sfp;
+ SeqMgrFeatContext context;
+ CharPtr product;
+
+ if (bsp == NULL || data == NULL) {
+ return;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_otherRNA, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_otherRNA, &context)) {
+ product = GetRNARefProductString(sfp->data.value.ptrvalue, NULL);
+ if (StringSearch (product, "ITS") == NULL && StringSearch (product, "internal transcribed spacer") == NULL) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ }
+ product = MemFree (product);
+ }
+}
+
+
+static void FindUnexpectedMiscRNA (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindUnexpectedMiscRNABioseq);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_UNUSUAL_MISC_RNA, "%d unexpected misc_RNA features found. misc_RNAs are unusual in a genome, consider using ncRNA, misc_binding, or misc_feature as appropriate.", item_list));
+ }
+}
+
+
+static Boolean AmpPrimersNoEnvSample (BioSourcePtr biop)
+{
+ OrgModPtr mod;
+ SubSourcePtr ssp;
+ Boolean has_note = FALSE;
+
+ if (biop == NULL) {
+ return FALSE;
+ }
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_environmental_sample) {
+ return FALSE;
+ } else if (ssp->subtype == SUBSRC_other
+ && StringISearch (ssp->name, "amplified with species-specific primers") != NULL) {
+ has_note = TRUE;
+ }
+ }
+
+ if (!has_note && biop->org != NULL && biop->org->orgname != NULL) {
+ for (mod = biop->org->orgname->mod; mod != NULL && !has_note; mod = mod->next) {
+ if (mod->subtype == SUBSRC_other
+ && StringISearch (mod->subname, "amplified with species-specific primers") != NULL) {
+ has_note = TRUE;
+ }
+ }
+ }
+
+ return has_note;
+}
+
+
+static void FindAmpPrimersNoEnvSample (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ ValNodeLink (&item_list, RunBioSourceTest (vnp->data.ptrvalue, AmpPrimersNoEnvSample));
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE, "%d biosources have 'amplified with species-specific primers' note but no environmental-sample qualifier.", item_list));
+ }
+}
+
+
+static void FindDuplicateGenesOnOppositeStrandsCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqFeatPtr sfp, sfp_prev = NULL;
+ SeqMgrFeatContext context;
+ Boolean sfp_prev_listed = FALSE;
+
+ if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
+ return;
+ }
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &context)) {
+ if (sfp_prev != NULL) {
+ if (SeqLocCompare (sfp_prev->location, sfp->location) == SLC_A_EQ_B
+ && SeqLocStrand (sfp_prev->location) != SeqLocStrand (sfp->location)) {
+ if (!sfp_prev_listed) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp_prev);
+ }
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
+ sfp_prev_listed = TRUE;
+ } else {
+ sfp_prev_listed = FALSE;
+ }
+ }
+ sfp_prev = sfp;
+ }
+}
+
+
+static void FindDuplicateGenesOnOppositeStrands (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, item_list = NULL;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &item_list, FindDuplicateGenesOnOppositeStrandsCallback);
+ }
+ if (item_list != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_DUP_GENES_OPPOSITE_STRANDS, "%d genes match other genes in the same location, but on the opposite strand", item_list));
+ }
+}
+
+
+static void FindSmallGenomeSetCallback (BioseqSetPtr bssp, Pointer data)
+{
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_small_genome_set && data != NULL) {
+ *((BoolPtr)data) = TRUE;
+ }
+}
+
+
+static void ListBioSources(SeqDescrPtr sdp, Pointer data)
+{
+ if (sdp != NULL && sdp->choice == Seq_descr_source) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+}
+
+
+static void FindSmallGenomeSetProblems (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ ValNodePtr vnp, src_list = NULL, item_list = NULL, s;
+ CharPtr taxname = NULL, strain = NULL, isolate = NULL;
+ CharPtr tmp;
+ Boolean has_small_genome_set;
+ BioSourcePtr biop;
+ ValNodePtr tax_qual, strain_qual, isolate_qual, segment_qual, div_qual;
+ ValNodePtr missing_segment = NULL;
+ Boolean all_taxnames_same = TRUE;
+ Boolean all_isolates_same = TRUE;
+ Boolean all_strains_same = TRUE;
+
+ tax_qual = ValNodeNew (NULL);
+ tax_qual->choice = SourceQualChoice_textqual;
+ tax_qual->data.intvalue = Source_qual_taxname;
+ strain_qual = ValNodeNew (NULL);
+ strain_qual->choice = SourceQualChoice_textqual;
+ strain_qual->data.intvalue = Source_qual_strain;
+ isolate_qual = ValNodeNew (NULL);
+ isolate_qual->choice = SourceQualChoice_textqual;
+ isolate_qual->data.intvalue = Source_qual_isolate;
+ segment_qual = ValNodeNew (NULL);
+ segment_qual->choice = SourceQualChoice_textqual;
+ segment_qual->data.intvalue = Source_qual_segment;
+ div_qual = ValNodeNew (NULL);
+ div_qual->choice = SourceQualChoice_textqual;
+ div_qual->data.intvalue = Source_qual_division;
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ has_small_genome_set = FALSE;
+ VisitSetsInSep (vnp->data.ptrvalue, &has_small_genome_set, FindSmallGenomeSetCallback);
+ if (has_small_genome_set) {
+ VisitDescriptorsInSep (vnp->data.ptrvalue, &src_list, ListBioSources);
+ for (s = src_list; s != NULL; s = s->next) {
+ biop = GetBioSourceFromObject(s->choice, s->data.ptrvalue);
+ if (biop != NULL) {
+ /* look for segment when required */
+ if (IsViralBioSource(biop)) {
+ tmp = GetSourceQualFromBioSource(biop, segment_qual, NULL);
+ if (tmp == NULL) {
+ ValNodeAddPointer (&missing_segment, OBJ_SEQDESC, s->data.ptrvalue);
+ }
+ tmp = MemFree (tmp);
+ }
+ /* are taxnames all the same */
+ if (all_taxnames_same) {
+ tmp = GetSourceQualFromBioSource(biop, tax_qual, NULL);
+ if (tmp != NULL) {
+ if (s == src_list) {
+ taxname = tmp;
+ tmp = NULL;
+ } else if (StringCmp (taxname, tmp) != 0) {
+ all_taxnames_same = FALSE;
+ }
+ tmp = MemFree (tmp);
+ }
+ }
+ /* are isolates all the same */
+ if (all_isolates_same) {
+ tmp = GetSourceQualFromBioSource(biop, isolate_qual, NULL);
+ if (tmp != NULL) {
+ if (s == src_list) {
+ isolate = tmp;
+ tmp = NULL;
+ } else if (StringCmp (isolate, tmp) != 0) {
+ all_isolates_same = FALSE;
+ }
+ tmp = MemFree (tmp);
+ }
+ }
+ /* are strains all the same */
+ if (all_strains_same) {
+ tmp = GetSourceQualFromBioSource(biop, strain_qual, NULL);
+ if (tmp != NULL) {
+ if (s == src_list) {
+ strain = tmp;
+ tmp = NULL;
+ } else if (StringCmp (strain, tmp) != 0) {
+ all_strains_same = FALSE;
+ }
+ tmp = MemFree (tmp);
+ }
+ }
+ }
+ }
+
+ src_list = FreeObjectList (src_list);
+ }
+ }
+
+ taxname = MemFree (taxname);
+ isolate = MemFree (isolate);
+ strain = MemFree (strain);
+
+ if (missing_segment != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_SMALL_GENOME_SET_PROBLEM, "%d biosources should have segment qualifier but do not", missing_segment));
+ }
+ if (!all_taxnames_same) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same taxname"));
+ }
+ if (!all_isolates_same) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same isolate"));
+ }
+ if (!all_strains_same) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItemNoList (TEST_SMALL_GENOME_SET_PROBLEM, "Not all biosources have same strain"));
+ }
+
+}
+
+
+static void FindOverlappingrRNAs (BioseqPtr bsp, Pointer userdata)
+{
+ SeqFeatPtr sfp, sfp_compare;
+ SeqMgrFeatContext context;
+ ValNodePtr PNTR overlapping_rrnas = NULL, non_overlap;
+ ValNodePtr rrna_list = NULL, vnp, vnp_next;
+
+ if (bsp == NULL || userdata == NULL)
+ {
+ return;
+ }
+
+ overlapping_rrnas = (ValNodePtr PNTR) userdata;
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_rRNA, &context);
+ sfp != NULL;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_rRNA, &context))
+ {
+ ValNodeAddPointer (&rrna_list, 0, sfp);
+ }
+
+ for (vnp = rrna_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next)
+ {
+ sfp = (SeqFeatPtr) vnp->data.ptrvalue;
+ for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next)
+ {
+ sfp_compare = (SeqFeatPtr) vnp_next->data.ptrvalue;
+
+ if (SeqLocCompare (sfp->location, sfp_compare->location) != SLC_NO_MATCH)
+ {
+ vnp->choice = OBJ_SEQFEAT;
+ vnp_next->choice = OBJ_SEQFEAT;
+ }
+ }
+ }
+
+ non_overlap = ValNodeExtractList (&rrna_list, 0);
+ non_overlap = ValNodeFree (non_overlap);
+ ValNodeLink (overlapping_rrnas, rrna_list);
+
+}
+
+
+extern void AddOverlappingrRNADiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ CharPtr bad_fmt = "%d rRNA features overlap another rRNA feature.";
+ ValNodePtr overlapping_rrnas = NULL, vnp;
+
+ if (discrepancy_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &overlapping_rrnas, FindOverlappingrRNAs);
+ }
+
+ if (overlapping_rrnas != NULL)
+ {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_OVERLAPPING_RRNAS, bad_fmt, overlapping_rrnas));
+ }
+}
+
+
+static void FindMrnaSequencesWithMinusStrandFeaturesCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+ Boolean found = FALSE;
+
+ if (bsp == NULL || !IsMrnaSequence(bsp) || data == NULL) {
+ return;
+ }
+
+ for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
+ sfp != NULL && !found;
+ sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
+ if (context.strand == Seq_strand_minus) {
+ found = TRUE;
+ }
+ }
+ if (found) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
+ }
+}
+
+
+static void FindMrnaSequencesWithMinusStrandFeatures (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ CharPtr bad_fmt = "%d mRNA sequences have features on the complement strand.";
+ ValNodePtr seqs = NULL, vnp;
+
+ if (discrepancy_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &seqs, FindMrnaSequencesWithMinusStrandFeaturesCallback);
+ }
+
+ if (seqs != NULL)
+ {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES, bad_fmt, seqs));
+ }
+}
+
+
+static void FindTaxnameMissingFromDeflineCallback (BioseqPtr bsp, Pointer data)
+{
+ SeqMgrDescContext context;
+ SeqDescPtr sdp;
+ BioSourcePtr biop;
+ CharPtr cp;
+ Int4 len;
+ CharPtr lookfor;
+
+ if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) {
+ return;
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL
+ || biop->org == NULL
+ || StringHasNoText (biop->org->taxname)) {
+ return;
+ }
+
+ lookfor = biop->org->taxname;
+ if (StringICmp (lookfor, "Human immunodeficiency virus 1") == 0) {
+ lookfor = "HIV-1";
+ } else if (StringICmp (lookfor, "Human immunodeficiency virus 2") == 0) {
+ lookfor = "HIV-2";
+ }
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
+ if (sdp != NULL) {
+ cp = StringISearch (sdp->data.ptrvalue, lookfor);
+ if (cp == NULL) {
+ /* taxname not in defline at all */
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ } else {
+ /* capitalization must match for all but the first letter */
+ len = StringLen (lookfor);
+ if (StringNCmp (cp + 1, lookfor + 1, len - 1) != 0) {
+ ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
+ }
+ }
+ }
+}
+
+
+static void FindTaxnameMissingFromDefline (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
+{
+ CharPtr bad_fmt = "%d deflines do not contain the complete taxname.";
+ ValNodePtr seqs = NULL, vnp;
+
+ if (discrepancy_list == NULL)
+ {
+ return;
+ }
+
+ for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
+ VisitBioseqsInSep (vnp->data.ptrvalue, &seqs, FindTaxnameMissingFromDeflineCallback);
+ }
+
+ if (seqs != NULL)
+ {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (TEST_TAXNAME_NOT_IN_DEFLINE, bad_fmt, seqs));
+ }
+}
+
+
static void
RemoveUnwantedDiscrepancyItems
(ValNodePtr PNTR discrepancy_list,
@@ -22174,8 +24800,6 @@ extern void SetDiscrepancyLevels (ValNodePtr discrepancy_list, Int4 level)
}
-typedef void (*AutofixCallback) (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip);
-
typedef struct discrepancyinfo
{
CharPtr conf_name;
@@ -22207,9 +24831,11 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Overlapping CDS", "OVERLAPPING_CDS", AddOverlappingCodingRegionDiscrepancies, MarkOverlappingCDSs },
{ "Contained CDS", "CONTAINED_CDS", AddContainedCodingRegionDiscrepancies, NULL },
{ "CDS RNA Overlap", "RNA_CDS_OVERLAP", AddRNACDSOverlapDiscrepancies, NULL },
- { "Short Contig", "SHORT_CONTIG", FindShortContigs, NULL },
+ { "Short Contig", "SHORT_CONTIG", FindShortContigs, RemoveShortContigsWithoutAnnotation },
{ "Inconsistent BioSource", "INCONSISTENT_BIOSOURCE", FindNonmatchingContigSources, NULL },
{ "Suspect Product Name", "SUSPECT_PRODUCT_NAMES", FindSuspectProductNames, NULL },
+ { "Suspect Product Name Typo", "DISC_PRODUCT_NAME_TYPO", FindSuspectProductNames, FixSuspectProductNameTypos },
+ { "Suspect Product Name QuickFix", "DISC_PRODUCT_NAME_QUICKFIX", FindSuspectProductNames, FixSuspectProductNameQuickFixes },
{ "Inconsistent Source And Definition Line", "INCONSISTENT_SOURCE_DEFLINE", FindInconsistentSourceAndDefline, NULL },
{ "Partial CDSs in Complete Sequences", "PARTIAL_CDS_COMPLETE_SEQUENCE", FindParticalCDSsInCompleteSequences, NULL },
{ "Hypothetical or Unknown Protein with EC Number", "EC_NUMBER_ON_UNKNOWN_PROTEIN", FindUnknownProteinsWithECNumbers, NULL },
@@ -22235,7 +24861,7 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Multiple CDS on GenProdSet, same protein", "DUP_GENPRODSET_PROTEIN", CheckListForGenProdSets, NULL},
{ "mRNA on GenProdSet without transcript ID", "MISSING_GENPRODSET_TRANSCRIPT_ID", CheckListForGenProdSets, NULL},
{ "mRNA on GenProdSet with duplicate ID", "DISC_DUP_GENPRODSET_TRANSCRIPT_ID", CheckListForGenProdSets, NULL},
- { "Greater than 10 percent Ns", "DISC_PERCENT_N", PercentNDiscrepanciesForSeqEntry, NULL},
+ { "Greater than 5 percent Ns", "DISC_PERCENT_N", PercentNDiscrepanciesForSeqEntry, NULL},
{ "Runs of 20 or more Ns", "N_RUNS", BaseCountAndNRunDiscrepancies, NULL},
{ "Zero Base Counts", "ZERO_BASECOUNT", BaseCountAndNRunDiscrepancies, NULL},
{ "Adjacent PseudoGenes with Identical Text", "ADJACENT_PSEUDOGENES", FindAdjacentPseudoGenes, NULL},
@@ -22307,14 +24933,39 @@ static DiscrepancyInfoData discrepancy_info_list[] =
{ "Mismatched Comments", "DISC_MISMATCHED_COMMENTS", FindMismatchedComments, FixMismatchedComments},
{ "BioSources with the same strain should have the same taxname", "DISC_STRAIN_TAXNAME_MISMATCH", CollectStrainTaxnameDiscrepancies, NULL},
{ "'Human' in host should be 'Homo sapiens'", "DISC_HUMAN_HOST", FindHumanHosts, FixHumanHosts},
- { "Genes on bacterial sequences should start with lowercase letters", "DISC_BAD_BACTERIAL_GENE_NAME", FindBadBacterialGeneNames, NULL},
+ { "Genes on bacterial sequences should start with lowercase letters", "DISC_BAD_BACTERIAL_GENE_NAME", FindBadGeneNames, MoveBadGeneNames},
+ { "Bad gene names", "TEST_BAD_GENE_NAME", FindBadGeneNames, MoveBadGeneNames },
{ "Location is ordered (intervals interspersed with gaps)", "ONCALLER_ORDERED_LOCATION", FindOrderedLocations, FixOrderedLocations},
{ "Comment descriptor present", "ONCALLER_COMMENT_PRESENT", FindCommentDescriptors, NULL },
{ "Titles on sets", "ONCALLER_DEFLINE_ON_SET", FindTitlesOnSets, NULL },
{ "HIV RNA location or molecule type inconsistent", "ONCALLER_HIV_RNA_INCONSISTENT", FindInconsistentHIVRNA, NULL },
{ "Protein sequences should be at least 50 aa, unless they are partial", "SHORT_PROT_SEQUENCES", FindShortProtSequences, NULL },
{ "mRNA sequences should not have exons", "TEST_EXON_ON_MRNA", FindExonsOnMrna, RemoveExonsOnMrna },
- { "Sequences with project IDs", "TEST_HAS_PROJECT_ID", FindProjectIdSequences, NULL }
+ { "Sequences with project IDs", "TEST_HAS_PROJECT_ID", FindProjectIdSequences, NULL },
+ { "Feature has standard_name qualifier", "ONCALLER_HAS_STANDARD_NAME", FindStandardName, NULL },
+ { "Missing structured comments", "ONCALLER_MISSING_STRUCTURED_COMMENTS", FindMissingStructuredComments, NULL },
+ { "Bacteria should have strain", "DISC_REQUIRED_STRAIN", FindRequiredStrains, NULL},
+ { "Bioseqs should have GenomeAssembly structured comments", "MISSING_GENOMEASSEMBLY_COMMENTS", FindMissingGenomeAssemblyStructuredComments, NULL },
+ { "Bacterial taxnames should end with strain", "DISC_BACTERIAL_TAX_STRAIN_MISMATCH", FindBacterialTaxStrainMismatch, NULL },
+ { "CDS has CDD Xref", "TEST_CDS_HAS_CDD_XREF", FindCDSWithCDDXref, NULL },
+ { "Sequence contains unusual nucleotides", "TEST_UNUSUAL_NT", FindUnusualNT, NULL },
+ { "Sequence contains regions of low quality", "TEST_LOW_QUALITY_REGION", FindLowQualityRegions, NULL },
+ { "Organelle location should have genomic moltype", "TEST_ORGANELLE_NOT_GENOMIC", FindOrganelleNotGenomic, NULL },
+ { "Intergenic spacer without plastid location", "TEST_UNWANTED_SPACER", FindUnwantedSpacers, NULL },
+ { "Organelle products on non-organelle sequence", "TEST_ORGANELLE_PRODUCTS", FindOrganelleProducts, NULL },
+ { "Organism ending in sp. needs tax consult", "TEST_SP_NOT_UNCULTURED", FindSpNotUncultured, NULL },
+ { "mRNA sequence contains rearranged or germline", "TEST_BAD_MRNA_QUAL", FindBadMrnaQual, NULL },
+ { "Unnecessary environmental qualifier present", "TEST_UNNECESSARY_ENVIRONMENTAL", FindUnnecessaryEnvironmental, NULL },
+ { "Unnecessary gene features on virus", "TEST_UNNECESSARY_VIRUS_GENE", FindUnnecessaryVirusGene, NULL },
+ { "Set wrapper on microsatellites or rearranged genes", "TEST_UNWANTED_SET_WRAPPER", FindUnwantedSetWrappers, NULL},
+ { "Missing values in primer set", "TEST_MISSING_PRIMER", FindMissingPrimerValues, NULL},
+ { "Unexpected misc_RNA features", "TEST_UNUSUAL_MISC_RNA", FindUnexpectedMiscRNA, NULL},
+ { "Species-specific primers, no environmental sample", "TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE", FindAmpPrimersNoEnvSample, NULL},
+ { "Duplicate genes on opposite strands", "TEST_DUP_GENES_OPPOSITE_STRANDS", FindDuplicateGenesOnOppositeStrands, NULL},
+ { "Problems with small genome sets", "TEST_SMALL_GENOME_SET_PROBLEM", FindSmallGenomeSetProblems, NULL},
+ { "Overlapping rRNA features", "TEST_OVERLAPPING_RRNAS", AddOverlappingrRNADiscrepancies, NULL},
+ { "mRNA sequences have CDS/gene on the complement strand", "TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES", FindMrnaSequencesWithMinusStrandFeatures, NULL},
+ { "Complete taxname should be present in definition line", "TEST_TAXNAME_NOT_IN_DEFLINE", FindTaxnameMissingFromDefline, NULL}
};
@@ -22384,7 +25035,20 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == ONCALLER_DEFLINE_ON_SET
|| test_type == ONCALLER_HIV_RNA_INCONSISTENT
|| test_type == TEST_EXON_ON_MRNA
- || test_type == TEST_HAS_PROJECT_ID) {
+ || test_type == TEST_HAS_PROJECT_ID
+ || test_type == ONCALLER_HAS_STANDARD_NAME
+ || test_type == ONCALLER_MISSING_STRUCTURED_COMMENTS
+ || test_type == TEST_ORGANELLE_PRODUCTS
+ || test_type == TEST_SP_NOT_UNCULTURED
+ || test_type == TEST_BAD_MRNA_QUAL
+ || test_type == TEST_UNNECESSARY_ENVIRONMENTAL
+ || test_type == TEST_UNNECESSARY_VIRUS_GENE
+ || test_type == TEST_UNWANTED_SET_WRAPPER
+ || test_type == TEST_MISSING_PRIMER
+ || test_type == TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE
+ || test_type == TEST_SMALL_GENOME_SET_PROBLEM
+ || test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES
+ || test_type == TEST_TAXNAME_NOT_IN_DEFLINE) {
rval = FALSE;
} else {
rval = TRUE;
@@ -22456,7 +25120,22 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
|| test_type == ONCALLER_DEFLINE_ON_SET
|| test_type == ONCALLER_HIV_RNA_INCONSISTENT
|| test_type == TEST_EXON_ON_MRNA
- || test_type == TEST_HAS_PROJECT_ID) {
+ || test_type == TEST_HAS_PROJECT_ID
+ || test_type == ONCALLER_HAS_STANDARD_NAME
+ || test_type == ONCALLER_MISSING_STRUCTURED_COMMENTS
+ || test_type == TEST_ORGANELLE_NOT_GENOMIC
+ || test_type == TEST_UNWANTED_SPACER
+ || test_type == TEST_ORGANELLE_PRODUCTS
+ || test_type == TEST_SP_NOT_UNCULTURED
+ || test_type == TEST_BAD_MRNA_QUAL
+ || test_type == TEST_UNNECESSARY_ENVIRONMENTAL
+ || test_type == TEST_UNNECESSARY_VIRUS_GENE
+ || test_type == TEST_UNWANTED_SET_WRAPPER
+ || test_type == TEST_MISSING_PRIMER
+ || test_type == TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE
+ || test_type == TEST_SMALL_GENOME_SET_PROBLEM
+ || test_type == TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES
+ || test_type == TEST_TAXNAME_NOT_IN_DEFLINE) {
rval = TRUE;
}
break;
@@ -22471,6 +25150,7 @@ extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyR
extern void PrintDiscrepancyTestList (FILE *fp)
{
Int4 i;
+ CharPtr tmp;
/* discrepancy report */
fprintf (fp, "Discrepancy Report Tests\n");
@@ -22496,9 +25176,35 @@ extern void PrintDiscrepancyTestList (FILE *fp)
fprintf (fp, "Terms searched for by SUSPECT_PRODUCT_NAMES:\n");
for (i = 0; i < num_suspect_product_terms; i++) {
- fprintf (fp, "'%s':%s\n",
+ fprintf (fp, "'%s':%s (Category: %s)\n",
suspect_product_terms[i].pattern,
- SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func));
+ SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func),
+ suspect_name_category_names[suspect_product_terms[i].fix_type]);
+ }
+ fprintf (fp, "\n");
+
+ fprintf (fp, "Replacements for SUSPECT_PRODUCT_NAMES:\n");
+ fprintf (fp, "Typos:\n");
+ for (i = 0; i < num_suspect_product_terms; i++) {
+ if (suspect_product_terms[i].replace_func != NULL && suspect_product_terms[i].fix_type == eSuspectNameType_Typo) {
+ tmp = SummarizeSuspectReplacementPhrase (suspect_product_terms[i].replace_func, suspect_product_terms[i].replace_phrase);
+ fprintf (fp, "'%s':%s (%s)\n",
+ suspect_product_terms[i].pattern,
+ SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func),
+ tmp);
+ tmp = MemFree (tmp);
+ }
+ }
+ fprintf (fp, "QuickFixes:\n");
+ for (i = 0; i < num_suspect_product_terms; i++) {
+ if (suspect_product_terms[i].replace_func != NULL && suspect_product_terms[i].fix_type == eSuspectNameType_QuickFix) {
+ tmp = SummarizeSuspectReplacementPhrase (suspect_product_terms[i].replace_func, suspect_product_terms[i].replace_phrase);
+ fprintf (fp, "'%s':%s (%s)\n",
+ suspect_product_terms[i].pattern,
+ SummarizeSuspectPhraseFunc(suspect_product_terms[i].search_func),
+ tmp);
+ tmp = MemFree (tmp);
+ }
}
fprintf (fp, "\n");
@@ -22627,6 +25333,7 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp)
dcp->conf_list[ONCALLER_SUPERFLUOUS_GENE] = FALSE;
dcp->conf_list[ONCALLER_CONSORTIUM] = FALSE;
dcp->conf_list[DISC_FEATURE_LIST] = FALSE;
+ dcp->conf_list[TEST_ORGANELLE_PRODUCTS] = FALSE;
/* mitochondrial tests */
dcp->conf_list[DISC_DUP_TRNA] = FALSE;
@@ -22639,6 +25346,21 @@ extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp)
/* on-caller specific tests */
dcp->conf_list[DISC_SRC_QUAL_PROBLEM] = FALSE;
dcp->conf_list[DISC_CATEGORY_HEADER] = FALSE;
+ dcp->conf_list[TEST_TAXNAME_NOT_IN_DEFLINE] = FALSE;
+}
+
+
+extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReportType report_type)
+{
+ Int4 i;
+
+ if (dcp == NULL) {
+ return;
+ }
+
+ for (i = 0; i < MAX_DISC_TYPE; i++) {
+ dcp->conf_list[i] = IsTestTypeAppropriateForReportType (i, report_type);
+ }
}
@@ -22685,6 +25407,9 @@ extern void AutofixDiscrepancies (ValNodePtr vnp, Boolean fix_all, LogInfoPtr li
if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL) {
(discrepancy_info_list[cip->clickable_item_type].autofix_func) (cip->item_list, NULL, lip);
}
+ if (cip->autofix_func != NULL) {
+ (cip->autofix_func)(cip->item_list, cip->autofix_data, lip);
+ }
}
AutofixDiscrepancies (cip->subcategories, fix_all || cip->chosen, lip);
}
@@ -22699,7 +25424,8 @@ extern void ChooseFixableDiscrepancies (ValNodePtr vnp)
while (vnp != NULL) {
cip = (ClickableItemPtr) vnp->data.ptrvalue;
if (cip != NULL && !cip->chosen) {
- if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL) {
+ if (discrepancy_info_list[cip->clickable_item_type].autofix_func != NULL
+ || cip->autofix_func != NULL) {
cip->chosen = TRUE;
} else {
ChooseFixableDiscrepancies (cip->subcategories);
@@ -22718,7 +25444,7 @@ static CharPtr GetLocusTagForFeature (SeqFeatPtr sfp)
if (sfp == NULL) {
return NULL;
}
- if (sfp->idx.subtype == FEATDEF_GENE) {
+ if (sfp->data.choice == SEQFEAT_GENE) {
grp = sfp->data.value.ptrvalue;
} else {
grp = SeqMgrGetGeneXref (sfp);
@@ -22788,10 +25514,28 @@ extern CharPtr GetBioseqSetLabel (BioseqSetPtr bssp)
}
+static void LIBCALLBACK CountNonATGCNTProc (CharPtr sequence, Pointer userdata)
+{
+ Int4Ptr p_i;
+ CharPtr cp;
+
+ if (sequence == NULL || userdata == NULL) return;
+ p_i = (Int4Ptr) userdata;
+
+ for (cp = sequence; *cp != 0; cp++)
+ {
+ if (*cp != 'A' && *cp != 'T' && *cp != 'G' && *cp != 'C')
+ {
+ (*p_i) ++;
+ }
+ }
+}
+
+
extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
{
CharPtr row_text = NULL, tmp, fmt = "%s:%s";
- SeqFeatPtr sfp, cds, sfp_index;
+ SeqFeatPtr sfp, cds, sfp_index = NULL;
BioseqPtr bsp;
SeqMgrFeatContext context;
CharPtr location;
@@ -22799,13 +25543,14 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
SeqDescrPtr sdp;
CharPtr locus_tag = "";
CharPtr bsp_fmt = "%s (length %d)\n";
+ CharPtr bsp_unusual_fmt = "%s (length %d, %d other)\n";
ObjValNodePtr ovn;
SeqEntryPtr sep;
SeqSubmitPtr ssp;
Boolean special_flag = FALSE;
Uint1 data_choice;
ValNodePtr extra_fields = NULL, field, field_strings = NULL, field_values, val_vnp;
- Int4 field_len = 0, label_len;
+ Int4 field_len = 0, label_len, num_bad;
if (vnp == NULL)
{
@@ -22865,9 +25610,9 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
+ StringLen (location)
+ StringLen (locus_tag)
+ 6));
- sprintf (row_text, "%s\t%s\t%s\t%s\n", label == NULL ? "unknown label" : label,
- context.label == NULL ? "unknown context label" : context.label,
- location == NULL ? "unknown location" : location,
+ sprintf (row_text, "%s\t%s\t%s\t%s\n", label,
+ context.label,
+ location,
locus_tag == NULL ? "" : locus_tag);
location = MemFree (location);
}
@@ -22879,8 +25624,15 @@ extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename)
if (bsp != NULL)
{
tmp = GetBioseqLabel (vnp->data.ptrvalue);
- row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32));
- sprintf (row_text, bsp_fmt, tmp, bsp->length);
+ num_bad = 0;
+ SeqPortStream (bsp, 0, (Pointer) &num_bad, CountNonATGCNTProc);
+ if (num_bad > 0) {
+ row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_unusual_fmt) + StringLen (tmp) + 47));
+ sprintf (row_text, bsp_unusual_fmt, tmp, bsp->length, num_bad);
+ } else {
+ row_text = (CharPtr) MemNew (sizeof(Char) * (StringLen (bsp_fmt) + StringLen (tmp) + 32));
+ sprintf (row_text, bsp_fmt, tmp, bsp->length);
+ }
tmp = MemFree (tmp);
}
}
@@ -24721,9 +27473,12 @@ NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp
/* create report for feature counts */
ValNodeLink (&local_list, CreateGlobalFeatureCountReports (&(g->feature_count_list)));
+ /* data collected for some tests with global components should not be displayed */
+ RemoveUnwantedDiscrepancyItems (&local_list, g->test_config);
+
/* group discrepany reports from separate files */
CollateDiscrepancyReports (&(g->discrepancy_list));
-
+
fprintf (fp, "Discrepancy Report Results\n\n");
fprintf (fp, "Summary\n");
WriteDiscrepancyReportSummary (local_list, fp);
@@ -24855,18 +27610,37 @@ extern ValNodePtr BarcodeTestResultsListFree (ValNodePtr res_list)
{
ValNodePtr vnp;
- if (res_list != NULL)
+ while (res_list != NULL)
{
vnp = res_list->next;
res_list->next = NULL;
res_list->data.ptrvalue = BarcodeTestResultsFree (res_list->data.ptrvalue);
- ValNodeFree (res_list);
- BarcodeTestResultsListFree (vnp);
+ res_list = ValNodeFree (res_list);
+ res_list = vnp;
}
return res_list;
}
+extern ValNodePtr BarcodeTestResultsExtractPass (ValNodePtr PNTR res_list)
+{
+ ValNodePtr vnp, pass_list = NULL;
+ BarcodeTestResultsPtr res;
+
+ if (res_list == NULL || *res_list == NULL) {
+ return NULL;
+ }
+ for (vnp = *res_list; vnp != NULL; vnp = vnp->next) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (PassBarcodeTests(res)) {
+ vnp->choice = 1;
+ }
+ }
+ pass_list = ValNodeExtractList (res_list, 1);
+ return pass_list;
+}
+
+
/* determines whether barcode tests should be performed on a sequence -
* no barcode keyword, no barcode tests needed.
*/
@@ -24894,7 +27668,7 @@ extern Boolean HasBARCODETech (BioseqPtr bsp)
* Finds the MolInfo descriptor for the Bioseq and removes the BARCODE technique.
* Returns true if the BARCODE technique was present before it was removed.
*/
-static Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp)
+NLM_EXTERN Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp)
{
SeqDescrPtr sdp;
SeqMgrDescContext dcontext;
@@ -24916,7 +27690,7 @@ static Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp)
}
-static Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp)
+NLM_EXTERN Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp)
{
SeqDescrPtr sdp;
SeqMgrDescContext dcontext;
@@ -24925,6 +27699,7 @@ static Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp)
StringConstraint sc;
ObjValNodePtr ovn;
+ MemSet (&sc, 0, sizeof (StringConstraint));
sc.case_sensitive = FALSE;
sc.match_location = String_location_equals;
sc.match_text = "BARCODE";
@@ -24994,6 +27769,51 @@ static void ApplyBarcodeTechToBioseq (BioseqPtr bsp)
}
+NLM_EXTERN Boolean BioseqHasKeyword (BioseqPtr bsp, CharPtr keyword)
+{
+ SeqDescrPtr sdp;
+ SeqMgrDescContext dcontext;
+ Boolean found = FALSE;
+ GBBlockPtr gb;
+ ValNodePtr vnp;
+ UserObjectPtr uop;
+
+ if (StringICmp (keyword, "UNVERIFIED") == 0)
+ {
+ /* special case for unverified */
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ sdp != NULL && !found;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext))
+ {
+ if ((uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "Unverified") == 0)
+ {
+ found = TRUE;
+ }
+ }
+ }
+
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
+ sdp != NULL && !found;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext))
+ {
+ gb = (GBBlockPtr) sdp->data.ptrvalue;
+ if (gb != NULL)
+ {
+ for (vnp = gb->keywords; vnp != NULL && !found; vnp = vnp->next)
+ {
+ if (StringICmp (vnp->data.ptrvalue, keyword) == 0)
+ {
+ found = TRUE;
+ }
+ }
+ }
+ }
+ return found;
+}
+
+
NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -25002,9 +27822,14 @@ NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp)
GBBlockPtr gb;
SeqEntryPtr sep;
+ if (BioseqHasKeyword (bsp, "UNVERIFIED"))
+ {
+ return;
+ }
+
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
sdp != NULL && !found;
- sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext))
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext))
{
gb = (GBBlockPtr) sdp->data.ptrvalue;
if (gb == NULL)
@@ -25028,80 +27853,7 @@ NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp)
NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp)
{
- SeqDescrPtr sdp;
- SeqMgrDescContext context;
- GBBlockPtr gb;
- ValNodePtr vnp;
- Boolean rval = FALSE;
-
- for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
- sdp != NULL && !rval;
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) {
- gb = (GBBlockPtr) sdp->data.ptrvalue;
- if (gb != NULL) {
- for (vnp = gb->keywords; vnp != NULL && !rval; vnp = vnp->next) {
- if (StringCmp (vnp->data.ptrvalue, "BARCODE") == 0) {
- rval = TRUE;
- }
- }
- }
- }
- return rval;
-}
-
-
-NLM_EXTERN Boolean HasLowTrace (BioseqPtr bsp)
-{
- SeqDescrPtr sdp;
- SeqMgrDescContext context;
- Boolean rval = TRUE;
- UserObjectPtr uop;
- UserFieldPtr ufp;
- ObjectIdPtr oip;
- int num_trace = 0;
-
- if (bsp == NULL) {
- return FALSE;
- }
- for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
- sdp != NULL && rval;
- sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
- uop = (UserObjectPtr) sdp->data.ptrvalue;
- if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "Submission") == 0) {
- for (ufp = uop->data; ufp != NULL && rval; ufp = ufp->next) {
- oip = ufp->label;
- if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0
- && sscanf (ufp->data.ptrvalue, "Traces: %d", &num_trace) == 1
- && num_trace > 1) {
- rval = FALSE;
- }
- }
- }
- }
- return rval;
-}
-
-
-static void GetBarcodeLowTraceListCallback (BioseqPtr bsp, Pointer data)
-{
- if (bsp == NULL || data == NULL
- || ISA_aa (bsp->mol)
- || !BioseqHasBarcodeKeyword(bsp)
- || !HasLowTrace(bsp)) {
- return;
- } else {
- ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
- }
-}
-
-
-NLM_EXTERN ValNodePtr GetBarcodeLowTraceList (SeqEntryPtr sep)
-{
- ValNodePtr list = NULL;
-
- VisitBioseqsInSep (sep, &list, GetBarcodeLowTraceListCallback);
-
- return list;
+ return BioseqHasKeyword (bsp, "BARCODE");
}
@@ -25136,25 +27888,6 @@ typedef struct barcodesearch {
BarcodeTestConfigPtr cfg;
} BarcodeSearchData, PNTR BarcodeSearchPtr;
-static void FindShortBarcodeSequencesCallback (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeSearchPtr bsd;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bsd = (BarcodeSearchPtr) userdata) == NULL
- || bsd->cfg == NULL
- || (bsd->cfg->require_keyword && !HasBARCODETech(bsp)))
- {
- return;
- }
-
- if (bsp->length < bsd->cfg->min_length)
- {
- ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
NLM_EXTERN Boolean IsIBOL (BioseqPtr bsp)
{
Boolean is_ibol = FALSE;
@@ -25231,44 +27964,6 @@ static Boolean HasOrderAssignment (BioseqPtr bsp)
}
-static void FindMissingOrderAssignment (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeSearchPtr bsd;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bsd = (BarcodeSearchPtr) userdata) == NULL
- || bsd->cfg == NULL
- || (bsd->cfg->require_keyword && !HasBARCODETech(bsp)))
- {
- return;
- }
-
- if (!HasOrderAssignment (bsp))
- {
- ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
-static void FindLowTrace (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeSearchPtr bsd;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bsd = (BarcodeSearchPtr) userdata) == NULL
- || bsd->cfg == NULL
- || (bsd->cfg->require_keyword && !HasBARCODETech(bsp)))
- {
- return;
- }
-
- if (HasLowTrace (bsp))
- {
- ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
static Boolean HasFrameShift (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -25300,48 +27995,15 @@ static Boolean HasFrameShift (BioseqPtr bsp)
}
-static void FindFrameShift (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeSearchPtr bsd;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bsd = (BarcodeSearchPtr) userdata) == NULL
- || bsd->cfg == NULL
- || (bsd->cfg->require_keyword && !HasBARCODETech(bsp)))
- {
- return;
- }
-
- if (IsIBOL (bsp) && HasFrameShift (bsp))
- {
- ValNodeAddPointer (&(bsd->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
typedef Boolean (*BarcodeBioSourceTestFunc) PROTO ((BioSourcePtr));
static Boolean HasForwardAndReversePrimers (BioSourcePtr biop)
{
- Boolean found_fwd_seq = FALSE;
- Boolean found_rev_seq = FALSE;
- SubSourcePtr ssp;
+ if (biop == NULL || biop->pcr_primers == NULL) return FALSE;
- if (biop == NULL || biop->subtype == NULL) return FALSE;
-
- for (ssp = biop->subtype; ssp != NULL && (!found_fwd_seq || !found_rev_seq); ssp = ssp->next)
- {
- if (ssp->subtype == SUBSRC_fwd_primer_seq)
- {
- found_fwd_seq = TRUE;
- }
- else if (ssp->subtype == SUBSRC_rev_primer_seq)
- {
- found_rev_seq = TRUE;
- }
- }
+ if (biop->pcr_primers->forward == NULL || biop->pcr_primers->reverse == NULL) return FALSE;
+ return TRUE;
- return found_fwd_seq && found_rev_seq;
}
@@ -25770,30 +28432,6 @@ static void BarcodeBioSourceTestCallback (BioseqPtr bsp, Pointer userdata, Barco
}
-static void FindMissingForwardAndReversePrimers (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeBioSourceTestCallback (bsp, userdata, HasForwardAndReversePrimers);
-}
-
-
-static void FindMissingCountryAndLatLon (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeBioSourceTestCallback (bsp, userdata, HasCountry);
-}
-
-
-static void FindMissingSpecimenVoucher (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeBioSourceTestCallback (bsp, userdata, HasVoucher);
-}
-
-
-static void FindBadCollectionDate (BioseqPtr bsp, Pointer userdata)
-{
- BarcodeBioSourceTestCallback (bsp, userdata, HasCollectionDate);
-}
-
-
static void FindBadGPS (BioseqPtr bsp, Pointer userdata)
{
BarcodeBioSourceTestCallback (bsp, userdata, BarcodeGPSOkay);
@@ -25830,114 +28468,87 @@ BarcodeTestForSeqEntry
}
-static void BarcodePercentNDiscrepancy (BioseqPtr bsp, Pointer userdata)
+static void BarcodePercentNDiscrepanciesForSeqEntry (ValNodePtr results, ValNodePtr PNTR discrepancy_list, FloatLo min_n_percent)
{
- FloatLo pct;
- BarcodeSearchPtr bs;
-
- if (bsp == NULL || ISA_aa (bsp->mol)
- || (bs = (BarcodeSearchPtr) userdata) == NULL
- || bs->cfg == NULL
- || (bs->cfg->require_keyword && !HasBARCODETech (bsp)))
- {
- return;
- }
-
- bs = (BarcodeSearchPtr) userdata;
-
- pct = PercentNInBioseq (bsp, TRUE);
- if (pct > bs->cfg->min_n_percent)
- {
- ValNodeAddPointer (&(bs->bioseq_list), OBJ_BIOSEQ, bsp);
- }
-}
-
-
-static void BarcodePercentNDiscrepanciesForSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list, BarcodeTestConfigPtr cfg)
-{
- BarcodeSearchData bsd;
- ValNodePtr subcategories = NULL, vnp;
+ BarcodeTestResultsPtr res;
+ ValNodePtr subcategories = NULL, bioseq_list = NULL, vnp;
ClickableItemPtr cip;
CharPtr fmt = "Sequence has %.1f%% percent Ns";
CharPtr top_fmt = "%d sequences have > %.1f%% Ns";
- FloatLo pct;
- bsd.bioseq_list = NULL;
- bsd.cfg = cfg;
- if (bsd.cfg == NULL)
+ for (vnp = results; vnp != NULL; vnp = vnp->next)
{
- bsd.cfg = BarcodeTestConfigNew ();
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res->n_percent < min_n_percent) {
+ cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
+ MemSet (cip, 0, sizeof (ClickableItemData));
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 5));
+ sprintf (cip->description, fmt, res->n_percent);
+ ValNodeAddPointer (&bioseq_list, OBJ_BIOSEQ, res->bsp);
+ ValNodeAddPointer (&(cip->item_list), OBJ_BIOSEQ, res->bsp);
+ ValNodeAddPointer (&subcategories, 0, cip);
+ }
}
- VisitBioseqsInSep (sep, &bsd, BarcodePercentNDiscrepancy);
- if (bsd.bioseq_list == NULL) return;
-
- for (vnp = bsd.bioseq_list; vnp != NULL; vnp = vnp->next)
- {
+ if (bioseq_list != NULL) {
cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
MemSet (cip, 0, sizeof (ClickableItemData));
- cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 5));
- pct = PercentNInBioseq (vnp->data.ptrvalue, TRUE);
- sprintf (cip->description, fmt, pct);
- ValNodeAddPointer (&(cip->item_list), OBJ_BIOSEQ, vnp->data.ptrvalue);
- ValNodeAddPointer (&subcategories, 0, cip);
- }
-
- cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
- MemSet (cip, 0, sizeof (ClickableItemData));
- cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (top_fmt) + 10));
- sprintf (cip->description, fmt, ValNodeLen (bsd.bioseq_list), bsd.cfg->min_n_percent);
- cip->item_list = bsd.bioseq_list;
- cip->subcategories = subcategories;
- ValNodeAddPointer (discrepancy_list, 0, cip);
-
- if (bsd.cfg != cfg)
- {
- bsd.cfg = BarcodeTestConfigFree (bsd.cfg);
+ cip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (top_fmt) + 10));
+ sprintf (cip->description, fmt, ValNodeLen (bioseq_list), min_n_percent);
+ cip->item_list = bioseq_list;
+ cip->subcategories = subcategories;
+ ValNodeAddPointer (discrepancy_list, 0, cip);
}
}
static void GetBarcodeDiscrepanciesForSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list, BarcodeTestConfigPtr cfg)
{
+ ValNodePtr results, vnp;
+ ValNodePtr PNTR lists;
+ BarcodeTestResultsPtr res;
+ Int4 i;
+ CharPtr fmts[] = {"%d sequences are shorter than 500 nucleotides",
+ "%d sequences are missing forward and/or reverse primers",
+ "%d sequences are missing country",
+ "%d sequences are missing specimen voucher",
+ NULL,
+ "%d sequences have invalid collection date",
+ "%d sequences are missing order assignment",
+ "%d sequences have low trace",
+ "%d sequences have frameshift" };
+
+
+
if (cfg == NULL) return;
- if (cfg->conf_list[eBarcodeTest_Length])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindShortBarcodeSequencesCallback, "%d sequences are shorter than 500 nucleotides", cfg);
- }
- if (cfg->conf_list[eBarcodeTest_Primers])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingForwardAndReversePrimers, "%d sequences are missing forward and/or reverse primers", cfg);
- }
- if (cfg->conf_list[eBarcodeTest_Country])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingCountryAndLatLon, "%d sequences are missing country", cfg);
+ results = GetBarcodePassFail(sep, cfg);
+
+ lists = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * eBarcodeTest_LAST);
+ MemSet (lists, 0, sizeof (ValNodePtr) * eBarcodeTest_LAST);
+ for (vnp = results; vnp != NULL; vnp = vnp->next) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ for (i = 0; i < eBarcodeTest_LAST; i++) {
+ if (cfg->conf_list[i] && res->failed_tests[i] && fmts[i] != NULL) {
+ ValNodeAddPointer (&(lists[i]), OBJ_BIOSEQ, res->bsp);
+ }
+ }
}
- if (cfg->conf_list[eBarcodeTest_SpecimenVoucher])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingSpecimenVoucher, "%d sequences are missing specimen voucher", cfg);
+ for (i = 0; i < eBarcodeTest_LAST; i++) {
+ if (cfg->conf_list[i] && lists[i] != NULL) {
+ if (fmts[i] != NULL) {
+ ValNodeAddPointer (discrepancy_list, 0, NewClickableItem (0, fmts[i], lists[i]));
+ }
+ }
}
+ lists = MemFree(lists);
+
if (cfg->conf_list[eBarcodeTest_PercentN])
{
- BarcodePercentNDiscrepanciesForSeqEntry (sep, discrepancy_list, cfg);
- }
- if (cfg->conf_list[eBarcodeTest_CollectionDate])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindBadCollectionDate, "%d sequences have invalid collection date", cfg);
- }
- if (cfg->conf_list[eBarcodeTest_OrderAssignment])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindMissingOrderAssignment, "%d sequences are missing order assignment", cfg);
- }
- if (cfg->conf_list[eBarcodeTest_LowTrace])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindLowTrace, "%d sequences have low trace", cfg);
- }
- if (cfg->conf_list[eBarcodeTest_FrameShift])
- {
- BarcodeTestForSeqEntry (sep, discrepancy_list, FindFrameShift, "%d sequences have frameshift", cfg);
+ BarcodePercentNDiscrepanciesForSeqEntry (sep, discrepancy_list, cfg->min_n_percent);
}
+
+ results = BarcodeTestResultsListFree(results);
}
@@ -26184,7 +28795,8 @@ extern Boolean PassBarcodeTests (BarcodeTestResultsPtr res)
}
-NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg)
+/* NOTE - this no longer performs the low trace test - that test needs to be done for the seq-entry as a whole */
+static BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg)
{
BarcodeTestResultsPtr res = NULL;
@@ -26222,10 +28834,6 @@ NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, Bar
{
res->failed_tests[eBarcodeTest_OrderAssignment] = !HasOrderAssignment (bsp);
}
- if (cfg->conf_list[eBarcodeTest_LowTrace])
- {
- res->failed_tests[eBarcodeTest_LowTrace] = HasLowTrace (bsp);
- }
if (cfg->conf_list[eBarcodeTest_FrameShift])
{
res->failed_tests[eBarcodeTest_FrameShift] = IsIBOL(bsp) && HasFrameShift (bsp);
@@ -26241,7 +28849,7 @@ NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, Bar
}
-static void FailedBarcodeTests (BioseqPtr bsp, Pointer userdata)
+static void DoBarcodeTestsExceptLowTrace (BioseqPtr bsp, Pointer userdata)
{
BarcodeBioseqSearchPtr sp;
BarcodeTestResultsPtr res = NULL;
@@ -26257,47 +28865,170 @@ static void FailedBarcodeTests (BioseqPtr bsp, Pointer userdata)
res = BarcodeTestResultsForBioseq (bsp, sp->cfg);
if (res == NULL) return;
- if (sp->collect_positives
- || !PassBarcodeTests(res))
- {
- ValNodeAddPointer (&(sp->results_list), 0, res);
- }
- else
- {
- res = BarcodeTestResultsFree (res);
- }
+ ValNodeAddPointer (&(sp->results_list), 0, res);
}
-extern ValNodePtr GetBarcodeFailedAccessionList (SeqEntryPtr sep, BarcodeTestConfigPtr cfg)
-{
- BarcodeBioseqSearchData sd;
+#ifdef OS_MSWIN
+#include <undefwin.h>
+#include <windows.h>
- if (cfg == NULL)
- {
- sd.cfg = BarcodeTestConfigNew();
- }
- else
- {
- sd.cfg = cfg;
- }
+NLM_EXTERN Int4 RunSilent(const char *cmdline) {
+ int status = -1;
- sd.results_list = NULL;
- sd.collect_positives = FALSE;
+ STARTUPINFO StartupInfo;
+ PROCESS_INFORMATION ProcessInfo;
- VisitBioseqsInSep (sep, &sd, FailedBarcodeTests);
+ DWORD dwCreateFlags;
- if (sd.cfg != cfg)
- {
- sd.cfg = BarcodeTestConfigFree (sd.cfg);
+#ifndef COMP_METRO
+ /* code warrior headers do not have this, so comment out to allow compilation */
+ _flushall();
+#endif
+
+ /* Set startup info */
+ memset(&StartupInfo, 0, sizeof(StartupInfo));
+ StartupInfo.cb = sizeof(STARTUPINFO);
+ StartupInfo.dwFlags = STARTF_USESHOWWINDOW;
+ StartupInfo.wShowWindow = SW_HIDE;
+ dwCreateFlags = CREATE_NEW_CONSOLE;
+
+ /* Run program */
+ if (CreateProcess(NULL, (LPSTR)cmdline, NULL, NULL, FALSE,
+ dwCreateFlags, NULL, NULL, &StartupInfo, &ProcessInfo))
+ {
+ /* wait running process */
+ DWORD exitcode = -1;
+ WaitForSingleObject(ProcessInfo.hProcess, INFINITE);
+ GetExitCodeProcess(ProcessInfo.hProcess, &exitcode);
+ status = exitcode;
+ CloseHandle(ProcessInfo.hProcess);
+ CloseHandle(ProcessInfo.hThread);
+ }
+ else
+ {
+ DWORD dw = GetLastError();
+ /* check for common errors first */
+ if(dw == ERROR_FILE_NOT_FOUND)
+ Message(MSG_ERROR, "CreateProcess() failed: file not found.");
+ else
+ /* generic error message */
+ Message(MSG_ERROR, "CreateProcess() failed, error code %d.",
+ (int)dw);
+ }
+
+ return status;
+}
+#endif
+
+static CharPtr tracefetchcmd = NULL;
+
+static void FillInMissingTraces (ValNodePtr trace_check_list)
+{
+ Char path_in [PATH_MAX];
+ Char path_out [PATH_MAX];
+ FILE *fp;
+ Char id_txt[255];
+ Char cmmd [256];
+ ValNodePtr vnp;
+ BarcodeTestResultsPtr res;
+ ReadBufferData rbd;
+ CharPtr line, cp;
+
+ if (tracefetchcmd == NULL) {
+ if (GetAppParam ("SEQUIN", "TRACECOUNT", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
+ tracefetchcmd = StringSaveNoNull (cmmd);
+ }
+ }
+ if (tracefetchcmd == NULL) return;
+
+ TmpNam (path_in);
+ fp = FileOpen (path_in, "w");
+ if (fp == NULL) {
+ Message (MSG_ERROR, "Unable to open temporary file %s, unable to get trace results", path_in);
+ } else {
+ /* make list of accessions to check */
+ for (vnp = trace_check_list; vnp != NULL; vnp = vnp->next) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res != NULL) {
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ fprintf (fp, "%s\n", id_txt);
+ }
+ }
+ FileClose (fp);
+ TmpNam (path_out);
+ /* launch script */
+#ifdef OS_UNIX
+ sprintf (cmmd, "%s -i %s -o %s", tracefetchcmd, path_in, path_out);
+ system (cmmd);
+#endif
+#ifdef OS_MSWIN
+ sprintf (cmmd, "%s -i %s -o %s", tracefetchcmd, path_in, path_out);
+ RunSilent (cmmd);
+#endif
+ /* read results */
+ fp = FileOpen (path_out, "r");
+ if (fp == NULL) {
+ Message (MSG_ERROR, "Unable to open temporary file %s for results", path_out);
+ } else {
+ rbd.current_data = NULL;
+ rbd.fp = fp;
+
+ line = AbstractReadFunction (&rbd);
+ vnp = trace_check_list;
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+
+ while (line != NULL && line[0] != EOF && vnp != NULL) {
+ if (!StringHasNoText (line)) {
+ cp = StringChr (line, '\t');
+ if (cp != NULL) {
+ *cp = 0;
+ while (StringCmp (id_txt, line) != 0 && vnp != NULL) {
+ if (res->num_trace < 2) {
+ res->failed_tests[eBarcodeTest_LowTrace] = TRUE;
+ }
+ vnp = vnp->next;
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ SeqIdWrite (SeqIdFindBest (res->bsp->id, SEQID_GENBANK), id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
+ }
+ if (vnp != NULL) {
+ res->num_trace++;
+ }
+ }
+ }
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+ while (vnp != NULL) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res->num_trace < 2) {
+ res->failed_tests[eBarcodeTest_LowTrace] = TRUE;
+ }
+ vnp = vnp->next;
+ }
+
+ FileClose (fp);
+ FileRemove (path_out);
+ }
+ FileRemove (path_in);
}
- return sd.results_list;
}
extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg)
{
BarcodeBioseqSearchData sd;
+ ValNodePtr vnp;
+ BarcodeTestResultsPtr res;
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ UserObjectPtr uop;
+ UserFieldPtr ufp;
+ ObjectIdPtr oip;
+ Boolean has_low_trace, has_object;
+ int num_trace = 0;
+ ValNodePtr trace_check_list = NULL;
if (cfg == NULL)
{
@@ -26309,9 +29040,51 @@ extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg)
}
sd.results_list = NULL;
- sd.collect_positives = TRUE;
- VisitBioseqsInSep (sep, &sd, FailedBarcodeTests);
+ VisitBioseqsInSep (sep, &sd, DoBarcodeTestsExceptLowTrace);
+
+ /* now do low trace test */
+ /* first, loop through list - if bioseq has submission object with trace statement,
+ * get result from that. otherwise add to list. */
+ for (vnp = sd.results_list; vnp != NULL; vnp = vnp->next) {
+ res = (BarcodeTestResultsPtr) vnp->data.ptrvalue;
+ if (res != NULL) {
+ /* look for user object */
+ has_low_trace = FALSE;
+ has_object = FALSE;
+ for (sdp = SeqMgrGetNextDescriptor (res->bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL && !has_low_trace;
+ sdp = SeqMgrGetNextDescriptor (res->bsp, sdp, Seq_descr_user, &context)) {
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "Submission") == 0) {
+ for (ufp = uop->data; ufp != NULL && !has_low_trace; ufp = ufp->next) {
+ oip = ufp->label;
+ if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) {
+ if ( sscanf (ufp->data.ptrvalue, "Traces: %d", &num_trace) == 1) {
+ res->num_trace = num_trace;
+ if (num_trace < 2) {
+ has_low_trace = TRUE;
+ }
+ has_object = TRUE;
+ }
+ }
+ }
+ }
+ }
+ if (has_low_trace) {
+ res->failed_tests[eBarcodeTest_LowTrace] = TRUE;
+ } else if (!has_object) {
+ ValNodeAddPointer (&trace_check_list, 0, res);
+ }
+ }
+ }
+
+ /* then put IDs in list, use script to collect from trace, add to results. */
+ if (trace_check_list != NULL) {
+ FillInMissingTraces (trace_check_list);
+ /* NOTE - do NOT free barcode result data, since this list points to data in sd.results list */
+ trace_check_list = ValNodeFree (trace_check_list);
+ }
if (sd.cfg != cfg)
{
@@ -26339,9 +29112,6 @@ extern void WriteBarcodeTestComplianceEx (FILE *fp, ValNodePtr results_list, Boo
barcode_id = BarcodeTestBarcodeIdString (res->bsp);
genbank_id = BarcodeTestGenbankIdString (res->bsp);
pass = PassBarcodeTests (res);
- if (pass && low_trace_fail && HasLowTrace (res->bsp)) {
- pass = FALSE;
- }
fprintf (fp, "%s\t%s\t%s\n", barcode_id, genbank_id, pass ? "PASS" : "FAIL");
barcode_id = MemFree (barcode_id);
genbank_id = MemFree (genbank_id);
@@ -27318,6 +30088,8 @@ static ReplacePairData latlon_replace_list[] = {
{ "DEG.", " " },
{ "DEG", " " },
{ "MIN.", "'" },
+ { "MINUTES", "'" },
+ { "MINUTE", "'" },
{ "MIN", "'" },
{ "SEC.", "''" },
{ "SEC", "''" },
@@ -27327,6 +30099,7 @@ static ReplacePairData latlon_replace_list[] = {
{ "WEST", "W" },
};
+
static Int4 num_latlon_replace = sizeof (latlon_replace_list) / sizeof (ReplacePairData);
@@ -27461,7 +30234,7 @@ extern CharPtr FixLatLonFormat (CharPtr orig_lat_lon)
bad_letter_found = TRUE;
}
}
- else if (i >= 13)
+ else if (i >= 15)
{
if (dtoken1 == NULL)
{
@@ -27818,14 +30591,15 @@ static CharPtr StringFromObjectID (ObjectIdPtr oip)
return str;
}
-extern void ApplyBarcodeDbxrefToBioSource (BioSourcePtr biop, ObjectIdPtr oip)
+static Boolean ApplyBarcodeDbxrefToBioSource (BioSourcePtr biop, ObjectIdPtr oip)
{
ValNodePtr vnp;
DbtagPtr dbt;
CharPtr str, cmp;
Boolean found = FALSE;
+ Boolean rval = FALSE;
- if (biop == NULL || oip == NULL) return;
+ if (biop == NULL || oip == NULL) return FALSE;
if (biop->org == NULL)
{
@@ -27854,7 +30628,9 @@ extern void ApplyBarcodeDbxrefToBioSource (BioSourcePtr biop, ObjectIdPtr oip)
dbt->tag = ObjectIdNew();
dbt->tag->str = str;
ValNodeAddPointer (&(biop->org->db), 0, dbt);
+ rval = TRUE;
}
+ return rval;
}
@@ -27864,6 +30640,7 @@ extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data)
SeqMgrDescContext context;
SeqIdPtr sip;
DbtagPtr dbt;
+ Int4Ptr p_num;
if (bsp == NULL) return;
for (sip = bsp->id; sip != NULL; sip = sip->next)
@@ -27875,7 +30652,11 @@ extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data)
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
if (sdp != NULL)
{
- ApplyBarcodeDbxrefToBioSource ((BioSourcePtr) sdp->data.ptrvalue, dbt->tag);
+ if (ApplyBarcodeDbxrefToBioSource ((BioSourcePtr) sdp->data.ptrvalue, dbt->tag)) {
+ if ((p_num = (Int4Ptr) data) != NULL) {
+ (*p_num)++;
+ }
+ }
}
}
}
@@ -27883,50 +30664,6 @@ extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data)
/* Code for Country Fixup */
-typedef struct countrystatelist {
- CharPtr PNTR state_list;
- CharPtr country_name;
-} CountryStateListData, PNTR CountryStateListPtr;
-
-static Boolean IsMatchInSecondChoiceLists (CharPtr find_str, Int4 match_len, CountryStateListPtr second_choice_lists, CharPtr whole_string)
-{
- Int4 i, j;
- Boolean in_lists = FALSE;
- CharPtr cp;
- Int4 len_second_choice;
-
- if (StringHasNoText (find_str) || match_len < 1 || second_choice_lists == NULL) return FALSE;
-
- for (i = 0; second_choice_lists[i].state_list != NULL && !in_lists; i++)
- {
- for (j = 0; second_choice_lists[i].state_list[j] != NULL && !in_lists; j++)
- {
- len_second_choice = StringLen (second_choice_lists[i].state_list[j]);
- if (len_second_choice == match_len
- &&StringNCmp (find_str, second_choice_lists[i].state_list[j], match_len) == 0)
- {
- in_lists = TRUE;
- }
- else if ((cp = StringSearch (second_choice_lists[i].state_list[j], find_str)) != NULL
- && StringSearch (whole_string, second_choice_lists[i].state_list[j]) != NULL)
- {
- in_lists = TRUE;
- }
- }
- }
- return in_lists;
-}
-
-
-static Boolean IsBodyOfWater (CharPtr str)
-{
- if (StringHasNoText (str)) return FALSE;
- if (StringSearch (str, "Ocean") != NULL) return TRUE;
- if (StringSearch (str, "Gulf") != NULL) return TRUE;
- if (StringSearch (str, "Sea") != NULL) return TRUE;
- return FALSE;
-}
-
static Boolean IsSubstringOfStringInList (CharPtr whole_str, CharPtr match_p, CharPtr match_str, CharPtr PNTR list)
{
@@ -27958,481 +30695,21 @@ static Boolean IsSubstringOfStringInList (CharPtr whole_str, CharPtr match_p, Ch
}
-static CharPtr bad_context_names[] = {
- "Gibraltar Range National Park",
- "Western Australia",
- "WSW Chihuahua",
- "Mississippi River",
- NULL };
-
-static Boolean IsBadContextName (CharPtr whole_str, CharPtr match_p, CharPtr match_str)
-{
- Boolean rval;
- Int4 len;
-
- rval = IsSubstringOfStringInList (whole_str, match_p, match_str, bad_context_names);
- if (!rval) {
- len = StringLen (match_str);
- if (StringCmp (match_p + len, " River") == 0) {
- rval = TRUE;
- } else if (StringCmp (match_p + len, " State University") == 0) {
- rval = TRUE;
- }
- }
- return rval;
-}
-
-
-static Boolean IsPartOfStateName (CharPtr whole_str, CharPtr match_p, CharPtr match_str, CountryStateListPtr second_choice_lists)
-{
- Boolean rval = FALSE;
- Int4 i;
-
- if (second_choice_lists == NULL) return FALSE;
-
- for (i = 0; second_choice_lists[i].state_list != NULL && !rval; i++) {
- rval = IsSubstringOfStringInList (whole_str, match_p, match_str, second_choice_lists[i].state_list);
- }
- return rval;
-}
-
-
-static CharPtr
-FindStringInStringWithContext
-(CharPtr search_str, CharPtr look_for, CharPtr PNTR list, CountryStateListPtr second_choice_lists)
-{
- Int4 len_match;
- CharPtr cp;
-
- if (StringHasNoText (search_str) || StringHasNoText (look_for)) {
- return NULL;
- }
-
- cp = StringISearch (search_str, look_for);
- len_match = StringLen (look_for);
- while (cp != NULL) {
- /* if character after match is alpha, continue */
- if (isalpha ((Int4)(cp [len_match]))
- /* if character before match is alpha, continue */
- || (cp > search_str && isalpha ((Int4)(*(cp - 1))))
- /* if match is part of a known "bad context", continue */
- || IsBadContextName (search_str, cp, look_for)
- /* if is shorter match for other item, continue */
- || IsSubstringOfStringInList (search_str, cp, look_for, list)
- || IsPartOfStateName (search_str, cp, look_for, second_choice_lists)) {
- cp = StringSearch (cp + len_match, look_for);
- } else {
- return cp;
- }
- }
- return cp;
-}
-
-
-static ValNodePtr FindBestStringMatch (CharPtr PNTR list, CharPtr find_str, CountryStateListPtr second_choice_lists)
-{
- CharPtr PNTR ptr;
- Int4 len_find;
- CharPtr cp;
- Boolean ocean_best, ocean_this;
- Boolean best_in_second, this_in_second;
- ValNodePtr match_list = NULL, vnp, best_vnp;
-
- if (list == NULL || find_str == NULL) return NULL;
-
- len_find = StringLen (find_str);
-
- /* first, find all matches */
- for (ptr = list; ptr != NULL && *ptr != NULL; ptr++)
- {
- cp = FindStringInStringWithContext (find_str, *ptr, list, second_choice_lists);
- if (cp != NULL) {
- ValNodeAddPointer (&match_list, 1, *ptr);
- }
- }
-
- if (match_list == NULL) return NULL;
-
- /* now eliminate matches where we know we have a preference */
- best_vnp = match_list;
- for (vnp = match_list->next; vnp != NULL; vnp = vnp->next)
- {
- if (StringSearch (vnp->data.ptrvalue, best_vnp->data.ptrvalue) != NULL)
- {
- /* best is inside this one */
- best_vnp->choice = 0;
- best_vnp = vnp;
- } else if (StringSearch (best_vnp->data.ptrvalue, vnp->data.ptrvalue) != NULL) {
- /* this is inside best */
- vnp->choice = 0;
- } else {
- /* prefer non-ocean to ocean */
- ocean_best = IsBodyOfWater (best_vnp->data.ptrvalue);
- ocean_this = IsBodyOfWater (vnp->data.ptrvalue);
- if (ocean_this && !ocean_best)
- {
- /* disregard this one */
- vnp->choice = 0;
- continue;
- }
- else if (!ocean_this && ocean_best)
- {
- /* definitely take this to replace best */
- best_vnp->choice = 0;
- best_vnp = vnp;
- }
- else if (second_choice_lists != NULL)
- {
- best_in_second = IsMatchInSecondChoiceLists (best_vnp->data.ptrvalue, StringLen (best_vnp->data.ptrvalue), second_choice_lists, find_str);
- this_in_second = IsMatchInSecondChoiceLists (vnp->data.ptrvalue, StringLen (vnp->data.ptrvalue), second_choice_lists, find_str);
- /* if this choice is a second choice, but the previous best wasn't, don't bother with this */
- if (this_in_second && !best_in_second) {
- vnp->choice = 0;
- } else if (!this_in_second && best_in_second) {
- /* if previous choice was in the secondary lists, prefer this and ignore previous */
- best_vnp->choice = 0;
- best_vnp = vnp;
- }
- }
- }
- }
- vnp = ValNodeExtract (&match_list, 0);
- vnp = ValNodeFree (vnp);
- return match_list;
-}
-
-static CharPtr usa_state_list[] =
-{
- "Alabama",
- "Alaska",
- "Arizona",
- "Arkansas",
- "California",
- "Colorado",
- "Connecticut",
- "Delaware",
- "Florida",
- "Georgia",
- "Hawaii",
- "Idaho",
- "Illinois",
- "Indiana",
- "Iowa",
- "Kansas",
- "Kentucky",
- "Louisiana",
- "Maine",
- "Maryland",
- "Massachusetts",
- "Michigan",
- "Minnesota",
- "Mississippi",
- "Missouri",
- "Montana",
- "Nebraska",
- "Nevada",
- "New Hampshire",
- "New Jersey",
- "New Mexico",
- "New York",
- "North Carolina",
- "North Dakota",
- "Ohio",
- "Oklahoma",
- "Oregon",
- "Pennsylvania",
- "Rhode Island",
- "South Carolina",
- "South Dakota",
- "Tennessee",
- "Texas",
- "Utah",
- "Vermont",
- "Virginia",
- "Washington",
- "Washington, DC",
- "West Virginia",
- "Wisconsin",
- "Wyoming",
- NULL
-};
-
-static CharPtr uk_state_list[] = {
- "England",
- "Scotland",
- NULL
-};
-
-static CharPtr canada_province_list[] = {
- "Alberta",
- "British Columbia",
- "Manitoba",
- "New Brunswick",
- "Newfoundland and Labrador",
- "Northwest Territories",
- "Nova Scotia",
- "Nunavut",
- "Ontario",
- "Prince Edward Island",
- "Quebec",
- "Saskatchewan",
- "Yukon",
- NULL
-};
-
-static CharPtr australia_state_list[] = {
- "Australian Capital Territory",
- "Jervis Bay Territory",
- "New South Wales",
- "Northern Territory",
- "Queensland",
- "South Australia",
- "Tasmania",
- "Victoria",
- "Western Australia",
- NULL
-};
-
-static CharPtr mx_state_list[] =
-{
- "Aguascalientes",
- "Baja California",
- "Baja California Sur",
- "Campeche",
- "Chiapas",
- "Chihuahua",
- "Coahuila",
- "Colima",
- "Distrito Federal",
- "Durango",
- "Estado de Mexico",
- "Guanajuato",
- "Guerrero",
- "Hidalgo",
- "Jalisco",
- "Michoacan",
- "Morelos",
- "Nayarit",
- "Nuevo Leon",
- "Oaxaca",
- "Puebla",
- "Queretaro",
- "Quintana Roo",
- "San Luis Potosi",
- "Sinaloa",
- "Sonora",
- "Tabasco",
- "Tamaulipas",
- "Tlaxcala",
- "Veracruz",
- "Yucatan",
- "Zacatecas",
- NULL
-};
-
-static CharPtr portugal_state_list[] = {
- "Azores",
- NULL
-};
-
-static CharPtr ecuador_state_list[] = {
- "Galapagos",
- NULL
-};
-
-
-static CountryStateListData country_state_list[] = {
-{ usa_state_list, "USA" },
-{ uk_state_list, "United Kingdom"},
-{ canada_province_list, "Canada"},
-{ australia_state_list, "Australia"},
-{ mx_state_list, "Mexico"},
-{ portugal_state_list, "Portugal"},
-{ ecuador_state_list, "Ecuador"},
-{ NULL, NULL}
-};
-
-
-static CharPtr FindStateMatch (CharPtr search, CharPtr PNTR country, Int4Ptr state_len, BoolPtr pMulti)
-{
- CharPtr best_match = NULL;
- Int4 i;
- ValNodePtr state_matches;
-
- if (StringHasNoText (search)) return NULL;
- if (country != NULL) {
- *country = NULL;
- }
-
- for (i = 0; country_state_list[i].state_list != NULL; i++) {
- state_matches = FindBestStringMatch (country_state_list[i].state_list, search, NULL);
- if (state_matches != NULL) {
- if (state_matches->next == NULL && best_match == NULL) {
- best_match = state_matches->data.ptrvalue;
- if (country != NULL) {
- *country = country_state_list[i].country_name;
- }
- } else {
- *pMulti = TRUE;
- return NULL;
- }
- state_matches = ValNodeFree (state_matches);
- }
- }
- if (best_match != NULL && state_len != NULL) {
- *state_len = StringLen (best_match);
- }
- return best_match;
-}
-
-
-static CharPtr FindStateMatchForCountry (CharPtr search, CharPtr country, BoolPtr pMulti)
-{
- ValNodePtr state_matches;
- CharPtr state_match = NULL;
- Int4 i;
-
- if (StringHasNoText (search) || StringHasNoText (country)) return NULL;
-
- for (i = 0; country_state_list[i].state_list != NULL; i++) {
- if (StringCmp (country, country_state_list[i].country_name) == 0) {
- state_matches = FindBestStringMatch (country_state_list[i].state_list, search, NULL);
- if (state_matches != NULL) {
- if (state_matches->next == NULL) {
- state_match = state_matches->data.ptrvalue;
- } else {
- *pMulti = TRUE;
- }
- state_matches = ValNodeFree (state_matches);
- return state_match;
- }
- }
- }
- return NULL;
-}
-
-
-static void FixCountryStringForStateName (CharPtr PNTR pCountry, CharPtr state_name, CharPtr country_name)
-{
- CharPtr cp;
- Int4 len_state, len_country, len_qual, len_name, len_after;
- CharPtr before, newname;
-
- if (pCountry == NULL
- || StringHasNoText (*pCountry)
- || StringHasNoText (state_name)
- || StringHasNoText (country_name))
- {
- return;
- }
-
- cp = StringStr (*pCountry, state_name);
- if (cp == NULL)
- {
- return;
- }
- len_state = StringLen (state_name);
- if (isalpha ((Int4)(cp [len_state])))
- {
- return;
- }
-
- len_country = StringLen (country_name);
-
- len_qual = StringLen (*pCountry);
- if (cp == *pCountry)
- {
- len_after = len_qual - len_state;
- newname = (CharPtr) MemNew ((5 + len_country + len_state + len_after) * sizeof (Char));
- sprintf (newname, "%s: %s", country_name, state_name);
- if (len_after > 0)
- {
- StringCat (newname, ", ");
- StringCat (newname, *pCountry + len_state);
- }
- *pCountry = MemFree (*pCountry);
- *pCountry = newname;
- }
- else
- {
- newname = (CharPtr) MemNew (len_qual + 5 + len_country);
- *(cp - 1) = 0;
- before = StringSave (*pCountry);
- sprintf (newname, "%s: %s, ", country_name, state_name);
- StringNCpy (newname + 4 + len_country + len_state, before, StringLen (before));
- StringCpy (newname + 4 + len_country + len_state + StringLen (before), cp + len_state);
- len_name = StringLen (newname);
- while (isspace ((Int4)(newname[len_name - 1])) || ispunct ((Int4)(newname [len_name - 1])))
- {
- newname [len_name - 1] = 0;
- len_name --;
- }
- /* get rid of trailing comma if necessary */
- if (len_name == StringLen (country_name) + 3 + StringLen (state_name)
- && newname [len_name - 1] == ',')
- {
- newname [len_name - 1] = 0;
- len_name --;
- }
- before = MemFree (before);
- MemFree (*pCountry);
- *pCountry = newname;
- }
-}
-
-
-static CharPtr FindCountryMatch (CharPtr search_str, CharPtr PNTR country_list, BoolPtr isMulti)
-{
- ValNodePtr match_list;
- CharPtr best_match = NULL;
-
- if (StringSearch (search_str, "Yugoslavia")) {
- *isMulti = TRUE;
- return NULL;
- }
-
- match_list = FindBestStringMatch (country_list, search_str, country_state_list);
- if (match_list != NULL) {
- if (match_list->next == NULL) {
- best_match = match_list->data.ptrvalue;
- } else {
- *isMulti = TRUE;
- }
- match_list = ValNodeFree (match_list);
- }
- return best_match;
-}
-
-
static ReplacePairData country_name_fixes[] = {
{"Vietnam", "Viet Nam"},
{"Ivory Coast", "Cote d'Ivoire"},
{"United States of America", "USA"},
{"U.S.A.", "USA"},
{"The Netherlands", "Netherlands"},
+ {"People's Republic of China", "China"},
+ {"Pr China", "China" },
+ {"Prchina", "China" },
+ {"P.R.China", "China" },
+ {"P.R. China", "China" },
+ {"P, R, China", "China" },
{NULL, NULL}
};
-static void FixCountryNames (CharPtr PNTR pCountry)
-{
- ReplacePairPtr fix;
-
- if (pCountry == NULL || StringHasNoText (*pCountry))
- {
- return;
- }
-
- fix = country_name_fixes;
- while (fix->find != NULL)
- {
- if (StringStr (*pCountry, fix->replace) == NULL || StringSearch (fix->find, fix->replace) != NULL) {
- FindReplaceString (pCountry, fix->find, fix->replace, FALSE, TRUE);
- }
- fix++;
- }
-}
-
-
NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state)
{
ReplacePairPtr fix;
@@ -28449,381 +30726,6 @@ NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state)
}
-static void FixUSStateAbbreviations (CharPtr PNTR pCountry)
-{
- ReplacePairPtr fix;
-
- if (pCountry == NULL || StringHasNoText (*pCountry))
- {
- return;
- }
-
- fix = us_state_abbrev_fixes;
- while (fix->find != NULL)
- {
- FindReplaceString (pCountry, fix->find, fix->replace, TRUE, TRUE);
- fix++;
- }
-}
-
-
-static CharPtr MoveStateAndAddComma (CharPtr cntry_str, CharPtr state_match, Int4 len_cntry)
-{
- CharPtr newname = NULL, cp;
- Int4 len_state, len_qual, len_after, len_before;
-
- if (StringHasNoText (cntry_str) || StringHasNoText (state_match) || len_cntry < 1)
- {
- return cntry_str;
- }
-
- cp = StringISearch (cntry_str + len_cntry + 2, state_match);
- if (cp != NULL)
- {
- len_state = StringLen (state_match);
- len_qual = StringLen (cntry_str);
-
- if (cp == cntry_str + len_cntry + 2)
- {
- /* state is at beginning of string */
- len_after = len_qual - len_cntry - 2 - len_state;
- if (len_after == 0 || cntry_str [len_cntry + 2 + len_state] == ',')
- {
- /* already in correct format, nothing after state name */
- /* just copy in state name, in case we are correcting case */
- StringNCpy (cp, state_match, len_state);
- return cntry_str;
- }
- else
- {
- /* insert comma */
- newname = (CharPtr) MemNew (StringLen (cntry_str) + 3);
- StringNCpy (newname, cntry_str, len_cntry + 2 + len_state);
- newname [len_cntry + 2 + len_state] = 0;
- StringCat (newname, ",");
- StringCat (newname, cntry_str + len_cntry + 2 + len_state);
- cntry_str = MemFree (cntry_str);
- cntry_str = newname;
- }
- }
- else
- {
- newname = (CharPtr) MemNew (StringLen (cntry_str) + 3);
- StringNCpy (newname, cntry_str, len_cntry + 2);
- newname [len_cntry + 2] = 0;
- StringCat (newname, state_match);
- StringCat (newname, ", ");
- len_before = cp - cntry_str - 3 - len_cntry;
- StringNCpy (newname + len_cntry + 2 + len_state + 2,
- cntry_str + len_cntry + 2,
- len_before);
- newname [len_cntry + 2 + len_state + 2 + len_before] = 0;
- StringCat (newname, cp + len_state);
- cntry_str = MemFree (cntry_str);
- cntry_str = newname;
- }
- }
- return cntry_str;
-}
-
-typedef struct namedregion {
- CharPtr country;
- CharPtr state;
- CharPtr region;
-} NamedRegionData, PNTR NamedRegionPtr;
-
-
-static NamedRegionData named_regions[] = {
-{ "USA", "Alaska", "Aleutian Islands" }
-};
-
-static Int4 num_named_regions = sizeof (named_regions) / sizeof (NamedRegionData);
-
-static void TrimInternalSpacesAndLeadingPunct (CharPtr str)
-{
- CharPtr src, dst;
-
- src = str;
- dst = str;
-
- while (*src != 0) {
- if (isspace (*src)) {
- if (dst > str && !isspace (*(dst - 1))) {
- *dst = ' ';
- dst++;
- }
- } else if (ispunct (*src)) {
- if (dst > str) {
- *dst = *src;
- dst++;
- }
- } else {
- *dst = *src;
- dst++;
- }
- src++;
- }
- if (dst > src && (isspace (*(dst - 1)))) {
- *(dst - 1) = 0;
- } else {
- *dst = 0;
- }
-}
-
-static void FixForNamedRegions (CharPtr PNTR pCountry)
-{
- Int4 i, country_len, state_len, region_len;
- CharPtr region = NULL, country, state, new_str;
-
- if (pCountry == NULL || StringHasNoText (*pCountry)) return;
-
- for (i = 0; i < num_named_regions && region == NULL; i++) {
- region = StringSearch (*pCountry, named_regions[i].region);
- if (region != NULL) {
- country_len = StringLen (named_regions[i].country);
- state_len = StringLen (named_regions[i].state);
- country = StringSearch (*pCountry, named_regions[i].country);
- region_len = StringLen (named_regions[i].region);
- if (country != NULL) {
- MemSet (country, ' ', country_len);
- if (*(country + country_len) == ':') {
- *(country + country_len) = ' ';
- }
- }
- state = StringSearch (*pCountry, named_regions[i].state);
- if (state != NULL) {
- MemSet (state, ' ', state_len);
- if (*(state + state_len) == ',') {
- *(state + state_len) = ' ';
- }
- }
- MemSet (region, ' ', region_len);
- if (ispunct (*(region + region_len))) {
- *(region + region_len) = ' ';
- }
- TrimInternalSpacesAndLeadingPunct (*pCountry);
- new_str = (CharPtr) MemNew (sizeof (Char) * (country_len + state_len + region_len + 7 + StringLen (*pCountry)));
- sprintf (new_str, "%s: %s, %s", named_regions[i].country, named_regions[i].state, named_regions[i].region);
- if (!StringHasNoText (*pCountry)) {
- StringCat (new_str, ", ");
- StringCat (new_str, *pCountry);
- }
- *pCountry = MemFree (*pCountry);
- *pCountry = new_str;
- }
- }
-}
-
-
-static void FindCountryName (CharPtr PNTR pCountry, CharPtr PNTR country_list)
-{
- CharPtr best_match = NULL, state_match, state_country = NULL;
- CharPtr cp, before, newname, after;
- Int4 len_cntry = 0, len_state = 0, len_qual, len_name;
- Boolean state_multi = FALSE, country_multi = FALSE;
-
- if (pCountry == NULL || StringHasNoText (*pCountry))
- {
- return;
- }
-
- best_match = FindCountryMatch (*pCountry, country_list, &country_multi);
- if (country_multi) {
- *pCountry = MemFree (*pCountry);
- return;
- }
- state_match = FindStateMatch (*pCountry, &state_country, &len_state, &state_multi);
-
- if ((best_match == NULL && state_match == NULL) || (best_match == NULL && state_multi)) {
- *pCountry = MemFree (*pCountry);
- return;
- } else if (best_match != NULL && state_match != NULL && StringCmp (best_match, state_country) != 0) {
- state_match = NULL;
- }
-
- /* if match could be a country or a state, treat it as a country */
- if (StringCmp (best_match, state_match) == 0) {
- state_match = NULL;
- }
-
- if (IsBodyOfWater (best_match) && state_match != NULL)
- {
- /* prefer state to body of water */
- best_match = NULL;
- }
-
- /* if we have a country and a state, but the state is for a different country, drop the state */
- if (state_match != NULL && best_match != NULL && StringNCmp (state_country, best_match, len_cntry) != 0)
- {
- state_multi = FALSE;
- state_match = FindStateMatchForCountry (*pCountry, best_match, &state_multi);
- if (state_multi) {
- *pCountry = MemFree (*pCountry);
- return;
- }
- }
-
- if (best_match != NULL && StringCmp (best_match, "USA") == 0 && StringLen (*pCountry) > 3 && state_match == NULL)
- {
- FixUSStateAbbreviations (pCountry);
- state_multi = FALSE;
- state_match = FindStateMatchForCountry (*pCountry, best_match, &state_multi);
- if (state_multi)
- {
- *pCountry = MemFree (*pCountry);
- return;
- }
- if (state_match != NULL) {
- FindReplaceString (pCountry, "USA:", "", TRUE, TRUE);
- FindReplaceString (pCountry, "USA", "", TRUE, TRUE);
- best_match = NULL;
- state_country = "USA";
- }
- }
-
- if (best_match == NULL && state_match == NULL) {
- *pCountry = MemFree (*pCountry);
- return;
- }
- else if (best_match == NULL && state_match != NULL)
- {
- FixCountryStringForStateName (pCountry, state_match, state_country);
- }
- else
- {
- cp = StringISearch (*pCountry, best_match);
- len_cntry = StringLen (best_match);
- after = cp + len_cntry;
- while (isspace (*after) || ispunct(*after))
- {
- after++;
- }
-
- if (cp != NULL && !isalpha ((Int4)(cp [len_cntry])))
- {
- len_qual = StringLen (*pCountry);
- if (cp == *pCountry)
- {
- newname = (CharPtr) MemNew (len_cntry + StringLen (after) + 3);
- sprintf (newname, "%s: %s", best_match, after);
- }
- else
- {
- /* strip spaces and punctuation from before */
- *(cp - 1) = 0;
- before = cp - 2;
- while (before >= *pCountry
- && (isspace (*before) || ispunct (*before)))
- {
- *before = 0;
- before--;
- }
- before = *pCountry;
- while (isspace (*before) || ispunct(*before))
- {
- before++;
- }
-
- newname = (CharPtr) MemNew (len_cntry + StringLen (before) + StringLen (after) + 4);
- sprintf (newname, "%s: %s%s%s", best_match, before,
- StringHasNoText (before) || StringHasNoText(after) ? "" : " ",
- after);
- }
- if (state_match != NULL)
- {
- newname = MoveStateAndAddComma (newname, state_match, len_cntry);
- }
-
- /* remove trailing spaces and punctuation */
- len_name = StringLen (newname);
- while (isspace ((Int4)(newname[len_name - 1]))
- || newname [len_name - 1] == ','
- || newname [len_name - 1] == ':'
- || newname [len_name - 1] == ';')
- {
- newname [len_name - 1] = 0;
- len_name --;
- }
- MemFree (*pCountry);
- *pCountry = newname;
- }
- }
-}
-
-
-static void CountryColonToComma (CharPtr PNTR country_str)
-{
- CharPtr cp, cp1, cp2, new_name;
- Int4 pre_len;
-
- if (country_str == NULL || *country_str == NULL) {
- return;
- }
-
- cp = StringChr (*country_str, ':');
- if (cp == NULL) return;
- cp = StringChr (cp + 1, ':');
- while (cp != NULL) {
- cp1 = cp;
- while (cp1 > *country_str && (isspace (*(cp1 - 1)) || *(cp1 - 1) == ',')) {
- cp1--;
- }
- pre_len = cp1 - *country_str;
- cp2 = cp + 1;
- while (isspace (*cp2) || *cp2 == ',') {
- cp2++;
- }
- new_name = (CharPtr) MemNew ((pre_len + StringLen (cp2) + 3) * sizeof (Char));
- StringNCpy (new_name, *country_str, pre_len);
- StringCat (new_name, ", ");
- StringCat (new_name, cp2);
- *country_str = MemFree (*country_str);
- *country_str = new_name;
- cp = StringChr ((*country_str) + pre_len, ':');
- }
-}
-
-static void RemoveDoubleCommas (CharPtr PNTR country_str)
-{
- CharPtr cp, cp1, cp2, new_name;
- Int4 pre_len;
- Boolean found_second_comma;
-
- if (country_str == NULL || *country_str == NULL) {
- return;
- }
-
- cp = StringChr (*country_str, ',');
- while (cp != NULL) {
- cp1 = cp;
- while (cp1 > *country_str && (isspace (*(cp1 - 1)) || *(cp1 - 1) == ',')) {
- cp1--;
- }
- pre_len = cp1 - *country_str;
- cp2 = cp + 1;
- found_second_comma = FALSE;
- while (isspace (*cp2) || *cp2 == ',') {
- if (*cp2 == ',') {
- found_second_comma = TRUE;
- }
- cp2++;
- }
-
- if (cp1 < cp || found_second_comma || cp2 > cp + 2) {
- new_name = (CharPtr) MemNew ((pre_len + StringLen (cp2) + 3) * sizeof (Char));
- StringNCpy (new_name, *country_str, pre_len);
- StringCat (new_name, ", ");
- StringCat (new_name, cp2);
- *country_str = MemFree (*country_str);
- *country_str = new_name;
- cp = StringChr ((*country_str) + pre_len + 1, ',');
- } else {
- cp = StringChr (cp2, ',');
- }
- }
-}
-
-
static Boolean ContainsMultipleCountryNames (CharPtr PNTR list, CharPtr search_str)
{
CharPtr PNTR ptr;
@@ -28932,7 +30834,15 @@ static CharPtr NewFixCountry (CharPtr country, CharPtr PNTR country_list)
too_many_countries = ContainsMultipleCountryNames (country_list, country);
}
- if (valid_country != NULL && !too_many_countries) {
+ if (valid_country != NULL && too_many_countries && valid_country == country) {
+ len_country = StringCSpn (valid_country, separator_list);
+ if (country[len_country] == ':' && !isspace (country[len_country + 1])) {
+ new_country = MemNew (sizeof (Char) * (StringLen (country) + 2));
+ StringNCpy (new_country, country, len_country + 1);
+ StringCat (new_country, " ");
+ StringCat (new_country, country + len_country + 1);
+ }
+ } else if (valid_country != NULL && !too_many_countries) {
len_country = StringCSpn (valid_country, separator_list);
len_before = valid_country - country;
@@ -28989,20 +30899,528 @@ extern CharPtr GetCountryFix (CharPtr country, CharPtr PNTR country_list)
CharPtr new_country;
if (StringHasNoText (country)) return NULL;
-#if 1
new_country = NewFixCountry (country, country_list);
-#else
- new_country = StringSave (country);
- FixCountryNames (&new_country);
- FindCountryName (&new_country, country_list);
- CountryColonToComma (&new_country);
- RemoveDoubleCommas (&new_country);
- FixForNamedRegions (&new_country);
-#endif
return new_country;
}
+typedef struct countryfixup {
+ CharPtr PNTR country_list;
+ ValNodePtr warning_list;
+ Boolean capitalize_after_colon;
+ Boolean any_changed;
+ FILE *log_fp;
+} CountryFixupData, PNTR CountryFixupPtr;
+
+
+static void CapitalizeFirstLetterOfEveryWord (CharPtr pString)
+{
+ CharPtr pCh;
+
+ pCh = pString;
+ if (pCh == NULL) return;
+ if (*pCh == '\0') return;
+
+ while (*pCh != 0)
+ {
+ /* skip over spaces */
+ while (isspace(*pCh))
+ {
+ pCh++;
+ }
+
+ /* capitalize first letter after white space */
+ if (isalpha (*pCh))
+ {
+ *pCh = toupper (*pCh);
+ pCh++;
+ }
+ /* skip over rest of word */
+ while (*pCh != 0 && !isspace (*pCh))
+ {
+ if (isalpha (*pCh)) {
+ *pCh = tolower (*pCh);
+ }
+ pCh++;
+ }
+ }
+}
+
+
+static void CountryFixupItem (Uint1 choice, Pointer data, CountryFixupPtr c)
+{
+ BioSourcePtr biop;
+ SubSourcePtr ssp;
+ CharPtr new_country;
+ CharPtr cp;
+ CharPtr tmp;
+ Int4 country_len;
+
+ if (data == NULL || c == NULL) return;
+
+ biop = GetBioSourceFromObject (choice, data);
+ if (biop == NULL) return;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
+ {
+ if (ssp->subtype == SUBSRC_country && !StringHasNoText (ssp->name))
+ {
+ new_country = GetCountryFix (ssp->name, c->country_list);
+ if (new_country == NULL) {
+ ValNodeAddPointer (&c->warning_list, choice, data);
+ } else {
+ cp = StringChr (new_country, ':');
+ if (cp != NULL) {
+ country_len = cp - new_country;
+ /* skip colon */
+ cp++;
+ /* skip over space after colon */
+ cp += StringSpn (cp, " \t");
+ if (c->capitalize_after_colon) {
+ /* reset capitalization */
+ CapitalizeFirstLetterOfEveryWord (cp);
+ }
+ if (*(new_country + country_len + 1) != 0 && !isspace (*(new_country + country_len + 1))) {
+ tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (new_country) + 2));
+ StringNCpy (tmp, new_country, country_len + 1);
+ StringCat (tmp, " ");
+ StringCat (tmp, cp + 1);
+ new_country = MemFree (new_country);
+ new_country = tmp;
+ }
+ }
+ if (StringCmp (ssp->name, new_country) == 0) {
+ new_country = MemFree (new_country);
+ } else {
+ c->any_changed = TRUE;
+ if (c->log_fp != NULL) {
+ fprintf (c->log_fp, "Changed '%s' to '%s'\n", ssp->name, new_country);
+ }
+ ssp->name = MemFree (ssp->name);
+ ssp->name = new_country;
+ }
+ }
+ }
+ }
+}
+
+
+static void CountryFixupDesc (SeqDescrPtr sdp, Pointer userdata)
+{
+ if (sdp != NULL && userdata != NULL && sdp->choice == Seq_descr_source) {
+ CountryFixupItem (OBJ_SEQDESC, sdp, (CountryFixupPtr) userdata);
+ }
+}
+
+
+static void CountryFixupFeat (SeqFeatPtr sfp, Pointer userdata)
+{
+ if (sfp != NULL && userdata != NULL && sfp->data.choice == SEQFEAT_BIOSRC) {
+ CountryFixupItem (OBJ_SEQFEAT, sfp, (CountryFixupPtr) userdata);
+ }
+}
+
+
+NLM_EXTERN ValNodePtr FixupCountryQuals (SeqEntryPtr sep, Boolean fix_after_colon)
+{
+ CountryFixupData c;
+
+ MemSet (&c, 0, sizeof (CountryFixupData));
+ c.country_list = GetValidCountryList ();
+ if (c.country_list == NULL) return NULL;
+ c.capitalize_after_colon = fix_after_colon;
+ c.warning_list = NULL;
+ VisitDescriptorsInSep (sep, &c, CountryFixupDesc);
+ VisitFeaturesInSep (sep, &c, CountryFixupFeat);
+ return c.warning_list;
+}
+
+
+NLM_EXTERN Boolean FixupCountryQualsWithLog (SeqEntryPtr sep, Boolean fix_after_colon, FILE *log_fp)
+{
+ CountryFixupData c;
+
+ MemSet (&c, 0, sizeof (CountryFixupData));
+ c.log_fp = log_fp;
+ c.country_list = GetValidCountryList ();
+ if (c.country_list == NULL) return FALSE;
+ c.capitalize_after_colon = fix_after_colon;
+ c.warning_list = NULL;
+ VisitDescriptorsInSep (sep, &c, CountryFixupDesc);
+ VisitFeaturesInSep (sep, &c, CountryFixupFeat);
+ c.warning_list = ValNodeFree (c.warning_list);
+ return c.any_changed;
+}
+
+
+typedef struct qualfixup {
+ SourceConstraintPtr scp;
+ ReplacePairPtr fix_list;
+ Boolean case_counts;
+ Boolean whole_word;
+ Boolean is_orgmod;
+ Uint1 subtype;
+ Boolean any_changed;
+ FILE *log_fp;
+} QualFixupData, PNTR QualFixupPtr;
+
+static void FixupBioSourceQuals (BioSourcePtr biop, Pointer data)
+{
+ QualFixupPtr qf;
+ OrgModPtr mod;
+ SubSourcePtr ssp;
+ ReplacePairPtr fix;
+ CharPtr orig = NULL;
+
+ if (biop == NULL || (qf = (QualFixupPtr) data) == NULL
+ || qf->fix_list == NULL
+ || !DoesBiosourceMatchConstraint(biop, qf->scp)) {
+ return;
+ }
+
+ if (qf->is_orgmod) {
+ if (biop->org == NULL || biop->org->orgname == NULL) {
+ return;
+ }
+ for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
+ if (mod->subtype == qf->subtype) {
+ for (fix = qf->fix_list; fix->find != NULL; fix++) {
+ orig = StringSave (mod->subname);
+ FindReplaceString (&(mod->subname), fix->find, fix->replace, qf->case_counts, qf->whole_word);
+ if (StringCmp (orig, mod->subname) != 0) {
+ qf->any_changed = TRUE;
+ if (qf->log_fp != NULL) {
+ fprintf (qf->log_fp, "Changed '%s' to '%s'\n", orig, mod->subname);
+ }
+ }
+ orig = MemFree (orig);
+ }
+ }
+ }
+ } else {
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == qf->subtype) {
+ for (fix = qf->fix_list; fix->find != NULL; fix++) {
+ orig = StringSave (ssp->name);
+ FindReplaceString (&(ssp->name), fix->find, fix->replace, qf->case_counts, qf->whole_word);
+ if (StringCmp (orig, ssp->name) != 0) {
+ qf->any_changed = TRUE;
+ if (qf->log_fp != NULL) {
+ fprintf (qf->log_fp, "Changed '%s' to '%s'\n", orig, ssp->name);
+ }
+ }
+ orig = MemFree (orig);
+ }
+ }
+ }
+ }
+}
+
+
+static ReplacePairData mouse_strain_fixes[] = {
+ {"129/Sv", "129/Sv"} ,
+ {"129/SvJ", "129/SvJ"} ,
+ {"BALB/c", "BALB/c"} ,
+ {"C57BL/6", "C57BL/6"} ,
+ {"C57BL/6J", "C57BL/6J"} ,
+ {"CD-1", "CD-1"} ,
+ {"CZECHII", "CZECHII"} ,
+ {"FVB/N", "FVB/N"} ,
+ {"FVB/N-3", "FVB/N-3"} ,
+ {"ICR", "ICR"} ,
+ {"NMRI", "NMRI"} ,
+ {"NOD", "NOD"} ,
+ {"C3H", "C3H"} ,
+ {"C57BL", "C57BL"} ,
+ {"C57BL/6", "C57BL/6"} ,
+ {"C57BL/6J", "C57BL/6J" } ,
+ {"DBA/2", "DBA/2"} ,
+ {NULL, NULL}};
+
+NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp)
+{
+ QualFixupData qd;
+
+ MemSet (&qd, 0, sizeof (QualFixupData));
+
+ qd.case_counts = FALSE;
+ qd.whole_word = TRUE;
+ qd.is_orgmod = TRUE;
+ qd.subtype = ORGMOD_strain;
+ qd.scp = SourceConstraintNew ();
+ qd.scp->constraint = StringConstraintNew ();
+ qd.scp->constraint->match_text = StringSave ("Mus musculus");
+ qd.scp->constraint->match_location = String_location_starts;
+ qd.scp->field1 = ValNodeNew (NULL);
+ qd.scp->field1->choice = SourceQualValChoice_textqual;
+ qd.scp->field1->data.intvalue = Source_qual_taxname;
+ qd.log_fp = log_fp;
+ qd.fix_list = mouse_strain_fixes;
+
+ VisitBioSourcesInSep (sep, &qd, FixupBioSourceQuals);
+ qd.scp = SourceConstraintFree (qd.scp);
+ return qd.any_changed;
+}
+
+
+typedef struct srcqualfixlist {
+ Int4 src_qual;
+ CharPtr PNTR fix_list;
+} SrcQualFixListData, PNTR SrcQualFixListPtr;
+
+
+static CharPtr src_qual_sex_words[] = {
+ "male",
+ "female",
+ NULL };
+
+static CharPtr src_qual_host_words[] = {
+ "porcine",
+ "caprine",
+ "ovine",
+ "cattle",
+ "canine",
+ "feline",
+ "bovine",
+ "tomato",
+ "pepper",
+ "yak",
+ "horse",
+ "pig",
+ "cow",
+ "rice",
+ "turkey",
+ "chicken",
+ "sheep",
+ "yak",
+ "salmon",
+ "wolf",
+ "nematode",
+ "fox",
+ "swine",
+ "fish",
+ "maize",
+ "soybean",
+ "wheat",
+ NULL };
+
+ static CharPtr src_qual_lab_host_words[] = {
+ "porcine",
+ "caprine",
+ "ovine",
+ "cattle",
+ "canine",
+ "feline",
+ "bovine",
+ "tomato",
+ "pepper",
+ "yak",
+ "horse",
+ "pig",
+ "cow",
+ "rice",
+ "turkey",
+ "chicken",
+ "sheep",
+ "yak",
+ "salmon",
+ "wolf",
+ "nematode",
+ "fox",
+ "swine",
+ "fish",
+ "maize",
+ "soybean",
+ "wheat",
+ NULL };
+
+static CharPtr src_qual_isolation_source_words[] = {
+ "porcine",
+ "caprine",
+ "ovine",
+ "cattle",
+ "canine",
+ "feline",
+ "bovine",
+ "tomato",
+ "pepper",
+ "yak",
+ "horse",
+ "pig",
+ "cow",
+ "rice",
+ "turkey",
+ "chicken",
+ "rhizosphere soil",
+ "soil",
+ "agricultural soil",
+ "seedling",
+ "fruit",
+ "leaf",
+ "leaves",
+ "stem",
+ "flower",
+ "root",
+ "root tip",
+ "mammary gland",
+ "skin",
+ "serum",
+ "testis",
+ "cerbrospinal fluid",
+ "placenta",
+ "blood",
+ "head",
+ "ovary",
+ "heart",
+ "rumen",
+ "plasma",
+ "wound",
+ "sera",
+ "lymph node",
+ "lung",
+ "swab",
+ "patient",
+ "feces",
+ "forest",
+ "clinical",
+ "milk",
+ "leaves",
+ "oviduct",
+ "whole blood",
+ "salivary gland",
+ "oviduct",
+ "ovary",
+ "testes",
+ "skin",
+ "brain",
+ "nasal swab",
+ "urine",
+ "intestines",
+ "stomach",
+ "muscle",
+ "muscle tissue",
+ "kidney",
+ "epithelium",
+ "acne",
+ "cornea",
+ NULL };
+
+static CharPtr src_qual_tissue_type_words[] = {
+ "blood",
+ "whole blood",
+ "salivary gland",
+ "oviduct",
+ "mammary gland",
+ "testis",
+ "placenta",
+ "heart",
+ "ovary",
+ "testes",
+ "skin",
+ "brain",
+ "intestines",
+ "stomach",
+ "muscle",
+ "kidney",
+ "muscle tissue",
+ "epithelium",
+ "lymph node",
+ "lung",
+ "mammary gland",
+ "skin",
+ "cornea",
+ "fruit",
+ "leaf",
+ "leaves",
+ "stem",
+ "flower",
+ "root",
+ "root tip",
+ NULL };
+
+static CharPtr src_qual_dev_stage_words[] = {
+ "adult",
+ NULL };
+
+static SrcQualFixListData src_qual_fixes[] = {
+ {Source_qual_sex, src_qual_sex_words} ,
+ {Source_qual_nat_host, src_qual_host_words},
+ {Source_qual_isolation_source, src_qual_isolation_source_words},
+ {Source_qual_lab_host, src_qual_lab_host_words},
+ {Source_qual_tissue_type, src_qual_tissue_type_words},
+ {Source_qual_dev_stage, src_qual_dev_stage_words},
+ {0, NULL}
+};
+
+typedef struct srcqualfix {
+ Boolean any_change;
+ FILE *log_fp;
+ CharPtr PNTR fix_list;
+ ValNode vn;
+} SrcQualFixData, PNTR SrcQualFixPtr;
+
+
+static void FixSourceQualCaps (BioSourcePtr biop, Pointer data)
+{
+ CharPtr val, orig;
+ SrcQualFixPtr sq;
+ Int4 i;
+ StringConstraint sd;
+
+ if (biop == NULL || (sq = (SrcQualFixPtr) data) == NULL || sq->fix_list == NULL) {
+ return;
+ }
+ val = GetSourceQualFromBioSource (biop, &(sq->vn), NULL);
+ if (val == NULL) {
+ return;
+ }
+ orig = StringSave (val);
+ for (i = 0; sq->fix_list[i] != NULL; i++) {
+ if (StringICmp (val, sq->fix_list[i]) == 0) {
+ val = MemFree (val);
+ val = StringSave (sq->fix_list[i]);
+ }
+ }
+ if (StringCmp (orig, val) != 0) {
+ MemSet (&sd, 0, sizeof (StringConstraint));
+ sd.match_text = orig;
+ sd.match_location = String_location_equals;
+ if (SetSourceQualInBioSource (biop, &(sq->vn), &sd, val, ExistingTextOption_replace_old)) {
+ sq->any_change = TRUE;
+ if (sq->log_fp != NULL) {
+ fprintf (sq->log_fp, "Changed '%s' to '%s'\n", orig, val);
+ }
+ }
+ }
+ orig = MemFree (orig);
+ val = MemFree (val);
+}
+
+
+NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp)
+{
+ Int4 i;
+ SrcQualFixData sd;
+
+ MemSet (&sd, 0, sizeof (SrcQualFixData));
+ sd.log_fp = log_fp;
+ sd.any_change = FALSE;
+ MemSet (&sd.vn, 0, sizeof (ValNode));
+ sd.vn.choice = SourceQualChoice_textqual;
+
+ /* find fix function */
+ for (i = 0; src_qual_fixes[i].fix_list != NULL; i++) {
+ if (src_qual_fixes[i].src_qual == src_qual) {
+ sd.fix_list = src_qual_fixes[i].fix_list;
+ sd.vn.data.intvalue = src_qual;
+ VisitBioSourcesInSep (sep, &sd, FixSourceQualCaps);
+ }
+ }
+
+ return sd.any_change;
+}
+
+
extern ValNodePtr ListFeaturesInLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice)
{
ValNodePtr feat_list = NULL;
@@ -29099,7 +31517,7 @@ extern ValNodePtr ListCodingRegionsContainedInSourceFeatures (SeqEntryPtr sep)
extern void CountNsInSequence (BioseqPtr bsp, Int4Ptr p_total, Int4Ptr p_max_stretch, Boolean expand_gaps)
{
- Int2 ctr, pos, i;
+ Int4 ctr, pos, i;
Char buf1[51];
Int4 len = 50, total = 0, max_stretch = 0, this_stretch = 0;
StreamFlgType flags = STREAM_CORRECT_INVAL;
@@ -29223,6 +31641,11 @@ NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags)
Int4 val_len, match_len;
if (tags == NULL || org == NULL || StringHasNoText (org->taxname)) return;
+
+ if (StringSearch (org->taxname, " x ") != NULL) {
+ /* ignore cross, applies only to one parent, do not parse */
+ return;
+ }
state = 0;
ptr = org->taxname;
ch = *ptr;
@@ -29276,3 +31699,238 @@ NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep)
return list;
}
+
+static CharPtr RemovableCultureNotes[] = {
+ "[uncultured (using universal primers)]",
+ "[uncultured (using universal primers) bacterial source]",
+ "[cultured bacterial source]",
+ "[enrichment culture bacterial source]",
+ "[mixed bacterial source (cultured and uncultured)]",
+ "[uncultured]; [universal primers]",
+ "[mixed bacterial source]",
+ NULL
+};
+
+static CharPtr ReplaceableCultureNotes[] = {
+ "[uncultured (with species-specific primers)]",
+ "[uncultured]; [amplified with species-specific primers]",
+ "[uncultured (using species-specific primers) bacterial source]",
+ NULL
+};
+
+
+static Boolean RemoveCultureNotesFromText (CharPtr PNTR p_txt)
+{
+ CharPtr txt, cp, src, dst;
+ Int4 i, len, extra_len;
+ Boolean any_removed = FALSE;
+
+ if (p_txt == NULL || (txt = *p_txt) == NULL) {
+ return FALSE;
+ }
+ for (i = 0; RemovableCultureNotes[i] != NULL; i++) {
+ len = StringLen (RemovableCultureNotes[i]);
+ cp = StringISearch (txt, RemovableCultureNotes[i]);
+ while (cp != NULL) {
+ extra_len = StringSpn (cp + len, " ;");
+ src = cp + len + extra_len;
+ dst = cp;
+ while (*src != 0) {
+ *dst = *src;
+ ++dst;
+ ++src;
+ }
+ *dst = 0;
+ any_removed = TRUE;
+ cp = StringISearch (txt, RemovableCultureNotes[i]);
+ }
+ }
+
+ for (i = 0; ReplaceableCultureNotes[i] != NULL; i++) {
+ if (StringICmp (txt, ReplaceableCultureNotes[i]) == 0) {
+ *p_txt = MemFree (*p_txt);
+ *p_txt = StringSave ("amplified with species-specific primers");
+ txt = *p_txt;
+ any_removed = TRUE;
+ break;
+ }
+ }
+ if (StringHasNoText (txt)) {
+ *p_txt = MemFree (*p_txt);
+ any_removed = TRUE;
+ }
+ return any_removed;
+}
+
+
+static void RemoveCultureNotesBioSourceCallback (BioSourcePtr biop, Pointer data)
+{
+ BoolPtr p_rval;
+ Boolean rval = FALSE;
+ SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
+
+ if (biop == NULL) {
+ return;
+ }
+ p_rval = (BoolPtr) data;
+
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) {
+ ssp_next = ssp->next;
+ if (ssp->subtype == 255) {
+ rval |= RemoveCultureNotesFromText(&(ssp->name));
+ if (StringHasNoText (ssp->name)) {
+ ssp->next = NULL;
+ ssp = SubSourceFree (ssp);
+ if (ssp_prev == NULL) {
+ biop->subtype = ssp_next;
+ } else {
+ ssp_prev->next = ssp_next;
+ }
+ } else {
+ ssp_prev = ssp;
+ }
+ } else {
+ ssp_prev = ssp;
+ }
+ }
+
+ if (p_rval != NULL) {
+ *p_rval |= rval;
+ }
+}
+
+
+NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep)
+{
+ Boolean rval = FALSE;
+
+ VisitBioSourcesInSep (sep, &rval, RemoveCultureNotesBioSourceCallback);
+ return rval;
+}
+
+
+static CharPtr s_CorrectProductCaps[] = {
+ "ABC",
+ "AAA",
+ "ATP",
+ "ATPase",
+ "A/G",
+ "AMP",
+ "CDP",
+ "coproporphyrinogen III",
+ "cytochrome BD",
+ "cytochrome C",
+ "cytochrome C2",
+ "cytochrome C550",
+ "cytochrome D",
+ "cytochrome O",
+ "cytochrome P450",
+ "cytochrome P460",
+ "D-alanine",
+ "D-alanyl",
+ "D-amino",
+ "D-beta",
+ "D-cysteine",
+ "D-lactate",
+ "D-ribulose",
+ "D-xylulose",
+ "endonuclease I",
+ "endonuclease II",
+ "endonuclease III",
+ "endonuclease V",
+ "EPS I",
+ "Fe-S",
+ "ferredoxin I",
+ "ferredoxin II",
+ "GTP",
+ "GTPase",
+ "H+",
+ "hemolysin I",
+ "hemolysin II",
+ "hemolysin III",
+ "L-allo",
+ "L-arabinose",
+ "L-asparaginase",
+ "L-aspartate",
+ "L-carnitine",
+ "L-fuculose",
+ "L-glutamine",
+ "L-histidinol",
+ "L-isoaspartate",
+ "L-serine",
+ "MFS",
+ "FAD/NAD(P)",
+ "MCP",
+ "Mg+",
+ "Mg chelatase",
+ "Mg-protoporphyrin IX",
+ "N(5)",
+ "N,N-",
+ "N-(",
+ "N-acetyl",
+ "N-acyl",
+ "N-carb",
+ "N-form",
+ "N-iso",
+ "N-succ",
+ "NADP",
+ "Na+/H+",
+ "NAD",
+ "NAD(P)",
+ "NADPH",
+ "O-sial",
+ "O-succ",
+ "pH",
+ "ribonuclease BN",
+ "ribonuclease D",
+ "ribonuclease E",
+ "ribonuclease G",
+ "ribonuclease H",
+ "ribonuclease I",
+ "ribonuclease II",
+ "ribonuclease III",
+ "ribonuclease P",
+ "ribonuclease PH",
+ "ribonuclease R",
+ "RNAse",
+ "S-adeno",
+ "type I",
+ "type II",
+ "type III",
+ "type IV",
+ "type V",
+ "type VI",
+ "UDP",
+ "UDP-N",
+ "Zn",
+ NULL};
+
+NLM_EXTERN void FixProductWordCapitalization (CharPtr PNTR pProduct)
+{
+ Int4 i;
+
+ if (pProduct == NULL || *pProduct == NULL) {
+ return;
+ }
+
+ for (i = 0; s_CorrectProductCaps[i] != NULL; i++) {
+ FindReplaceString (pProduct, s_CorrectProductCaps[i], s_CorrectProductCaps[i], FALSE, TRUE);
+ }
+}
+
+
+NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip)
+{
+ DbtagPtr dbt;
+
+ if (sip == NULL || sip->choice != SEQID_GENERAL) return FALSE;
+ dbt = (DbtagPtr) sip->data.ptrvalue;
+ if (dbt == NULL) return FALSE;
+ if (StringCmp (dbt->db, "NCBIFILE") == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
diff --git a/api/sqnutil4.c b/api/sqnutil4.c
index 6690de04..bcdebf0f 100755
--- a/api/sqnutil4.c
+++ b/api/sqnutil4.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 12/27/2007
*
-* $Revision: 1.111 $
+* $Revision: 1.153 $
*
* File Description:
* This file contains functions for automatically generating definition lines.
@@ -53,6 +53,8 @@
#include <gbftdef.h>
#include <gbfeat.h>
#include <findrepl.h>
+#include <salpacc.h>
+#include <salpedit.h>
#define NLM_GENERATED_CODE_PROTO
#include <objmacro.h>
#include <macroapi.h>
@@ -207,7 +209,8 @@ static void ListClauses (
ValNodePtr clauselist,
ValNodePtr PNTR strings,
Boolean allow_semicolons,
- Boolean suppress_final_and
+ Boolean suppress_final_and,
+ Boolean suppress_allele
);
static void LabelClauses
@@ -480,7 +483,7 @@ static void AddSubtypeFields (ValNodePtr PNTR sq_list, SourceQualDescPtr orig)
}
-static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name)
+static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name, Boolean get_subfields)
{
Int4 k;
SourceQualDescPtr sqdp;
@@ -509,7 +512,9 @@ static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, B
sqdp->subfield = 0;
ValNodeAddPointer (list, 0, sqdp);
}
- AddSubtypeFields (list, sqdp);
+ if (get_subfields) {
+ AddSubtypeFields (list, sqdp);
+ }
}
}
@@ -545,7 +550,7 @@ static void AddNoteQual (ValNodePtr PNTR list, Boolean is_orgmod, Boolean use_al
}
-static int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
+NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
{
SourceQualDescPtr str1;
@@ -569,27 +574,27 @@ static int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
}
-extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued)
+extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields)
{
ValNodePtr source_qual_list = NULL;
if (get_orgmod) {
- AddQualList (&source_qual_list, current_orgmod_subtype_alist, TRUE, get_subsrc);
+ AddQualList (&source_qual_list, current_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
if (get_discouraged) {
- AddQualList (&source_qual_list, discouraged_orgmod_subtype_alist, TRUE, get_subsrc);
+ AddQualList (&source_qual_list, discouraged_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
}
if (get_discontinued) {
- AddQualList (&source_qual_list, discontinued_orgmod_subtype_alist, TRUE, get_subsrc);
+ AddQualList (&source_qual_list, discontinued_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
}
AddNoteQual (&source_qual_list, TRUE, get_subsrc);
}
if (get_subsrc) {
- AddQualList (&source_qual_list, current_subsource_subtype_alist, FALSE, get_orgmod);
+ AddQualList (&source_qual_list, current_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
if (get_discouraged) {
- AddQualList (&source_qual_list, discouraged_subsource_subtype_alist, FALSE, get_orgmod);
+ AddQualList (&source_qual_list, discouraged_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
}
if (get_discontinued) {
- AddQualList (&source_qual_list, discontinued_subsource_subtype_alist, FALSE, get_orgmod);
+ AddQualList (&source_qual_list, discontinued_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
}
AddNoteQual (&source_qual_list, FALSE, get_orgmod);
}
@@ -598,6 +603,11 @@ extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod,
return source_qual_list;
}
+extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued)
+{
+ return GetSourceQualDescListEx (get_subsrc, get_orgmod, get_discouraged, get_discontinued, TRUE);
+}
+
/*
* The CountModifiersProc is used as the callback function for
* VisitBioSourcesInSep when we are getting a list of all the modifiers
@@ -3475,11 +3485,12 @@ static Int4 IsMobileElementGBQual (GBQualPtr gbqual)
{
Int4 keyword_idx;
if (gbqual == NULL || gbqual->qual == NULL || gbqual->val == NULL) return -1;
- if (StringCmp (gbqual->qual, "mobile_element") != 0) return -1;
+ if (StringCmp (gbqual->qual, "mobile_element") != 0 && StringCmp (gbqual->qual, "mobile_element_type") != 0) return -1;
keyword_idx = StartsWithMobileElementKeyword (gbqual->val);
if (keyword_idx < 0) return -1;
if (keyword_idx == eMobileElementOther
- && StringStr (gbqual->val, "transposable element") == NULL) {
+ && StringStr (gbqual->val, "transposable element") == NULL
+ && StringStr (gbqual->val, "P element") == NULL) {
return -1;
} else {
return keyword_idx;
@@ -3508,7 +3519,7 @@ static Boolean FeatureDoesNotGetPartialComplete (SeqFeatPtr sfp)
NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp)
{
GBQualPtr gbqual;
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
+ if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE;
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
{
@@ -3523,7 +3534,7 @@ static Boolean LIBCALLBACK IsRemovableMobileElement (SeqFeatPtr sfp)
{
GBQualPtr gbqual;
Int4 keyword_idx;
- if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
+ if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE;
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
{
@@ -6524,7 +6535,7 @@ static CharPtr GetGenericInterval
prev_feat->next = featlist;
}
- ListClauses (featlist, &strings, FALSE, suppress_final_and);
+ ListClauses (featlist, &strings, FALSE, suppress_final_and, rp->suppress_allele);
subfeatlist = MergeValNodeStrings (strings, FALSE);
ValNodeFreeData (strings);
len += StringLen (subfeatlist) + 7;
@@ -8148,7 +8159,8 @@ static void ListClauses (
ValNodePtr clauselist,
ValNodePtr PNTR strings,
Boolean allow_semicolons,
- Boolean suppress_final_and
+ Boolean suppress_final_and,
+ Boolean suppress_allele
)
{
FeatureClausePtr thisclause, onebefore, twobefore, oneafter, twoafter;
@@ -8198,8 +8210,8 @@ static void ListClauses (
onebefore_has_typeword_change = TRUE;
}
if (onebefore_has_typeword_change || onebefore_has_interval_change
- || (DisplayAlleleName (onebefore) && StringLen (onebefore->allelename) != 0)
- || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0))
+ || (!suppress_allele && DisplayAlleleName (onebefore) && StringLen (onebefore->allelename) != 0)
+ || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0))
{
onebefore_has_detail_change = TRUE;
}
@@ -8233,8 +8245,8 @@ static void ListClauses (
oneafter_has_typeword_change = TRUE;
}
if (oneafter_has_typeword_change || oneafter_has_interval_change
- || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0)
- || (DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) != 0))
+ || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0)
+ || (!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) != 0))
{
oneafter_has_detail_change = TRUE;
}
@@ -8397,15 +8409,15 @@ static void ListClauses (
}
else if (oneafter != NULL && twoafter != NULL
&& ! oneafter_has_interval_change && StringCmp (thisclause->interval, twoafter->interval) == 0
- && ((DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
- || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
+ && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
+ || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
{
print_comma = TRUE;
}
else if (oneafter != NULL && onebefore != NULL
&& ! oneafter_has_interval_change && ! onebefore_has_interval_change
- && ((DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
- || (DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
+ && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
+ || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
{
print_comma = TRUE;
}
@@ -8483,7 +8495,7 @@ static void ListClauses (
clause_len += 4;
if (print_comma)
clause_len += 2;
- if (DisplayAlleleName (thisclause))
+ if (!suppress_allele && DisplayAlleleName (thisclause))
{
clause_len += StringLen (thisclause->allelename) + 10;
if (StringLen (thisclause->allelename) > 0)
@@ -8525,7 +8537,7 @@ static void ListClauses (
StringCat (clause_string, thisclause->feature_label_data.typeword);
if (typeword_is_plural)
StringCat (clause_string, "s");
- if (DisplayAlleleName (thisclause)
+ if (!suppress_allele && DisplayAlleleName (thisclause)
&& thisclause->allelename != NULL)
{
StringCat (clause_string, ", ");
@@ -9014,6 +9026,7 @@ NLM_EXTERN void InitFeatureRequests (
feature_requests->suppress_locus_tags = FALSE;
feature_requests->suppressed_feature_list = NULL;
feature_requests->use_ncrna_note = FALSE;
+ feature_requests->suppress_allele = FALSE;
}
@@ -10487,6 +10500,10 @@ NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature
{
str = StringSave (", complete sequence.");
}
+ else if (feature_list_type == DEFLINE_PARTIAL_SEQUENCE)
+ {
+ str = StringSave (", partial sequence.");
+ }
else if (feature_list_type == DEFLINE_COMPLETE_GENOME)
{
ending_str [0] = 0;
@@ -10705,7 +10722,7 @@ static CharPtr BuildFeatureClauses (
/* SmashTallClauses (feature_list, TRUE); */
clause = *feature_list;
- ListClauses (clause, &strings, TRUE, FALSE);
+ ListClauses (clause, &strings, TRUE, FALSE, feature_requests->suppress_allele);
AutoDef_AddEnding (clause, &strings, bsp,
product_flag, alternate_splice_flag);
@@ -10810,6 +10827,49 @@ static void BuildFeatClauseListForSegSet (
FreeListElement (sdld.parent_feature_list);
}
+
+static Boolean Is5SList (ValNodePtr feature_list)
+{
+ FeatureClausePtr fcp;
+ Boolean is_5s_list = TRUE;
+ SeqFeatPtr sfp;
+ RnaRefPtr rrp;
+
+ if (feature_list == NULL || feature_list->next == NULL) {
+ return FALSE;
+ }
+
+ while (feature_list != NULL && is_5s_list) {
+ if (feature_list->choice != DEFLINE_CLAUSEPLUS) {
+ is_5s_list = FALSE;
+ } else if ((fcp = (FeatureClausePtr) feature_list->data.ptrvalue) == NULL) {
+ is_5s_list = FALSE;
+ } else if (fcp->featlist->choice != DEFLINE_FEATLIST
+ || (sfp = (SeqFeatPtr) fcp->featlist->data.ptrvalue) == NULL) {
+ is_5s_list = FALSE;
+ } else if (sfp->idx.subtype == FEATDEF_rRNA) {
+ rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
+ if (rrp == NULL) {
+ is_5s_list = FALSE;
+ } else if (rrp->ext.choice != 1) {
+ is_5s_list = FALSE;
+ } else if (StringCmp (rrp->ext.value.ptrvalue, "5S ribosomal RNA") != 0) {
+ is_5s_list = FALSE;
+ }
+ } else if (sfp->idx.subtype == FEATDEF_misc_feature) {
+ if (StringCmp (sfp->comment, "nontranscribed spacer") != 0
+ && StringCmp (sfp->comment, "contains 5S ribosomal RNA and nontranscribed spacer") != 0) {
+ is_5s_list = FALSE;
+ }
+ } else {
+ is_5s_list = FALSE;
+ }
+ feature_list = feature_list->next;
+ }
+ return is_5s_list;
+}
+
+
static void BuildOneFeatClauseList (
SeqEntryPtr sep,
Uint2 entityID,
@@ -10867,16 +10927,21 @@ static void BuildOneFeatClauseList (
if (deflist == NULL) return;
deflist->sep = SeqMgrGetSeqEntryForData (bsp),
deflist->bsp = bsp;
- deflist->clauselist = BuildFeatureClauses (bsp,
- molecule_type,
- SeqMgrGetSeqEntryForData (bsp),
- &head,
- FALSE,
- NULL,
- product_flag,
- alternate_splice_flag,
- gene_cluster_opp_strand,
- feature_requests);
+ if (Is5SList(head)) {
+ deflist->clauselist = StringSave ("5S ribosomal RNA gene region");
+ } else {
+ deflist->clauselist = BuildFeatureClauses (bsp,
+ molecule_type,
+ SeqMgrGetSeqEntryForData (bsp),
+ &head,
+ FALSE,
+ NULL,
+ product_flag,
+ alternate_splice_flag,
+ gene_cluster_opp_strand,
+ feature_requests);
+ }
+
vnp = ValNodeNew (*list);
if (vnp == NULL) return;
if (*list == NULL) *list = vnp;
@@ -11351,7 +11416,7 @@ static Boolean UseHaplotype (OrganismDescriptionModifiersPtr odmp, ValNodePtr PN
static Boolean UseAutoDefId (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
{
- Int4 index;
+ size_t index;
ValNodePtr vnp;
Boolean found, changed = FALSE;
ModifierItemLocalPtr cpy;
@@ -11822,7 +11887,8 @@ NLM_EXTERN DefLineClauseOptionsPtr MakeFeatureRequestsMatchExpectedTitle (Bioseq
{
SeqEntryPtr sep;
ValNodePtr defline_clauses = NULL;
- Int4 index, mod_index;
+ size_t index;
+ Int4 mod_index;
ValNodePtr best_modifier_indices, default_modifier_indices, modifier_indices = NULL, tmp_mod_list;
ValNodePtr vnp;
ModifierItemLocalPtr modList;
@@ -12222,6 +12288,33 @@ NLM_EXTERN void BuildDefinitionLinesFromFeatureClauseLists (
}
}
+NLM_EXTERN void BuildDefLinesFromFeatClauseListsForOneBsp (
+ ValNodePtr list,
+ ModifierItemLocalPtr modList,
+ ValNodePtr modifier_indices,
+ OrganismDescriptionModifiersPtr odmp,
+ BioseqPtr bsp
+)
+{
+ ValNodePtr vnp;
+ DefLineFeatClausePtr defline_featclause;
+ CharPtr tmp_str;
+
+ for (vnp = list; vnp != NULL; vnp = vnp->next)
+ {
+ if (vnp->data.ptrvalue != NULL)
+ {
+ defline_featclause = vnp->data.ptrvalue;
+ if (defline_featclause == NULL) continue;
+ if (defline_featclause->bsp != bsp) continue;
+ tmp_str = BuildOneDefinitionLine (defline_featclause->sep, defline_featclause->bsp,
+ defline_featclause->clauselist,
+ modList, modifier_indices, odmp);
+ ReplaceDefinitionLine (defline_featclause->sep, tmp_str);
+ }
+ }
+}
+
/* This removes redundant titles on nuc-prot sets, which will not be
* visible in the flat file if all sequences in the nuc-prot set have
@@ -12300,6 +12393,40 @@ NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep)
DeleteMarkedObjects (entityID, 0, NULL);
}
+static void MRnaTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
+
+{
+ MolInfoPtr mip;
+ ObjValNodePtr ovp;
+ SeqDescrPtr sdp;
+
+ if (bsp == NULL) return;
+ if (! ISA_na (bsp->mol)) return;
+
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
+ if (sdp == NULL) return;
+ mip = (MolInfoPtr) sdp->data.ptrvalue;
+ if (mip == NULL || mip->biomol != MOLECULE_TYPE_MRNA) return;
+
+ for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice == Seq_descr_title && sdp->extended) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+ }
+}
+
+NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep)
+
+{
+ Uint2 entityID;
+
+ if (sep == NULL) return;
+ VisitBioseqsInSep (sep, NULL, MRnaTitleRemoveProc);
+ entityID = ObjMgrGetEntityIDForChoice (sep);
+ DeleteMarkedObjects (entityID, 0, NULL);
+}
+
typedef struct popsetdefline {
DeflineFeatureRequestListPtr feature_requests;
@@ -12448,6 +12575,8 @@ NLM_EXTERN void AddPopsetTitles
PopsetDeflineData pop;
pop.feature_requests = feature_requests;
+ /* forcibly suppress alleles in popset titles */
+ pop.feature_requests->suppress_allele = TRUE;
pop.product_flag = product_flag;
pop.alternate_splice_flag = alternate_splice_flag;
pop.gene_cluster_opp_strand = gene_cluster_opp_strand;
@@ -14598,6 +14727,36 @@ NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
}
+NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp)
+{
+ GeneRefPtr grp;
+ CharPtr cp;
+
+ if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) {
+ return FALSE;
+ }
+ sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue);
+ grp = GeneRefNew ();
+ sfp->data.value.ptrvalue = grp;
+ sfp->data.choice = SEQFEAT_GENE;
+ sfp->idx.subtype = 0;
+
+ if (!StringHasNoText (sfp->comment)) {
+ cp = StringChr (sfp->comment, ';');
+ if (cp != NULL) {
+ *cp = 0;
+ }
+ grp->locus = StringSave (sfp->comment);
+ if (cp != NULL) {
+ cp = StringSave (cp + 1);
+ }
+ sfp->comment = MemFree (sfp->comment);
+ sfp->comment = cp;
+ }
+ return TRUE;
+}
+
+
NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp)
{
BioseqPtr bsp, prot_bsp;
@@ -16776,6 +16935,154 @@ NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first,
}
+NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
+{
+ CharPtr reformatted_date = NULL, cp;
+ Int4 year = 0, day = 0;
+ CharPtr month = NULL;
+ CharPtr token_list[3];
+ Int4 token_lens[3];
+ CharPtr numbers = "0123456789";
+ CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ Int4 num_tokens = 0;
+ Int4 token_len;
+ Int4 month_token = -1;
+ Boolean is_num;
+ Int4 num_1, num_2;
+
+ if (StringHasNoText (orig_date))
+ {
+ return NULL;
+ }
+
+ /* divide our original date into tokens */
+ /* skip over any leading spaces */
+ cp = orig_date;
+ while (*cp != 0 && num_tokens < 3)
+ {
+ is_num = FALSE;
+ token_len = StringSpn (cp, numbers);
+ if (token_len == 0)
+ {
+ token_len = StringSpn (cp, letters);
+ }
+ else
+ {
+ is_num = TRUE;
+ }
+ if (token_len == 0)
+ {
+ cp++;
+ }
+ else
+ {
+ if (!is_num)
+ {
+ if (month_token == -1)
+ {
+ month_token = num_tokens;
+ }
+ else
+ {
+ /* already found a month string */
+ return NULL;
+ }
+ }
+ token_list [num_tokens] = cp;
+ token_lens [num_tokens] = token_len;
+ num_tokens ++;
+ cp += token_len;
+ }
+ }
+
+ if (num_tokens == 0 || *cp != 0 || month_token == -1 || num_tokens < 2)
+ {
+ return NULL;
+ }
+
+ if (num_tokens == 2)
+ {
+ if (month_token == 0)
+ {
+ month = GetMonthFromToken (token_list [0], token_lens [0]);
+ year = GetYearFromToken (token_list [1], token_lens [1]);
+ }
+ else if (month_token == 1)
+ {
+ month = GetMonthFromToken (token_list [1], token_lens [1]);
+ year = GetYearFromToken (token_list [0], token_lens [0]);
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+ else if (num_tokens == 3)
+ {
+ if (month_token == 0)
+ {
+ month = GetMonthFromToken (token_list [0], token_lens [0]);
+ num_1 = ReadNumberFromToken (token_list [1], token_lens [1]);
+ num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
+ if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ {
+ return NULL;
+ }
+ }
+ else if (month_token == 1)
+ {
+ month = GetMonthFromToken (token_list [1], token_lens [1]);
+ num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
+ num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
+ if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ {
+ return NULL;
+ }
+ }
+ else if (month_token == 2)
+ {
+ month = GetMonthFromToken (token_list [2], token_lens [2]);
+ num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
+ num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
+ if (!ChooseDayAndYear (num_1, num_2, month, FALSE, &day, &year))
+ {
+ return NULL;
+ }
+ }
+ else
+ {
+ return NULL;
+ }
+ year = GetYearFromNumber(year);
+ }
+
+ if (month == NULL && day > 0)
+ {
+ return NULL;
+ }
+
+ reformatted_date = (CharPtr) MemNew (sizeof (Char) * 12);
+ if (reformatted_date == NULL)
+ {
+ return NULL;
+ }
+
+ if (month == NULL)
+ {
+ sprintf (reformatted_date, "%d", year);
+ }
+ else if (day == 0)
+ {
+ sprintf (reformatted_date, "%s-%d", month, year);
+ }
+ else
+ {
+ sprintf (reformatted_date, "%02d-%s-%d", day, month, year);
+ }
+ return reformatted_date;
+}
+
+
NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp)
{
SeqFeatPtr orig_prot, new_prot;
@@ -16817,6 +17124,7 @@ NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqF
ProtRefPtr prp;
Int4 frame;
Boolean rval = FALSE;
+ Boolean partial5, partial3;
if (sfp == NULL || top_cds == NULL || sfp->data.choice != SEQFEAT_CDREGION || top_cds->data.choice != SEQFEAT_CDREGION) {
return FALSE;
@@ -16827,7 +17135,8 @@ NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqF
{
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
- prot_loc = dnaLoc_to_aaLoc(top_cds, sfp->location, TRUE, &frame, TRUE);
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+ prot_loc = dnaLoc_to_aaLoc(top_cds, sfp->location, TRUE, &frame, !partial3);
if (prot_loc != NULL)
{
/* Create new feature on prot_bsp */
@@ -17016,82 +17325,105 @@ NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr p
}
}
-static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category)
+
+static void AddItemListToSet (ValNodePtr item_list, BioseqSetPtr newset, Boolean for_segregate)
{
ValNodePtr vnp_item;
SeqEntryPtr sep, last_sep, prev_sep, remove_sep;
BioseqSetPtr bssp, orig_parent;
BioseqPtr bsp;
- if (newset == NULL || category == NULL || category->item_list == NULL) return;
+ if (newset == NULL || item_list == NULL) return;
- if (category->chosen) {
- last_sep = newset->seq_set;
- while (last_sep != NULL && last_sep->next != NULL) {
- last_sep = last_sep->next;
- }
-
- for (vnp_item = category->item_list; vnp_item != NULL; vnp_item = vnp_item->next) {
- sep = GetBestSeqEntryForItem (vnp_item);
- if (sep == NULL || sep->data.ptrvalue == NULL) continue;
- orig_parent = NULL;
- if (IS_Bioseq (sep)) {
- bsp = sep->data.ptrvalue;
- if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
- orig_parent = bsp->idx.parentptr;
- bsp->idx.parentptr = NULL;
- }
- } else if (IS_Bioseq_set (sep)) {
- bssp = sep->data.ptrvalue;
- if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
- orig_parent = bssp->idx.parentptr;
- bssp->idx.parentptr = NULL;
- }
- } else {
- continue;
+ last_sep = newset->seq_set;
+ while (last_sep != NULL && last_sep->next != NULL) {
+ last_sep = last_sep->next;
+ }
+
+ for (vnp_item = item_list; vnp_item != NULL; vnp_item = vnp_item->next) {
+ sep = GetBestSeqEntryForItem (vnp_item);
+ if (sep == NULL || sep->data.ptrvalue == NULL) continue;
+ orig_parent = NULL;
+ bsp = NULL;
+ bssp = NULL;
+ if (IS_Bioseq (sep)) {
+ bsp = sep->data.ptrvalue;
+ if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
+ orig_parent = bsp->idx.parentptr;
+ bsp->idx.parentptr = NULL;
}
-
- if (orig_parent != NULL) {
- /* remove this seq-entry from the original parent */
- prev_sep = NULL;
- for (remove_sep = orig_parent->seq_set;
- remove_sep != NULL && remove_sep != sep;
- remove_sep = remove_sep->next) {
- prev_sep = remove_sep;
- }
- if (remove_sep == sep) {
- if (prev_sep == NULL) {
- orig_parent->seq_set = orig_parent->seq_set->next;
- if (orig_parent->seq_set == NULL) {
- orig_parent->idx.deleteme = TRUE;
- }
- } else {
- prev_sep->next = sep->next;
+ } else if (IS_Bioseq_set (sep)) {
+ bssp = sep->data.ptrvalue;
+ if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
+ orig_parent = bssp->idx.parentptr;
+ bssp->idx.parentptr = NULL;
+ }
+ } else {
+ continue;
+ }
+
+ if (orig_parent != NULL) {
+ /* remove this seq-entry from the original parent */
+ prev_sep = NULL;
+ for (remove_sep = orig_parent->seq_set;
+ remove_sep != NULL && remove_sep != sep;
+ remove_sep = remove_sep->next) {
+ prev_sep = remove_sep;
+ }
+ if (remove_sep == sep) {
+ if (prev_sep == NULL) {
+ orig_parent->seq_set = orig_parent->seq_set->next;
+ if (orig_parent->seq_set == NULL) {
+ orig_parent->idx.deleteme = TRUE;
}
+ } else {
+ prev_sep->next = sep->next;
}
- /* set class type if not already set */
- if (newset->_class == BioseqseqSet_class_genbank) {
- newset->_class = orig_parent->_class;
- }
}
- if (orig_parent != NULL) {
+ /* set class type if not already set */
+ if (newset->_class == BioseqseqSet_class_genbank && for_segregate) {
+ newset->_class = orig_parent->_class;
+ }
+ }
+ if (orig_parent != NULL) {
+ if (for_segregate) {
/* add descriptors from the orig_parent to the new parent */
AddNewUniqueDescriptors (&(newset->descr), orig_parent->descr);
/* add annotations from the orig_parent to the new parent */
AddNewUniqueAnnotations (&(newset->annot), orig_parent->annot);
- }
-
- /* add to new parent */
- sep->next = NULL;
- if (last_sep == NULL) {
- newset->seq_set = sep;
} else {
- last_sep->next = sep;
+ /* add descriptors from the orig_parent to the bioseq itself (or nuc-prot-set if that's what moved) */
+ if (bsp != NULL) {
+ AddNewUniqueDescriptors (&(bsp->descr), orig_parent->descr);
+ } else if (bssp != NULL) {
+ AddNewUniqueDescriptors (&(bssp->descr), orig_parent->descr);
+ }
}
- last_sep = sep;
- SeqMgrLinkSeqEntry (sep, OBJ_BIOSEQSET, newset);
}
+
+ /* add to new parent */
+ sep->next = NULL;
+ if (last_sep == NULL) {
+ newset->seq_set = sep;
+ } else {
+ last_sep->next = sep;
+ }
+ last_sep = sep;
+ SeqMgrLinkSeqEntry (sep, OBJ_BIOSEQSET, newset);
+ }
+
+}
+
+
+static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category)
+{
+ ValNodePtr vnp_item;
+
+ if (newset == NULL || category == NULL || category->item_list == NULL) return;
+
+ if (category->chosen) {
+ AddItemListToSet (category->item_list, newset, TRUE);
} else {
for (vnp_item = category->subcategories; vnp_item != NULL; vnp_item = vnp_item->next) {
AddCategorySeqEntriesToSet (newset, vnp_item->data.ptrvalue);
@@ -17118,6 +17450,113 @@ static Boolean NeedsNewSet (SeqEntryPtr sep)
return FALSE;
}
+
+static Boolean IsSingletonSet (SeqEntryPtr sep)
+{
+ BioseqSetPtr bssp;
+ SeqAnnotPtr sap;
+
+ if (sep == NULL
+ || !IS_Bioseq_set(sep)
+ || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL
+ || bssp->seq_set == NULL
+ || bssp->seq_set->next != NULL) {
+ return FALSE;
+ }
+
+ /* not a singleton set if it has an alignment annotation */
+ for (sap = bssp->annot; sap != NULL; sap = sap->next) {
+ if (sap->type == 2) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+static void AddAnnotsToSeqEntry (SeqEntryPtr sep, SeqAnnotPtr sap)
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ SeqAnnotPtr last_sap;
+
+ if (sep == NULL) {
+ return;
+ }
+ if (IS_Bioseq(sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ if (bsp->annot == NULL) {
+ bsp->annot = sap;
+ } else {
+ last_sap = bsp->annot;
+ while (last_sap->next != NULL) {
+ last_sap = last_sap->next;
+ }
+ last_sap->next = sap;
+ }
+ } else if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp->annot == NULL) {
+ bssp->annot = sap;
+ } else {
+ last_sap = bssp->annot;
+ while (last_sap->next != NULL) {
+ last_sap = last_sap->next;
+ }
+ last_sap->next = sap;
+ }
+ }
+}
+
+
+static void PromoteSingletonSetsInSet (SeqEntryPtr sep)
+{
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+ BioseqSetPtr bssp, child_bssp;
+ SeqEntryPtr sep_child, child_next, child_prev = NULL;
+ ValNodePtr titles;
+ Uint2 top_parenttype;
+ Pointer top_parentptr;
+
+ if (sep == NULL || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
+ return;
+ }
+
+ SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
+ GetSeqEntryParent (sep, &top_parentptr, &top_parenttype);
+
+ for (sep_child = bssp->seq_set; sep_child != NULL; sep_child = child_next) {
+ child_next = sep_child->next;
+ if (IsSingletonSet(sep_child)) {
+ child_bssp = (BioseqSetPtr) sep_child->data.ptrvalue;
+ /* remove set title if any */
+ titles = ValNodeExtractList (&(child_bssp->descr), Seq_descr_title);
+ titles = SeqDescrFree (titles);
+ /* propagate remaining descriptors */
+ SetDescriptorPropagate (child_bssp);
+ /* push down annotation */
+ AddAnnotsToSeqEntry (child_bssp->seq_set, child_bssp->annot);
+ /* replace in list */
+ if (child_prev == NULL) {
+ bssp->seq_set = child_bssp->seq_set;
+ } else {
+ child_prev->next = child_bssp->seq_set;
+ }
+ child_bssp->seq_set->next = child_next;
+ child_prev = child_bssp->seq_set;
+ child_bssp->seq_set = NULL;
+ sep_child = SeqEntryFree (sep_child);
+ } else {
+ child_prev = sep_child;
+ }
+ }
+
+ SeqMgrLinkSeqEntry (sep, top_parenttype, top_parentptr);
+ RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
+}
+
+
NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
(BioseqSetPtr bssp,
ValNodePtr value_lists)
@@ -17230,6 +17669,9 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
parent_set->descr = SeqDescrFree (parent_set->descr);
}
+ sep = SeqMgrGetSeqEntryForData (parent_set);
+ PromoteSingletonSetsInSet (sep);
+
ObjMgrSetDirtyFlag (entityID, TRUE);
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
@@ -17237,6 +17679,123 @@ NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
}
+static void RemoveBioseqFromAlignmentsCallback (SeqAnnotPtr sap, Pointer data)
+{
+ BioseqPtr bsp;
+ SeqAlignPtr salphead, salp, salp_next, prev_salp, remove_salp, last_remove;
+ SeqIdPtr sip, tmpsip;
+ Uint4 seqid_order;
+
+ if (sap == NULL || sap->type != 2
+ || (bsp = (BioseqPtr) data) == NULL
+ || (salphead = (SeqAlignPtr) sap->data) == NULL) {
+ return;
+ }
+
+ salp = salphead;
+ prev_salp = NULL;
+ remove_salp = NULL;
+ last_remove = NULL;
+ while (salp != NULL)
+ {
+ salp_next = salp->next;
+ tmpsip = SeqIdPtrFromSeqAlign (salp);
+ seqid_order = 0;
+ for (sip = bsp->id; sip != NULL && seqid_order == 0; sip = sip->next) {
+ seqid_order = SeqIdOrderInBioseqIdList(sip, tmpsip);
+ }
+ if (seqid_order == 0)
+ {
+ /* do nothing for this subalignment */
+ prev_salp = salp;
+ }
+ else if (salp->dim == 2 || salphead->segtype ==1)
+ {
+ /* This is for a pairwise alignment or a DENDIAG alignment */
+ if (prev_salp == NULL)
+ {
+ salphead = salp->next;
+ }
+ else
+ {
+ prev_salp->next = salp->next;
+ }
+ /* save the alignments that we want to free in a list and get rid of them
+ * at the end - freeing them beforehand causes problems with listing the
+ * IDs in the alignment.
+ */
+ salp->next = NULL;
+ if (remove_salp == NULL)
+ {
+ remove_salp = salp;
+ }
+ else
+ {
+ last_remove->next = salp;
+ }
+ last_remove = salp;
+ }
+ else
+ {
+ SeqAlignBioseqDeleteById (salphead, sip);
+ prev_salp = salp;
+ }
+ salp = salp_next;
+ }
+ /* Now we can free the alignment */
+ SeqAlignFree (remove_salp);
+
+ sap->data = salphead;
+ if (sap->data == NULL) {
+ sap->idx.deleteme = TRUE;
+ }
+}
+
+
+/* expect that list is a valnode list with choice OBJ_BIOSEQ and data.ptrvalue a bioseq */
+NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID)
+{
+ ObjMgrDataPtr omdptop;
+ ObjMgrData omdata;
+ Uint2 parenttype;
+ Pointer parentptr;
+ BioseqSetPtr bssp;
+ SeqEntryPtr sep;
+ ValNodePtr vnp;
+
+ if (list == NULL) return;
+ sep = GetTopSeqEntryForEntityID (entityID);
+ if (sep == NULL
+ || !IS_Bioseq_set (sep)
+ || (bssp = sep->data.ptrvalue) == NULL
+ || bssp->_class != BioseqseqSet_class_genbank) {
+ return;
+ }
+
+ /* first, propagate descriptors */
+ SetDescriptorPropagate (bssp);
+
+ /* pull sequences out of current positions and add to top-level set */
+ SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
+ GetSeqEntryParent (sep, &parentptr, &parenttype);
+
+ AddItemListToSet (list, bssp, FALSE);
+
+ RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
+
+ /* remove sequences from alignments */
+ for (vnp = list; vnp != NULL; vnp = vnp->next)
+ {
+ VisitAnnotsInSep (sep, vnp->data.ptrvalue, RemoveBioseqFromAlignmentsCallback);
+ }
+ DeleteMarkedObjects (entityID, 0, NULL);
+
+ ObjMgrSetDirtyFlag (entityID, TRUE);
+ ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
+
+}
+
+
static void GetBioseqListCallback (BioseqPtr bsp, Pointer userdata)
{
if (bsp != NULL && userdata != NULL && ! ISA_aa (bsp->mol))
@@ -17348,55 +17907,7096 @@ NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set)
}
-NLM_EXTERN CharPtr CompressSpaces (CharPtr str)
+typedef Boolean (*Nlm_ParseProc) PROTO ((CharPtr, Pointer));
+
+
+static Boolean SkipToken (CharPtr cp, Pointer data)
+{
+ return TRUE;
+}
+
+static Boolean ParseLineOfTokens (CharPtr line, Nlm_ParseProc PNTR token_funcs, Pointer data)
{
- Char ch;
- CharPtr dst;
- Char last;
- CharPtr ptr;
+ CharPtr cp, cp_next;
+ Char ch_was;
+ Int4 token_num = 0;
+ Boolean rval = TRUE;
- if (str != NULL && str [0] != '\0') {
- dst = str;
- ptr = str;
- ch = *ptr;
- while (ch != '\0' && ch <= ' ') {
- ptr++;
- ch = *ptr;
- }
- while (ch != '\0') {
- *dst = ch;
- dst++;
- ptr++;
- last = ch;
- ch = *ptr;
- if (ch != '\0' && ch < ' ') {
- *ptr = ' ';
- ch = *ptr;
- }
- while (ch != '\0' && last <= ' ' && ch <= ' ') {
- ptr++;
- ch = *ptr;
- }
- }
- *dst = '\0';
- dst = NULL;
- ptr = str;
- ch = *ptr;
- while (ch != '\0') {
- if (ch != ' ') {
- dst = NULL;
- } else if (dst == NULL) {
- dst = ptr;
+ if (StringHasNoText (line) || token_funcs == NULL) {
+ return FALSE;
+ }
+
+ cp = line;
+ cp_next = StringChr (cp, '\t');
+ while (cp_next != NULL && rval && token_funcs[token_num] != NULL) {
+ ch_was = *cp_next;
+ *cp_next = 0;
+ rval = token_funcs[token_num] (cp, data);
+ *cp_next = ch_was;
+ cp = cp_next + 1;
+ cp_next = StringChr (cp, '\t');
+ token_num++;
+ }
+
+ if (rval && token_funcs[token_num] != NULL) {
+ /* last token_func for end of line */
+ rval = token_funcs[token_num](cp, data);
+ token_num++;
+ while (token_funcs[token_num] != NULL && rval) {
+ rval = token_funcs[token_num](NULL, data);
+ token_num++;
+ }
+ }
+ return rval;
+}
+
+
+/* Output from Fungal ITS sequence extractor:
+ * first column is ID (after position in set) and length
+ * next column is ITS1 or ---- (---- means it's not there)
+ * next column is ITS2 or ---- (---- means it's not there)
+ * next column is range for ITS1 (or -----)
+ * next column is range for ITS2 (or -----)
+ * next column (if present) indicates reverse complement.
+ */
+
+typedef struct extractorinfo {
+ CharPtr id;
+ Int4 length;
+ Boolean has_its1;
+ Boolean has_its2;
+ CharPtr its1_range;
+ CharPtr its2_range;
+ Boolean is_complement;
+} ExtractorInfoData, PNTR ExtractorInfoPtr;
+
+
+static ExtractorInfoPtr ExtractorInfoNew ()
+{
+ ExtractorInfoPtr ep = (ExtractorInfoPtr) MemNew (sizeof (ExtractorInfoData));
+ MemSet (ep, 0, sizeof (ExtractorInfoData));
+ return ep;
+}
+
+
+static ExtractorInfoPtr ExtractorInfoFree (ExtractorInfoPtr ep)
+{
+ if (ep != NULL) {
+ ep->id = MemFree (ep->id);
+ ep->its1_range = MemFree (ep->its1_range);
+ ep->its2_range = MemFree (ep->its2_range);
+ ep = MemFree (ep);
+ }
+ return ep;
+}
+
+
+static Boolean ParseExtractorIdAndLength (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+ CharPtr div, id_start, id_end;
+ Int4 len;
+ Char ch_was;
+
+ if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+ /* separate length and ID */
+ len = StringLen (cp);
+ if (len < 4) {
+ return FALSE;
+ }
+ if (StringCmp (cp + len - 4, " bp.") != 0) {
+ return FALSE;
+ }
+
+ div = cp + len - 5;
+ while (div > cp && isdigit (*div)) {
+ div--;
+ }
+ if (!isdigit (*(div + 1))) {
+ return FALSE;
+ }
+ ep->length = atoi (div + 1);
+
+ /* skip over the part that indicates the position of the sequence (1 of N, 2 of N, etc.) */
+ id_start = StringChr (cp, ')');
+ if (id_start == NULL) {
+ return FALSE;
+ }
+ id_start++;
+ while (isspace (*id_start)) {
+ id_start++;
+ }
+
+ if (id_start >= div) {
+ return FALSE;
+ }
+
+ /* if we have a list of IDs, truncate after just the first one */
+ id_end = StringChr (id_start, '|');
+ if (id_end != NULL && id_end < div) {
+ id_end = StringChr (id_end + 1, '|');
+ if (id_end != NULL && id_end < div) {
+ div = id_end;
+ }
+ }
+
+ ch_was = *div;
+ *div = 0;
+ ep->id = StringSave (id_start);
+ *div = ch_was;
+ /* trim spaces from end of ID */
+ cp = ep->id + StringLen (ep->id) - 1;
+ while (cp > ep->id && isspace (*cp)) {
+ cp--;
+ }
+ *(cp + 1) = 0;
+ return TRUE;
+}
+
+
+static Boolean ParseHasITS1 (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+
+ if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+
+ if (StringCmp (cp, "ITS1") == 0) {
+ ep->has_its1 = TRUE;
+ } else if (StringCmp (cp, "----") == 0) {
+ ep->has_its1 = FALSE;
+ } else {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static Boolean ParseHasITS2 (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+
+ if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+
+ if (StringCmp (cp, "ITS2") == 0) {
+ ep->has_its2 = TRUE;
+ } else if (StringCmp (cp, "----") == 0) {
+ ep->has_its2 = FALSE;
+ } else {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static Boolean ParseITS1Range (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+ Boolean rval = TRUE;
+
+ if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+
+ if (*cp == '-') {
+ if (ep->has_its1) {
+ rval = FALSE;
+ } else {
+ rval = TRUE;
+ }
+ } else if (StringNCmp (cp, "ITS1: ", 6) == 0) {
+ if (ep->has_its1) {
+ ep->its1_range = StringSave (cp + 6);
+ } else {
+ rval = FALSE;
+ }
+ } else {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static Boolean ParseITS2Range (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+ Boolean rval = TRUE;
+
+ if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+
+ if (*cp == '-') {
+ if (ep->has_its2) {
+ rval = FALSE;
+ } else {
+ rval = TRUE;
+ }
+ } else if (StringNCmp (cp, "ITS2: ", 6) == 0) {
+ if (ep->has_its2) {
+ ep->its2_range = StringSave (cp + 6);
+ } else {
+ rval = FALSE;
+ }
+ } else {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static Boolean ParseIsComplement (CharPtr cp, Pointer data)
+{
+ ExtractorInfoPtr ep;
+ Boolean rval = TRUE;
+
+ if ((ep = (ExtractorInfoPtr) data) == NULL) {
+ return FALSE;
+ }
+ if (StringHasNoText (cp)) {
+ ep->is_complement = FALSE;
+ } else if (StringNCmp (cp, "Reverse complementary", 21) == 0) {
+ ep->is_complement = TRUE;
+ } else {
+ rval = FALSE;
+ }
+ return rval;
+}
+
+
+static Nlm_ParseProc token_parsers[] = {
+ ParseExtractorIdAndLength,
+ ParseHasITS1,
+ ParseHasITS2,
+ SkipToken,
+ SkipToken,
+ ParseITS1Range,
+ ParseITS2Range,
+ ParseIsComplement,
+ NULL};
+
+
+typedef enum {
+ eExtractorFeat18S = 0,
+ eExtractorFeatITS1,
+ eExtractorFeat58S,
+ eExtractorFeatITS2,
+ eExtractorFeat28S
+} EExtractorFeat;
+
+CharPtr extractor_feature_labels[] = {
+ "18S ribosomal RNA",
+ "internal transcribed spacer 1",
+ "5.8S ribosomal RNA",
+ "internal transcribed spacer 2",
+ "28S ribosomal RNA"
+};
+
+
+static CharPtr MakeLabelFromExtractorInfo (ExtractorInfoPtr ep)
+{
+ Boolean feat_present[5];
+ CharPtr cp, label;
+ Int4 len, i, num_feat = 0, feat_num = 0;
+
+ if (ep == NULL) {
+ return NULL;
+ }
+
+ MemSet (feat_present, 0, sizeof (feat_present));
+ if (ep->has_its1) {
+ feat_present[eExtractorFeatITS1] = TRUE;
+ if (StringNCmp (ep->its1_range, "1-", 2) == 0) {
+ feat_present[eExtractorFeat18S] = FALSE;
+ } else {
+ feat_present[eExtractorFeat18S] = TRUE;
+ }
+ if (ep->has_its2) {
+ feat_present[eExtractorFeat58S] = TRUE;
+ feat_present[eExtractorFeatITS2] = TRUE;
+ cp = StringChr (ep->its2_range, '-');
+ if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
+ feat_present[eExtractorFeat28S] = FALSE;
+ } else {
+ feat_present[eExtractorFeat28S] = TRUE;
}
- ptr++;
- ch = *ptr;
+ } else {
+ cp = StringChr (ep->its1_range, '-');
+ if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
+ feat_present[eExtractorFeat58S] = FALSE;
+ } else {
+ feat_present[eExtractorFeat58S] = TRUE;
+ }
+ feat_present[eExtractorFeatITS2] = FALSE;
+ feat_present[eExtractorFeat28S] = FALSE;
+ }
+ } else {
+ feat_present[eExtractorFeat18S] = FALSE;
+ feat_present[eExtractorFeatITS1] = FALSE;
+ if (StringNCmp (ep->its2_range, "1-", 2) == 0) {
+ feat_present[eExtractorFeat58S] = FALSE;
+ } else {
+ feat_present[eExtractorFeat58S] = TRUE;
}
- if (dst != NULL) {
- *dst = '\0';
+ feat_present[eExtractorFeatITS2] = TRUE;
+ cp = StringChr (ep->its2_range, '-');
+ if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
+ feat_present[eExtractorFeat28S] = FALSE;
+ } else {
+ feat_present[eExtractorFeat28S] = TRUE;
}
}
- return str;
+
+ len = 15;
+ for (i = 0; i < 5; i++) {
+ if (feat_present[i]) {
+ len += StringLen (extractor_feature_labels[i]) + 2;
+ num_feat++;
+ } else if (num_feat > 0) {
+ break;
+ }
+ }
+ label = (CharPtr) MemNew (sizeof (Char) * len);
+ sprintf (label, "contains ");
+ for (i = 0; i < 5; i++) {
+ if (feat_present[i]) {
+ if (feat_num > 0) {
+ if (feat_num == num_feat - 1) {
+ if (num_feat == 2) {
+ StringCat (label, " and ");
+ } else {
+ StringCat (label, ", and ");
+ }
+ } else {
+ StringCat (label, ", ");
+ }
+ }
+ StringCat (label, extractor_feature_labels[i]);
+ feat_num++;
+ } else if (feat_num > 0) {
+ break;
+ }
+ }
+ return label;
}
+NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
+
+{
+ BioseqPtr bsp;
+ BioseqSetPtr bssp;
+ SeqAnnotPtr sap;
+ SeqFeatPtr sfp;
+
+ if (mydata == NULL) return;
+ if (sep == NULL || sep->data.ptrvalue == NULL) return;
+ if (IS_Bioseq (sep)) {
+ bsp = (BioseqPtr) sep->data.ptrvalue;
+ sap = bsp->annot;
+ } else if (IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ sap = bssp->annot;
+ } else return;
+ bsp = (BioseqPtr) mydata;
+ if (bsp == NULL) return;
+ if (! ISA_na (bsp->mol)) return;
+ while (sap != NULL) {
+ if (sap->type == 1) {
+ sfp = (SeqFeatPtr) sap->data;
+ while (sfp != NULL) {
+ RevCompOneFeatForBioseq (sfp, bsp);
+ sfp = sfp->next;
+ }
+ }
+ sap = sap->next;
+ }
+}
+
+
+static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr sep)
+{
+ ExtractorInfoPtr ep;
+ SeqFeatPtr sfp = NULL;
+ CharPtr label;
+ Int4 len;
+ SeqIdPtr sip;
+ BioseqPtr bsp;
+ RnaRefPtr rrp;
+ RNAGenPtr rgp;
+
+ if (StringHasNoText (line)) {
+ return NULL;
+ }
+
+ ep = ExtractorInfoNew ();
+ if (!ParseLineOfTokens(line, token_parsers, ep)) {
+ ep = ExtractorInfoFree (ep);
+ Message (MSG_POSTERR, "Unable to parse extractor line %s", line);
+ return NULL;
+ }
+ if (!ep->has_its1 && !ep->has_its2) {
+ ep = ExtractorInfoFree (ep);
+ Message (MSG_POSTERR, "Unable to determine feature list for line %s", line);
+ return NULL;
+ }
+
+ /* figure out ID */
+ len = StringLen (ep->id);
+ if (len > 3 && ep->id[len - 1] == '.' && ep->id[len - 2] == '.' && ep->id[len - 3] == '.') {
+ ep = ExtractorInfoFree (ep);
+ Message (MSG_POSTERR, "ID was truncated for line %s", line);
+ return NULL;
+ }
+ sip = CreateSeqIdFromText (ep->id, sep);
+ bsp = BioseqFind (sip);
+ sip = SeqIdFree (sip);
+ if (bsp == NULL) {
+ ep = ExtractorInfoFree (ep);
+ Message (MSG_POSTERR, "ID for sequence not present in record in line %s", line);
+ return NULL;
+ }
+
+
+ /* calculate label */
+ label = MakeLabelFromExtractorInfo(ep);
+
+ if (ep->is_complement) {
+ BioseqRevComp (bsp);
+ SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
+ }
+
+ /* make feature and attach to appropriate annots */
+ sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
+ rrp = RnaRefNew ();
+ sfp->data.value.ptrvalue = rrp;
+ rrp->type = 255;
+ rgp = RNAGenNew ();
+ rrp->ext.choice = 3;
+ rrp->ext.value.ptrvalue = rgp;
+ sfp->comment = label;
+ SetSeqLocPartial (sfp->location, TRUE, TRUE);
+
+ ep = ExtractorInfoFree (ep);
+ return sfp;
+}
+
+NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep)
+{
+ ReadBufferData rbd;
+ CharPtr line;
+
+ rbd.fp = fp;
+ rbd.current_data = NULL;
+ line = AbstractReadFunction (&rbd);
+ while (line != NULL && line[0] != EOF) {
+ /* TODO: skip intro lines */
+ ParseExtractorResultRowToFeatures(line, sep);
+ line = MemFree (line);
+ line = AbstractReadFunction (&rbd);
+ }
+}
+
+extern CharPtr latlon_onedegree [];
+CharPtr latlon_onedegree [] = {
+ "1",
+ "Afghanistan",
+ "\t39\t69\t72",
+ "\t38\t63\t75",
+ "\t37\t62\t75",
+ "\t36\t60\t75",
+ "\t35\t59\t75",
+ "\t34\t59\t72",
+ "\t33\t59\t72",
+ "\t32\t59\t71",
+ "\t31\t59\t70",
+ "\t30\t59\t70",
+ "\t29\t59\t67",
+ "\t28\t59\t67",
+ "Albania",
+ "\t43\t18\t21",
+ "\t42\t18\t21",
+ "\t41\t18\t22",
+ "\t40\t18\t22",
+ "\t39\t18\t22",
+ "\t38\t18\t21",
+ "Algeria",
+ "\t38\t5\t8",
+ "\t37\t-1\t9",
+ "\t36\t-3\t9",
+ "\t35\t-3\t9",
+ "\t34\t-3\t9",
+ "\t33\t-3\t10",
+ "\t32\t-4\t10",
+ "\t31\t-6\t10",
+ "\t30\t-9\t10",
+ "\t29\t-9\t10",
+ "\t28\t-9\t10",
+ "\t27\t-9\t10",
+ "\t26\t-9\t11",
+ "\t25\t-9\t12",
+ "\t24\t-7\t12",
+ "\t23\t-5\t12",
+ "\t22\t-4\t12",
+ "\t21\t-2\t12",
+ "\t20\t-1\t10",
+ "\t19\t0\t8",
+ "\t18\t1\t7",
+ "\t17\t2\t4",
+ "American Samoa",
+ "\t-10\t-172\t-170",
+ "\t-11\t-172\t-170",
+ "\t-12\t-172\t-170",
+ "\t-13\t-171\t-167",
+ "\t-14\t-171\t-167",
+ "\t-15\t-171\t-167",
+ "Andorra",
+ "\t43\t0\t2",
+ "\t42\t0\t2",
+ "\t41\t0\t2",
+ "Angola",
+ "\t-3\t11\t14",
+ "\t-4\t11\t17",
+ "\t-5\t11\t17\t19\t21",
+ "\t-6\t11\t22",
+ "\t-7\t11\t22",
+ "\t-8\t11\t23",
+ "\t-9\t11\t25",
+ "\t-10\t11\t25",
+ "\t-11\t11\t25",
+ "\t-12\t11\t25",
+ "\t-13\t11\t25",
+ "\t-14\t10\t25",
+ "\t-15\t10\t23",
+ "\t-16\t10\t24",
+ "\t-17\t10\t24",
+ "\t-18\t10\t24",
+ "\t-19\t19\t22",
+ "Anguilla",
+ "\t19\t-64\t-61",
+ "\t18\t-64\t-61",
+ "\t17\t-64\t-61",
+ "Antarctica",
+ "\t-59\t-47\t-43",
+ "\t-60\t-59\t-53\t-47\t-43",
+ "\t-61\t-62\t-53\t-47\t-43",
+ "\t-62\t-62\t-53",
+ "\t-63\t-65\t-54",
+ "\t-64\t-66\t-54\t51\t56\t99\t104\t110\t114",
+ "\t-65\t-69\t-56\t47\t58\t86\t117\t119\t144",
+ "\t-66\t-70\t-59\t42\t70\t79\t147",
+ "\t-67\t-91\t-89\t-73\t-59\t31\t35\t38\t71\t76\t156",
+ "\t-68\t-91\t-89\t-76\t-60\t31\t161",
+ "\t-69\t-91\t-89\t-77\t-60\t-11\t168",
+ "\t-70\t-103\t-95\t-77\t-59\t-13\t171",
+ "\t-71\t-106\t-87\t-81\t-79\t-77\t-58\t-15\t171",
+ "\t-72\t-128\t-117\t-115\t-112\t-106\t-58\t-22\t-19\t-17\t171",
+ "\t-73\t-137\t-109\t-106\t-58\t-23\t171",
+ "\t-74\t-147\t-58\t-27\t170",
+ "\t-75\t-150\t-59\t-32\t166",
+ "\t-76\t-159\t-62\t-48\t-44\t-36\t170",
+ "\t-77\t-165\t-65\t-51\t-42\t-37\t170",
+ "\t-78\t-165\t-64\t-62\t-58\t-52\t-41\t-37\t170",
+ "\t-79\t-165\t-58\t-55\t168",
+ "\t-80\t-165\t-58\t-55\t164",
+ "\t-81\t-175\t-170\t-164\t169",
+ "\t-82\t-175\t177",
+ "\t-83\t-180\t180",
+ "\t-84\t-180\t180",
+ "\t-85\t-180\t180",
+ "\t-86\t-180\t180",
+ "\t-87\t-180\t180",
+ "\t-88\t-180\t180",
+ "\t-89\t-180\t180",
+ "\t-90\t-180\t180",
+ "\t-90\t-180\t180",
+ "Antigua and Barbuda",
+ "\t18\t-62\t-60",
+ "\t17\t-62\t-60",
+ "\t16\t-62\t-60",
+ "\t15\t-62\t-60",
+ "Argentina",
+ "\t-20\t-67\t-61",
+ "\t-21\t-68\t-60",
+ "\t-22\t-68\t-59",
+ "\t-23\t-69\t-57",
+ "\t-24\t-69\t-52",
+ "\t-25\t-69\t-52",
+ "\t-26\t-70\t-52",
+ "\t-27\t-70\t-52",
+ "\t-28\t-71\t-52",
+ "\t-29\t-71\t-54",
+ "\t-30\t-71\t-55",
+ "\t-31\t-71\t-56",
+ "\t-32\t-71\t-56",
+ "\t-33\t-71\t-56",
+ "\t-34\t-71\t-56",
+ "\t-35\t-72\t-55",
+ "\t-36\t-72\t-55",
+ "\t-37\t-72\t-55",
+ "\t-38\t-72\t-55",
+ "\t-39\t-72\t-56",
+ "\t-40\t-72\t-60",
+ "\t-41\t-73\t-61",
+ "\t-42\t-73\t-61",
+ "\t-43\t-73\t-62",
+ "\t-44\t-73\t-63",
+ "\t-45\t-73\t-64",
+ "\t-46\t-73\t-64",
+ "\t-47\t-74\t-64",
+ "\t-48\t-74\t-64",
+ "\t-49\t-74\t-64",
+ "\t-50\t-74\t-66",
+ "\t-51\t-74\t-66",
+ "\t-52\t-73\t-66",
+ "\t-53\t-71\t-62",
+ "\t-54\t-69\t-62",
+ "\t-55\t-69\t-62",
+ "\t-56\t-67\t-65",
+ "Armenia",
+ "\t42\t42\t46",
+ "\t41\t42\t46",
+ "\t40\t42\t47",
+ "\t39\t42\t47",
+ "\t38\t43\t47",
+ "\t37\t45\t47",
+ "Aruba",
+ "\t13\t-71\t-68",
+ "\t12\t-71\t-68",
+ "\t11\t-71\t-68",
+ "Ashmore and Cartier Islands",
+ "\t-11\t122\t124",
+ "\t-12\t122\t124",
+ "\t-13\t122\t124",
+ "Australia",
+ "\t-8\t141\t143",
+ "\t-9\t131\t133\t141\t143",
+ "\t-10\t129\t137\t140\t144",
+ "\t-11\t129\t137\t140\t144",
+ "\t-12\t124\t137\t140\t144",
+ "\t-13\t123\t137\t140\t146",
+ "\t-14\t123\t138\t140\t146",
+ "\t-15\t121\t146",
+ "\t-16\t121\t147",
+ "\t-17\t120\t147",
+ "\t-18\t118\t149",
+ "\t-19\t114\t150",
+ "\t-20\t112\t151",
+ "\t-21\t112\t151",
+ "\t-22\t112\t152",
+ "\t-23\t112\t154",
+ "\t-24\t111\t154",
+ "\t-25\t111\t154",
+ "\t-26\t111\t154",
+ "\t-27\t112\t154",
+ "\t-28\t112\t154",
+ "\t-29\t113\t154",
+ "\t-30\t113\t154\t158\t160",
+ "\t-31\t113\t154\t158\t160",
+ "\t-32\t113\t154\t158\t160",
+ "\t-33\t113\t129\t131\t153",
+ "\t-34\t113\t125\t133\t152",
+ "\t-35\t113\t124\t134\t152",
+ "\t-36\t115\t119\t134\t151",
+ "\t-37\t135\t151",
+ "\t-38\t138\t151",
+ "\t-39\t139\t149",
+ "\t-40\t142\t149",
+ "\t-41\t142\t149",
+ "\t-42\t143\t149",
+ "\t-43\t144\t149",
+ "\t-44\t144\t149",
+ "\t-53\t157\t159",
+ "\t-54\t157\t159",
+ "\t-55\t157\t159",
+ "Australia: Australian Capital Territory",
+ "\t-34\t147\t150",
+ "\t-35\t147\t150",
+ "\t-36\t147\t150",
+ "Australia: Jervis Bay Territory",
+ "\t-34\t149\t151",
+ "\t-35\t149\t151",
+ "\t-36\t149\t151",
+ "Australia: New South Wales",
+ "\t-27\t147\t154",
+ "\t-28\t140\t154",
+ "\t-29\t140\t154",
+ "\t-30\t140\t154",
+ "\t-31\t140\t154",
+ "\t-32\t140\t154",
+ "\t-33\t140\t153",
+ "\t-34\t140\t152",
+ "\t-35\t140\t152",
+ "\t-36\t142\t151",
+ "\t-37\t143\t151",
+ "\t-38\t147\t151",
+ "Australia: Northern Territory",
+ "\t-9\t131\t133",
+ "\t-10\t129\t137",
+ "\t-11\t129\t137",
+ "\t-12\t128\t137",
+ "\t-13\t128\t137",
+ "\t-14\t128\t138",
+ "\t-15\t128\t139",
+ "\t-16\t128\t139",
+ "\t-17\t128\t139",
+ "\t-18\t128\t139",
+ "\t-19\t128\t139",
+ "\t-20\t128\t139",
+ "\t-21\t128\t139",
+ "\t-22\t128\t139",
+ "\t-23\t128\t139",
+ "\t-24\t128\t139",
+ "\t-25\t128\t139",
+ "\t-26\t128\t139",
+ "\t-27\t128\t139",
+ "Australia: Queensland",
+ "\t-9\t141\t143",
+ "\t-10\t140\t144",
+ "\t-11\t140\t144",
+ "\t-12\t140\t144",
+ "\t-13\t140\t146",
+ "\t-14\t140\t146",
+ "\t-15\t137\t146",
+ "\t-16\t137\t147",
+ "\t-17\t137\t147",
+ "\t-18\t137\t149",
+ "\t-19\t137\t150",
+ "\t-20\t137\t151",
+ "\t-21\t137\t151",
+ "\t-22\t137\t152",
+ "\t-23\t137\t154",
+ "\t-24\t137\t154",
+ "\t-25\t137\t154",
+ "\t-26\t137\t154",
+ "\t-27\t137\t154",
+ "\t-28\t140\t154",
+ "\t-29\t140\t154",
+ "\t-30\t140\t152",
+ "Australia: South Australia",
+ "\t-25\t128\t142",
+ "\t-26\t128\t142",
+ "\t-27\t128\t142",
+ "\t-28\t128\t142",
+ "\t-29\t128\t142",
+ "\t-30\t128\t142",
+ "\t-31\t128\t142",
+ "\t-32\t128\t142",
+ "\t-33\t131\t142",
+ "\t-34\t133\t142",
+ "\t-35\t134\t142",
+ "\t-36\t134\t141",
+ "\t-37\t135\t141",
+ "\t-38\t138\t141",
+ "\t-39\t139\t141",
+ "Australia: Tasmania",
+ "\t-38\t142\t149",
+ "\t-39\t142\t149",
+ "\t-40\t142\t149",
+ "\t-41\t142\t149",
+ "\t-42\t143\t149",
+ "\t-43\t144\t149",
+ "\t-44\t144\t149",
+ "Australia: Victoria",
+ "\t-32\t139\t141",
+ "\t-33\t139\t144",
+ "\t-34\t139\t148",
+ "\t-35\t139\t149",
+ "\t-36\t139\t150",
+ "\t-37\t139\t150",
+ "\t-38\t139\t150",
+ "\t-39\t139\t148",
+ "\t-40\t145\t147",
+ "Australia: Western Australia",
+ "\t-12\t124\t128",
+ "\t-13\t123\t130",
+ "\t-14\t123\t130",
+ "\t-15\t121\t130",
+ "\t-16\t121\t130",
+ "\t-17\t120\t130",
+ "\t-18\t118\t130",
+ "\t-19\t114\t130",
+ "\t-20\t112\t130",
+ "\t-21\t112\t130",
+ "\t-22\t112\t130",
+ "\t-23\t112\t130",
+ "\t-24\t111\t130",
+ "\t-25\t111\t130",
+ "\t-26\t111\t130",
+ "\t-27\t112\t130",
+ "\t-28\t112\t130",
+ "\t-29\t113\t130",
+ "\t-30\t113\t130",
+ "\t-31\t113\t130",
+ "\t-32\t113\t130",
+ "\t-33\t113\t129",
+ "\t-34\t113\t125",
+ "\t-35\t113\t124",
+ "\t-36\t115\t119",
+ "Austria",
+ "\t50\t13\t16",
+ "\t49\t11\t18",
+ "\t48\t8\t18",
+ "\t47\t8\t18",
+ "\t46\t8\t18",
+ "\t45\t8\t17",
+ "Azerbaijan",
+ "\t42\t43\t50",
+ "\t41\t43\t51",
+ "\t40\t43\t51",
+ "\t39\t43\t51",
+ "\t38\t43\t50",
+ "\t37\t44\t50",
+ "Bahamas",
+ "\t27\t-79\t-76",
+ "\t26\t-80\t-75",
+ "\t25\t-80\t-73",
+ "\t24\t-80\t-72",
+ "\t23\t-80\t-71",
+ "\t22\t-80\t-71",
+ "\t21\t-76\t-71",
+ "\t20\t-74\t-71",
+ "\t19\t-74\t-72",
+ "Bahrain",
+ "\t27\t49\t51",
+ "\t26\t49\t51",
+ "\t25\t49\t51",
+ "\t24\t49\t51",
+ "Baker Island",
+ "\t1\t-177\t-175",
+ "\t0\t-177\t-175",
+ "\t-1\t-177\t-175",
+ "Bangladesh",
+ "\t27\t87\t90",
+ "\t26\t87\t93",
+ "\t25\t87\t93",
+ "\t24\t87\t93",
+ "\t23\t87\t93",
+ "\t22\t87\t93",
+ "\t21\t87\t93",
+ "\t20\t88\t93",
+ "\t19\t91\t93",
+ "Barbados",
+ "\t14\t-60\t-58",
+ "\t13\t-60\t-58",
+ "\t12\t-60\t-58",
+ "Bassas da India",
+ "\t-20\t38\t40",
+ "\t-21\t38\t40",
+ "\t-22\t38\t40",
+ "Belarus",
+ "\t57\t26\t30",
+ "\t56\t25\t32",
+ "\t55\t23\t32",
+ "\t54\t22\t33",
+ "\t53\t22\t33",
+ "\t52\t22\t33",
+ "\t51\t22\t32",
+ "\t50\t22\t31",
+ "Belgium",
+ "\t52\t1\t6",
+ "\t51\t1\t7",
+ "\t50\t1\t7",
+ "\t49\t1\t7",
+ "\t48\t3\t6",
+ "Belize",
+ "\t19\t-90\t-86",
+ "\t18\t-90\t-86",
+ "\t17\t-90\t-86",
+ "\t16\t-90\t-86",
+ "\t15\t-90\t-87",
+ "\t14\t-90\t-87",
+ "Benin",
+ "\t13\t1\t4",
+ "\t12\t-1\t4",
+ "\t11\t-1\t4",
+ "\t10\t-1\t4",
+ "\t9\t-1\t4",
+ "\t8\t0\t4",
+ "\t7\t0\t3",
+ "\t6\t0\t3",
+ "\t5\t0\t3",
+ "Bermuda",
+ "\t33\t-65\t-63",
+ "\t32\t-65\t-63",
+ "\t31\t-65\t-63",
+ "Bhutan",
+ "\t29\t88\t92",
+ "\t28\t87\t93",
+ "\t27\t87\t93",
+ "\t26\t87\t93",
+ "\t25\t87\t93",
+ "Bolivia",
+ "\t-8\t-67\t-64",
+ "\t-9\t-70\t-64",
+ "\t-10\t-70\t-63",
+ "\t-11\t-70\t-61",
+ "\t-12\t-70\t-59",
+ "\t-13\t-70\t-59",
+ "\t-14\t-70\t-59",
+ "\t-15\t-70\t-57",
+ "\t-16\t-70\t-56",
+ "\t-17\t-70\t-56",
+ "\t-18\t-70\t-56",
+ "\t-19\t-70\t-56",
+ "\t-20\t-69\t-56",
+ "\t-21\t-69\t-56",
+ "\t-22\t-69\t-61",
+ "\t-23\t-69\t-61",
+ "Borneo",
+ "\t6\t113\t116",
+ "\t5\t113\t116",
+ "\t4\t113\t116",
+ "\t3\t113\t116",
+ "Borneo",
+ "\t5\t114\t118",
+ "\t4\t107\t109\t114\t118",
+ "\t3\t107\t110\t113\t119",
+ "\t2\t107\t120",
+ "\t1\t107\t120",
+ "\t0\t107\t120",
+ "\t-1\t107\t120",
+ "\t-2\t107\t118",
+ "\t-3\t109\t117",
+ "\t-4\t109\t117",
+ "\t-5\t113\t117",
+ "Borneo",
+ "\t8\t115\t118",
+ "\t7\t115\t119",
+ "\t6\t114\t120",
+ "\t5\t112\t120",
+ "\t4\t111\t120",
+ "\t3\t108\t119",
+ "\t2\t108\t116",
+ "\t1\t108\t116",
+ "\t0\t108\t115",
+ "\t-1\t109\t112",
+ "Bosnia and Herzegovina",
+ "\t46\t14\t19",
+ "\t45\t14\t20",
+ "\t44\t14\t20",
+ "\t43\t14\t20",
+ "\t42\t15\t20",
+ "\t41\t16\t19",
+ "Botswana",
+ "\t-16\t22\t26",
+ "\t-17\t19\t26",
+ "\t-18\t19\t27",
+ "\t-19\t19\t28",
+ "\t-20\t19\t30",
+ "\t-21\t18\t30",
+ "\t-22\t18\t30",
+ "\t-23\t18\t30",
+ "\t-24\t18\t28",
+ "\t-25\t18\t27",
+ "\t-26\t19\t26",
+ "\t-27\t19\t23",
+ "Bouvet Island",
+ "\t-53\t2\t4",
+ "\t-54\t2\t4",
+ "\t-55\t2\t4",
+ "Brazil",
+ "\t6\t-61\t-58",
+ "\t5\t-65\t-58\t-52\t-50",
+ "\t4\t-65\t-58\t-53\t-49",
+ "\t3\t-69\t-49",
+ "\t2\t-70\t-48",
+ "\t1\t-71\t-45\t-30\t-28",
+ "\t0\t-71\t-43\t-30\t-28",
+ "\t-1\t-71\t-38\t-30\t-28",
+ "\t-2\t-70\t-37\t-33\t-31",
+ "\t-3\t-73\t-35\t-33\t-31",
+ "\t-4\t-74\t-31",
+ "\t-5\t-74\t-33",
+ "\t-6\t-75\t-33",
+ "\t-7\t-75\t-33",
+ "\t-8\t-75\t-33",
+ "\t-9\t-74\t-33",
+ "\t-10\t-74\t-34",
+ "\t-11\t-73\t-35",
+ "\t-12\t-71\t-36",
+ "\t-13\t-65\t-36",
+ "\t-14\t-63\t-37",
+ "\t-15\t-61\t-37",
+ "\t-16\t-61\t-37",
+ "\t-17\t-61\t-37",
+ "\t-18\t-59\t-38",
+ "\t-19\t-59\t-38\t-30\t-27",
+ "\t-20\t-59\t-38\t-30\t-27",
+ "\t-21\t-59\t-39\t-30\t-27",
+ "\t-22\t-58\t-39",
+ "\t-23\t-58\t-39",
+ "\t-24\t-56\t-42",
+ "\t-25\t-56\t-45",
+ "\t-26\t-56\t-46",
+ "\t-27\t-57\t-47",
+ "\t-28\t-58\t-47",
+ "\t-29\t-58\t-47",
+ "\t-30\t-58\t-48",
+ "\t-31\t-58\t-49",
+ "\t-32\t-57\t-49",
+ "\t-33\t-54\t-51",
+ "\t-34\t-54\t-51",
+ "British Indian Ocean Territory",
+ "\t-4\t70\t73",
+ "\t-5\t70\t73",
+ "\t-6\t70\t73",
+ "\t-7\t70\t73",
+ "\t-8\t71\t73",
+ "British Virgin Islands",
+ "\t19\t-65\t-63",
+ "\t18\t-65\t-63",
+ "\t17\t-65\t-63",
+ "Brunei",
+ "\t6\t113\t116",
+ "\t5\t113\t116",
+ "\t4\t113\t116",
+ "\t3\t113\t116",
+ "Bulgaria",
+ "\t45\t21\t28",
+ "\t44\t21\t29",
+ "\t43\t21\t29",
+ "\t42\t21\t29",
+ "\t41\t21\t29",
+ "\t40\t21\t29",
+ "Burkina Faso",
+ "\t16\t-1\t1",
+ "\t15\t-3\t1",
+ "\t14\t-5\t2",
+ "\t13\t-5\t3",
+ "\t12\t-6\t3",
+ "\t11\t-6\t3",
+ "\t10\t-6\t3",
+ "\t9\t-6\t1",
+ "\t8\t-5\t-1",
+ "Burundi",
+ "\t-1\t27\t31",
+ "\t-2\t27\t31",
+ "\t-3\t27\t31",
+ "\t-4\t28\t31",
+ "\t-5\t28\t31",
+ "Cambodia",
+ "\t15\t101\t108",
+ "\t14\t101\t108",
+ "\t13\t101\t108",
+ "\t12\t101\t108",
+ "\t11\t101\t108",
+ "\t10\t101\t107",
+ "\t9\t102\t107",
+ "Cameroon",
+ "\t14\t13\t15",
+ "\t13\t13\t16",
+ "\t12\t12\t16",
+ "\t11\t12\t16",
+ "\t10\t11\t16",
+ "\t9\t11\t16",
+ "\t8\t9\t16",
+ "\t7\t8\t16",
+ "\t6\t7\t16",
+ "\t5\t7\t16",
+ "\t4\t7\t16",
+ "\t3\t7\t17",
+ "\t2\t8\t17",
+ "\t1\t8\t17",
+ "\t0\t14\t17",
+ "Canada",
+ "\t84\t-78\t-67",
+ "\t83\t-90\t-60",
+ "\t82\t-96\t-60",
+ "\t81\t-101\t-60",
+ "\t80\t-106\t-61",
+ "\t79\t-115\t-108\t-106\t-66",
+ "\t78\t-121\t-69",
+ "\t77\t-124\t-73",
+ "\t76\t-124\t-74",
+ "\t75\t-125\t-76",
+ "\t74\t-125\t-75",
+ "\t73\t-126\t-73",
+ "\t72\t-126\t-69",
+ "\t71\t-132\t-66",
+ "\t70\t-142\t-65",
+ "\t69\t-142\t-63",
+ "\t68\t-142\t-61",
+ "\t67\t-142\t-60",
+ "\t66\t-142\t-60",
+ "\t65\t-142\t-60",
+ "\t64\t-142\t-61",
+ "\t63\t-142\t-62",
+ "\t62\t-142\t-63",
+ "\t61\t-142\t-89\t-84\t-63",
+ "\t60\t-142\t-91\t-81\t-62",
+ "\t59\t-142\t-91\t-81\t-61",
+ "\t58\t-140\t-88\t-81\t-60",
+ "\t57\t-138\t-86\t-81\t-59",
+ "\t56\t-133\t-57",
+ "\t55\t-134\t-56",
+ "\t54\t-134\t-54",
+ "\t53\t-134\t-54",
+ "\t52\t-134\t-54",
+ "\t51\t-133\t-54",
+ "\t50\t-132\t-52",
+ "\t49\t-129\t-51",
+ "\t48\t-128\t-51",
+ "\t47\t-126\t-51",
+ "\t46\t-90\t-51",
+ "\t45\t-86\t-58\t-56\t-51",
+ "\t44\t-84\t-58",
+ "\t43\t-84\t-73\t-67\t-58",
+ "\t42\t-84\t-75\t-67\t-63\t-61\t-58",
+ "\t41\t-84\t-77",
+ "\t40\t-84\t-80",
+ "Canada: Alberta",
+ "\t61\t-121\t-109",
+ "\t60\t-121\t-109",
+ "\t59\t-121\t-109",
+ "\t58\t-121\t-109",
+ "\t57\t-121\t-109",
+ "\t56\t-121\t-109",
+ "\t55\t-121\t-109",
+ "\t54\t-121\t-109",
+ "\t53\t-121\t-109",
+ "\t52\t-121\t-109",
+ "\t51\t-119\t-109",
+ "\t50\t-118\t-109",
+ "\t49\t-116\t-109",
+ "\t48\t-115\t-109",
+ "\t47\t-115\t-109",
+ "Canada: British Columbia",
+ "\t61\t-140\t-119",
+ "\t60\t-140\t-119",
+ "\t59\t-140\t-119",
+ "\t58\t-140\t-119",
+ "\t57\t-138\t-119",
+ "\t56\t-134\t-119",
+ "\t55\t-134\t-119",
+ "\t54\t-134\t-117",
+ "\t53\t-134\t-116",
+ "\t52\t-134\t-114",
+ "\t51\t-133\t-113",
+ "\t50\t-132\t-113",
+ "\t49\t-129\t-113",
+ "\t48\t-128\t-113",
+ "\t47\t-126\t-113",
+ "Canada: Manitoba",
+ "\t61\t-103\t-93",
+ "\t60\t-103\t-93",
+ "\t59\t-103\t-91",
+ "\t58\t-103\t-88",
+ "\t57\t-103\t-87",
+ "\t56\t-103\t-87",
+ "\t55\t-103\t-87",
+ "\t54\t-103\t-89",
+ "\t53\t-102\t-90",
+ "\t52\t-102\t-92",
+ "\t51\t-102\t-93",
+ "\t50\t-102\t-94",
+ "\t49\t-102\t-94",
+ "\t48\t-102\t-94",
+ "\t47\t-102\t-94",
+ "Canada: New Brunswick",
+ "\t49\t-67\t-63",
+ "\t48\t-70\t-63",
+ "\t47\t-70\t-62",
+ "\t46\t-70\t-62",
+ "\t45\t-68\t-62",
+ "\t44\t-68\t-63",
+ "\t43\t-67\t-65",
+ "Canada: Newfoundland and Labrador",
+ "\t61\t-65\t-63",
+ "\t60\t-65\t-62",
+ "\t59\t-65\t-61",
+ "\t58\t-65\t-60",
+ "\t57\t-65\t-59",
+ "\t56\t-68\t-57",
+ "\t55\t-68\t-56",
+ "\t54\t-68\t-54",
+ "\t53\t-68\t-54",
+ "\t52\t-68\t-54",
+ "\t51\t-68\t-54",
+ "\t50\t-65\t-63\t-59\t-52",
+ "\t49\t-60\t-51",
+ "\t48\t-60\t-51",
+ "\t47\t-60\t-51",
+ "\t46\t-60\t-51",
+ "\t45\t-56\t-51",
+ "Canada: Northwest Territories",
+ "\t79\t-115\t-109",
+ "\t78\t-121\t-109",
+ "\t77\t-124\t-109",
+ "\t76\t-124\t-109",
+ "\t75\t-125\t-109",
+ "\t74\t-125\t-109",
+ "\t73\t-126\t-109",
+ "\t72\t-126\t-109",
+ "\t71\t-132\t-109",
+ "\t70\t-136\t-109",
+ "\t69\t-137\t-109",
+ "\t68\t-137\t-115\t-113\t-111",
+ "\t67\t-137\t-113",
+ "\t66\t-137\t-108",
+ "\t65\t-135\t-100",
+ "\t64\t-134\t-100",
+ "\t63\t-133\t-100",
+ "\t62\t-131\t-100",
+ "\t61\t-130\t-101",
+ "\t60\t-129\t-101",
+ "\t59\t-127\t-101",
+ "Canada: Nova Scotia",
+ "\t48\t-61\t-59",
+ "\t47\t-65\t-58",
+ "\t46\t-66\t-58",
+ "\t45\t-67\t-58",
+ "\t44\t-67\t-58",
+ "\t43\t-67\t-58",
+ "\t42\t-67\t-63\t-61\t-58",
+ "Canada: Nunavut",
+ "\t84\t-78\t-67",
+ "\t83\t-90\t-60",
+ "\t82\t-96\t-60",
+ "\t81\t-101\t-60",
+ "\t80\t-106\t-61",
+ "\t79\t-111\t-108\t-106\t-66",
+ "\t78\t-111\t-69",
+ "\t77\t-111\t-73",
+ "\t76\t-111\t-74",
+ "\t75\t-111\t-76",
+ "\t74\t-111\t-75",
+ "\t73\t-111\t-73",
+ "\t72\t-111\t-69",
+ "\t71\t-118\t-66",
+ "\t70\t-121\t-65",
+ "\t69\t-121\t-63",
+ "\t68\t-121\t-61",
+ "\t67\t-121\t-60",
+ "\t66\t-121\t-60",
+ "\t65\t-118\t-60",
+ "\t64\t-114\t-61",
+ "\t63\t-110\t-62",
+ "\t62\t-103\t-63",
+ "\t61\t-103\t-89\t-84\t-63",
+ "\t60\t-103\t-91\t-81\t-77\t-69\t-63",
+ "\t59\t-103\t-93\t-81\t-76\t-69\t-63",
+ "\t58\t-81\t-75",
+ "\t57\t-81\t-75",
+ "\t56\t-81\t-75",
+ "\t55\t-82\t-75",
+ "\t54\t-83\t-76",
+ "\t53\t-83\t-77",
+ "\t52\t-83\t-77",
+ "\t51\t-82\t-77",
+ "\t50\t-80\t-78",
+ "Canada: Ontario",
+ "\t57\t-90\t-86",
+ "\t56\t-92\t-81",
+ "\t55\t-94\t-81",
+ "\t54\t-95\t-81",
+ "\t53\t-96\t-79",
+ "\t52\t-96\t-78",
+ "\t51\t-96\t-78",
+ "\t50\t-96\t-78",
+ "\t49\t-96\t-78",
+ "\t48\t-96\t-78",
+ "\t47\t-95\t-76",
+ "\t46\t-90\t-73",
+ "\t45\t-86\t-73",
+ "\t44\t-84\t-73",
+ "\t43\t-84\t-73",
+ "\t42\t-84\t-75",
+ "\t41\t-84\t-77",
+ "\t40\t-84\t-80",
+ "Canada: Prince Edward Island",
+ "\t48\t-65\t-62",
+ "\t47\t-65\t-60",
+ "\t46\t-65\t-60",
+ "\t45\t-65\t-60",
+ "\t44\t-63\t-61",
+ "Canada: Quebec",
+ "\t63\t-79\t-71",
+ "\t62\t-79\t-68",
+ "\t61\t-79\t-68\t-66\t-63",
+ "\t60\t-79\t-68\t-66\t-63",
+ "\t59\t-79\t-62",
+ "\t58\t-79\t-62",
+ "\t57\t-79\t-62",
+ "\t56\t-79\t-62",
+ "\t55\t-80\t-62",
+ "\t54\t-80\t-62",
+ "\t53\t-80\t-56",
+ "\t52\t-80\t-56",
+ "\t51\t-80\t-56",
+ "\t50\t-80\t-56",
+ "\t49\t-80\t-57",
+ "\t48\t-80\t-60",
+ "\t47\t-80\t-60",
+ "\t46\t-80\t-65\t-63\t-60",
+ "\t45\t-80\t-68",
+ "\t44\t-78\t-69",
+ "\t43\t-75\t-73",
+ "Canada: Saskatchewan",
+ "\t61\t-111\t-101",
+ "\t60\t-111\t-101",
+ "\t59\t-111\t-101",
+ "\t58\t-111\t-101",
+ "\t57\t-111\t-101",
+ "\t56\t-111\t-100",
+ "\t55\t-111\t-100",
+ "\t54\t-111\t-100",
+ "\t53\t-111\t-100",
+ "\t52\t-111\t-100",
+ "\t51\t-111\t-100",
+ "\t50\t-111\t-100",
+ "\t49\t-111\t-100",
+ "\t48\t-111\t-100",
+ "\t47\t-111\t-100",
+ "Canada: Yukon",
+ "\t70\t-142\t-136",
+ "\t69\t-142\t-135",
+ "\t68\t-142\t-132",
+ "\t67\t-142\t-131",
+ "\t66\t-142\t-131",
+ "\t65\t-142\t-129",
+ "\t64\t-142\t-128",
+ "\t63\t-142\t-127",
+ "\t62\t-142\t-125",
+ "\t61\t-142\t-122",
+ "\t60\t-142\t-122",
+ "\t59\t-142\t-122",
+ "Cape Verde",
+ "\t18\t-26\t-23",
+ "\t17\t-26\t-21",
+ "\t16\t-26\t-21",
+ "\t15\t-26\t-21",
+ "\t14\t-25\t-21",
+ "\t13\t-25\t-22",
+ "Cayman Islands",
+ "\t20\t-82\t-78",
+ "\t19\t-82\t-78",
+ "\t18\t-82\t-78",
+ "Central African Republic",
+ "\t12\t21\t23",
+ "\t11\t20\t24",
+ "\t10\t18\t24",
+ "\t9\t17\t25",
+ "\t8\t14\t26",
+ "\t7\t13\t27",
+ "\t6\t13\t28",
+ "\t5\t13\t28",
+ "\t4\t13\t28",
+ "\t3\t13\t26",
+ "\t2\t14\t19",
+ "\t1\t14\t17",
+ "Chad",
+ "\t24\t13\t17",
+ "\t23\t13\t19",
+ "\t22\t13\t21",
+ "\t21\t13\t23",
+ "\t20\t14\t24",
+ "\t19\t14\t24",
+ "\t18\t14\t24",
+ "\t17\t13\t24",
+ "\t16\t12\t24",
+ "\t15\t12\t24",
+ "\t14\t12\t24",
+ "\t13\t12\t23",
+ "\t12\t12\t23",
+ "\t11\t13\t23",
+ "\t10\t12\t23",
+ "\t9\t12\t23",
+ "\t8\t12\t22",
+ "\t7\t13\t20",
+ "\t6\t14\t18",
+ "Chile",
+ "\t-16\t-70\t-68",
+ "\t-17\t-71\t-67",
+ "\t-18\t-71\t-67",
+ "\t-19\t-71\t-67",
+ "\t-20\t-71\t-67",
+ "\t-21\t-71\t-66",
+ "\t-22\t-71\t-66",
+ "\t-23\t-71\t-66",
+ "\t-24\t-71\t-66",
+ "\t-25\t-106\t-104\t-81\t-78\t-71\t-66",
+ "\t-26\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67",
+ "\t-27\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67",
+ "\t-28\t-110\t-108\t-72\t-67",
+ "\t-29\t-72\t-68",
+ "\t-30\t-72\t-68",
+ "\t-31\t-72\t-68",
+ "\t-32\t-81\t-77\t-72\t-68",
+ "\t-33\t-81\t-77\t-73\t-68",
+ "\t-34\t-81\t-77\t-73\t-68",
+ "\t-35\t-74\t-68",
+ "\t-36\t-74\t-69",
+ "\t-37\t-74\t-69",
+ "\t-38\t-74\t-69",
+ "\t-39\t-74\t-69",
+ "\t-40\t-75\t-70",
+ "\t-41\t-75\t-70",
+ "\t-42\t-75\t-70",
+ "\t-43\t-76\t-70",
+ "\t-44\t-76\t-70",
+ "\t-45\t-76\t-70",
+ "\t-46\t-76\t-70",
+ "\t-47\t-76\t-70",
+ "\t-48\t-76\t-70",
+ "\t-49\t-76\t-71",
+ "\t-50\t-76\t-69",
+ "\t-51\t-76\t-67",
+ "\t-52\t-76\t-67",
+ "\t-53\t-76\t-66",
+ "\t-54\t-75\t-65",
+ "\t-55\t-74\t-65",
+ "\t-56\t-72\t-65",
+ "China",
+ "\t54\t119\t126",
+ "\t53\t119\t127",
+ "\t52\t118\t127",
+ "\t51\t118\t128",
+ "\t50\t85\t88\t115\t130",
+ "\t49\t84\t90\t114\t135",
+ "\t48\t81\t91\t114\t135",
+ "\t47\t81\t92\t114\t135",
+ "\t46\t79\t94\t110\t135",
+ "\t45\t78\t96\t110\t135",
+ "\t44\t78\t96\t109\t134",
+ "\t43\t78\t132",
+ "\t42\t75\t132",
+ "\t41\t72\t132",
+ "\t40\t72\t129",
+ "\t39\t72\t127",
+ "\t38\t72\t125",
+ "\t37\t72\t123",
+ "\t36\t73\t123",
+ "\t35\t73\t123",
+ "\t34\t74\t121",
+ "\t33\t77\t122",
+ "\t32\t77\t122",
+ "\t31\t77\t123",
+ "\t30\t77\t123",
+ "\t29\t78\t123",
+ "\t28\t81\t123",
+ "\t27\t83\t122",
+ "\t26\t84\t93\t96\t122",
+ "\t25\t96\t121",
+ "\t24\t96\t120",
+ "\t23\t96\t120",
+ "\t22\t96\t118",
+ "\t21\t98\t117",
+ "\t20\t98\t102\t105\t114",
+ "\t19\t107\t112",
+ "\t18\t107\t112",
+ "\t17\t107\t111",
+ "China",
+ "\t25\t117\t119",
+ "\t24\t117\t119",
+ "\t23\t117\t119",
+ "China: Hainan",
+ "\t21\t108\t111",
+ "\t20\t107\t112",
+ "\t19\t107\t112",
+ "\t18\t107\t112",
+ "\t17\t107\t111",
+ "Christmas Island",
+ "\t-9\t104\t106",
+ "\t-10\t104\t106",
+ "\t-11\t104\t106",
+ "Clipperton Island",
+ "\t11\t-110\t-108",
+ "\t10\t-110\t-108",
+ "\t9\t-110\t-108",
+ "Cocos Islands",
+ "\t-11\t95\t97",
+ "\t-12\t95\t97",
+ "\t-13\t95\t97",
+ "Colombia",
+ "\t14\t-82\t-80",
+ "\t13\t-82\t-80\t-73\t-70",
+ "\t12\t-82\t-80\t-75\t-70",
+ "\t11\t-82\t-80\t-76\t-70",
+ "\t10\t-77\t-70",
+ "\t9\t-78\t-71",
+ "\t8\t-78\t-69",
+ "\t7\t-78\t-66",
+ "\t6\t-78\t-66",
+ "\t5\t-78\t-66",
+ "\t4\t-78\t-66",
+ "\t3\t-79\t-66",
+ "\t2\t-80\t-65",
+ "\t1\t-80\t-65",
+ "\t0\t-80\t-65",
+ "\t-1\t-79\t-68",
+ "\t-2\t-75\t-68",
+ "\t-3\t-74\t-68",
+ "\t-4\t-71\t-68",
+ "\t-5\t-71\t-68",
+ "Comoros",
+ "\t-10\t42\t44",
+ "\t-11\t42\t45",
+ "\t-12\t42\t45",
+ "\t-13\t42\t45",
+ "Cook Islands",
+ "\t-7\t-159\t-156",
+ "\t-8\t-159\t-156",
+ "\t-9\t-166\t-164\t-162\t-156",
+ "\t-10\t-166\t-164\t-162\t-159",
+ "\t-11\t-166\t-164\t-162\t-159",
+ "\t-17\t-160\t-158",
+ "\t-18\t-160\t-156",
+ "\t-19\t-160\t-156",
+ "\t-20\t-160\t-156",
+ "\t-21\t-160\t-156",
+ "\t-22\t-160\t-156",
+ "Coral Sea Islands",
+ "\t-15\t146\t151",
+ "\t-16\t146\t151",
+ "\t-17\t146\t151",
+ "\t-18\t147\t149",
+ "\t-20\t152\t156",
+ "\t-21\t152\t156",
+ "\t-22\t152\t156",
+ "\t-23\t154\t156",
+ "Costa Rica",
+ "\t12\t-86\t-83",
+ "\t11\t-86\t-82",
+ "\t10\t-86\t-81",
+ "\t9\t-86\t-81",
+ "\t8\t-86\t-81",
+ "\t7\t-84\t-81",
+ "\t6\t-88\t-86",
+ "\t5\t-88\t-86",
+ "\t4\t-88\t-86",
+ "Cote d'Ivoire",
+ "\t11\t-9\t-3",
+ "\t10\t-9\t-1",
+ "\t9\t-9\t-1",
+ "\t8\t-9\t-1",
+ "\t7\t-9\t-1",
+ "\t6\t-9\t-1",
+ "\t5\t-9\t-1",
+ "\t4\t-8\t-1",
+ "\t3\t-8\t-4",
+ "Croatia",
+ "\t47\t14\t18",
+ "\t46\t12\t20",
+ "\t45\t12\t20",
+ "\t44\t12\t20",
+ "\t43\t12\t20",
+ "\t42\t14\t19",
+ "\t41\t15\t19",
+ "Cuba",
+ "\t24\t-84\t-79",
+ "\t23\t-85\t-76",
+ "\t22\t-85\t-74",
+ "\t21\t-85\t-73",
+ "\t20\t-85\t-73",
+ "\t19\t-80\t-73",
+ "\t18\t-78\t-73",
+ "Curacao",
+ "\t13\t-70\t-67",
+ "\t12\t-70\t-67",
+ "\t11\t-70\t-67",
+ "Cyprus",
+ "\t36\t31\t35",
+ "\t35\t31\t35",
+ "\t34\t31\t35",
+ "\t33\t31\t35",
+ "Cyprus",
+ "\t36\t31\t35",
+ "\t35\t31\t35",
+ "\t34\t31\t35",
+ "Cyprus",
+ "\t35\t31\t34",
+ "\t34\t31\t34",
+ "\t33\t31\t34",
+ "Cyprus",
+ "\t36\t32\t34",
+ "\t35\t32\t34",
+ "\t34\t32\t34",
+ "\t33\t32\t34",
+ "Czech Republic",
+ "\t52\t13\t16",
+ "\t51\t11\t19",
+ "\t50\t11\t19",
+ "\t49\t11\t19",
+ "\t48\t11\t19",
+ "\t47\t12\t18",
+ "Democratic Republic of the Congo",
+ "\t6\t18\t20\t23\t28",
+ "\t5\t17\t31",
+ "\t4\t17\t31",
+ "\t3\t17\t32",
+ "\t2\t16\t32",
+ "\t1\t16\t32",
+ "\t0\t15\t32",
+ "\t-1\t15\t31",
+ "\t-2\t14\t30",
+ "\t-3\t11\t30",
+ "\t-4\t11\t30",
+ "\t-5\t11\t31",
+ "\t-6\t11\t31",
+ "\t-7\t11\t13\t15\t31",
+ "\t-8\t15\t31",
+ "\t-9\t16\t31",
+ "\t-10\t20\t29",
+ "\t-11\t21\t30",
+ "\t-12\t21\t30",
+ "\t-13\t25\t30",
+ "\t-14\t27\t30",
+ "Denmark",
+ "\t58\t7\t12",
+ "\t57\t7\t13",
+ "\t56\t7\t16",
+ "\t55\t7\t16",
+ "\t54\t7\t16",
+ "\t53\t7\t13",
+ "Djibouti",
+ "\t13\t41\t44",
+ "\t12\t40\t44",
+ "\t11\t40\t44",
+ "\t10\t40\t44",
+ "\t9\t40\t43",
+ "Dominica",
+ "\t16\t-62\t-60",
+ "\t15\t-62\t-60",
+ "\t14\t-62\t-60",
+ "Dominican Republic",
+ "\t20\t-72\t-67",
+ "\t19\t-73\t-67",
+ "\t18\t-73\t-67",
+ "\t17\t-73\t-67",
+ "\t16\t-72\t-70",
+ "East Timor",
+ "\t-7\t123\t128",
+ "\t-8\t123\t128",
+ "\t-9\t123\t128",
+ "\t-10\t123\t127",
+ "Ecuador",
+ "\t2\t-80\t-77",
+ "\t1\t-81\t-74",
+ "\t0\t-81\t-74",
+ "\t-1\t-82\t-74",
+ "\t-2\t-82\t-74",
+ "\t-3\t-82\t-74",
+ "\t-4\t-81\t-76",
+ "\t-5\t-81\t-77",
+ "\t-6\t-80\t-78",
+ "Ecuador: Galapagos",
+ "\t2\t-93\t-90",
+ "\t1\t-93\t-88",
+ "\t0\t-93\t-88",
+ "\t-1\t-92\t-88",
+ "\t-2\t-92\t-88",
+ "Egypt",
+ "\t32\t23\t35",
+ "\t31\t23\t35",
+ "\t30\t23\t35",
+ "\t29\t23\t35",
+ "\t28\t23\t35",
+ "\t27\t23\t35",
+ "\t26\t23\t35",
+ "\t25\t23\t36",
+ "\t24\t23\t36",
+ "\t23\t23\t37",
+ "\t22\t23\t37",
+ "\t21\t23\t37",
+ "\t20\t23\t37",
+ "El Salvador",
+ "\t15\t-90\t-87",
+ "\t14\t-91\t-86",
+ "\t13\t-91\t-86",
+ "\t12\t-91\t-86",
+ "Equatorial Guinea",
+ "\t4\t7\t9",
+ "\t3\t7\t12",
+ "\t2\t7\t12",
+ "\t1\t8\t12",
+ "\t0\t4\t6\t8\t12",
+ "\t-1\t4\t6\t8\t10",
+ "\t-2\t4\t6",
+ "Eritrea",
+ "\t19\t37\t39",
+ "\t18\t35\t40",
+ "\t17\t35\t41",
+ "\t16\t35\t41",
+ "\t15\t35\t42",
+ "\t14\t35\t43",
+ "\t13\t35\t44",
+ "\t12\t39\t44",
+ "\t11\t40\t44",
+ "Estonia",
+ "\t60\t21\t29",
+ "\t59\t20\t29",
+ "\t58\t20\t29",
+ "\t57\t20\t28",
+ "\t56\t20\t28",
+ "Ethiopia",
+ "\t15\t35\t41",
+ "\t14\t35\t42",
+ "\t13\t34\t43",
+ "\t12\t33\t43",
+ "\t11\t33\t44",
+ "\t10\t33\t44",
+ "\t9\t32\t47",
+ "\t8\t31\t48",
+ "\t7\t31\t48",
+ "\t6\t31\t48",
+ "\t5\t33\t47",
+ "\t4\t33\t46",
+ "\t3\t34\t46",
+ "\t2\t36\t42",
+ "Europa Island",
+ "\t-21\t39\t41",
+ "\t-22\t39\t41",
+ "\t-23\t39\t41",
+ "Falkland Islands (Islas Malvinas)",
+ "\t-50\t-62\t-56",
+ "\t-51\t-62\t-56",
+ "\t-52\t-62\t-56",
+ "\t-53\t-62\t-57",
+ "Faroe Islands",
+ "\t63\t-8\t-5",
+ "\t62\t-8\t-5",
+ "\t61\t-8\t-5",
+ "\t60\t-7\t-5",
+ "Fiji",
+ "\t-11\t176\t178",
+ "\t-12\t176\t178",
+ "\t-13\t176\t178",
+ "\t-15\t-180\t-178\t176\t180",
+ "\t-16\t-180\t-177\t176\t180",
+ "\t-17\t-180\t-177\t176\t180",
+ "\t-18\t-180\t-177\t176\t180",
+ "\t-19\t-180\t-177\t176\t180",
+ "\t-20\t-180\t-177\t173\t180",
+ "\t-21\t173\t175",
+ "\t-22\t173\t175",
+ "Finland",
+ "\t71\t26\t28",
+ "\t70\t19\t30",
+ "\t69\t19\t30",
+ "\t68\t19\t31",
+ "\t67\t19\t31",
+ "\t66\t22\t31",
+ "\t65\t22\t31",
+ "\t64\t20\t32",
+ "\t63\t20\t32",
+ "\t62\t20\t32",
+ "\t61\t20\t32",
+ "\t60\t20\t31",
+ "\t59\t20\t29",
+ "\t58\t21\t25",
+ "Finland",
+ "\t61\t18\t22",
+ "\t60\t18\t22",
+ "\t59\t18\t22",
+ "\t58\t19\t21",
+ "France",
+ "\t52\t0\t3",
+ "\t51\t0\t5",
+ "\t50\t-2\t8",
+ "\t49\t-6\t9",
+ "\t48\t-6\t9",
+ "\t47\t-6\t9",
+ "\t46\t-5\t8",
+ "\t45\t-3\t8",
+ "\t44\t-2\t8",
+ "\t43\t-2\t8",
+ "\t42\t-2\t8",
+ "\t41\t-2\t7",
+ "France: Corsica",
+ "\t44\t8\t10",
+ "\t43\t7\t10",
+ "\t42\t7\t10",
+ "\t41\t7\t10",
+ "\t40\t7\t10",
+ "France: Saint Barthelemy",
+ "\t18\t-63\t-61",
+ "\t17\t-63\t-61",
+ "\t16\t-63\t-61",
+ "France: Saint Martin",
+ "\t19\t-64\t-62",
+ "\t18\t-64\t-62",
+ "\t17\t-64\t-62",
+ "French Guiana",
+ "\t6\t-55\t-51",
+ "\t5\t-55\t-50",
+ "\t4\t-55\t-50",
+ "\t3\t-55\t-50",
+ "\t2\t-55\t-50",
+ "\t1\t-55\t-51",
+ "French Polynesia",
+ "\t-6\t-141\t-139",
+ "\t-7\t-141\t-138",
+ "\t-8\t-141\t-137",
+ "\t-9\t-141\t-137",
+ "\t-10\t-141\t-137",
+ "\t-11\t-140\t-137",
+ "\t-13\t-149\t-140",
+ "\t-14\t-149\t-139",
+ "\t-15\t-152\t-139",
+ "\t-16\t-152\t-137",
+ "\t-17\t-152\t-135",
+ "\t-18\t-150\t-148\t-146\t-135",
+ "\t-19\t-142\t-135",
+ "\t-20\t-142\t-134",
+ "\t-21\t-152\t-150\t-141\t-134",
+ "\t-22\t-152\t-146\t-141\t-133",
+ "\t-23\t-152\t-146\t-136\t-133",
+ "\t-24\t-150\t-146\t-136\t-133",
+ "\t-26\t-145\t-143",
+ "\t-27\t-145\t-143",
+ "\t-28\t-145\t-143",
+ "French Southern and Antarctic Lands",
+ "\t-10\t46\t48",
+ "\t-11\t46\t48",
+ "\t-12\t46\t48",
+ "\t-14\t53\t55",
+ "\t-15\t53\t55",
+ "\t-16\t41\t43\t53\t55",
+ "\t-17\t41\t43",
+ "\t-18\t41\t43",
+ "\t-20\t38\t40",
+ "\t-21\t38\t41",
+ "\t-22\t38\t41",
+ "\t-23\t39\t41",
+ "\t-36\t76\t78",
+ "\t-37\t76\t78",
+ "\t-38\t76\t78",
+ "\t-39\t76\t78",
+ "\t-45\t49\t52",
+ "\t-46\t49\t52",
+ "\t-47\t49\t52\t67\t70",
+ "\t-48\t67\t71",
+ "\t-49\t67\t71",
+ "\t-50\t67\t71",
+ "Gabon",
+ "\t3\t10\t14",
+ "\t2\t8\t15",
+ "\t1\t7\t15",
+ "\t0\t7\t15",
+ "\t-1\t7\t15",
+ "\t-2\t7\t15",
+ "\t-3\t8\t15",
+ "\t-4\t9\t12",
+ "Gambia",
+ "\t14\t-17\t-12",
+ "\t13\t-17\t-12",
+ "\t12\t-17\t-12",
+ "Gaza Strip",
+ "\t32\t33\t35",
+ "\t31\t33\t35",
+ "\t30\t33\t35",
+ "Georgia",
+ "\t44\t38\t44",
+ "\t43\t38\t47",
+ "\t42\t38\t47",
+ "\t41\t39\t47",
+ "\t40\t40\t47",
+ "Germany",
+ "\t56\t7\t9",
+ "\t55\t7\t15",
+ "\t54\t5\t15",
+ "\t53\t5\t15",
+ "\t52\t4\t16",
+ "\t51\t4\t16",
+ "\t50\t4\t16",
+ "\t49\t4\t15",
+ "\t48\t5\t14",
+ "\t47\t6\t14",
+ "\t46\t6\t14",
+ "Ghana",
+ "\t12\t-2\t1",
+ "\t11\t-3\t1",
+ "\t10\t-3\t1",
+ "\t9\t-3\t1",
+ "\t8\t-4\t1",
+ "\t7\t-4\t2",
+ "\t6\t-4\t2",
+ "\t5\t-4\t2",
+ "\t4\t-4\t2",
+ "\t3\t-3\t0",
+ "Gibraltar",
+ "\t37\t-6\t-4",
+ "\t36\t-6\t-4",
+ "\t35\t-6\t-4",
+ "Glorioso Islands",
+ "\t-10\t46\t48",
+ "\t-11\t46\t48",
+ "\t-12\t46\t48",
+ "Greece",
+ "\t42\t20\t27",
+ "\t41\t19\t27",
+ "\t40\t18\t27",
+ "\t39\t18\t27",
+ "\t38\t18\t28",
+ "\t37\t19\t29",
+ "\t36\t19\t29",
+ "\t35\t20\t29",
+ "\t34\t22\t28",
+ "\t33\t23\t26",
+ "Greenland",
+ "\t84\t-47\t-23",
+ "\t83\t-60\t-18",
+ "\t82\t-65\t-10",
+ "\t81\t-68\t-10",
+ "\t80\t-69\t-10",
+ "\t79\t-74\t-13",
+ "\t78\t-74\t-16",
+ "\t77\t-74\t-16",
+ "\t76\t-73\t-16",
+ "\t75\t-72\t-16",
+ "\t74\t-68\t-65\t-61\t-16",
+ "\t73\t-58\t-16",
+ "\t72\t-57\t-19",
+ "\t71\t-57\t-20",
+ "\t70\t-56\t-20",
+ "\t69\t-56\t-20",
+ "\t68\t-55\t-21",
+ "\t67\t-54\t-24",
+ "\t66\t-54\t-31",
+ "\t65\t-54\t-32",
+ "\t64\t-54\t-34",
+ "\t63\t-53\t-39",
+ "\t62\t-52\t-39",
+ "\t61\t-51\t-40",
+ "\t60\t-50\t-41",
+ "\t59\t-49\t-41",
+ "\t58\t-45\t-42",
+ "Grenada",
+ "\t13\t-62\t-60",
+ "\t12\t-62\t-60",
+ "\t11\t-62\t-60",
+ "Guadeloupe",
+ "\t17\t-62\t-59",
+ "\t16\t-62\t-59",
+ "\t15\t-62\t-59",
+ "\t14\t-62\t-60",
+ "Guam",
+ "\t14\t143\t145",
+ "\t13\t143\t145",
+ "\t12\t143\t145",
+ "Guatemala",
+ "\t18\t-92\t-88",
+ "\t17\t-92\t-88",
+ "\t16\t-93\t-87",
+ "\t15\t-93\t-87",
+ "\t14\t-93\t-87",
+ "\t13\t-93\t-88",
+ "\t12\t-92\t-89",
+ "Guernsey",
+ "\t50\t-3\t-1",
+ "\t49\t-3\t-1",
+ "\t48\t-3\t-1",
+ "Guinea",
+ "\t13\t-14\t-7",
+ "\t12\t-15\t-7",
+ "\t11\t-16\t-6",
+ "\t10\t-16\t-6",
+ "\t9\t-16\t-6",
+ "\t8\t-14\t-6",
+ "\t7\t-11\t-6",
+ "\t6\t-10\t-7",
+ "Guinea-Bissau",
+ "\t13\t-17\t-12",
+ "\t12\t-17\t-12",
+ "\t11\t-17\t-12",
+ "\t10\t-17\t-12",
+ "\t9\t-16\t-13",
+ "Guyana",
+ "\t9\t-61\t-58",
+ "\t8\t-61\t-57",
+ "\t7\t-62\t-56",
+ "\t6\t-62\t-56",
+ "\t5\t-62\t-56",
+ "\t4\t-62\t-56",
+ "\t3\t-61\t-55",
+ "\t2\t-61\t-55",
+ "\t1\t-61\t-55",
+ "\t0\t-60\t-55",
+ "Haiti",
+ "\t21\t-73\t-71",
+ "\t20\t-74\t-70",
+ "\t19\t-75\t-70",
+ "\t18\t-75\t-70",
+ "\t17\t-75\t-70",
+ "Heard Island and McDonald Islands",
+ "\t-51\t72\t74",
+ "\t-52\t72\t74",
+ "\t-53\t72\t74",
+ "\t-54\t72\t74",
+ "Honduras",
+ "\t18\t-84\t-82",
+ "\t17\t-87\t-82",
+ "\t16\t-90\t-82",
+ "\t15\t-90\t-82",
+ "\t14\t-90\t-82",
+ "\t13\t-90\t-82",
+ "\t12\t-89\t-84",
+ "\t11\t-88\t-86",
+ "Hong Kong",
+ "\t23\t112\t115",
+ "\t22\t112\t115",
+ "\t21\t112\t115",
+ "Howland Island",
+ "\t1\t-177\t-175",
+ "\t0\t-177\t-175",
+ "\t-1\t-177\t-175",
+ "Hungary",
+ "\t49\t16\t23",
+ "\t48\t15\t23",
+ "\t47\t15\t23",
+ "\t46\t15\t23",
+ "\t45\t15\t22",
+ "\t44\t16\t20",
+ "Iceland",
+ "\t67\t-24\t-13",
+ "\t66\t-25\t-12",
+ "\t65\t-25\t-12",
+ "\t64\t-25\t-12",
+ "\t63\t-25\t-12",
+ "\t62\t-23\t-15",
+ "India",
+ "\t36\t76\t79",
+ "\t35\t72\t79",
+ "\t34\t72\t80",
+ "\t33\t72\t80",
+ "\t32\t72\t80",
+ "\t31\t72\t82",
+ "\t30\t71\t82\t93\t97",
+ "\t29\t69\t82\t87\t89\t91\t98",
+ "\t28\t68\t98",
+ "\t27\t68\t98",
+ "\t26\t68\t98",
+ "\t25\t67\t96",
+ "\t24\t67\t96",
+ "\t23\t67\t95",
+ "\t22\t67\t95",
+ "\t21\t67\t94",
+ "\t20\t68\t93",
+ "\t19\t69\t88",
+ "\t18\t71\t87",
+ "\t17\t71\t85",
+ "\t16\t72\t84",
+ "\t15\t72\t83",
+ "\t14\t72\t82\t91\t95",
+ "\t13\t73\t81\t91\t95",
+ "\t12\t71\t81\t91\t95",
+ "\t11\t71\t81\t91\t94",
+ "\t10\t71\t80\t91\t94",
+ "\t9\t71\t80\t91\t94",
+ "\t8\t72\t80\t91\t94",
+ "\t7\t72\t79\t92\t94",
+ "\t6\t92\t94",
+ "\t5\t92\t94",
+ "Indonesia",
+ "\t6\t94\t98\t125\t127",
+ "\t5\t94\t99\t106\t109\t114\t118\t125\t128",
+ "\t4\t94\t100\t104\t109\t114\t118\t124\t128",
+ "\t3\t94\t102\t104\t110\t113\t119\t124\t129",
+ "\t2\t94\t132",
+ "\t1\t94\t137",
+ "\t0\t96\t139",
+ "\t-1\t96\t141",
+ "\t-2\t97\t141",
+ "\t-3\t98\t141",
+ "\t-4\t99\t141",
+ "\t-5\t101\t141",
+ "\t-6\t101\t116\t118\t141",
+ "\t-7\t104\t141",
+ "\t-8\t105\t132\t136\t141",
+ "\t-9\t109\t132\t136\t141",
+ "\t-10\t115\t126\t139\t141",
+ "\t-11\t119\t125",
+ "Iran",
+ "\t40\t43\t49",
+ "\t39\t43\t49\t54\t58",
+ "\t38\t43\t61",
+ "\t37\t43\t62",
+ "\t36\t43\t62",
+ "\t35\t43\t62",
+ "\t34\t44\t62",
+ "\t33\t44\t62",
+ "\t32\t44\t62",
+ "\t31\t45\t62",
+ "\t30\t46\t62",
+ "\t29\t46\t63",
+ "\t28\t47\t64",
+ "\t27\t49\t64",
+ "\t26\t50\t64",
+ "\t25\t52\t64",
+ "\t24\t53\t62",
+ "Iraq",
+ "\t38\t41\t45",
+ "\t37\t40\t46",
+ "\t36\t40\t47",
+ "\t35\t39\t47",
+ "\t34\t37\t47",
+ "\t33\t37\t48",
+ "\t32\t37\t48",
+ "\t31\t37\t49",
+ "\t30\t39\t49",
+ "\t29\t41\t49",
+ "\t28\t42\t49",
+ "Ireland",
+ "\t56\t-9\t-5",
+ "\t55\t-11\t-5",
+ "\t54\t-11\t-5",
+ "\t53\t-11\t-4",
+ "\t52\t-11\t-4",
+ "\t51\t-11\t-4",
+ "\t50\t-11\t-6",
+ "Isle of Man",
+ "\t55\t-5\t-3",
+ "\t54\t-5\t-3",
+ "\t53\t-5\t-3",
+ "Israel",
+ "\t34\t34\t36",
+ "\t33\t33\t36",
+ "\t32\t33\t36",
+ "\t31\t33\t36",
+ "\t30\t33\t36",
+ "\t29\t33\t36",
+ "\t28\t33\t36",
+ "Italy",
+ "\t48\t10\t13",
+ "\t47\t6\t14",
+ "\t46\t5\t14",
+ "\t45\t5\t14",
+ "\t44\t5\t14",
+ "\t43\t5\t16",
+ "\t42\t6\t18",
+ "\t41\t7\t19",
+ "\t40\t7\t19",
+ "\t39\t7\t19",
+ "\t38\t7\t19",
+ "\t37\t7\t18",
+ "\t36\t10\t17",
+ "\t35\t10\t16",
+ "\t34\t11\t13",
+ "Jamaica",
+ "\t19\t-79\t-75",
+ "\t18\t-79\t-75",
+ "\t17\t-79\t-75",
+ "\t16\t-78\t-75",
+ "Jan Mayen",
+ "\t72\t-9\t-6",
+ "\t71\t-10\t-6",
+ "\t70\t-10\t-6",
+ "\t69\t-10\t-7",
+ "Japan",
+ "\t46\t139\t143",
+ "\t45\t139\t146",
+ "\t44\t139\t146",
+ "\t43\t138\t146",
+ "\t42\t138\t146",
+ "\t41\t138\t146",
+ "\t40\t138\t144",
+ "\t39\t137\t143",
+ "\t38\t135\t143",
+ "\t37\t131\t142",
+ "\t36\t131\t142",
+ "\t35\t128\t141",
+ "\t34\t128\t141",
+ "\t33\t127\t140",
+ "\t32\t127\t141",
+ "\t31\t127\t134\t138\t141",
+ "\t30\t128\t132\t139\t141",
+ "\t29\t128\t132\t139\t141",
+ "\t28\t126\t131\t139\t143",
+ "\t27\t125\t131\t139\t143",
+ "\t26\t122\t132\t139\t143",
+ "\t25\t121\t132\t140\t143\t152\t154",
+ "\t24\t121\t126\t130\t132\t140\t142\t152\t154",
+ "\t23\t121\t126\t140\t142\t152\t154",
+ "Jarvis Island",
+ "\t1\t-161\t-159",
+ "\t0\t-161\t-159",
+ "\t-1\t-161\t-159",
+ "Jersey",
+ "\t50\t-3\t-1",
+ "\t49\t-3\t-1",
+ "\t48\t-3\t-1",
+ "Johnston Atoll",
+ "\t17\t-170\t-168",
+ "\t16\t-170\t-168",
+ "\t15\t-170\t-168",
+ "Jordan",
+ "\t34\t37\t39",
+ "\t33\t34\t40",
+ "\t32\t34\t40",
+ "\t31\t34\t40",
+ "\t30\t33\t39",
+ "\t29\t33\t38",
+ "\t28\t33\t38",
+ "Juan de Nova Island",
+ "\t-16\t41\t43",
+ "\t-17\t41\t43",
+ "\t-18\t41\t43",
+ "Kazakhstan",
+ "\t56\t67\t71",
+ "\t55\t60\t77",
+ "\t54\t59\t79",
+ "\t53\t59\t79",
+ "\t52\t48\t84",
+ "\t51\t46\t86",
+ "\t50\t45\t88",
+ "\t49\t45\t88",
+ "\t48\t45\t88",
+ "\t47\t45\t87",
+ "\t46\t46\t86",
+ "\t45\t47\t86",
+ "\t44\t48\t83",
+ "\t43\t48\t56\t58\t81",
+ "\t42\t49\t56\t60\t81",
+ "\t41\t50\t56\t64\t81",
+ "\t40\t51\t56\t65\t71",
+ "\t39\t66\t69",
+ "Kenya",
+ "\t6\t33\t36",
+ "\t5\t32\t42",
+ "\t4\t32\t42",
+ "\t3\t32\t42",
+ "\t2\t33\t42",
+ "\t1\t32\t42",
+ "\t0\t32\t42",
+ "\t-1\t32\t42",
+ "\t-2\t32\t42",
+ "\t-3\t34\t42",
+ "\t-4\t36\t41",
+ "\t-5\t37\t40",
+ "Kerguelen Archipelago",
+ "\t-47\t67\t70",
+ "\t-48\t67\t71",
+ "\t-49\t67\t71",
+ "\t-50\t67\t71",
+ "Kingman Reef",
+ "\t7\t-163\t-161",
+ "\t6\t-163\t-161",
+ "\t5\t-163\t-161",
+ "Kiribati",
+ "\t5\t-161\t-159",
+ "\t4\t-161\t-158\t171\t173",
+ "\t3\t-161\t-156\t171\t173",
+ "\t2\t-160\t-156\t171\t174",
+ "\t1\t-158\t-156\t171\t175",
+ "\t0\t-158\t-156\t171\t177",
+ "\t-1\t-172\t-170\t171\t177",
+ "\t-2\t-172\t-170\t173\t177",
+ "\t-3\t-173\t-170\t-156\t-153\t174\t177",
+ "\t-4\t-173\t-171\t-156\t-153",
+ "\t-5\t-173\t-171\t-156\t-153",
+ "\t-10\t-152\t-150",
+ "\t-11\t-152\t-150",
+ "\t-12\t-152\t-150",
+ "Kosovo",
+ "\t44\t19\t22",
+ "\t43\t19\t22",
+ "\t42\t19\t22",
+ "\t41\t19\t22",
+ "\t40\t19\t21",
+ "Kuwait",
+ "\t31\t46\t49",
+ "\t30\t45\t49",
+ "\t29\t45\t49",
+ "\t28\t45\t49",
+ "\t27\t46\t49",
+ "Kyrgyzstan",
+ "\t44\t72\t75",
+ "\t43\t69\t81",
+ "\t42\t69\t81",
+ "\t41\t68\t81",
+ "\t40\t68\t80",
+ "\t39\t68\t78",
+ "\t38\t68\t74",
+ "Laos",
+ "\t23\t100\t103",
+ "\t22\t99\t104",
+ "\t21\t99\t105",
+ "\t20\t99\t105",
+ "\t19\t99\t106",
+ "\t18\t99\t107",
+ "\t17\t99\t108",
+ "\t16\t99\t108",
+ "\t15\t103\t108",
+ "\t14\t104\t108",
+ "\t13\t104\t108",
+ "\t12\t104\t107",
+ "Latvia",
+ "\t59\t23\t26",
+ "\t58\t20\t28",
+ "\t57\t19\t29",
+ "\t56\t19\t29",
+ "\t55\t19\t29",
+ "\t54\t24\t28",
+ "Lebanon",
+ "\t35\t34\t37",
+ "\t34\t34\t37",
+ "\t33\t34\t37",
+ "\t32\t34\t37",
+ "Lesotho",
+ "\t-27\t26\t30",
+ "\t-28\t26\t30",
+ "\t-29\t26\t30",
+ "\t-30\t26\t30",
+ "\t-31\t26\t29",
+ "Liberia",
+ "\t9\t-11\t-8",
+ "\t8\t-12\t-7",
+ "\t7\t-12\t-6",
+ "\t6\t-12\t-6",
+ "\t5\t-12\t-6",
+ "\t4\t-11\t-6",
+ "\t3\t-10\t-6",
+ "Libya",
+ "\t34\t10\t12",
+ "\t33\t9\t16\t19\t25",
+ "\t32\t9\t26",
+ "\t31\t8\t26",
+ "\t30\t8\t26",
+ "\t29\t8\t25",
+ "\t28\t8\t25",
+ "\t27\t8\t25",
+ "\t26\t8\t25",
+ "\t25\t8\t25",
+ "\t24\t8\t25",
+ "\t23\t9\t25",
+ "\t22\t10\t25",
+ "\t21\t12\t25",
+ "\t20\t17\t25",
+ "\t19\t20\t25",
+ "\t18\t21\t25",
+ "Liechtenstein",
+ "\t48\t8\t10",
+ "\t47\t8\t10",
+ "\t46\t8\t10",
+ "Lithuania",
+ "\t57\t20\t26",
+ "\t56\t19\t27",
+ "\t55\t19\t27",
+ "\t54\t19\t27",
+ "\t53\t21\t27",
+ "\t52\t22\t25",
+ "Luxembourg",
+ "\t51\t4\t7",
+ "\t50\t4\t7",
+ "\t49\t4\t7",
+ "\t48\t4\t7",
+ "Macau",
+ "\t23\t112\t114",
+ "\t22\t112\t114",
+ "\t21\t112\t114",
+ "Macedonia",
+ "\t43\t19\t23",
+ "\t42\t19\t24",
+ "\t41\t19\t24",
+ "\t40\t19\t24",
+ "\t39\t19\t22",
+ "Madagascar",
+ "\t-10\t48\t50",
+ "\t-11\t47\t50",
+ "\t-12\t46\t51",
+ "\t-13\t46\t51",
+ "\t-14\t44\t51",
+ "\t-15\t43\t51",
+ "\t-16\t42\t51",
+ "\t-17\t42\t51",
+ "\t-18\t42\t50",
+ "\t-19\t42\t50",
+ "\t-20\t42\t50",
+ "\t-21\t42\t49",
+ "\t-22\t42\t49",
+ "\t-23\t42\t48",
+ "\t-24\t42\t48",
+ "\t-25\t42\t48",
+ "\t-26\t43\t48",
+ "Malawi",
+ "\t-8\t31\t35",
+ "\t-9\t31\t35",
+ "\t-10\t31\t35",
+ "\t-11\t31\t35",
+ "\t-12\t31\t36",
+ "\t-13\t31\t36",
+ "\t-14\t31\t36",
+ "\t-15\t31\t36",
+ "\t-16\t33\t36",
+ "\t-17\t33\t36",
+ "\t-18\t34\t36",
+ "Malaysia",
+ "\t8\t115\t118",
+ "\t7\t98\t103\t115\t119",
+ "\t6\t98\t104\t114\t120",
+ "\t5\t98\t104\t112\t120",
+ "\t4\t99\t104\t111\t120",
+ "\t3\t99\t105\t108\t119",
+ "\t2\t99\t105\t108\t116",
+ "\t1\t100\t105\t108\t116",
+ "\t0\t101\t105\t108\t115",
+ "\t-1\t109\t112",
+ "Maldives",
+ "\t8\t71\t73",
+ "\t7\t71\t74",
+ "\t6\t71\t74",
+ "\t5\t71\t74",
+ "\t4\t71\t74",
+ "\t3\t71\t74",
+ "\t2\t71\t74",
+ "\t1\t71\t74",
+ "\t0\t71\t74",
+ "\t-1\t71\t74",
+ "Mali",
+ "\t25\t-7\t-2",
+ "\t24\t-7\t0",
+ "\t23\t-7\t1",
+ "\t22\t-7\t2",
+ "\t21\t-7\t3",
+ "\t20\t-7\t5",
+ "\t19\t-7\t5",
+ "\t18\t-7\t5",
+ "\t17\t-6\t5",
+ "\t16\t-12\t5",
+ "\t15\t-13\t5",
+ "\t14\t-13\t4",
+ "\t13\t-13\t1",
+ "\t12\t-13\t-1",
+ "\t11\t-12\t-3",
+ "\t10\t-12\t-3",
+ "\t9\t-9\t-4",
+ "Malta",
+ "\t37\t13\t15",
+ "\t36\t13\t15",
+ "\t35\t13\t15",
+ "\t34\t13\t15",
+ "Marshall Islands",
+ "\t15\t167\t170",
+ "\t14\t167\t170",
+ "\t13\t167\t170",
+ "\t12\t164\t167",
+ "\t11\t164\t167\t169\t171",
+ "\t10\t164\t167\t169\t171",
+ "\t9\t166\t171",
+ "\t8\t166\t172",
+ "\t7\t166\t173",
+ "\t6\t167\t173",
+ "\t5\t167\t173",
+ "\t4\t167\t170",
+ "\t3\t167\t169",
+ "Martinique",
+ "\t15\t-62\t-59",
+ "\t14\t-62\t-59",
+ "\t13\t-62\t-59",
+ "Mauritania",
+ "\t28\t-9\t-7",
+ "\t27\t-9\t-5",
+ "\t26\t-13\t-3",
+ "\t25\t-13\t-3",
+ "\t24\t-14\t-3",
+ "\t23\t-14\t-3",
+ "\t22\t-18\t-5",
+ "\t21\t-18\t-5",
+ "\t20\t-18\t-4",
+ "\t19\t-18\t-4",
+ "\t18\t-17\t-4",
+ "\t17\t-17\t-4",
+ "\t16\t-17\t-4",
+ "\t15\t-17\t-4",
+ "\t14\t-17\t-4",
+ "\t13\t-13\t-10",
+ "Mauritius",
+ "\t-9\t55\t57",
+ "\t-10\t55\t57",
+ "\t-11\t55\t57",
+ "\t-18\t56\t58\t62\t64",
+ "\t-19\t56\t58\t62\t64",
+ "\t-20\t56\t58\t62\t64",
+ "\t-21\t56\t58",
+ "Mayotte",
+ "\t-11\t44\t46",
+ "\t-12\t44\t46",
+ "\t-13\t44\t46",
+ "Mexico",
+ "\t33\t-118\t-112",
+ "\t32\t-118\t-104",
+ "\t31\t-118\t-103",
+ "\t30\t-119\t-99",
+ "\t29\t-119\t-98",
+ "\t28\t-119\t-98",
+ "\t27\t-119\t-96",
+ "\t26\t-116\t-96",
+ "\t25\t-115\t-96",
+ "\t24\t-113\t-96",
+ "\t23\t-113\t-96",
+ "\t22\t-111\t-96\t-91\t-85",
+ "\t21\t-111\t-95\t-91\t-85",
+ "\t20\t-111\t-109\t-107\t-94\t-92\t-85",
+ "\t19\t-115\t-109\t-106\t-85",
+ "\t18\t-115\t-109\t-106\t-86",
+ "\t17\t-115\t-109\t-105\t-86",
+ "\t16\t-103\t-87",
+ "\t15\t-101\t-89",
+ "\t14\t-98\t-90",
+ "\t13\t-93\t-91",
+ "Micronesia",
+ "\t10\t137\t139",
+ "\t9\t137\t139\t148\t151\t153\t155",
+ "\t8\t137\t139\t148\t155",
+ "\t7\t148\t159",
+ "\t6\t148\t154\t156\t159\t161\t164",
+ "\t5\t148\t150\t152\t154\t156\t159\t161\t164",
+ "\t4\t152\t154\t156\t158\t161\t164",
+ "Midway Islands",
+ "\t29\t-178\t-176",
+ "\t28\t-178\t-176",
+ "\t27\t-178\t-176",
+ "Moldova",
+ "\t49\t25\t29",
+ "\t48\t25\t30",
+ "\t47\t25\t31",
+ "\t46\t26\t31",
+ "\t45\t27\t31",
+ "\t44\t27\t29",
+ "Monaco",
+ "\t44\t6\t8",
+ "\t43\t6\t8",
+ "\t42\t6\t8",
+ "Mongolia",
+ "\t53\t97\t100",
+ "\t52\t96\t103",
+ "\t51\t88\t108\t112\t117",
+ "\t50\t86\t117",
+ "\t49\t86\t119",
+ "\t48\t86\t120",
+ "\t47\t86\t120",
+ "\t46\t88\t120",
+ "\t45\t89\t120",
+ "\t44\t89\t117",
+ "\t43\t91\t115",
+ "\t42\t94\t112",
+ "\t41\t95\t111",
+ "\t40\t102\t106",
+ "Montenegro",
+ "\t44\t17\t21",
+ "\t43\t17\t21",
+ "\t42\t17\t21",
+ "\t41\t17\t21",
+ "\t40\t18\t20",
+ "Montserrat",
+ "\t17\t-63\t-61",
+ "\t16\t-63\t-61",
+ "\t15\t-63\t-61",
+ "Morocco",
+ "\t36\t-7\t-1",
+ "\t35\t-7\t0",
+ "\t34\t-9\t0",
+ "\t33\t-10\t0",
+ "\t32\t-10\t0",
+ "\t31\t-10\t0",
+ "\t30\t-11\t-1",
+ "\t29\t-13\t-2",
+ "\t28\t-14\t-4",
+ "\t27\t-15\t-7",
+ "\t26\t-15\t-7",
+ "\t25\t-16\t-8",
+ "\t24\t-17\t-11",
+ "\t23\t-17\t-11",
+ "\t22\t-18\t-12",
+ "\t21\t-18\t-13",
+ "\t20\t-18\t-13",
+ "Mozambique",
+ "\t-9\t38\t41",
+ "\t-10\t33\t41",
+ "\t-11\t33\t41",
+ "\t-12\t33\t41",
+ "\t-13\t29\t41",
+ "\t-14\t29\t41",
+ "\t-15\t29\t41",
+ "\t-16\t29\t41",
+ "\t-17\t29\t41",
+ "\t-18\t31\t39",
+ "\t-19\t31\t37",
+ "\t-20\t30\t36",
+ "\t-21\t30\t36",
+ "\t-22\t30\t36",
+ "\t-23\t30\t36",
+ "\t-24\t30\t36",
+ "\t-25\t30\t36",
+ "\t-26\t30\t34",
+ "\t-27\t31\t33",
+ "Myanmar",
+ "\t29\t96\t99",
+ "\t28\t94\t99",
+ "\t27\t94\t99",
+ "\t26\t93\t99",
+ "\t25\t92\t99",
+ "\t24\t92\t100",
+ "\t23\t91\t100",
+ "\t22\t91\t102",
+ "\t21\t91\t102",
+ "\t20\t91\t102",
+ "\t19\t91\t101",
+ "\t18\t91\t100",
+ "\t17\t92\t99",
+ "\t16\t93\t99",
+ "\t15\t92\t99",
+ "\t14\t92\t100",
+ "\t13\t92\t94\t96\t100",
+ "\t12\t96\t100",
+ "\t11\t96\t100",
+ "\t10\t96\t100",
+ "\t9\t96\t100",
+ "\t8\t97\t99",
+ "Namibia",
+ "\t-15\t12\t14",
+ "\t-16\t10\t26",
+ "\t-17\t10\t26",
+ "\t-18\t10\t26",
+ "\t-19\t10\t25",
+ "\t-20\t11\t21",
+ "\t-21\t12\t21",
+ "\t-22\t12\t21",
+ "\t-23\t13\t21",
+ "\t-24\t13\t20",
+ "\t-25\t13\t20",
+ "\t-26\t13\t20",
+ "\t-27\t13\t20",
+ "\t-28\t14\t20",
+ "\t-29\t14\t20",
+ "Nauru",
+ "\t1\t165\t167",
+ "\t0\t165\t167",
+ "\t-1\t165\t167",
+ "Navassa Island",
+ "\t19\t-76\t-74",
+ "\t18\t-76\t-74",
+ "\t17\t-76\t-74",
+ "Nepal",
+ "\t31\t79\t83",
+ "\t30\t79\t85",
+ "\t29\t79\t87",
+ "\t28\t79\t89",
+ "\t27\t79\t89",
+ "\t26\t80\t89",
+ "\t25\t83\t89",
+ "Netherlands",
+ "\t54\t3\t8",
+ "\t53\t3\t8",
+ "\t52\t2\t8",
+ "\t51\t2\t8",
+ "\t50\t2\t7",
+ "\t49\t4\t7",
+ "\t19\t-64\t-62",
+ "\t18\t-64\t-61",
+ "\t17\t-64\t-61",
+ "\t16\t-64\t-61",
+ "\t13\t-69\t-67",
+ "\t12\t-69\t-67",
+ "\t11\t-69\t-67",
+ "Netherlands Antilles",
+ "\t13\t-70\t-67",
+ "\t12\t-70\t-67",
+ "\t11\t-70\t-67",
+ "Netherlands Antilles",
+ "\t19\t-64\t-62",
+ "\t18\t-64\t-61",
+ "\t17\t-64\t-61",
+ "\t16\t-64\t-61",
+ "New Caledonia",
+ "\t-18\t158\t160\t162\t164",
+ "\t-19\t158\t160\t162\t168",
+ "\t-20\t158\t160\t162\t169",
+ "\t-21\t162\t169",
+ "\t-22\t163\t169",
+ "\t-23\t165\t168",
+ "New Zealand",
+ "\t-7\t-173\t-171",
+ "\t-8\t-173\t-170",
+ "\t-9\t-173\t-170",
+ "\t-10\t-172\t-170",
+ "\t-28\t-178\t-176",
+ "\t-29\t-178\t-176",
+ "\t-30\t-178\t-176",
+ "\t-33\t171\t174",
+ "\t-34\t171\t175",
+ "\t-35\t171\t176",
+ "\t-36\t172\t179",
+ "\t-37\t172\t179",
+ "\t-38\t172\t179",
+ "\t-39\t171\t179",
+ "\t-40\t170\t179",
+ "\t-41\t169\t177",
+ "\t-42\t-177\t-175\t167\t177",
+ "\t-43\t-177\t-175\t166\t175",
+ "\t-44\t-177\t-175\t165\t174",
+ "\t-45\t-177\t-175\t165\t172",
+ "\t-46\t165\t172",
+ "\t-47\t165\t171",
+ "\t-48\t165\t169\t177\t179",
+ "\t-49\t164\t167\t177\t179",
+ "\t-50\t164\t167\t177\t179",
+ "\t-51\t164\t170",
+ "\t-52\t168\t170",
+ "\t-53\t168\t170",
+ "Nicaragua",
+ "\t16\t-84\t-82",
+ "\t15\t-87\t-81",
+ "\t14\t-88\t-81",
+ "\t13\t-88\t-81",
+ "\t12\t-88\t-82",
+ "\t11\t-88\t-82",
+ "\t10\t-87\t-82",
+ "\t9\t-85\t-82",
+ "Niger",
+ "\t24\t10\t14",
+ "\t23\t8\t16",
+ "\t22\t6\t16",
+ "\t21\t5\t16",
+ "\t20\t3\t16",
+ "\t19\t3\t16",
+ "\t18\t3\t16",
+ "\t17\t2\t16",
+ "\t16\t0\t16",
+ "\t15\t-1\t16",
+ "\t14\t-1\t15",
+ "\t13\t-1\t14",
+ "\t12\t-1\t14",
+ "\t11\t0\t10",
+ "\t10\t1\t4",
+ "Nigeria",
+ "\t14\t3\t15",
+ "\t13\t2\t15",
+ "\t12\t2\t15",
+ "\t11\t2\t15",
+ "\t10\t1\t15",
+ "\t9\t1\t14",
+ "\t8\t1\t14",
+ "\t7\t1\t13",
+ "\t6\t1\t13",
+ "\t5\t1\t12",
+ "\t4\t4\t10",
+ "\t3\t4\t9",
+ "Niue",
+ "\t-17\t-170\t-168",
+ "\t-18\t-170\t-168",
+ "\t-19\t-170\t-168",
+ "\t-20\t-170\t-168",
+ "Norfolk Island",
+ "\t-27\t166\t168",
+ "\t-28\t166\t168",
+ "\t-29\t166\t168",
+ "\t-30\t166\t168",
+ "North Korea",
+ "\t44\t128\t130",
+ "\t43\t127\t131",
+ "\t42\t125\t131",
+ "\t41\t123\t131",
+ "\t40\t123\t131",
+ "\t39\t123\t130",
+ "\t38\t123\t129",
+ "\t37\t123\t129",
+ "\t36\t123\t127",
+ "Northern Mariana Islands",
+ "\t21\t143\t146",
+ "\t20\t143\t146",
+ "\t19\t143\t146",
+ "\t18\t144\t146",
+ "\t17\t144\t146",
+ "\t16\t144\t146",
+ "\t15\t144\t146",
+ "\t14\t144\t146",
+ "\t13\t144\t146",
+ "Norway",
+ "\t72\t22\t29",
+ "\t71\t17\t32",
+ "\t70\t14\t32",
+ "\t69\t11\t32",
+ "\t68\t11\t31",
+ "\t67\t11\t26",
+ "\t66\t10\t18",
+ "\t65\t8\t17",
+ "\t64\t6\t15",
+ "\t63\t3\t15",
+ "\t62\t3\t13",
+ "\t61\t3\t13",
+ "\t60\t3\t13",
+ "\t59\t3\t13",
+ "\t58\t4\t13",
+ "\t57\t4\t12",
+ "\t56\t5\t8",
+ "Oman",
+ "\t27\t55\t57",
+ "\t26\t55\t57",
+ "\t25\t54\t58",
+ "\t24\t54\t60",
+ "\t23\t54\t60",
+ "\t22\t54\t60",
+ "\t21\t54\t60",
+ "\t20\t51\t60",
+ "\t19\t50\t59",
+ "\t18\t50\t58",
+ "\t17\t50\t58",
+ "\t16\t51\t57",
+ "\t15\t51\t55",
+ "Pakistan",
+ "\t38\t73\t75",
+ "\t37\t70\t77",
+ "\t36\t70\t78",
+ "\t35\t68\t78",
+ "\t34\t68\t78",
+ "\t33\t68\t78",
+ "\t32\t65\t76",
+ "\t31\t65\t76",
+ "\t30\t59\t75",
+ "\t29\t59\t75",
+ "\t28\t59\t74",
+ "\t27\t60\t73",
+ "\t26\t60\t72",
+ "\t25\t60\t72",
+ "\t24\t60\t72",
+ "\t23\t65\t72",
+ "\t22\t66\t69",
+ "Palau",
+ "\t8\t133\t135",
+ "\t7\t133\t135",
+ "\t6\t131\t135",
+ "\t5\t131\t135",
+ "\t4\t130\t133",
+ "\t3\t130\t132",
+ "\t2\t130\t132",
+ "\t1\t130\t132",
+ "Palmyra Atoll",
+ "\t6\t-163\t-161",
+ "\t5\t-163\t-161",
+ "\t4\t-163\t-161",
+ "Panama",
+ "\t10\t-83\t-76",
+ "\t9\t-84\t-76",
+ "\t8\t-84\t-76",
+ "\t7\t-84\t-76",
+ "\t6\t-82\t-76",
+ "Papua New Guinea",
+ "\t0\t141\t143\t145\t151",
+ "\t-1\t139\t143\t145\t153",
+ "\t-2\t139\t155",
+ "\t-3\t139\t155",
+ "\t-4\t139\t156",
+ "\t-5\t139\t156",
+ "\t-6\t139\t156",
+ "\t-7\t139\t156",
+ "\t-8\t139\t154",
+ "\t-9\t139\t154",
+ "\t-10\t139\t155",
+ "\t-11\t146\t155",
+ "\t-12\t152\t155",
+ "Paracel Islands",
+ "\t18\t110\t112",
+ "\t17\t110\t113",
+ "\t16\t110\t113",
+ "\t15\t110\t113",
+ "Paraguay",
+ "\t-18\t-62\t-57",
+ "\t-19\t-63\t-56",
+ "\t-20\t-63\t-56",
+ "\t-21\t-63\t-54",
+ "\t-22\t-63\t-53",
+ "\t-23\t-63\t-53",
+ "\t-24\t-62\t-53",
+ "\t-25\t-61\t-53",
+ "\t-26\t-59\t-53",
+ "\t-27\t-59\t-53",
+ "\t-28\t-59\t-54",
+ "Peru",
+ "\t1\t-76\t-73",
+ "\t0\t-76\t-72",
+ "\t-1\t-78\t-69",
+ "\t-2\t-81\t-69",
+ "\t-3\t-82\t-68",
+ "\t-4\t-82\t-68",
+ "\t-5\t-82\t-68",
+ "\t-6\t-82\t-71",
+ "\t-7\t-82\t-71",
+ "\t-8\t-80\t-69",
+ "\t-9\t-80\t-68",
+ "\t-10\t-79\t-68",
+ "\t-11\t-79\t-67",
+ "\t-12\t-78\t-67",
+ "\t-13\t-78\t-67",
+ "\t-14\t-77\t-67",
+ "\t-15\t-77\t-67",
+ "\t-16\t-76\t-67",
+ "\t-17\t-75\t-67",
+ "\t-18\t-73\t-68",
+ "\t-19\t-71\t-68",
+ "Philippines",
+ "\t22\t120\t122",
+ "\t21\t120\t123",
+ "\t20\t120\t123",
+ "\t19\t119\t123",
+ "\t18\t119\t123",
+ "\t17\t118\t123",
+ "\t16\t118\t123",
+ "\t15\t118\t125",
+ "\t14\t118\t125",
+ "\t13\t118\t126",
+ "\t12\t118\t126",
+ "\t11\t117\t127",
+ "\t10\t116\t127",
+ "\t9\t115\t127",
+ "\t8\t115\t127",
+ "\t7\t115\t127",
+ "\t6\t115\t127",
+ "\t5\t117\t127",
+ "\t4\t118\t126",
+ "\t3\t118\t120",
+ "Pitcairn Islands",
+ "\t-22\t-131\t-129",
+ "\t-23\t-131\t-127\t-125\t-123",
+ "\t-24\t-131\t-127\t-125\t-123",
+ "\t-25\t-131\t-127\t-125\t-123",
+ "\t-26\t-131\t-129",
+ "Poland",
+ "\t55\t13\t24",
+ "\t54\t13\t24",
+ "\t53\t13\t24",
+ "\t52\t13\t24",
+ "\t51\t13\t25",
+ "\t50\t13\t25",
+ "\t49\t13\t25",
+ "\t48\t16\t24",
+ "\t47\t21\t23",
+ "Portugal",
+ "\t43\t-9\t-7",
+ "\t42\t-9\t-5",
+ "\t41\t-9\t-5",
+ "\t40\t-10\t-5",
+ "\t39\t-10\t-5",
+ "\t38\t-10\t-5",
+ "\t37\t-10\t-5",
+ "\t36\t-9\t-6",
+ "\t35\t-8\t-6",
+ "Portugal: Azores",
+ "\t40\t-32\t-26",
+ "\t39\t-32\t-26",
+ "\t38\t-32\t-24",
+ "\t37\t-29\t-24",
+ "\t36\t-26\t-24",
+ "\t35\t-26\t-24",
+ "Portugal: Madeira",
+ "\t34\t-17\t-15",
+ "\t33\t-18\t-15",
+ "\t32\t-18\t-15",
+ "\t31\t-18\t-14",
+ "\t30\t-17\t-14",
+ "\t29\t-17\t-14",
+ "Puerto Rico",
+ "\t19\t-68\t-64",
+ "\t18\t-68\t-64",
+ "\t17\t-68\t-64",
+ "\t16\t-68\t-64",
+ "Qatar",
+ "\t27\t50\t52",
+ "\t26\t49\t52",
+ "\t25\t49\t52",
+ "\t24\t49\t52",
+ "\t23\t49\t52",
+ "Republic of the Congo",
+ "\t4\t15\t19",
+ "\t3\t12\t19",
+ "\t2\t12\t19",
+ "\t1\t12\t19",
+ "\t0\t11\t19",
+ "\t-1\t10\t18",
+ "\t-2\t10\t18",
+ "\t-3\t10\t17",
+ "\t-4\t10\t17",
+ "\t-5\t10\t16",
+ "\t-6\t10\t13",
+ "Reunion",
+ "\t-19\t54\t56",
+ "\t-20\t54\t56",
+ "\t-21\t54\t56",
+ "\t-22\t54\t56",
+ "Romania",
+ "\t49\t21\t28",
+ "\t48\t20\t29",
+ "\t47\t19\t29",
+ "\t46\t19\t30",
+ "\t45\t19\t30",
+ "\t44\t19\t30",
+ "\t43\t20\t30",
+ "\t42\t21\t29",
+ "Russia",
+ "\t82\t49\t51\t53\t66\t88\t97",
+ "\t81\t35\t37\t43\t66\t77\t81\t88\t100",
+ "\t80\t35\t37\t43\t66\t75\t81\t88\t105",
+ "\t79\t35\t37\t43\t66\t75\t81\t89\t108",
+ "\t78\t49\t52\t57\t60\t66\t68\t75\t78\t88\t108\t155\t157",
+ "\t77\t59\t70\t88\t114\t136\t143\t147\t153\t155\t157",
+ "\t76\t54\t70\t80\t114\t134\t153\t155\t157",
+ "\t75\t53\t70\t78\t117\t134\t153",
+ "\t74\t52\t130\t134\t151",
+ "\t73\t50\t61\t67\t130\t134\t151",
+ "\t72\t-180\t-174\t50\t59\t65\t159\t177\t180",
+ "\t71\t-180\t-174\t50\t61\t65\t163\t167\t172\t177\t180",
+ "\t70\t-180\t-174\t27\t37\t47\t180",
+ "\t69\t-180\t-175\t27\t180",
+ "\t68\t-180\t-171\t27\t180",
+ "\t67\t-180\t-168\t27\t180",
+ "\t66\t-180\t-167\t28\t180",
+ "\t65\t-180\t-167\t28\t180",
+ "\t64\t-180\t-167\t28\t180",
+ "\t63\t-176\t-171\t28\t180",
+ "\t62\t27\t180",
+ "\t61\t25\t180",
+ "\t60\t25\t175",
+ "\t59\t25\t173",
+ "\t58\t26\t167\t169\t171",
+ "\t57\t26\t143\t150\t165",
+ "\t56\t19\t23\t26\t141\t154\t167",
+ "\t55\t18\t23\t26\t144\t154\t169",
+ "\t54\t18\t23\t27\t144\t154\t169",
+ "\t53\t18\t23\t29\t144\t154\t163\t165\t169",
+ "\t52\t30\t63\t71\t144\t154\t161",
+ "\t51\t30\t63\t77\t121\t124\t145\t154\t159",
+ "\t50\t33\t62\t78\t121\t125\t145\t153\t159",
+ "\t49\t34\t50\t53\t62\t78\t99\t101\t120\t126\t145\t152\t157",
+ "\t48\t36\t49\t83\t90\t94\t98\t106\t120\t126\t145\t151\t156",
+ "\t47\t36\t50\t129\t145\t149\t155",
+ "\t46\t35\t50\t129\t144\t146\t154",
+ "\t45\t35\t50\t129\t153",
+ "\t44\t35\t49\t129\t138\t141\t151",
+ "\t43\t36\t49\t129\t137\t144\t148",
+ "\t42\t38\t49\t129\t136\t144\t147",
+ "\t41\t42\t49\t129\t135",
+ "\t40\t45\t49",
+ "Rwanda",
+ "\t0\t28\t31",
+ "\t-1\t27\t31",
+ "\t-2\t27\t31",
+ "\t-3\t27\t31",
+ "Saint Helena",
+ "\t-6\t-15\t-13",
+ "\t-7\t-15\t-13",
+ "\t-8\t-15\t-13",
+ "\t-14\t-6\t-4",
+ "\t-15\t-6\t-4",
+ "\t-16\t-6\t-4",
+ "\t-17\t-6\t-4",
+ "\t-36\t-13\t-11",
+ "\t-37\t-13\t-11",
+ "\t-38\t-13\t-11",
+ "\t-39\t-11\t-8",
+ "\t-40\t-11\t-8",
+ "\t-41\t-11\t-8",
+ "Saint Kitts and Nevis",
+ "\t18\t-63\t-61",
+ "\t17\t-63\t-61",
+ "\t16\t-63\t-61",
+ "Saint Lucia",
+ "\t15\t-62\t-59",
+ "\t14\t-62\t-59",
+ "\t13\t-62\t-59",
+ "\t12\t-62\t-59",
+ "Saint Pierre and Miquelon",
+ "\t48\t-57\t-55",
+ "\t47\t-57\t-55",
+ "\t46\t-57\t-55",
+ "\t45\t-57\t-55",
+ "Saint Vincent and the Grenadines",
+ "\t14\t-62\t-60",
+ "\t13\t-62\t-60",
+ "\t12\t-62\t-60",
+ "\t11\t-62\t-60",
+ "Samoa",
+ "\t-12\t-173\t-170",
+ "\t-13\t-173\t-170",
+ "\t-14\t-173\t-170",
+ "\t-15\t-172\t-170",
+ "San Marino",
+ "\t44\t11\t13",
+ "\t43\t11\t13",
+ "\t42\t11\t13",
+ "Sao Tome and Principe",
+ "\t2\t6\t8",
+ "\t1\t5\t8",
+ "\t0\t5\t8",
+ "\t-1\t5\t7",
+ "Saudi Arabia",
+ "\t33\t37\t40",
+ "\t32\t35\t43",
+ "\t31\t35\t44",
+ "\t30\t33\t48",
+ "\t29\t33\t49",
+ "\t28\t33\t50",
+ "\t27\t33\t51",
+ "\t26\t33\t51",
+ "\t25\t34\t52",
+ "\t24\t35\t53",
+ "\t23\t36\t56",
+ "\t22\t37\t56",
+ "\t21\t37\t56",
+ "\t20\t37\t56",
+ "\t19\t38\t56",
+ "\t18\t39\t55",
+ "\t17\t40\t52",
+ "\t16\t40\t48",
+ "\t15\t40\t48",
+ "Senegal",
+ "\t17\t-17\t-12",
+ "\t16\t-17\t-11",
+ "\t15\t-18\t-10",
+ "\t14\t-18\t-10",
+ "\t13\t-18\t-10",
+ "\t12\t-17\t-10",
+ "\t11\t-17\t-10",
+ "Serbia",
+ "\t47\t18\t21",
+ "\t46\t17\t22",
+ "\t45\t17\t23",
+ "\t44\t17\t23",
+ "\t43\t17\t23",
+ "\t42\t18\t23",
+ "\t41\t19\t23",
+ "Seychelles",
+ "\t-2\t54\t56",
+ "\t-3\t54\t56",
+ "\t-4\t52\t56",
+ "\t-5\t51\t56",
+ "\t-6\t51\t57",
+ "\t-7\t51\t53\t55\t57",
+ "\t-8\t45\t48\t51\t53\t55\t57",
+ "\t-9\t45\t48",
+ "\t-10\t45\t48",
+ "Sierra Leone",
+ "\t10\t-14\t-9",
+ "\t9\t-14\t-9",
+ "\t8\t-14\t-9",
+ "\t7\t-14\t-9",
+ "\t6\t-13\t-9",
+ "\t5\t-12\t-10",
+ "Singapore",
+ "\t2\t102\t105",
+ "\t1\t102\t105",
+ "\t0\t102\t105",
+ "Sint Maarten",
+ "\t19\t-64\t-62",
+ "\t18\t-64\t-62",
+ "\t17\t-64\t-62",
+ "Slovakia",
+ "\t50\t16\t23",
+ "\t49\t15\t23",
+ "\t48\t15\t23",
+ "\t47\t15\t23",
+ "\t46\t16\t19",
+ "Slovenia",
+ "\t47\t12\t17",
+ "\t46\t12\t17",
+ "\t45\t12\t17",
+ "\t44\t12\t16",
+ "Solomon Islands",
+ "\t-5\t154\t158",
+ "\t-6\t154\t161",
+ "\t-7\t154\t163",
+ "\t-8\t154\t163\t166\t168",
+ "\t-9\t155\t168",
+ "\t-10\t158\t168",
+ "\t-11\t158\t169",
+ "\t-12\t158\t161\t165\t169",
+ "\t-13\t167\t169",
+ "Somalia",
+ "\t12\t47\t52",
+ "\t11\t47\t52",
+ "\t10\t47\t52",
+ "\t9\t47\t52",
+ "\t8\t45\t51",
+ "\t7\t44\t51",
+ "\t6\t44\t50",
+ "\t5\t40\t50",
+ "\t4\t40\t49",
+ "\t3\t39\t49",
+ "\t2\t39\t48",
+ "\t1\t39\t47",
+ "\t0\t39\t46",
+ "\t-1\t39\t44",
+ "\t-2\t40\t42",
+ "Somalia",
+ "\t12\t42\t44\t46\t49",
+ "\t11\t41\t49",
+ "\t10\t41\t49",
+ "\t9\t41\t49",
+ "\t8\t42\t49",
+ "\t7\t43\t49",
+ "\t6\t45\t48",
+ "South Africa",
+ "\t-21\t26\t32",
+ "\t-22\t25\t32",
+ "\t-23\t18\t21\t24\t32",
+ "\t-24\t18\t32",
+ "\t-25\t18\t33",
+ "\t-26\t18\t33",
+ "\t-27\t15\t33",
+ "\t-28\t15\t33",
+ "\t-29\t15\t33",
+ "\t-30\t15\t32",
+ "\t-31\t16\t31",
+ "\t-32\t16\t31",
+ "\t-33\t16\t30",
+ "\t-34\t16\t28",
+ "\t-35\t17\t26",
+ "\t-45\t36\t38",
+ "\t-46\t36\t38",
+ "\t-47\t36\t38",
+ "South Georgia and the South Sandwich Islands",
+ "\t-52\t-43\t-36",
+ "\t-53\t-43\t-33",
+ "\t-54\t-43\t-33",
+ "\t-55\t-39\t-33\t-29\t-26",
+ "\t-56\t-35\t-33\t-29\t-25",
+ "\t-57\t-29\t-25",
+ "\t-58\t-28\t-25",
+ "\t-59\t-28\t-25",
+ "\t-60\t-28\t-25",
+ "South Korea",
+ "\t39\t125\t129",
+ "\t38\t123\t131",
+ "\t37\t123\t131",
+ "\t36\t123\t131",
+ "\t35\t124\t130",
+ "\t34\t124\t130",
+ "\t33\t124\t129",
+ "\t32\t125\t127",
+ "Spain",
+ "\t44\t-10\t0",
+ "\t43\t-10\t4",
+ "\t42\t-10\t4",
+ "\t41\t-10\t5",
+ "\t40\t-9\t5",
+ "\t39\t-8\t5",
+ "\t38\t-8\t5",
+ "\t37\t-8\t2",
+ "\t36\t-8\t1",
+ "\t35\t-7\t0",
+ "\t34\t-6\t-1",
+ "Spain: Canary Islands",
+ "\t30\t-14\t-12",
+ "\t29\t-19\t-12",
+ "\t28\t-19\t-12",
+ "\t27\t-19\t-12",
+ "\t26\t-19\t-14",
+ "Spratly Islands",
+ "\t12\t113\t115",
+ "\t11\t112\t115",
+ "\t10\t112\t116",
+ "\t9\t110\t116",
+ "\t8\t110\t116",
+ "\t7\t110\t116",
+ "\t6\t112\t114",
+ "\t5\t112\t114",
+ "Sri Lanka",
+ "\t10\t78\t81",
+ "\t9\t78\t82",
+ "\t8\t78\t82",
+ "\t7\t78\t82",
+ "\t6\t78\t82",
+ "\t5\t78\t82",
+ "\t4\t79\t81",
+ "Sudan",
+ "\t23\t30\t32",
+ "\t22\t23\t38",
+ "\t21\t23\t38",
+ "\t20\t22\t38",
+ "\t19\t22\t39",
+ "\t18\t22\t39",
+ "\t17\t22\t39",
+ "\t16\t21\t39",
+ "\t15\t21\t38",
+ "\t14\t20\t37",
+ "\t13\t20\t37",
+ "\t12\t20\t37",
+ "\t11\t20\t37",
+ "\t10\t21\t36",
+ "\t9\t21\t35",
+ "\t8\t22\t35",
+ "\t7\t22\t35",
+ "\t6\t23\t36",
+ "\t5\t25\t36",
+ "\t4\t25\t36",
+ "\t3\t26\t35",
+ "\t2\t29\t34",
+ "Suriname",
+ "\t7\t-56\t-54",
+ "\t6\t-58\t-52",
+ "\t5\t-59\t-52",
+ "\t4\t-59\t-52",
+ "\t3\t-59\t-52",
+ "\t2\t-59\t-52",
+ "\t1\t-58\t-53",
+ "\t0\t-57\t-54",
+ "Svalbard",
+ "\t81\t15\t28\t30\t34",
+ "\t80\t9\t34",
+ "\t79\t9\t34",
+ "\t78\t9\t31",
+ "\t77\t9\t31",
+ "\t76\t12\t26",
+ "\t75\t14\t20\t23\t26",
+ "\t74\t17\t20",
+ "\t73\t17\t20",
+ "Swaziland",
+ "\t-24\t30\t33",
+ "\t-25\t29\t33",
+ "\t-26\t29\t33",
+ "\t-27\t29\t33",
+ "\t-28\t29\t32",
+ "Sweden",
+ "\t70\t19\t21",
+ "\t69\t16\t24",
+ "\t68\t15\t24",
+ "\t67\t13\t25",
+ "\t66\t13\t25",
+ "\t65\t11\t25",
+ "\t64\t10\t25",
+ "\t63\t10\t22",
+ "\t62\t10\t21",
+ "\t61\t11\t19",
+ "\t60\t10\t20",
+ "\t59\t10\t20",
+ "\t58\t10\t20",
+ "\t57\t10\t20",
+ "\t56\t10\t20",
+ "\t55\t11\t19",
+ "\t54\t11\t15",
+ "Switzerland",
+ "\t48\t5\t10",
+ "\t47\t4\t11",
+ "\t46\t4\t11",
+ "\t45\t4\t11",
+ "\t44\t5\t10",
+ "Syria",
+ "\t38\t39\t43",
+ "\t37\t35\t43",
+ "\t36\t34\t43",
+ "\t35\t34\t43",
+ "\t34\t34\t42",
+ "\t33\t34\t42",
+ "\t32\t34\t40",
+ "\t31\t34\t39",
+ "Taiwan",
+ "\t26\t120\t123",
+ "\t25\t119\t123",
+ "\t24\t118\t123",
+ "\t23\t118\t122",
+ "\t22\t118\t122",
+ "\t21\t119\t122",
+ "\t20\t119\t121",
+ "Tajikistan",
+ "\t42\t69\t71",
+ "\t41\t67\t71",
+ "\t40\t66\t74",
+ "\t39\t66\t75",
+ "\t38\t66\t76",
+ "\t37\t66\t76",
+ "\t36\t66\t76",
+ "\t35\t66\t73",
+ "Tanzania",
+ "\t1\t29\t31",
+ "\t0\t29\t36",
+ "\t-1\t29\t38",
+ "\t-2\t29\t39",
+ "\t-3\t28\t40",
+ "\t-4\t28\t40",
+ "\t-5\t28\t40",
+ "\t-6\t28\t40",
+ "\t-7\t28\t40",
+ "\t-8\t29\t40",
+ "\t-9\t29\t41",
+ "\t-10\t30\t41",
+ "\t-11\t33\t41",
+ "\t-12\t33\t40",
+ "Thailand",
+ "\t21\t98\t101",
+ "\t20\t96\t102",
+ "\t19\t96\t105",
+ "\t18\t96\t105",
+ "\t17\t96\t106",
+ "\t16\t96\t106",
+ "\t15\t97\t106",
+ "\t14\t97\t106",
+ "\t13\t97\t106",
+ "\t12\t97\t103",
+ "\t11\t97\t103",
+ "\t10\t96\t103",
+ "\t9\t96\t101",
+ "\t8\t96\t101",
+ "\t7\t97\t103",
+ "\t6\t97\t103",
+ "\t5\t98\t103",
+ "\t4\t99\t102",
+ "Togo",
+ "\t12\t-1\t1",
+ "\t11\t-1\t2",
+ "\t10\t-1\t2",
+ "\t9\t-1\t2",
+ "\t8\t-1\t2",
+ "\t7\t-1\t2",
+ "\t6\t-1\t2",
+ "\t5\t-1\t2",
+ "Tokelau",
+ "\t-8\t-172\t-170",
+ "\t-9\t-172\t-170",
+ "\t-10\t-172\t-170",
+ "Tonga",
+ "\t-14\t-176\t-172",
+ "\t-15\t-176\t-172",
+ "\t-16\t-176\t-172",
+ "\t-17\t-175\t-172",
+ "\t-18\t-176\t-172",
+ "\t-19\t-176\t-172",
+ "\t-20\t-176\t-173",
+ "\t-21\t-177\t-173",
+ "\t-22\t-177\t-173",
+ "\t-23\t-177\t-175",
+ "Trinidad and Tobago",
+ "\t12\t-61\t-59",
+ "\t11\t-62\t-59",
+ "\t10\t-62\t-59",
+ "\t9\t-62\t-59",
+ "Tromelin Island",
+ "\t-14\t53\t55",
+ "\t-15\t53\t55",
+ "\t-16\t53\t55",
+ "Tunisia",
+ "\t38\t7\t12",
+ "\t37\t7\t12",
+ "\t36\t7\t12",
+ "\t35\t6\t12",
+ "\t34\t6\t12",
+ "\t33\t6\t12",
+ "\t32\t6\t12",
+ "\t31\t7\t12",
+ "\t30\t8\t11",
+ "\t29\t8\t11",
+ "Turkey",
+ "\t43\t25\t28\t32\t36",
+ "\t42\t25\t44",
+ "\t41\t24\t45",
+ "\t40\t24\t45",
+ "\t39\t24\t45",
+ "\t38\t24\t45",
+ "\t37\t25\t45",
+ "\t36\t26\t45",
+ "\t35\t26\t41\t43\t45",
+ "\t34\t34\t37",
+ "Turkmenistan",
+ "\t43\t51\t61",
+ "\t42\t51\t62",
+ "\t41\t51\t63",
+ "\t40\t51\t64",
+ "\t39\t51\t67",
+ "\t38\t51\t67",
+ "\t37\t52\t67",
+ "\t36\t52\t67",
+ "\t35\t59\t65",
+ "\t34\t60\t65",
+ "Turks and Caicos Islands",
+ "\t22\t-73\t-70",
+ "\t21\t-73\t-70",
+ "\t20\t-73\t-70",
+ "Tuvalu",
+ "\t-4\t175\t177",
+ "\t-5\t175\t178",
+ "\t-6\t175\t179",
+ "\t-7\t175\t180",
+ "\t-8\t176\t180",
+ "\t-9\t177\t180",
+ "\t-10\t178\t180",
+ "Uganda",
+ "\t5\t32\t35",
+ "\t4\t29\t35",
+ "\t3\t29\t35",
+ "\t2\t29\t36",
+ "\t1\t28\t36",
+ "\t0\t28\t36",
+ "\t-1\t28\t35",
+ "\t-2\t28\t34",
+ "Ukraine",
+ "\t53\t29\t35",
+ "\t52\t22\t36",
+ "\t51\t22\t39",
+ "\t50\t21\t41",
+ "\t49\t21\t41",
+ "\t48\t21\t41",
+ "\t47\t21\t41",
+ "\t46\t21\t40",
+ "\t45\t27\t38",
+ "\t44\t27\t37",
+ "\t43\t32\t36",
+ "United Arab Emirates",
+ "\t27\t55\t57",
+ "\t26\t53\t57",
+ "\t25\t50\t57",
+ "\t24\t50\t57",
+ "\t23\t50\t57",
+ "\t22\t50\t56",
+ "\t21\t51\t56",
+ "United Kingdom",
+ "\t61\t-3\t1",
+ "\t60\t-4\t1",
+ "\t59\t-8\t1",
+ "\t58\t-14\t-12\t-9\t0",
+ "\t57\t-14\t-12\t-9\t0",
+ "\t56\t-14\t-12\t-9\t0",
+ "\t55\t-9\t1",
+ "\t54\t-9\t1",
+ "\t53\t-9\t2",
+ "\t52\t-6\t2",
+ "\t51\t-6\t2",
+ "\t50\t-7\t2",
+ "\t49\t-7\t1",
+ "\t48\t-7\t-4",
+ "Uruguay",
+ "\t-29\t-58\t-54",
+ "\t-30\t-59\t-52",
+ "\t-31\t-59\t-52",
+ "\t-32\t-59\t-52",
+ "\t-33\t-59\t-52",
+ "\t-34\t-59\t-52",
+ "\t-35\t-59\t-52",
+ "USA",
+ "\t72\t-158\t-153",
+ "\t71\t-163\t-141",
+ "\t70\t-164\t-140",
+ "\t69\t-167\t-140",
+ "\t68\t-167\t-140",
+ "\t67\t-167\t-140",
+ "\t66\t-169\t-140",
+ "\t65\t-169\t-140",
+ "\t64\t-172\t-140",
+ "\t63\t-172\t-140",
+ "\t62\t-172\t-140",
+ "\t61\t-174\t-138",
+ "\t60\t-174\t-171\t-168\t-133",
+ "\t59\t-174\t-171\t-168\t-132",
+ "\t58\t-171\t-131",
+ "\t57\t-171\t-168\t-163\t-150\t-138\t-129",
+ "\t56\t-171\t-168\t-164\t-151\t-137\t-128",
+ "\t55\t-170\t-152\t-136\t-128",
+ "\t54\t-170\t-154\t-135\t-128\t171\t173",
+ "\t53\t-177\t-158\t-134\t-129\t171\t180",
+ "\t52\t-180\t-165\t171\t180",
+ "\t51\t-180\t-167\t171\t180",
+ "\t50\t-180\t-174\t-96\t-93\t176\t180",
+ "\t49\t-125\t-86",
+ "\t48\t-125\t-84\t-70\t-66",
+ "\t47\t-125\t-82\t-71\t-66",
+ "\t46\t-125\t-81\t-75\t-66",
+ "\t45\t-125\t-81\t-77\t-65",
+ "\t44\t-125\t-65",
+ "\t43\t-125\t-65",
+ "\t42\t-125\t-68",
+ "\t41\t-125\t-68",
+ "\t40\t-125\t-68",
+ "\t39\t-125\t-71",
+ "\t38\t-124\t-73",
+ "\t37\t-124\t-73",
+ "\t36\t-124\t-74",
+ "\t35\t-123\t-74",
+ "\t34\t-122\t-74",
+ "\t33\t-121\t-75",
+ "\t32\t-121\t-76",
+ "\t31\t-119\t-78",
+ "\t30\t-114\t-79",
+ "\t29\t-106\t-79",
+ "\t28\t-105\t-79",
+ "\t27\t-174\t-172\t-104\t-94\t-90\t-88\t-83\t-79",
+ "\t26\t-174\t-166\t-100\t-95\t-83\t-79",
+ "\t25\t-174\t-166\t-100\t-96\t-83\t-79",
+ "\t24\t-172\t-160\t-98\t-96\t-83\t-79",
+ "\t23\t-165\t-158\t-83\t-79",
+ "\t22\t-165\t-155",
+ "\t21\t-161\t-154",
+ "\t20\t-161\t-153",
+ "\t19\t-158\t-153",
+ "\t18\t-157\t-153",
+ "\t17\t-156\t-154",
+ "USA: Alabama",
+ "\t36\t-89\t-84",
+ "\t35\t-89\t-84",
+ "\t34\t-89\t-84",
+ "\t33\t-89\t-83",
+ "\t32\t-89\t-83",
+ "\t31\t-89\t-83",
+ "\t30\t-89\t-83",
+ "\t29\t-89\t-86",
+ "USA: Alaska",
+ "\t72\t-158\t-153",
+ "\t71\t-163\t-141",
+ "\t70\t-164\t-140",
+ "\t69\t-167\t-140",
+ "\t68\t-167\t-140",
+ "\t67\t-167\t-140",
+ "\t66\t-169\t-140",
+ "\t65\t-169\t-140",
+ "\t64\t-172\t-140",
+ "\t63\t-172\t-140",
+ "\t62\t-172\t-140",
+ "\t61\t-174\t-138",
+ "\t60\t-174\t-171\t-168\t-133",
+ "\t59\t-174\t-171\t-168\t-132",
+ "\t58\t-171\t-131",
+ "\t57\t-171\t-168\t-163\t-150\t-138\t-129",
+ "\t56\t-171\t-168\t-164\t-151\t-137\t-128",
+ "\t55\t-170\t-152\t-136\t-128",
+ "\t54\t-170\t-154\t-135\t-128\t171\t173",
+ "\t53\t-177\t-158\t-134\t-129\t171\t180",
+ "\t52\t-180\t-165\t171\t180",
+ "\t51\t-180\t-167\t171\t180",
+ "\t50\t-180\t-174\t176\t180",
+ "USA: Alaska, Aleutian Islands",
+ "\t60\t-154\t-149\t-147\t-145",
+ "\t59\t-162\t-159\t-154\t-149\t-147\t-145",
+ "\t58\t-171\t-169\t-162\t-159\t-155\t-149\t-147\t-145",
+ "\t57\t-171\t-168\t-162\t-150",
+ "\t56\t-171\t-168\t-164\t-151",
+ "\t55\t-170\t-152",
+ "\t54\t-170\t-154",
+ "\t53\t-177\t-158",
+ "\t52\t-180\t-165",
+ "\t51\t-180\t-167",
+ "\t50\t-180\t-174",
+ "USA: Arizona",
+ "\t38\t-115\t-108",
+ "\t37\t-115\t-108",
+ "\t36\t-115\t-108",
+ "\t35\t-115\t-108",
+ "\t34\t-115\t-108",
+ "\t33\t-115\t-108",
+ "\t32\t-115\t-108",
+ "\t31\t-115\t-108",
+ "\t30\t-114\t-108",
+ "USA: Arkansas",
+ "\t37\t-95\t-88",
+ "\t36\t-95\t-88",
+ "\t35\t-95\t-88",
+ "\t34\t-95\t-88",
+ "\t33\t-95\t-89",
+ "\t32\t-95\t-90",
+ "USA: California",
+ "\t43\t-125\t-119",
+ "\t42\t-125\t-119",
+ "\t41\t-125\t-119",
+ "\t40\t-125\t-119",
+ "\t39\t-125\t-117",
+ "\t38\t-124\t-116",
+ "\t37\t-124\t-115",
+ "\t36\t-124\t-113",
+ "\t35\t-123\t-113",
+ "\t34\t-122\t-113",
+ "\t33\t-121\t-113",
+ "\t32\t-121\t-113",
+ "\t31\t-119\t-113",
+ "USA: Colorado",
+ "\t42\t-110\t-101",
+ "\t41\t-110\t-101",
+ "\t40\t-110\t-101",
+ "\t39\t-110\t-101",
+ "\t38\t-110\t-101",
+ "\t37\t-110\t-101",
+ "\t36\t-110\t-101",
+ "USA: Connecticut",
+ "\t43\t-74\t-70",
+ "\t42\t-74\t-70",
+ "\t41\t-74\t-70",
+ "\t40\t-74\t-70",
+ "USA: Delaware",
+ "\t40\t-76\t-74",
+ "\t39\t-76\t-74",
+ "\t38\t-76\t-74",
+ "\t37\t-76\t-74",
+ "USA: District of Columbia",
+ "\t40\t-78\t-76",
+ "\t39\t-78\t-75",
+ "\t38\t-78\t-75",
+ "\t37\t-78\t-75",
+ "USA: Florida",
+ "\t32\t-88\t-84",
+ "\t31\t-88\t-80",
+ "\t30\t-88\t-79",
+ "\t29\t-88\t-79",
+ "\t28\t-86\t-79",
+ "\t27\t-83\t-79",
+ "\t26\t-83\t-79",
+ "\t25\t-83\t-79",
+ "\t24\t-83\t-79",
+ "\t23\t-83\t-79",
+ "USA: Georgia",
+ "\t36\t-86\t-82",
+ "\t35\t-86\t-81",
+ "\t34\t-86\t-80",
+ "\t33\t-86\t-79",
+ "\t32\t-86\t-79",
+ "\t31\t-86\t-79",
+ "\t30\t-86\t-79",
+ "\t29\t-86\t-80",
+ "USA: Hawaii",
+ "\t27\t-174\t-172",
+ "\t26\t-174\t-166",
+ "\t25\t-174\t-166",
+ "\t24\t-172\t-160",
+ "\t23\t-165\t-158",
+ "\t22\t-165\t-155",
+ "\t21\t-161\t-154",
+ "\t20\t-161\t-153",
+ "\t19\t-158\t-153",
+ "\t18\t-157\t-153",
+ "\t17\t-156\t-154",
+ "USA: Idaho",
+ "\t49\t-118\t-115",
+ "\t48\t-118\t-114",
+ "\t47\t-118\t-113",
+ "\t46\t-118\t-112",
+ "\t45\t-118\t-110",
+ "\t44\t-118\t-110",
+ "\t43\t-118\t-110",
+ "\t42\t-118\t-110",
+ "\t41\t-118\t-110",
+ "USA: Illinois",
+ "\t43\t-91\t-86",
+ "\t42\t-92\t-86",
+ "\t41\t-92\t-86",
+ "\t40\t-92\t-86",
+ "\t39\t-92\t-86",
+ "\t38\t-92\t-86",
+ "\t37\t-91\t-86",
+ "\t36\t-91\t-87",
+ "\t35\t-90\t-88",
+ "USA: Indiana",
+ "\t42\t-88\t-83",
+ "\t41\t-88\t-83",
+ "\t40\t-88\t-83",
+ "\t39\t-89\t-83",
+ "\t38\t-89\t-83",
+ "\t37\t-89\t-83",
+ "\t36\t-89\t-85",
+ "USA: Iowa",
+ "\t44\t-97\t-90",
+ "\t43\t-97\t-89",
+ "\t42\t-97\t-89",
+ "\t41\t-97\t-89",
+ "\t40\t-97\t-89",
+ "\t39\t-96\t-89",
+ "USA: Kansas",
+ "\t41\t-103\t-94",
+ "\t40\t-103\t-93",
+ "\t39\t-103\t-93",
+ "\t38\t-103\t-93",
+ "\t37\t-103\t-93",
+ "\t36\t-103\t-93",
+ "USA: Kentucky",
+ "\t40\t-85\t-83",
+ "\t39\t-87\t-81",
+ "\t38\t-90\t-80",
+ "\t37\t-90\t-80",
+ "\t36\t-90\t-80",
+ "\t35\t-90\t-81",
+ "USA: Louisiana",
+ "\t34\t-95\t-90",
+ "\t33\t-95\t-89",
+ "\t32\t-95\t-88",
+ "\t31\t-95\t-87",
+ "\t30\t-95\t-87",
+ "\t29\t-94\t-87",
+ "\t28\t-94\t-87",
+ "\t27\t-90\t-88",
+ "USA: Maine",
+ "\t48\t-70\t-66",
+ "\t47\t-71\t-66",
+ "\t46\t-72\t-66",
+ "\t45\t-72\t-65",
+ "\t44\t-72\t-65",
+ "\t43\t-72\t-65",
+ "\t42\t-71\t-68",
+ "USA: Maryland",
+ "\t40\t-80\t-74",
+ "\t39\t-80\t-74",
+ "\t38\t-80\t-74",
+ "\t37\t-78\t-74",
+ "\t36\t-77\t-74",
+ "USA: Massachusetts",
+ "\t43\t-74\t-69",
+ "\t42\t-74\t-68",
+ "\t41\t-74\t-68",
+ "\t40\t-72\t-68",
+ "USA: Michigan",
+ "\t49\t-90\t-86",
+ "\t48\t-91\t-84",
+ "\t47\t-91\t-82",
+ "\t46\t-91\t-81",
+ "\t45\t-91\t-81",
+ "\t44\t-89\t-81",
+ "\t43\t-88\t-81",
+ "\t42\t-88\t-81",
+ "\t41\t-88\t-81",
+ "\t40\t-88\t-82",
+ "USA: Minnesota",
+ "\t50\t-96\t-93",
+ "\t49\t-98\t-88",
+ "\t48\t-98\t-88",
+ "\t47\t-98\t-88",
+ "\t46\t-98\t-88",
+ "\t45\t-97\t-90",
+ "\t44\t-97\t-90",
+ "\t43\t-97\t-90",
+ "\t42\t-97\t-90",
+ "USA: Mississippi",
+ "\t36\t-91\t-88",
+ "\t35\t-92\t-87",
+ "\t34\t-92\t-87",
+ "\t33\t-92\t-87",
+ "\t32\t-92\t-87",
+ "\t31\t-92\t-87",
+ "\t30\t-92\t-87",
+ "\t29\t-90\t-87",
+ "USA: Missouri",
+ "\t41\t-96\t-90",
+ "\t40\t-96\t-89",
+ "\t39\t-96\t-89",
+ "\t38\t-96\t-88",
+ "\t37\t-95\t-88",
+ "\t36\t-95\t-88",
+ "\t35\t-95\t-88",
+ "\t34\t-91\t-88",
+ "USA: Montana",
+ "\t49\t-117\t-103",
+ "\t48\t-117\t-103",
+ "\t47\t-117\t-103",
+ "\t46\t-117\t-103",
+ "\t45\t-116\t-103",
+ "\t44\t-115\t-103",
+ "\t43\t-114\t-110",
+ "USA: Nebraska",
+ "\t44\t-105\t-97",
+ "\t43\t-105\t-95",
+ "\t42\t-105\t-94",
+ "\t41\t-105\t-94",
+ "\t40\t-105\t-94",
+ "\t39\t-103\t-94",
+ "USA: Nevada",
+ "\t43\t-121\t-113",
+ "\t42\t-121\t-113",
+ "\t41\t-121\t-113",
+ "\t40\t-121\t-113",
+ "\t39\t-121\t-113",
+ "\t38\t-121\t-113",
+ "\t37\t-120\t-113",
+ "\t36\t-119\t-113",
+ "\t35\t-118\t-113",
+ "\t34\t-116\t-113",
+ "USA: New Hampshire",
+ "\t46\t-72\t-70",
+ "\t45\t-73\t-69",
+ "\t44\t-73\t-69",
+ "\t43\t-73\t-69",
+ "\t42\t-73\t-69",
+ "\t41\t-73\t-69",
+ "USA: New Jersey",
+ "\t42\t-76\t-73",
+ "\t41\t-76\t-72",
+ "\t40\t-76\t-72",
+ "\t39\t-76\t-72",
+ "\t38\t-76\t-73",
+ "\t37\t-75\t-73",
+ "USA: New Mexico",
+ "\t38\t-110\t-102",
+ "\t37\t-110\t-102",
+ "\t36\t-110\t-102",
+ "\t35\t-110\t-102",
+ "\t34\t-110\t-102",
+ "\t33\t-110\t-102",
+ "\t32\t-110\t-102",
+ "\t31\t-110\t-102",
+ "\t30\t-110\t-105",
+ "USA: New York",
+ "\t46\t-75\t-72",
+ "\t45\t-77\t-72",
+ "\t44\t-80\t-72",
+ "\t43\t-80\t-72",
+ "\t42\t-80\t-70",
+ "\t41\t-80\t-70",
+ "\t40\t-79\t-70",
+ "\t39\t-75\t-71",
+ "USA: North Carolina",
+ "\t37\t-83\t-74",
+ "\t36\t-85\t-74",
+ "\t35\t-85\t-74",
+ "\t34\t-85\t-74",
+ "\t33\t-85\t-75",
+ "\t32\t-79\t-76",
+ "USA: North Dakota",
+ "\t49\t-105\t-96",
+ "\t48\t-105\t-95",
+ "\t47\t-105\t-95",
+ "\t46\t-105\t-95",
+ "\t45\t-105\t-95",
+ "\t44\t-105\t-95",
+ "USA: Ohio",
+ "\t43\t-82\t-79",
+ "\t42\t-85\t-79",
+ "\t41\t-85\t-79",
+ "\t40\t-85\t-79",
+ "\t39\t-85\t-79",
+ "\t38\t-85\t-79",
+ "\t37\t-85\t-80",
+ "USA: Oklahoma",
+ "\t38\t-104\t-93",
+ "\t37\t-104\t-93",
+ "\t36\t-104\t-93",
+ "\t35\t-104\t-93",
+ "\t34\t-101\t-93",
+ "\t33\t-101\t-93",
+ "\t32\t-98\t-93",
+ "USA: Oregon",
+ "\t47\t-124\t-115",
+ "\t46\t-125\t-115",
+ "\t45\t-125\t-115",
+ "\t44\t-125\t-115",
+ "\t43\t-125\t-115",
+ "\t42\t-125\t-115",
+ "\t41\t-125\t-116",
+ "USA: Pennsylvania",
+ "\t43\t-81\t-77",
+ "\t42\t-81\t-73",
+ "\t41\t-81\t-73",
+ "\t40\t-81\t-73",
+ "\t39\t-81\t-73",
+ "\t38\t-81\t-74",
+ "USA: Rhode Island",
+ "\t43\t-72\t-70",
+ "\t42\t-72\t-70",
+ "\t41\t-72\t-70",
+ "\t40\t-72\t-70",
+ "USA: South Carolina",
+ "\t36\t-84\t-79",
+ "\t35\t-84\t-77",
+ "\t34\t-84\t-77",
+ "\t33\t-84\t-77",
+ "\t32\t-83\t-77",
+ "\t31\t-82\t-78",
+ "USA: South Dakota",
+ "\t46\t-105\t-95",
+ "\t45\t-105\t-95",
+ "\t44\t-105\t-95",
+ "\t43\t-105\t-95",
+ "\t42\t-105\t-95",
+ "\t41\t-99\t-95",
+ "USA: Tennessee",
+ "\t37\t-90\t-80",
+ "\t36\t-91\t-80",
+ "\t35\t-91\t-80",
+ "\t34\t-91\t-81",
+ "\t33\t-90\t-87\t-85\t-83",
+ "USA: Texas",
+ "\t37\t-104\t-99",
+ "\t36\t-104\t-99",
+ "\t35\t-104\t-97",
+ "\t34\t-104\t-93",
+ "\t33\t-107\t-93",
+ "\t32\t-107\t-92",
+ "\t31\t-107\t-92",
+ "\t30\t-107\t-92",
+ "\t29\t-106\t-92",
+ "\t28\t-105\t-92",
+ "\t27\t-104\t-94",
+ "\t26\t-100\t-95",
+ "\t25\t-100\t-96",
+ "\t24\t-98\t-96",
+ "USA: Utah",
+ "\t43\t-115\t-110",
+ "\t42\t-115\t-108",
+ "\t41\t-115\t-108",
+ "\t40\t-115\t-108",
+ "\t39\t-115\t-108",
+ "\t38\t-115\t-108",
+ "\t37\t-115\t-108",
+ "\t36\t-115\t-108",
+ "USA: Vermont",
+ "\t46\t-74\t-70",
+ "\t45\t-74\t-70",
+ "\t44\t-74\t-70",
+ "\t43\t-74\t-70",
+ "\t42\t-74\t-71",
+ "\t41\t-74\t-71",
+ "USA: Virginia",
+ "\t40\t-79\t-76",
+ "\t39\t-81\t-74",
+ "\t38\t-83\t-74",
+ "\t37\t-84\t-74",
+ "\t36\t-84\t-74",
+ "\t35\t-84\t-74",
+ "USA: Washington",
+ "\t49\t-125\t-116",
+ "\t48\t-125\t-116",
+ "\t47\t-125\t-115",
+ "\t46\t-125\t-115",
+ "\t45\t-125\t-115",
+ "\t44\t-123\t-118",
+ "USA: West Virginia",
+ "\t41\t-81\t-79",
+ "\t40\t-83\t-76",
+ "\t39\t-83\t-76",
+ "\t38\t-83\t-76",
+ "\t37\t-83\t-77",
+ "\t36\t-83\t-79",
+ "USA: Wisconsin",
+ "\t48\t-92\t-88",
+ "\t47\t-93\t-87",
+ "\t46\t-93\t-85",
+ "\t45\t-93\t-85",
+ "\t44\t-93\t-85",
+ "\t43\t-93\t-85",
+ "\t42\t-92\t-86",
+ "\t41\t-92\t-86",
+ "USA: Wyoming",
+ "\t46\t-112\t-103",
+ "\t45\t-112\t-103",
+ "\t44\t-112\t-103",
+ "\t43\t-112\t-103",
+ "\t42\t-112\t-103",
+ "\t41\t-112\t-103",
+ "\t40\t-112\t-103",
+ "Uzbekistan",
+ "\t46\t55\t60",
+ "\t45\t54\t62",
+ "\t44\t54\t66",
+ "\t43\t54\t67\t69\t72",
+ "\t42\t54\t73",
+ "\t41\t54\t74",
+ "\t40\t54\t74",
+ "\t39\t60\t74",
+ "\t38\t61\t72",
+ "\t37\t63\t69",
+ "\t36\t65\t69",
+ "Vanuatu",
+ "\t-12\t165\t168",
+ "\t-13\t165\t169",
+ "\t-14\t165\t169",
+ "\t-15\t165\t169",
+ "\t-16\t165\t169",
+ "\t-17\t166\t170",
+ "\t-18\t167\t170",
+ "\t-19\t167\t170",
+ "\t-20\t168\t170",
+ "\t-21\t168\t170",
+ "Venezuela",
+ "\t13\t-71\t-66",
+ "\t12\t-73\t-62",
+ "\t11\t-73\t-60",
+ "\t10\t-74\t-59",
+ "\t9\t-74\t-58",
+ "\t8\t-74\t-58",
+ "\t7\t-73\t-58",
+ "\t6\t-73\t-59",
+ "\t5\t-72\t-59",
+ "\t4\t-68\t-59",
+ "\t3\t-68\t-59",
+ "\t2\t-68\t-61",
+ "\t1\t-68\t-62",
+ "\t0\t-68\t-62",
+ "\t-1\t-67\t-64",
+ "Viet Nam",
+ "\t24\t103\t106",
+ "\t23\t101\t107",
+ "\t22\t101\t108",
+ "\t21\t101\t108",
+ "\t20\t101\t108",
+ "\t19\t102\t108",
+ "\t18\t102\t108",
+ "\t17\t103\t109",
+ "\t16\t104\t109",
+ "\t15\t105\t110",
+ "\t14\t106\t110",
+ "\t13\t105\t110",
+ "\t12\t104\t110",
+ "\t11\t102\t110",
+ "\t10\t102\t110",
+ "\t9\t102\t109",
+ "\t8\t103\t107",
+ "\t7\t103\t107",
+ "Virgin Islands",
+ "\t19\t-66\t-63",
+ "\t18\t-66\t-63",
+ "\t17\t-66\t-63",
+ "\t16\t-65\t-63",
+ "Wake Island",
+ "\t20\t165\t167",
+ "\t19\t165\t167",
+ "\t18\t165\t167",
+ "Wallis and Futuna",
+ "\t-12\t-177\t-175",
+ "\t-13\t-179\t-175",
+ "\t-14\t-179\t-175",
+ "\t-15\t-179\t-176",
+ "West Bank",
+ "\t33\t33\t36",
+ "\t32\t33\t36",
+ "\t31\t33\t36",
+ "\t30\t33\t36",
+ "Western Sahara",
+ "\t28\t-14\t-7",
+ "\t27\t-15\t-7",
+ "\t26\t-15\t-7",
+ "\t25\t-16\t-7",
+ "\t24\t-17\t-7",
+ "\t23\t-17\t-11",
+ "\t22\t-18\t-11",
+ "\t21\t-18\t-12",
+ "\t20\t-18\t-12",
+ "\t19\t-18\t-16",
+ "Yemen",
+ "\t19\t47\t53",
+ "\t18\t42\t53",
+ "\t17\t41\t54",
+ "\t16\t41\t54",
+ "\t15\t41\t54",
+ "\t14\t41\t53",
+ "\t13\t41\t55",
+ "\t12\t41\t49\t51\t55",
+ "\t11\t42\t46\t51\t55",
+ "Zambia",
+ "\t-7\t27\t32",
+ "\t-8\t27\t34",
+ "\t-9\t22\t25\t27\t34",
+ "\t-10\t22\t34",
+ "\t-11\t22\t34",
+ "\t-12\t20\t34",
+ "\t-13\t20\t34",
+ "\t-14\t20\t34",
+ "\t-15\t20\t34",
+ "\t-16\t20\t31",
+ "\t-17\t20\t29",
+ "\t-18\t21\t28",
+ "\t-19\t24\t27",
+ "Zimbabwe",
+ "\t-14\t27\t32",
+ "\t-15\t26\t33",
+ "\t-16\t24\t34",
+ "\t-17\t24\t34",
+ "\t-18\t24\t34",
+ "\t-19\t24\t34",
+ "\t-20\t24\t34",
+ "\t-21\t25\t34",
+ "\t-22\t26\t33",
+ "\t-23\t28\t32",
+ NULL
+};
+
+extern CharPtr water_onedegree [];
+CharPtr water_onedegree [] = {
+ "1",
+ "Adriatic Sea",
+ "\t46\t11\t15",
+ "\t45\t11\t16",
+ "\t44\t11\t18",
+ "\t43\t11\t20",
+ "\t42\t11\t20",
+ "\t41\t12\t20",
+ "\t40\t14\t20",
+ "\t39\t16\t20",
+ "\t38\t17\t20",
+ "Aegean Sea",
+ "\t41\t21\t27",
+ "\t40\t21\t27",
+ "\t39\t21\t28",
+ "\t38\t21\t29",
+ "\t37\t21\t29",
+ "\t36\t23\t29",
+ "\t35\t23\t29",
+ "Albemarle Sound",
+ "\t37\t-77\t-74",
+ "\t36\t-77\t-74",
+ "\t35\t-77\t-74",
+ "\t34\t-77\t-74",
+ "Alboran Sea",
+ "\t37\t-6\t-1",
+ "\t36\t-6\t0",
+ "\t35\t-6\t0",
+ "\t34\t-6\t0",
+ "Amundsen Gulf",
+ "\t72\t-126\t-117",
+ "\t71\t-128\t-116",
+ "\t70\t-128\t-116",
+ "\t69\t-128\t-116",
+ "\t68\t-127\t-117",
+ "Amundsen Sea",
+ "\t-71\t-108\t-101",
+ "\t-72\t-115\t-97",
+ "\t-73\t-115\t-97",
+ "\t-74\t-115\t-97",
+ "\t-75\t-115\t-97",
+ "\t-76\t-112\t-97",
+ "Andaman Sea",
+ "\t17\t93\t96",
+ "\t16\t92\t97",
+ "\t15\t91\t99",
+ "\t14\t91\t99",
+ "\t13\t91\t99",
+ "\t12\t91\t99",
+ "\t11\t91\t99",
+ "\t10\t91\t99",
+ "\t9\t91\t99",
+ "\t8\t91\t99",
+ "\t7\t91\t99",
+ "\t6\t92\t99",
+ "\t5\t92\t98",
+ "\t4\t94\t96",
+ "Arabian Sea",
+ "\t26\t60\t67",
+ "\t25\t59\t68",
+ "\t24\t59\t69",
+ "\t23\t58\t70",
+ "\t22\t57\t71",
+ "\t21\t57\t72",
+ "\t20\t56\t74",
+ "\t19\t55\t74",
+ "\t18\t53\t74",
+ "\t17\t51\t74",
+ "\t16\t50\t74",
+ "\t15\t50\t75",
+ "\t14\t50\t75",
+ "\t13\t50\t75",
+ "\t12\t50\t74",
+ "\t11\t50\t72",
+ "\t10\t50\t72",
+ "\t9\t50\t72",
+ "\t8\t52\t72",
+ "\t7\t54\t73",
+ "\t6\t56\t73",
+ "\t5\t58\t73",
+ "\t4\t60\t73",
+ "\t3\t63\t73",
+ "\t2\t65\t73",
+ "\t1\t67\t74",
+ "\t0\t69\t74",
+ "\t-1\t71\t74",
+ "Arafura Sea",
+ "\t-2\t132\t135",
+ "\t-3\t132\t138",
+ "\t-4\t131\t139",
+ "\t-5\t131\t139",
+ "\t-6\t130\t141",
+ "\t-7\t129\t141",
+ "\t-8\t129\t142",
+ "\t-9\t129\t143",
+ "\t-10\t129\t143",
+ "\t-11\t130\t143",
+ "\t-12\t130\t143",
+ "\t-13\t133\t142",
+ "Aral Sea",
+ "\t47\t58\t62",
+ "\t46\t57\t62",
+ "\t45\t57\t62",
+ "\t44\t57\t61",
+ "\t43\t57\t61",
+ "Arctic Ocean",
+ "\t90\t-180\t180",
+ "\t89\t-180\t180",
+ "\t88\t-180\t180",
+ "\t87\t-180\t180",
+ "\t86\t-180\t180",
+ "\t85\t-180\t180",
+ "\t84\t-180\t180",
+ "\t83\t-180\t180",
+ "\t82\t-180\t180",
+ "\t81\t-180\t-69\t-18\t180",
+ "\t80\t-180\t-75\t-4\t180",
+ "\t79\t-180\t-75\t10\t50\t100\t180",
+ "\t78\t-180\t-99\t-88\t-79\t108\t180",
+ "\t77\t-180\t-107\t117\t180",
+ "\t76\t-180\t-112\t125\t180",
+ "\t75\t-180\t-119\t133\t148\t157\t180",
+ "\t74\t-180\t-125\t161\t180",
+ "\t73\t-180\t-132\t165\t180",
+ "\t72\t-180\t-138\t169\t180",
+ "\t71\t-180\t-145\t173\t180",
+ "\t70\t-180\t-152\t177\t180",
+ "Atlantic Ocean",
+ "\t69\t-33\t-29",
+ "\t68\t-34\t-27",
+ "\t67\t-39\t-26",
+ "\t66\t-42\t-24",
+ "\t65\t-42\t-20\t-17\t-11",
+ "\t64\t-43\t-8",
+ "\t63\t-43\t-6",
+ "\t62\t-43\t-3",
+ "\t61\t-44\t0",
+ "\t60\t-44\t0",
+ "\t59\t-45\t0",
+ "\t58\t-45\t0",
+ "\t57\t-46\t-1",
+ "\t56\t-47\t-5",
+ "\t55\t-48\t-6",
+ "\t54\t-48\t-7",
+ "\t53\t-49\t-5",
+ "\t52\t-50\t-5",
+ "\t51\t-51\t-4",
+ "\t50\t-51\t-4",
+ "\t49\t-52\t-4",
+ "\t48\t-56\t-4",
+ "\t47\t-60\t-4",
+ "\t46\t-62\t-5",
+ "\t45\t-65\t-5",
+ "\t44\t-66\t-6",
+ "\t43\t-68\t-6",
+ "\t42\t-74\t-6",
+ "\t41\t-75\t-7",
+ "\t40\t-75\t-7",
+ "\t39\t-76\t-7",
+ "\t38\t-76\t-5",
+ "\t37\t-77\t-4",
+ "\t36\t-77\t-4",
+ "\t35\t-78\t-4",
+ "\t34\t-80\t-4",
+ "\t33\t-81\t-5",
+ "\t32\t-82\t-5",
+ "\t31\t-82\t-7",
+ "\t30\t-82\t-8",
+ "\t29\t-82\t-8",
+ "\t28\t-82\t-8",
+ "\t27\t-81\t-9",
+ "\t26\t-81\t-11",
+ "\t25\t-81\t-12",
+ "\t24\t-81\t-13",
+ "\t23\t-81\t-13",
+ "\t22\t-81\t-14",
+ "\t21\t-81\t-15",
+ "\t20\t-78\t-15",
+ "\t19\t-76\t-15",
+ "\t18\t-74\t-15",
+ "\t17\t-69\t-15",
+ "\t16\t-62\t-15",
+ "\t15\t-61\t-15",
+ "\t14\t-61\t-14",
+ "\t13\t-60\t-14",
+ "\t12\t-61\t-14",
+ "\t11\t-61\t-13",
+ "\t10\t-62\t-12",
+ "\t9\t-62\t-11",
+ "\t8\t-62\t-10",
+ "\t7\t-61\t-9",
+ "\t6\t-59\t-8",
+ "\t5\t-59\t-5",
+ "\t4\t-58\t-2",
+ "\t3\t-53\t2",
+ "\t2\t-52\t5",
+ "\t1\t-51\t7",
+ "\t0\t-51\t7",
+ "\t-1\t-51\t7",
+ "Atlantic Ocean",
+ "\t1\t-50\t9",
+ "\t0\t-50\t10",
+ "\t-1\t-50\t11",
+ "\t-2\t-47\t12",
+ "\t-3\t-44\t13",
+ "\t-4\t-40\t14",
+ "\t-5\t-39\t14",
+ "\t-6\t-37\t14",
+ "\t-7\t-36\t14",
+ "\t-8\t-36\t14",
+ "\t-9\t-37\t14",
+ "\t-10\t-38\t14",
+ "\t-11\t-39\t14",
+ "\t-12\t-40\t14",
+ "\t-13\t-40\t14",
+ "\t-14\t-40\t13",
+ "\t-15\t-40\t13",
+ "\t-16\t-40\t13",
+ "\t-17\t-40\t13",
+ "\t-18\t-41\t13",
+ "\t-19\t-41\t14",
+ "\t-20\t-42\t14",
+ "\t-21\t-45\t15",
+ "\t-22\t-46\t15",
+ "\t-23\t-48\t15",
+ "\t-24\t-49\t15",
+ "\t-25\t-49\t16",
+ "\t-26\t-49\t16",
+ "\t-27\t-50\t17",
+ "\t-28\t-51\t17",
+ "\t-29\t-51\t18",
+ "\t-30\t-52\t19",
+ "\t-31\t-53\t19",
+ "\t-32\t-54\t19",
+ "\t-33\t-55\t20",
+ "\t-34\t-56\t20",
+ "\t-35\t-57\t20",
+ "\t-36\t-58\t20",
+ "\t-37\t-62\t20",
+ "\t-38\t-63\t20",
+ "\t-39\t-63\t20",
+ "\t-40\t-64\t20",
+ "\t-41\t-66\t20",
+ "\t-42\t-66\t20",
+ "\t-43\t-66\t20",
+ "\t-44\t-66\t20",
+ "\t-45\t-66\t20",
+ "\t-46\t-67\t20",
+ "\t-47\t-68\t20",
+ "\t-48\t-68\t20",
+ "\t-49\t-68\t20",
+ "\t-50\t-69\t20",
+ "\t-51\t-69\t20",
+ "\t-52\t-69\t20",
+ "\t-53\t-70\t20",
+ "\t-54\t-70\t20",
+ "\t-55\t-70\t20",
+ "\t-56\t-69\t20",
+ "\t-57\t-69\t20",
+ "\t-58\t-69\t20",
+ "\t-59\t-69\t20",
+ "\t-60\t-69\t20",
+ "\t-61\t-69\t20",
+ "Bab el Mandeb",
+ "\t14\t42\t44",
+ "\t13\t42\t44",
+ "\t12\t42\t44",
+ "\t11\t42\t44",
+ "Baffin Bay",
+ "\t79\t-77\t-71",
+ "\t78\t-83\t-70",
+ "\t77\t-83\t-66",
+ "\t76\t-83\t-61",
+ "\t75\t-81\t-55",
+ "\t74\t-81\t-54",
+ "\t73\t-81\t-53",
+ "\t72\t-79\t-53",
+ "\t71\t-78\t-53",
+ "\t70\t-76\t-53",
+ "\t69\t-73\t-53",
+ "\t68\t-70\t-53",
+ "Bahia Blanca",
+ "\t-37\t-63\t-60",
+ "\t-38\t-63\t-60",
+ "\t-39\t-63\t-60",
+ "\t-40\t-63\t-60",
+ "Bahia de Campeche",
+ "\t22\t-94\t-89",
+ "\t21\t-98\t-89",
+ "\t20\t-98\t-89",
+ "\t19\t-98\t-89",
+ "\t18\t-97\t-89",
+ "\t17\t-96\t-90",
+ "Bahia Grande",
+ "\t-48\t-69\t-66",
+ "\t-49\t-70\t-66",
+ "\t-50\t-70\t-66",
+ "\t-51\t-70\t-66",
+ "\t-52\t-70\t-67",
+ "\t-53\t-69\t-67",
+ "Bahia Inutil",
+ "\t-52\t-71\t-68",
+ "\t-53\t-71\t-68",
+ "\t-54\t-71\t-68",
+ "\t-55\t-71\t-68",
+ "Baia de Maputo",
+ "\t-24\t31\t33",
+ "\t-25\t31\t33",
+ "\t-26\t31\t33",
+ "\t-27\t31\t33",
+ "Baia de Marajo",
+ "\t1\t-49\t-47",
+ "\t0\t-50\t-47",
+ "\t-1\t-50\t-47",
+ "\t-2\t-50\t-47",
+ "\t-3\t-50\t-48",
+ "Baia de Sao Marcos",
+ "\t0\t-45\t-43",
+ "\t-1\t-45\t-42",
+ "\t-2\t-45\t-42",
+ "\t-3\t-45\t-42",
+ "\t-4\t-45\t-43",
+ "Baird Inlet",
+ "\t61\t-165\t-162",
+ "\t60\t-165\t-162",
+ "\t59\t-165\t-162",
+ "Balearic Sea",
+ "\t42\t0\t4",
+ "\t41\t-1\t5",
+ "\t40\t-1\t5",
+ "\t39\t-1\t5",
+ "\t38\t-1\t5",
+ "\t37\t-1\t3",
+ "Bali Sea",
+ "\t-5\t114\t117",
+ "\t-6\t113\t118",
+ "\t-7\t113\t118",
+ "\t-8\t113\t118",
+ "\t-9\t113\t118",
+ "\t-10\t115\t117",
+ "Baltic Sea",
+ "\t60\t16\t24",
+ "\t59\t15\t24",
+ "\t58\t15\t24",
+ "\t57\t13\t23",
+ "\t56\t11\t23",
+ "\t55\t11\t22",
+ "\t54\t11\t22",
+ "\t53\t11\t21",
+ "\t52\t13\t15",
+ "Banda Sea",
+ "\t1\t121\t124",
+ "\t0\t120\t126",
+ "\t-1\t119\t129",
+ "\t-2\t119\t131",
+ "\t-3\t119\t133",
+ "\t-4\t119\t134",
+ "\t-5\t119\t134",
+ "\t-6\t119\t134",
+ "\t-7\t119\t133",
+ "\t-8\t119\t132",
+ "\t-9\t121\t132",
+ "Barents Sea",
+ "\t82\t49\t66",
+ "\t81\t16\t19\t26\t66",
+ "\t80\t16\t67",
+ "\t79\t16\t67",
+ "\t78\t16\t68",
+ "\t77\t16\t69",
+ "\t76\t16\t69",
+ "\t75\t16\t69",
+ "\t74\t18\t61",
+ "\t73\t20\t57",
+ "\t72\t22\t55",
+ "\t71\t24\t59",
+ "\t70\t26\t61",
+ "\t69\t26\t61",
+ "\t68\t28\t61",
+ "\t67\t36\t61",
+ "\t66\t43\t50",
+ "\t65\t44\t48",
+ "Bass Strait",
+ "\t-36\t143\t150",
+ "\t-37\t142\t150",
+ "\t-38\t142\t150",
+ "\t-39\t142\t149",
+ "\t-40\t142\t149",
+ "\t-41\t142\t149",
+ "\t-42\t144\t148",
+ "Bathurst Inlet",
+ "\t68\t-109\t-106",
+ "\t67\t-109\t-106",
+ "\t66\t-109\t-106",
+ "\t65\t-109\t-106",
+ "Bay of Bengal",
+ "\t24\t89\t91",
+ "\t23\t86\t92",
+ "\t22\t85\t93",
+ "\t21\t85\t94",
+ "\t20\t83\t95",
+ "\t19\t82\t95",
+ "\t18\t81\t95",
+ "\t17\t80\t95",
+ "\t16\t79\t95",
+ "\t15\t79\t95",
+ "\t14\t79\t94",
+ "\t13\t78\t93",
+ "\t12\t78\t93",
+ "\t11\t78\t93",
+ "\t10\t78\t93",
+ "\t9\t78\t93",
+ "\t8\t79\t94",
+ "\t7\t79\t95",
+ "\t6\t80\t96",
+ "\t5\t84\t96",
+ "\t4\t91\t96",
+ "Bay of Biscay",
+ "\t49\t-6\t-3",
+ "\t48\t-7\t0",
+ "\t47\t-7\t0",
+ "\t46\t-8\t1",
+ "\t45\t-8\t1",
+ "\t44\t-9\t1",
+ "\t43\t-9\t0",
+ "\t42\t-9\t0",
+ "Bay of Fundy",
+ "\t46\t-68\t-62",
+ "\t45\t-68\t-62",
+ "\t44\t-68\t-62",
+ "\t43\t-68\t-64",
+ "Bay of Plenty",
+ "\t-35\t174\t177",
+ "\t-36\t174\t179",
+ "\t-37\t174\t179",
+ "\t-38\t174\t179",
+ "Beaufort Sea",
+ "\t77\t-126\t-121",
+ "\t76\t-133\t-121",
+ "\t75\t-139\t-121",
+ "\t74\t-146\t-122",
+ "\t73\t-153\t-122",
+ "\t72\t-157\t-122",
+ "\t71\t-157\t-123",
+ "\t70\t-157\t-124",
+ "\t69\t-157\t-125",
+ "\t68\t-145\t-127",
+ "Bellingshausen Sea",
+ "\t-67\t-74\t-70",
+ "\t-68\t-80\t-70",
+ "\t-69\t-86\t-68",
+ "\t-70\t-92\t-68",
+ "\t-71\t-96\t-68",
+ "\t-72\t-96\t-68",
+ "\t-73\t-96\t-73",
+ "\t-74\t-96\t-73",
+ "Bering Sea",
+ "\t67\t-171\t-168",
+ "\t66\t-173\t-165",
+ "\t65\t-175\t-163",
+ "\t64\t-177\t-163",
+ "\t63\t-179\t-163\t174\t180",
+ "\t62\t-180\t-163\t171\t180",
+ "\t61\t-180\t-160\t165\t180",
+ "\t60\t-180\t-160\t165\t180",
+ "\t59\t-180\t-160\t163\t180",
+ "\t58\t-180\t-160\t161\t180",
+ "\t57\t-180\t-160\t161\t180",
+ "\t56\t-180\t-160\t161\t180",
+ "\t55\t-180\t-160\t161\t180",
+ "\t54\t-180\t-160\t163\t180",
+ "\t53\t-180\t-161\t165\t180",
+ "\t52\t-180\t-163\t167\t180",
+ "\t51\t-180\t-166\t169\t180",
+ "\t50\t-180\t-171\t171\t180",
+ "\t49\t178\t180",
+ "Bering Strait",
+ "\t67\t-171\t-168",
+ "\t66\t-171\t-166",
+ "\t65\t-171\t-166",
+ "\t64\t-171\t-166",
+ "Bight of Benin",
+ "\t7\t0\t5",
+ "\t6\t-1\t6",
+ "\t5\t-1\t6",
+ "\t4\t-1\t6",
+ "\t3\t2\t6",
+ "Bight of Biafra",
+ "\t5\t5\t10",
+ "\t4\t5\t10",
+ "\t3\t5\t10",
+ "\t2\t7\t10",
+ "\t1\t8\t10",
+ "Bismarck Sea",
+ "\t0\t141\t148",
+ "\t-1\t140\t152",
+ "\t-2\t140\t153",
+ "\t-3\t140\t153",
+ "\t-4\t141\t153",
+ "\t-5\t143\t153",
+ "\t-6\t144\t152",
+ "Black Sea",
+ "\t48\t30\t32",
+ "\t47\t29\t34",
+ "\t46\t28\t37",
+ "\t45\t27\t39",
+ "\t44\t26\t41",
+ "\t43\t26\t42",
+ "\t42\t26\t42",
+ "\t41\t26\t42",
+ "\t40\t26\t42",
+ "\t39\t37\t41",
+ "Bo Hai",
+ "\t41\t119\t123",
+ "\t40\t116\t123",
+ "\t39\t116\t123",
+ "\t38\t116\t122",
+ "\t37\t116\t122",
+ "\t36\t117\t121",
+ "Boca Grande",
+ "\t10\t-62\t-59",
+ "\t9\t-62\t-59",
+ "\t8\t-62\t-59",
+ "\t7\t-62\t-59",
+ "Bohol Sea",
+ "\t11\t122\t126",
+ "\t10\t122\t126",
+ "\t9\t122\t126",
+ "\t8\t122\t126",
+ "\t7\t122\t126",
+ "Boknafjorden",
+ "\t60\t4\t7",
+ "\t59\t4\t7",
+ "\t58\t4\t7",
+ "\t57\t4\t7",
+ "Bosporus",
+ "\t42\t27\t30",
+ "\t41\t27\t30",
+ "\t40\t27\t30",
+ "Bransfield Strait",
+ "\t-60\t-58\t-53",
+ "\t-61\t-63\t-53",
+ "\t-62\t-63\t-53",
+ "\t-63\t-64\t-53",
+ "\t-64\t-64\t-54",
+ "\t-65\t-64\t-59",
+ "\t-66\t-64\t-62",
+ "Bristol Bay",
+ "\t60\t-161\t-155",
+ "\t59\t-163\t-155",
+ "\t58\t-163\t-155",
+ "\t57\t-163\t-155",
+ "\t56\t-163\t-156",
+ "\t55\t-162\t-157",
+ "\t54\t-162\t-159",
+ "Bristol Channel",
+ "\t52\t-7\t-1",
+ "\t51\t-7\t-1",
+ "\t50\t-7\t-1",
+ "\t49\t-6\t-3",
+ "Caribbean Sea",
+ "\t23\t-84\t-79",
+ "\t22\t-88\t-77",
+ "\t21\t-88\t-73",
+ "\t20\t-88\t-71",
+ "\t19\t-89\t-60",
+ "\t18\t-89\t-60",
+ "\t17\t-89\t-59",
+ "\t16\t-89\t-59",
+ "\t15\t-88\t-58",
+ "\t14\t-87\t-58",
+ "\t13\t-84\t-58",
+ "\t12\t-84\t-58",
+ "\t11\t-84\t-58",
+ "\t10\t-84\t-59",
+ "\t9\t-84\t-59",
+ "\t8\t-84\t-74\t-63\t-59",
+ "\t7\t-83\t-75",
+ "Caspian Sea",
+ "\t48\t49\t52",
+ "\t47\t47\t54",
+ "\t46\t46\t54",
+ "\t45\t45\t54",
+ "\t44\t45\t54",
+ "\t43\t45\t53",
+ "\t42\t46\t53",
+ "\t41\t46\t53",
+ "\t40\t47\t54",
+ "\t39\t47\t54",
+ "\t38\t47\t54",
+ "\t37\t47\t55",
+ "\t36\t47\t55",
+ "\t35\t49\t55",
+ "Celebes Sea",
+ "\t8\t121\t125",
+ "\t7\t120\t126",
+ "\t6\t117\t126",
+ "\t5\t116\t126",
+ "\t4\t116\t126",
+ "\t3\t116\t126",
+ "\t2\t116\t126",
+ "\t1\t116\t126",
+ "\t0\t116\t126",
+ "\t-1\t117\t124",
+ "Ceram Sea",
+ "\t0\t124\t133",
+ "\t-1\t124\t134",
+ "\t-2\t124\t134",
+ "\t-3\t124\t134",
+ "\t-4\t124\t126\t129\t134",
+ "\t-5\t130\t134",
+ "\t-6\t132\t134",
+ "Chaun Bay",
+ "\t70\t167\t171",
+ "\t69\t167\t171",
+ "\t68\t167\t171",
+ "\t67\t168\t171",
+ "Chesapeake Bay",
+ "\t40\t-77\t-74",
+ "\t39\t-78\t-74",
+ "\t38\t-78\t-74",
+ "\t37\t-78\t-74",
+ "\t36\t-78\t-74",
+ "\t35\t-77\t-75",
+ "Chukchi Sea",
+ "\t72\t-179\t-155\t177\t179",
+ "\t71\t-180\t-155\t175\t180",
+ "\t70\t-180\t-155\t174\t180",
+ "\t69\t-180\t-156\t174\t180",
+ "\t68\t-180\t-161\t174\t180",
+ "\t67\t-180\t-162\t179\t180",
+ "\t66\t-176\t-162",
+ "\t65\t-175\t-163",
+ "\t64\t-169\t-165",
+ "Cook Inlet",
+ "\t62\t-152\t-148",
+ "\t61\t-154\t-148",
+ "\t60\t-155\t-148",
+ "\t59\t-155\t-148",
+ "\t58\t-155\t-150",
+ "\t57\t-154\t-151",
+ "Cook Strait",
+ "\t-39\t173\t176",
+ "\t-40\t173\t176",
+ "\t-41\t173\t176",
+ "\t-42\t173\t176",
+ "Coral Sea",
+ "\t-7\t142\t147",
+ "\t-8\t141\t148\t164\t168",
+ "\t-9\t141\t153\t161\t168",
+ "\t-10\t141\t168",
+ "\t-11\t141\t168",
+ "\t-12\t141\t168",
+ "\t-13\t142\t169",
+ "\t-14\t142\t169",
+ "\t-15\t142\t169",
+ "\t-16\t144\t169",
+ "\t-17\t144\t170",
+ "\t-18\t144\t170",
+ "\t-19\t145\t170",
+ "\t-20\t145\t170",
+ "\t-21\t147\t170",
+ "\t-22\t148\t169",
+ "\t-23\t148\t168",
+ "\t-24\t149\t167",
+ "\t-25\t150\t166",
+ "\t-26\t151\t165",
+ "\t-27\t152\t164",
+ "\t-28\t152\t162",
+ "\t-29\t152\t161",
+ "\t-30\t152\t160",
+ "Cordova Bay",
+ "\t56\t-134\t-131",
+ "\t55\t-134\t-131",
+ "\t54\t-134\t-131",
+ "\t53\t-133\t-131",
+ "Cumberland Sound",
+ "\t67\t-69\t-63",
+ "\t66\t-69\t-62",
+ "\t65\t-69\t-62",
+ "\t64\t-69\t-62",
+ "\t63\t-67\t-62",
+ "\t62\t-65\t-63",
+ "Dardanelles",
+ "\t41\t25\t27",
+ "\t40\t25\t27",
+ "\t39\t25\t27",
+ "\t38\t25\t27",
+ "Darnley Bay",
+ "\t70\t-125\t-122",
+ "\t69\t-125\t-122",
+ "\t68\t-125\t-122",
+ "Davao Gulf",
+ "\t8\t124\t126",
+ "\t7\t124\t127",
+ "\t6\t124\t127",
+ "\t5\t124\t127",
+ "\t4\t124\t126",
+ "Davis Sea",
+ "\t-62\t90\t104",
+ "\t-63\t86\t111",
+ "\t-64\t84\t113",
+ "\t-65\t83\t113",
+ "\t-66\t82\t113",
+ "\t-67\t82\t111",
+ "\t-68\t82\t87",
+ "Davis Strait",
+ "\t70\t-70\t-52",
+ "\t69\t-70\t-50",
+ "\t68\t-70\t-49",
+ "\t67\t-70\t-49",
+ "\t66\t-67\t-49",
+ "\t65\t-64\t-49",
+ "\t64\t-66\t-48",
+ "\t63\t-66\t-47",
+ "\t62\t-66\t-44",
+ "\t61\t-66\t-43",
+ "\t60\t-65\t-43",
+ "\t59\t-65\t-43",
+ "Delaware Bay",
+ "\t40\t-76\t-73",
+ "\t39\t-76\t-73",
+ "\t38\t-76\t-73",
+ "\t37\t-76\t-73",
+ "Denmark Strait",
+ "\t71\t-23\t-21",
+ "\t70\t-26\t-19",
+ "\t69\t-31\t-18",
+ "\t68\t-31\t-16",
+ "\t67\t-31\t-15",
+ "\t66\t-30\t-15",
+ "\t65\t-28\t-15",
+ "\t64\t-27\t-16",
+ "\t63\t-25\t-22",
+ "Disko Bay",
+ "\t71\t-55\t-49",
+ "\t70\t-55\t-49",
+ "\t69\t-55\t-49",
+ "\t68\t-54\t-49",
+ "\t67\t-54\t-49",
+ "Dixon Entrance",
+ "\t55\t-134\t-130",
+ "\t54\t-134\t-130",
+ "\t53\t-134\t-130",
+ "Dmitriy Laptev Strait",
+ "\t74\t138\t144",
+ "\t73\t138\t144",
+ "\t72\t138\t144",
+ "\t71\t139\t144",
+ "Drake Passage",
+ "\t-53\t-67\t-62",
+ "\t-54\t-69\t-61",
+ "\t-55\t-69\t-60",
+ "\t-56\t-69\t-58",
+ "\t-57\t-69\t-57",
+ "\t-58\t-69\t-56",
+ "\t-59\t-69\t-55",
+ "\t-60\t-69\t-54",
+ "\t-61\t-69\t-54",
+ "\t-62\t-69\t-54",
+ "\t-63\t-69\t-57",
+ "\t-64\t-69\t-61",
+ "\t-65\t-69\t-62",
+ "\t-66\t-69\t-64",
+ "\t-67\t-69\t-65",
+ "East China Sea",
+ "\t34\t124\t127\t129\t131",
+ "\t33\t122\t131",
+ "\t32\t120\t131",
+ "\t31\t120\t131",
+ "\t30\t120\t131",
+ "\t29\t119\t131",
+ "\t28\t119\t131",
+ "\t27\t118\t130",
+ "\t26\t118\t129",
+ "\t25\t118\t129",
+ "\t24\t118\t128",
+ "\t23\t120\t127",
+ "\t22\t122\t125",
+ "East Korea Bay",
+ "\t41\t127\t129",
+ "\t40\t126\t129",
+ "\t39\t126\t129",
+ "\t38\t126\t129",
+ "\t37\t126\t129",
+ "East Siberian Sea",
+ "\t78\t147\t158",
+ "\t77\t137\t162",
+ "\t76\t137\t166",
+ "\t75\t137\t170",
+ "\t74\t138\t174",
+ "\t73\t138\t178",
+ "\t72\t138\t180",
+ "\t71\t142\t180",
+ "\t70\t147\t180",
+ "\t69\t150\t155\t157\t178",
+ "\t68\t158\t176",
+ "\t67\t159\t162",
+ "Eclipse Sound",
+ "\t74\t-81\t-79",
+ "\t73\t-82\t-76",
+ "\t72\t-82\t-76",
+ "\t71\t-82\t-76",
+ "\t70\t-81\t-77",
+ "English Channel",
+ "\t52\t0\t2",
+ "\t51\t-6\t2",
+ "\t50\t-7\t2",
+ "\t49\t-7\t2",
+ "\t48\t-7\t2",
+ "\t47\t-6\t0",
+ "Eskimo Lakes",
+ "\t70\t-134\t-130",
+ "\t69\t-134\t-130",
+ "\t68\t-134\t-130",
+ "\t67\t-134\t-131",
+ "Estrecho de Magellanes",
+ "\t-51\t-75\t-67",
+ "\t-52\t-75\t-67",
+ "\t-53\t-75\t-67",
+ "\t-54\t-74\t-69",
+ "\t-55\t-72\t-69",
+ "Finger Lakes",
+ "\t43\t-78\t-75",
+ "\t42\t-78\t-75",
+ "\t41\t-78\t-75",
+ "Flores Sea",
+ "\t-4\t118\t121",
+ "\t-5\t117\t121",
+ "\t-6\t116\t122",
+ "\t-7\t116\t123",
+ "\t-8\t116\t123",
+ "\t-9\t116\t123",
+ "Foxe Basin",
+ "\t71\t-80\t-76",
+ "\t70\t-83\t-73",
+ "\t69\t-83\t-72",
+ "\t68\t-85\t-71",
+ "\t67\t-87\t-71",
+ "\t66\t-87\t-71",
+ "\t65\t-87\t-71",
+ "\t64\t-86\t-72",
+ "\t63\t-84\t-74",
+ "\t62\t-81\t-78",
+ "Franklin Bay",
+ "\t70\t-126\t-124",
+ "\t69\t-126\t-124",
+ "\t68\t-126\t-124",
+ "Frobisher Bay",
+ "\t64\t-69\t-64",
+ "\t63\t-69\t-64",
+ "\t62\t-69\t-64",
+ "\t61\t-68\t-64",
+ "Fury and Hecla Strait",
+ "\t71\t-86\t-82",
+ "\t70\t-86\t-81",
+ "\t69\t-86\t-81",
+ "\t68\t-86\t-81",
+ "Garabogaz Bay",
+ "\t43\t52\t54",
+ "\t42\t51\t55",
+ "\t41\t51\t55",
+ "\t40\t51\t55",
+ "\t39\t51\t55",
+ "Geographe Bay",
+ "\t-29\t114\t116",
+ "\t-30\t114\t116",
+ "\t-31\t114\t116",
+ "\t-32\t114\t116",
+ "\t-33\t114\t116",
+ "\t-34\t114\t116",
+ "George VI Sound",
+ "\t-68\t-70\t-67",
+ "\t-69\t-70\t-66",
+ "\t-70\t-74\t-65",
+ "\t-71\t-75\t-65",
+ "\t-72\t-75\t-65",
+ "\t-73\t-75\t-65",
+ "\t-74\t-75\t-67",
+ "Goldsmith Channel",
+ "\t74\t-108\t-104",
+ "\t73\t-108\t-104",
+ "\t72\t-108\t-104",
+ "\t71\t-106\t-104",
+ "Golfe du Lion",
+ "\t44\t2\t6",
+ "\t43\t2\t6",
+ "\t42\t2\t6",
+ "\t41\t2\t5",
+ "\t40\t2\t4",
+ "Golfo Corcovado",
+ "\t-40\t-74\t-71",
+ "\t-41\t-74\t-71",
+ "\t-42\t-74\t-71",
+ "\t-43\t-75\t-71",
+ "\t-44\t-75\t-71",
+ "\t-45\t-75\t-71",
+ "\t-46\t-74\t-71",
+ "Golfo de California",
+ "\t32\t-115\t-112",
+ "\t31\t-115\t-111",
+ "\t30\t-115\t-111",
+ "\t29\t-115\t-110",
+ "\t28\t-115\t-108",
+ "\t27\t-114\t-108",
+ "\t26\t-113\t-107",
+ "\t25\t-113\t-106",
+ "\t24\t-112\t-105",
+ "\t23\t-111\t-105",
+ "\t22\t-110\t-105",
+ "Golfo de Guayaquil",
+ "\t-1\t-81\t-78",
+ "\t-2\t-81\t-78",
+ "\t-3\t-81\t-78",
+ "\t-4\t-81\t-78",
+ "Golfo de Panama",
+ "\t10\t-80\t-78",
+ "\t9\t-81\t-76",
+ "\t8\t-81\t-76",
+ "\t7\t-81\t-76",
+ "\t6\t-81\t-77",
+ "Golfo de Penas",
+ "\t-45\t-76\t-73",
+ "\t-46\t-76\t-73",
+ "\t-47\t-76\t-73",
+ "\t-48\t-76\t-73",
+ "Golfo de Tehuantepec",
+ "\t17\t-96\t-92",
+ "\t16\t-97\t-92",
+ "\t15\t-97\t-92",
+ "\t14\t-97\t-92",
+ "Golfo de Uraba",
+ "\t9\t-78\t-75",
+ "\t8\t-78\t-75",
+ "\t7\t-78\t-75",
+ "\t6\t-77\t-75",
+ "Golfo San Jorge",
+ "\t-43\t-67\t-65",
+ "\t-44\t-68\t-64",
+ "\t-45\t-68\t-64",
+ "\t-46\t-68\t-64",
+ "\t-47\t-68\t-64",
+ "\t-48\t-67\t-64",
+ "Golfo San Matias",
+ "\t-39\t-66\t-63",
+ "\t-40\t-66\t-62",
+ "\t-41\t-66\t-62",
+ "\t-42\t-66\t-62",
+ "\t-43\t-65\t-62",
+ "Great Australian Bight",
+ "\t-30\t127\t133",
+ "\t-31\t123\t135",
+ "\t-32\t118\t136",
+ "\t-33\t117\t136",
+ "\t-34\t117\t140",
+ "\t-35\t117\t140",
+ "\t-36\t117\t141",
+ "\t-37\t119\t144",
+ "\t-38\t123\t144",
+ "\t-39\t126\t145",
+ "\t-40\t129\t146",
+ "\t-41\t133\t146",
+ "\t-42\t136\t147",
+ "\t-43\t139\t147",
+ "\t-44\t143\t147",
+ "Great Barrier Reef",
+ "\t-8\t141\t146",
+ "\t-9\t141\t146",
+ "\t-10\t141\t146",
+ "\t-11\t141\t146",
+ "\t-12\t141\t147",
+ "\t-13\t142\t148",
+ "\t-14\t142\t148",
+ "\t-15\t142\t148",
+ "\t-16\t144\t149",
+ "\t-17\t144\t150",
+ "\t-18\t144\t151",
+ "\t-19\t145\t151",
+ "\t-20\t145\t152",
+ "\t-21\t147\t154",
+ "\t-22\t148\t154",
+ "\t-23\t148\t154",
+ "\t-24\t149\t154",
+ "\t-25\t150\t154",
+ "\t-26\t151\t154",
+ "Great Bear Lake",
+ "\t68\t-121\t-118",
+ "\t67\t-126\t-116",
+ "\t66\t-126\t-116",
+ "\t65\t-126\t-116",
+ "\t64\t-125\t-116",
+ "\t63\t-123\t-119",
+ "Great Salt Lake",
+ "\t42\t-114\t-110",
+ "\t41\t-114\t-110",
+ "\t40\t-114\t-110",
+ "\t39\t-113\t-110",
+ "Great Slave Lake",
+ "\t63\t-117\t-108",
+ "\t62\t-118\t-108",
+ "\t61\t-118\t-108",
+ "\t60\t-118\t-110",
+ "\t59\t-117\t-113",
+ "Greenland Sea",
+ "\t84\t-32\t-17",
+ "\t83\t-33\t-3",
+ "\t82\t-33\t11",
+ "\t81\t-33\t18",
+ "\t80\t-30\t-27\t-25\t18",
+ "\t79\t-24\t18",
+ "\t78\t-22\t18",
+ "\t77\t-23\t18",
+ "\t76\t-23\t18",
+ "\t75\t-23\t17",
+ "\t74\t-28\t14",
+ "\t73\t-28\t10",
+ "\t72\t-28\t5",
+ "\t71\t-27\t0",
+ "\t70\t-26\t-4",
+ "\t69\t-29\t-7",
+ "\t68\t-29\t-9",
+ "\t67\t-29\t-10",
+ "\t66\t-27\t-10",
+ "\t65\t-25\t-11",
+ "\t64\t-24\t-12",
+ "Guba Gusinaya",
+ "\t73\t144\t148",
+ "\t72\t143\t148",
+ "\t71\t143\t148",
+ "\t70\t143\t147",
+ "Gulf of Aden",
+ "\t16\t49\t52",
+ "\t15\t46\t52",
+ "\t14\t44\t52",
+ "\t13\t42\t52",
+ "\t12\t41\t52",
+ "\t11\t41\t52",
+ "\t10\t41\t52",
+ "\t9\t42\t47",
+ "Gulf of Alaska",
+ "\t61\t-150\t-138",
+ "\t60\t-152\t-137",
+ "\t59\t-156\t-135",
+ "\t58\t-157\t-135",
+ "\t57\t-159\t-135",
+ "\t56\t-164\t-139",
+ "\t55\t-164\t-145",
+ "\t54\t-164\t-152",
+ "\t53\t-164\t-158",
+ "Gulf of Anadyr",
+ "\t67\t-180\t-177",
+ "\t66\t-180\t-174",
+ "\t65\t-180\t-172",
+ "\t64\t-180\t-172",
+ "\t63\t-180\t-172",
+ "\t62\t-180\t-174",
+ "\t61\t-180\t-176",
+ "\t60\t-180\t-178",
+ "Gulf of Anadyr",
+ "\t66\t175\t180",
+ "\t65\t173\t180",
+ "\t64\t173\t180",
+ "\t63\t173\t180",
+ "\t62\t177\t180",
+ "\t61\t178\t180",
+ "Gulf of Aqaba",
+ "\t30\t33\t35",
+ "\t29\t33\t35",
+ "\t28\t33\t35",
+ "\t27\t33\t35",
+ "\t26\t33\t35",
+ "Gulf of Boothia",
+ "\t72\t-90\t-88",
+ "\t71\t-93\t-84",
+ "\t70\t-93\t-83",
+ "\t69\t-93\t-83",
+ "\t68\t-93\t-83",
+ "\t67\t-91\t-83",
+ "\t66\t-89\t-85",
+ "Gulf of Bothnia",
+ "\t66\t20\t26",
+ "\t65\t20\t26",
+ "\t64\t17\t26",
+ "\t63\t16\t26",
+ "\t62\t16\t24",
+ "\t61\t16\t24",
+ "\t60\t16\t24",
+ "\t59\t16\t24",
+ "\t58\t17\t24",
+ "Gulf of Buli",
+ "\t2\t127\t130",
+ "\t1\t127\t130",
+ "\t0\t127\t130",
+ "\t-1\t127\t130",
+ "Gulf of Carpentaria",
+ "\t-11\t135\t142",
+ "\t-12\t134\t142",
+ "\t-13\t134\t142",
+ "\t-14\t134\t142",
+ "\t-15\t134\t142",
+ "\t-16\t134\t142",
+ "\t-17\t136\t142",
+ "\t-18\t138\t141",
+ "Gulf of Finland",
+ "\t61\t23\t31",
+ "\t60\t21\t31",
+ "\t59\t21\t31",
+ "\t58\t21\t31",
+ "Gulf of Gabes",
+ "\t36\t9\t12",
+ "\t35\t9\t12",
+ "\t34\t9\t12",
+ "\t33\t9\t12",
+ "\t32\t9\t11",
+ "Gulf of Guinea",
+ "\t6\t-6\t3",
+ "\t5\t-8\t8",
+ "\t4\t-8\t9",
+ "\t3\t-8\t10",
+ "\t2\t-6\t10",
+ "\t1\t-3\t11",
+ "\t0\t1\t11",
+ "\t-1\t4\t11",
+ "Gulf of Honduras",
+ "\t18\t-89\t-87",
+ "\t17\t-89\t-86",
+ "\t16\t-89\t-85",
+ "\t15\t-89\t-85",
+ "\t14\t-89\t-85",
+ "Gulf of Kamchatka",
+ "\t57\t161\t164",
+ "\t56\t160\t164",
+ "\t55\t160\t164",
+ "\t54\t160\t164",
+ "\t53\t160\t163",
+ "Gulf of Kau",
+ "\t3\t127\t129",
+ "\t2\t126\t130",
+ "\t1\t126\t130",
+ "\t0\t126\t130",
+ "\t-1\t126\t128",
+ "Gulf of Khambhat",
+ "\t23\t71\t73",
+ "\t22\t71\t74",
+ "\t21\t69\t74",
+ "\t20\t69\t74",
+ "\t19\t69\t73",
+ "\t18\t71\t73",
+ "Gulf of Kutch",
+ "\t24\t67\t71",
+ "\t23\t67\t71",
+ "\t22\t67\t71",
+ "\t21\t67\t71",
+ "Gulf of Maine",
+ "\t45\t-70\t-65",
+ "\t44\t-71\t-64",
+ "\t43\t-71\t-64",
+ "\t42\t-71\t-64",
+ "\t41\t-71\t-65",
+ "\t40\t-70\t-67",
+ "Gulf of Mannar",
+ "\t10\t77\t80",
+ "\t9\t76\t80",
+ "\t8\t76\t80",
+ "\t7\t76\t80",
+ "\t6\t78\t80",
+ "Gulf of Martaban",
+ "\t18\t95\t98",
+ "\t17\t94\t98",
+ "\t16\t94\t98",
+ "\t15\t94\t98",
+ "\t14\t94\t98",
+ "\t13\t96\t98",
+ "Gulf of Masira",
+ "\t21\t56\t59",
+ "\t20\t56\t59",
+ "\t19\t56\t59",
+ "\t18\t56\t58",
+ "Gulf of Mexico",
+ "\t31\t-90\t-83",
+ "\t30\t-96\t-81",
+ "\t29\t-98\t-81",
+ "\t28\t-98\t-81",
+ "\t27\t-98\t-80",
+ "\t26\t-98\t-79",
+ "\t25\t-98\t-78",
+ "\t24\t-98\t-78",
+ "\t23\t-98\t-78",
+ "\t22\t-98\t-82",
+ "\t21\t-98\t-82",
+ "\t20\t-98\t-83",
+ "\t19\t-98\t-93",
+ "Gulf of Ob",
+ "\t73\t71\t76",
+ "\t72\t70\t76",
+ "\t71\t70\t76",
+ "\t70\t70\t77",
+ "\t69\t71\t78",
+ "\t68\t70\t79",
+ "\t67\t68\t79",
+ "\t66\t68\t79",
+ "\t65\t68\t74",
+ "Gulf of Olenek",
+ "\t74\t117\t124",
+ "\t73\t117\t124",
+ "\t72\t117\t124",
+ "\t71\t118\t124",
+ "Gulf of Oman",
+ "\t27\t55\t58",
+ "\t26\t55\t62",
+ "\t25\t55\t62",
+ "\t24\t55\t62",
+ "\t23\t55\t61",
+ "\t22\t56\t61",
+ "\t21\t58\t60",
+ "Gulf of Papua",
+ "\t-6\t142\t146",
+ "\t-7\t141\t147",
+ "\t-8\t141\t147",
+ "\t-9\t141\t147",
+ "Gulf of Riga",
+ "\t60\t22\t24",
+ "\t59\t21\t25",
+ "\t58\t20\t25",
+ "\t57\t20\t25",
+ "\t56\t20\t25",
+ "\t55\t22\t24",
+ "Gulf of Sakhalin",
+ "\t55\t138\t143",
+ "\t54\t138\t143",
+ "\t53\t138\t143",
+ "\t52\t139\t143",
+ "Gulf of Sidra",
+ "\t33\t14\t20",
+ "\t32\t14\t21",
+ "\t31\t14\t21",
+ "\t30\t14\t21",
+ "\t29\t16\t21",
+ "Gulf of St. Lawrence",
+ "\t52\t-59\t-55",
+ "\t51\t-65\t-55",
+ "\t50\t-65\t-55",
+ "\t49\t-67\t-56",
+ "\t48\t-67\t-53",
+ "\t47\t-67\t-53",
+ "\t46\t-67\t-53",
+ "\t45\t-65\t-54",
+ "\t44\t-64\t-60",
+ "Gulf of Suez",
+ "\t30\t31\t34",
+ "\t29\t31\t34",
+ "\t28\t31\t35",
+ "\t27\t31\t35",
+ "\t26\t32\t35",
+ "Gulf of Thailand",
+ "\t14\t98\t101",
+ "\t13\t98\t103",
+ "\t12\t98\t104",
+ "\t11\t98\t106",
+ "\t10\t98\t106",
+ "\t9\t98\t106",
+ "\t8\t98\t106",
+ "\t7\t98\t105",
+ "\t6\t99\t104",
+ "\t5\t99\t103",
+ "Gulf of Tomini",
+ "\t1\t119\t124",
+ "\t0\t119\t124",
+ "\t-1\t119\t124",
+ "\t-2\t119\t122",
+ "Gulf of Tonkin",
+ "\t22\t105\t110",
+ "\t21\t105\t111",
+ "\t20\t104\t111",
+ "\t19\t104\t111",
+ "\t18\t104\t111",
+ "\t17\t104\t109",
+ "\t16\t105\t108",
+ "Gulf of Yana",
+ "\t76\t135\t138",
+ "\t75\t135\t141",
+ "\t74\t135\t141",
+ "\t73\t133\t142",
+ "\t72\t131\t142",
+ "\t71\t131\t142",
+ "\t70\t131\t140",
+ "Gulf St. Vincent",
+ "\t-31\t136\t138",
+ "\t-32\t135\t138",
+ "\t-33\t134\t139",
+ "\t-34\t134\t139",
+ "\t-35\t134\t139",
+ "\t-36\t135\t139",
+ "Hadley Bay",
+ "\t74\t-109\t-107",
+ "\t73\t-109\t-106",
+ "\t72\t-109\t-106",
+ "\t71\t-109\t-106",
+ "\t70\t-109\t-106",
+ "Hall Basin",
+ "\t83\t-63\t-61",
+ "\t82\t-69\t-60",
+ "\t81\t-69\t-60",
+ "\t80\t-69\t-60",
+ "\t79\t-64\t-62",
+ "Halmahera Sea",
+ "\t1\t126\t131",
+ "\t0\t126\t132",
+ "\t-1\t126\t132",
+ "\t-2\t126\t132",
+ "Hamilton Inlet",
+ "\t55\t-59\t-56",
+ "\t54\t-61\t-56",
+ "\t53\t-61\t-56",
+ "\t52\t-61\t-57",
+ "Hangzhou Bay",
+ "\t31\t119\t123",
+ "\t30\t119\t123",
+ "\t29\t119\t123",
+ "\t28\t120\t123",
+ "Hecate Straight",
+ "\t56\t-133\t-129",
+ "\t55\t-133\t-128",
+ "\t54\t-133\t-128",
+ "\t53\t-133\t-128",
+ "\t52\t-133\t-128",
+ "\t51\t-132\t-129",
+ "Helodranon' Antongila",
+ "\t-14\t48\t51",
+ "\t-15\t48\t51",
+ "\t-16\t48\t51",
+ "\t-17\t48\t50",
+ "Hudson Bay",
+ "\t67\t-87\t-84",
+ "\t66\t-88\t-84",
+ "\t65\t-94\t-81",
+ "\t64\t-94\t-78",
+ "\t63\t-94\t-77",
+ "\t62\t-95\t-76",
+ "\t61\t-95\t-76",
+ "\t60\t-95\t-76",
+ "\t59\t-95\t-76",
+ "\t58\t-95\t-75",
+ "\t57\t-95\t-75",
+ "\t56\t-93\t-75",
+ "\t55\t-93\t-75",
+ "\t54\t-88\t-75",
+ "\t53\t-83\t-77",
+ "Hudson Strait",
+ "\t65\t-79\t-71",
+ "\t64\t-81\t-69",
+ "\t63\t-81\t-64",
+ "\t62\t-81\t-63",
+ "\t61\t-79\t-63",
+ "\t60\t-73\t-63",
+ "\t59\t-71\t-63",
+ "IJsselmeer",
+ "\t54\t4\t6",
+ "\t53\t3\t6",
+ "\t52\t3\t6",
+ "\t51\t3\t6",
+ "Indian Ocean",
+ "\t11\t49\t53",
+ "\t10\t49\t55",
+ "\t9\t49\t57",
+ "\t8\t48\t59\t80\t85",
+ "\t7\t48\t61\t79\t92",
+ "\t6\t47\t64\t78\t96",
+ "\t5\t46\t66\t77\t97",
+ "\t4\t45\t68\t75\t98",
+ "\t3\t44\t70\t74\t99",
+ "\t2\t43\t99",
+ "\t1\t40\t101",
+ "\t0\t39\t101",
+ "\t-1\t39\t102",
+ "\t-2\t38\t103",
+ "\t-3\t38\t104",
+ "\t-4\t37\t105",
+ "\t-5\t37\t107",
+ "\t-6\t37\t111",
+ "\t-7\t37\t119",
+ "\t-8\t38\t120",
+ "\t-9\t38\t123",
+ "\t-10\t38\t125",
+ "\t-11\t38\t126",
+ "\t-12\t43\t127",
+ "\t-13\t48\t127",
+ "\t-14\t48\t127",
+ "\t-15\t48\t127",
+ "\t-16\t48\t126",
+ "\t-17\t48\t125",
+ "\t-18\t47\t124",
+ "\t-19\t47\t123",
+ "\t-20\t47\t122",
+ "\t-21\t46\t120",
+ "\t-22\t46\t117",
+ "\t-23\t46\t115",
+ "\t-24\t38\t114",
+ "\t-25\t31\t114",
+ "\t-26\t31\t115",
+ "\t-27\t30\t115",
+ "\t-28\t30\t115",
+ "\t-29\t29\t116",
+ "\t-30\t28\t116",
+ "\t-31\t27\t116",
+ "\t-32\t22\t116",
+ "\t-33\t18\t117",
+ "\t-34\t18\t120",
+ "\t-35\t18\t124",
+ "\t-36\t18\t127",
+ "\t-37\t18\t130",
+ "\t-38\t18\t134",
+ "\t-39\t18\t137",
+ "\t-40\t18\t140",
+ "\t-41\t18\t144",
+ "\t-42\t18\t148",
+ "\t-43\t18\t151",
+ "\t-44\t18\t153",
+ "\t-45\t18\t156",
+ "\t-46\t18\t159",
+ "\t-47\t18\t161",
+ "\t-48\t18\t164",
+ "\t-49\t18\t167",
+ "\t-50\t18\t167",
+ "\t-51\t18\t167",
+ "\t-52\t18\t167",
+ "\t-53\t18\t167",
+ "\t-54\t18\t167",
+ "\t-55\t18\t167",
+ "\t-56\t18\t167",
+ "\t-57\t18\t167",
+ "\t-58\t18\t167",
+ "\t-59\t18\t167",
+ "\t-60\t18\t167",
+ "\t-61\t18\t167",
+ "Inner Sea",
+ "\t35\t129\t136",
+ "\t34\t129\t136",
+ "\t33\t129\t136",
+ "\t32\t129\t136",
+ "\t31\t130\t133",
+ "Inner Seas",
+ "\t59\t-7\t-4",
+ "\t58\t-8\t-4",
+ "\t57\t-8\t-3",
+ "\t56\t-9\t-3",
+ "\t55\t-9\t-3",
+ "\t54\t-9\t-3",
+ "\t53\t-8\t-4",
+ "Internal Canada (B.C.) Waters",
+ "\t54\t-130\t-126",
+ "\t53\t-130\t-126",
+ "\t52\t-130\t-126",
+ "\t51\t-130\t-126",
+ "Internal Canada (B.C.) Waters",
+ "\t55\t-133\t-131",
+ "\t54\t-133\t-131",
+ "\t53\t-133\t-131",
+ "\t52\t-133\t-131",
+ "Internal Canada (B.C.) Waters",
+ "\t56\t-131\t-128",
+ "\t55\t-131\t-128",
+ "\t54\t-131\t-128",
+ "\t53\t-131\t-129",
+ "Internal Canada Arctic Waters",
+ "\t72\t-119\t-116",
+ "\t71\t-119\t-116",
+ "\t70\t-119\t-116",
+ "Internal Canada Arctic Waters",
+ "\t69\t-108\t-104",
+ "\t68\t-108\t-104",
+ "\t67\t-108\t-104",
+ "Internal Canada Arctic Waters",
+ "\t71\t-82\t-79",
+ "\t70\t-82\t-78",
+ "\t69\t-82\t-78",
+ "\t68\t-82\t-78",
+ "Internal Denmark Waters",
+ "\t57\t9\t11",
+ "\t56\t8\t12",
+ "\t55\t8\t12",
+ "\t54\t8\t12",
+ "\t53\t8\t12",
+ "Internal Philippines Waters",
+ "\t11\t124\t127",
+ "\t10\t124\t127",
+ "\t9\t124\t127",
+ "\t8\t124\t127",
+ "Internal Philippines Waters",
+ "\t11\t122\t124",
+ "\t10\t122\t124",
+ "\t9\t122\t124",
+ "\t8\t122\t124",
+ "Internal U.S. (Alaska) Waters",
+ "\t60\t-138\t-134",
+ "\t59\t-138\t-132",
+ "\t58\t-138\t-131",
+ "\t57\t-138\t-130",
+ "\t56\t-136\t-129",
+ "\t55\t-135\t-129",
+ "\t54\t-134\t-129",
+ "Ionian Sea",
+ "\t41\t15\t18",
+ "\t40\t15\t22",
+ "\t39\t14\t24",
+ "\t38\t14\t24",
+ "\t37\t14\t24",
+ "\t36\t14\t23",
+ "\t35\t14\t23",
+ "Irish Sea",
+ "\t55\t-7\t-1",
+ "\t54\t-7\t-1",
+ "\t53\t-7\t-1",
+ "\t52\t-7\t-1",
+ "\t51\t-7\t-2",
+ "\t50\t-7\t-4",
+ "James Bay",
+ "\t55\t-83\t-77",
+ "\t54\t-83\t-77",
+ "\t53\t-83\t-77",
+ "\t52\t-83\t-77",
+ "\t51\t-83\t-77",
+ "\t50\t-81\t-77",
+ "\t49\t-80\t-78",
+ "Java Sea",
+ "\t-1\t105\t114",
+ "\t-2\t104\t117",
+ "\t-3\t104\t119",
+ "\t-4\t103\t120",
+ "\t-5\t103\t120",
+ "\t-6\t103\t120",
+ "\t-7\t104\t119",
+ "\t-8\t111\t118",
+ "Jones Sound",
+ "\t77\t-92\t-77",
+ "\t76\t-92\t-77",
+ "\t75\t-92\t-77",
+ "\t74\t-91\t-78",
+ "Joseph Bonaparte Gulf",
+ "\t-12\t126\t130",
+ "\t-13\t126\t130",
+ "\t-14\t126\t130",
+ "\t-15\t126\t130",
+ "\t-16\t127\t130",
+ "Kaliningrad",
+ "\t56\t19\t22",
+ "\t55\t19\t22",
+ "\t54\t19\t22",
+ "\t53\t19\t22",
+ "Kane Basin",
+ "\t81\t-73\t-63",
+ "\t80\t-79\t-63",
+ "\t79\t-79\t-63",
+ "\t78\t-79\t-63",
+ "\t77\t-79\t-67",
+ "Kangertittivaq",
+ "\t72\t-29\t-23",
+ "\t71\t-30\t-20",
+ "\t70\t-30\t-20",
+ "\t69\t-30\t-20",
+ "\t68\t-28\t-26",
+ "Kara Sea",
+ "\t82\t64\t96",
+ "\t81\t64\t98",
+ "\t80\t64\t103",
+ "\t79\t64\t103",
+ "\t78\t65\t103",
+ "\t77\t65\t102",
+ "\t76\t59\t102",
+ "\t75\t56\t102",
+ "\t74\t55\t100",
+ "\t73\t54\t88",
+ "\t72\t54\t88",
+ "\t71\t54\t80",
+ "\t70\t54\t69\t74\t80",
+ "\t69\t55\t70\t77\t80",
+ "\t68\t59\t70",
+ "\t67\t65\t70",
+ "Karaginskiy Gulf",
+ "\t61\t162\t167",
+ "\t60\t161\t167",
+ "\t59\t160\t167",
+ "\t58\t160\t167",
+ "\t57\t160\t166",
+ "\t56\t161\t164",
+ "Karskiye Vorota Strait",
+ "\t71\t56\t60",
+ "\t70\t56\t60",
+ "\t69\t56\t60",
+ "Kattegat",
+ "\t59\t10\t12",
+ "\t58\t9\t13",
+ "\t57\t9\t13",
+ "\t56\t9\t13",
+ "\t55\t9\t13",
+ "\t54\t10\t12",
+ "Kennedy Channel",
+ "\t82\t-67\t-63",
+ "\t81\t-68\t-63",
+ "\t80\t-68\t-63",
+ "\t79\t-68\t-63",
+ "Khatanga Gulf",
+ "\t76\t111\t114",
+ "\t75\t108\t114",
+ "\t74\t105\t114",
+ "\t73\t104\t114",
+ "\t72\t104\t113",
+ "\t71\t104\t107",
+ "Korea Strait",
+ "\t37\t128\t131",
+ "\t36\t126\t133",
+ "\t35\t125\t133",
+ "\t34\t125\t133",
+ "\t33\t125\t133",
+ "\t32\t125\t131",
+ "\t31\t126\t130",
+ "Kotzebue Sound",
+ "\t68\t-164\t-160",
+ "\t67\t-165\t-159",
+ "\t66\t-165\t-159",
+ "\t65\t-165\t-159",
+ "Kronotskiy Gulf",
+ "\t55\t158\t162",
+ "\t54\t158\t162",
+ "\t53\t158\t162",
+ "\t52\t158\t161",
+ "La Perouse Strait",
+ "\t47\t140\t142",
+ "\t46\t140\t143",
+ "\t45\t140\t143",
+ "\t44\t140\t143",
+ "Labrador Sea",
+ "\t61\t-65\t-43",
+ "\t60\t-65\t-42",
+ "\t59\t-65\t-42",
+ "\t58\t-65\t-42",
+ "\t57\t-64\t-43",
+ "\t56\t-63\t-43",
+ "\t55\t-63\t-44",
+ "\t54\t-62\t-45",
+ "\t53\t-60\t-46",
+ "\t52\t-58\t-46",
+ "\t51\t-57\t-47",
+ "\t50\t-57\t-48",
+ "\t49\t-57\t-49",
+ "\t48\t-57\t-49",
+ "\t47\t-55\t-50",
+ "\t46\t-54\t-51",
+ "Laccadive Sea",
+ "\t15\t73\t75",
+ "\t14\t70\t75",
+ "\t13\t70\t76",
+ "\t12\t70\t76",
+ "\t11\t70\t77",
+ "\t10\t70\t77",
+ "\t9\t70\t79",
+ "\t8\t70\t80",
+ "\t7\t70\t81",
+ "\t6\t70\t81",
+ "\t5\t71\t81",
+ "\t4\t71\t80",
+ "\t3\t71\t79",
+ "\t2\t71\t78",
+ "\t1\t71\t76",
+ "\t0\t71\t75",
+ "\t-1\t71\t74",
+ "Lago de Maracaibo",
+ "\t11\t-72\t-70",
+ "\t10\t-73\t-70",
+ "\t9\t-73\t-70",
+ "\t8\t-73\t-70",
+ "Lake Baikal",
+ "\t56\t107\t110",
+ "\t55\t107\t110",
+ "\t54\t105\t110",
+ "\t53\t104\t110",
+ "\t52\t102\t110",
+ "\t51\t102\t109",
+ "\t50\t102\t107",
+ "Lake Chad",
+ "\t14\t13\t15",
+ "\t13\t13\t15",
+ "\t12\t13\t15",
+ "\t11\t13\t15",
+ "Lake Champlain",
+ "\t46\t-74\t-72",
+ "\t45\t-74\t-72",
+ "\t44\t-74\t-72",
+ "\t43\t-74\t-72",
+ "\t42\t-74\t-72",
+ "Lake Erie",
+ "\t44\t-80\t-77",
+ "\t43\t-84\t-77",
+ "\t42\t-84\t-77",
+ "\t41\t-84\t-77",
+ "\t40\t-84\t-79",
+ "Lake Huron",
+ "\t47\t-82\t-80",
+ "\t46\t-82\t-78",
+ "\t45\t-82\t-78",
+ "\t44\t-82\t-78",
+ "\t43\t-82\t-78",
+ "Lake Huron",
+ "\t47\t-85\t-80",
+ "\t46\t-85\t-78",
+ "\t45\t-85\t-78",
+ "\t44\t-85\t-78",
+ "\t43\t-84\t-78",
+ "\t42\t-84\t-80",
+ "Lake Huron",
+ "\t47\t-84\t-80",
+ "\t46\t-84\t-80",
+ "\t45\t-84\t-80",
+ "\t44\t-84\t-80",
+ "Lake Huron",
+ "\t45\t-84\t-82",
+ "\t44\t-84\t-82",
+ "\t43\t-84\t-82",
+ "\t42\t-84\t-82",
+ "Lake Malawi",
+ "\t-8\t32\t35",
+ "\t-9\t32\t35",
+ "\t-10\t32\t35",
+ "\t-11\t32\t35",
+ "\t-12\t33\t36",
+ "\t-13\t33\t36",
+ "\t-14\t33\t36",
+ "\t-15\t33\t36",
+ "Lake Michigan",
+ "\t47\t-86\t-84",
+ "\t46\t-88\t-83",
+ "\t45\t-89\t-83",
+ "\t44\t-89\t-83",
+ "\t43\t-89\t-84",
+ "\t42\t-88\t-85",
+ "\t41\t-88\t-85",
+ "\t40\t-88\t-85",
+ "Lake Okeechobee",
+ "\t28\t-82\t-79",
+ "\t27\t-82\t-79",
+ "\t26\t-82\t-79",
+ "\t25\t-82\t-79",
+ "Lake Ontario",
+ "\t45\t-78\t-74",
+ "\t44\t-80\t-74",
+ "\t43\t-80\t-74",
+ "\t42\t-80\t-75",
+ "Lake Pontchartrain",
+ "\t31\t-91\t-88",
+ "\t30\t-91\t-88",
+ "\t29\t-91\t-88",
+ "Lake Saint Clair",
+ "\t43\t-84\t-81",
+ "\t42\t-84\t-81",
+ "\t41\t-84\t-81",
+ "Lake Shasta",
+ "\t41\t-123\t-121",
+ "\t40\t-123\t-121",
+ "\t39\t-123\t-121",
+ "Lake Superior",
+ "\t50\t-89\t-87",
+ "\t49\t-90\t-84",
+ "\t48\t-92\t-83",
+ "\t47\t-93\t-83",
+ "\t46\t-93\t-83",
+ "\t45\t-93\t-83",
+ "Lake Superior",
+ "\t48\t-85\t-83",
+ "\t47\t-86\t-83",
+ "\t46\t-86\t-83",
+ "\t45\t-86\t-83",
+ "Lake Tahoe",
+ "\t40\t-121\t-118",
+ "\t39\t-121\t-118",
+ "\t38\t-121\t-118",
+ "\t37\t-121\t-118",
+ "Lake Tanganyika",
+ "\t-2\t28\t30",
+ "\t-3\t28\t30",
+ "\t-4\t28\t30",
+ "\t-5\t28\t31",
+ "\t-6\t28\t31",
+ "\t-7\t28\t32",
+ "\t-8\t28\t32",
+ "\t-9\t29\t32",
+ "Lake Victoria",
+ "\t1\t30\t35",
+ "\t0\t30\t35",
+ "\t-1\t30\t35",
+ "\t-2\t30\t35",
+ "\t-3\t30\t34",
+ "\t-4\t31\t33",
+ "Lake Winnipeg",
+ "\t55\t-99\t-96",
+ "\t54\t-100\t-96",
+ "\t53\t-100\t-95",
+ "\t52\t-100\t-95",
+ "\t51\t-99\t-95",
+ "\t50\t-99\t-95",
+ "\t49\t-97\t-95",
+ "Laptev Sea",
+ "\t82\t95\t101",
+ "\t81\t95\t109",
+ "\t80\t95\t118",
+ "\t79\t95\t126",
+ "\t78\t96\t134",
+ "\t77\t101\t139",
+ "\t76\t103\t139",
+ "\t75\t104\t139",
+ "\t74\t111\t138",
+ "\t73\t111\t137",
+ "\t72\t111\t137",
+ "\t71\t112\t114\t126\t136",
+ "\t70\t127\t134",
+ "\t69\t129\t132",
+ "Leyte Gulf",
+ "\t12\t124\t126",
+ "\t11\t124\t126",
+ "\t10\t124\t126",
+ "\t9\t124\t126",
+ "Liddon Gulf",
+ "\t76\t-115\t-110",
+ "\t75\t-116\t-110",
+ "\t74\t-116\t-110",
+ "\t73\t-116\t-111",
+ "Ligurian Sea",
+ "\t45\t7\t10",
+ "\t44\t6\t10",
+ "\t43\t6\t10",
+ "\t42\t6\t10",
+ "Lincoln Sea",
+ "\t84\t-70\t-36",
+ "\t83\t-70\t-36",
+ "\t82\t-70\t-36",
+ "\t81\t-69\t-37",
+ "\t80\t-54\t-48\t-46\t-43",
+ "Long Island Sound",
+ "\t42\t-74\t-71",
+ "\t41\t-74\t-71",
+ "\t40\t-74\t-71",
+ "\t39\t-74\t-71",
+ "Lutzow-Holm Bay",
+ "\t-67\t32\t41",
+ "\t-68\t32\t41",
+ "\t-69\t32\t41",
+ "\t-70\t32\t40",
+ "\t-71\t37\t39",
+ "Luzon Strait",
+ "\t23\t119\t121",
+ "\t22\t119\t122",
+ "\t21\t119\t123",
+ "\t20\t119\t123",
+ "\t19\t119\t123",
+ "\t18\t119\t123",
+ "\t17\t119\t123",
+ "M'Clure Strait",
+ "\t77\t-123\t-119",
+ "\t76\t-124\t-114",
+ "\t75\t-125\t-113",
+ "\t74\t-125\t-113",
+ "\t73\t-125\t-113",
+ "\t72\t-116\t-114",
+ "Mackenzie Bay",
+ "\t70\t-140\t-133",
+ "\t69\t-140\t-133",
+ "\t68\t-140\t-133",
+ "\t67\t-138\t-134",
+ "Makassar Strait",
+ "\t2\t116\t122",
+ "\t1\t116\t122",
+ "\t0\t115\t122",
+ "\t-1\t115\t121",
+ "\t-2\t115\t120",
+ "\t-3\t115\t120",
+ "\t-4\t115\t120",
+ "\t-5\t116\t120",
+ "\t-6\t118\t120",
+ "Marguerite Bay",
+ "\t-66\t-70\t-65",
+ "\t-67\t-71\t-65",
+ "\t-68\t-71\t-65",
+ "\t-69\t-71\t-65",
+ "\t-70\t-71\t-65",
+ "Massachusetts Bay",
+ "\t43\t-72\t-69",
+ "\t42\t-72\t-69",
+ "\t41\t-72\t-69",
+ "\t40\t-71\t-69",
+ "Matochkin Shar Strait",
+ "\t74\t53\t57",
+ "\t73\t53\t57",
+ "\t72\t53\t57",
+ "McMurdo Sound",
+ "\t-71\t165\t167",
+ "\t-72\t163\t170",
+ "\t-73\t161\t170",
+ "\t-74\t159\t170",
+ "\t-75\t159\t170",
+ "\t-76\t159\t170",
+ "\t-77\t161\t170",
+ "\t-78\t161\t170",
+ "\t-79\t162\t166",
+ "Mecklenburger Bucht",
+ "\t55\t9\t13",
+ "\t54\t9\t13",
+ "\t53\t9\t13",
+ "\t52\t9\t12",
+ "Mediterranean Sea",
+ "\t38\t10\t15",
+ "\t37\t9\t24\t26\t37",
+ "\t36\t9\t37",
+ "\t35\t9\t37",
+ "\t34\t9\t36",
+ "\t33\t9\t36",
+ "\t32\t9\t36",
+ "\t31\t11\t36",
+ "\t30\t23\t35",
+ "\t29\t27\t30",
+ "Mediterranean Sea",
+ "\t44\t4\t10",
+ "\t43\t3\t10",
+ "\t42\t2\t10",
+ "\t41\t2\t10",
+ "\t40\t2\t10",
+ "\t39\t-1\t11",
+ "\t38\t-2\t13",
+ "\t37\t-3\t13",
+ "\t36\t-3\t13",
+ "\t35\t-3\t11",
+ "\t34\t-2\t1",
+ "Melville Bay",
+ "\t77\t-68\t-59",
+ "\t76\t-68\t-56",
+ "\t75\t-68\t-55",
+ "\t74\t-67\t-55",
+ "\t73\t-62\t-55",
+ "Minto Inlet",
+ "\t72\t-119\t-114",
+ "\t71\t-119\t-114",
+ "\t70\t-119\t-114",
+ "Molucca Sea",
+ "\t5\t125\t127",
+ "\t4\t124\t128",
+ "\t3\t124\t129",
+ "\t2\t123\t129",
+ "\t1\t122\t129",
+ "\t0\t122\t129",
+ "\t-1\t122\t129",
+ "\t-2\t122\t128",
+ "Monterey Bay",
+ "\t37\t-123\t-120",
+ "\t36\t-123\t-120",
+ "\t35\t-123\t-120",
+ "Mozambique Channel",
+ "\t-9\t39\t44",
+ "\t-10\t39\t49",
+ "\t-11\t39\t50",
+ "\t-12\t39\t50",
+ "\t-13\t39\t50",
+ "\t-14\t39\t49",
+ "\t-15\t38\t48",
+ "\t-16\t35\t48",
+ "\t-17\t34\t46",
+ "\t-18\t33\t45",
+ "\t-19\t33\t45",
+ "\t-20\t33\t45",
+ "\t-21\t33\t45",
+ "\t-22\t34\t44",
+ "\t-23\t33\t45",
+ "\t-24\t31\t46",
+ "\t-25\t31\t46",
+ "\t-26\t31\t46",
+ "\t-27\t31\t39",
+ "Murchison Sound",
+ "\t79\t-73\t-71",
+ "\t78\t-73\t-65",
+ "\t77\t-73\t-65",
+ "\t76\t-73\t-65",
+ "North Sea",
+ "\t61\t-2\t7",
+ "\t60\t-3\t7",
+ "\t59\t-4\t8",
+ "\t58\t-5\t10",
+ "\t57\t-5\t10",
+ "\t56\t-5\t10",
+ "\t55\t-4\t10",
+ "\t54\t-4\t10",
+ "\t53\t-2\t10",
+ "\t52\t-1\t10",
+ "\t51\t-1\t5",
+ "\t50\t-1\t5",
+ "\t49\t0\t2",
+ "Norton Sound",
+ "\t65\t-165\t-159",
+ "\t64\t-165\t-159",
+ "\t63\t-165\t-159",
+ "\t62\t-165\t-159",
+ "Norwegian Sea",
+ "\t77\t13\t19",
+ "\t76\t9\t21",
+ "\t75\t4\t23",
+ "\t74\t-1\t25",
+ "\t73\t-5\t27",
+ "\t72\t-9\t28",
+ "\t71\t-10\t28",
+ "\t70\t-11\t28",
+ "\t69\t-12\t28",
+ "\t68\t-12\t24",
+ "\t67\t-13\t18",
+ "\t66\t-14\t15",
+ "\t65\t-14\t15",
+ "\t64\t-14\t14",
+ "\t63\t-14\t12",
+ "\t62\t-12\t10",
+ "\t61\t-9\t9",
+ "\t60\t-7\t7",
+ "\t59\t-4\t6",
+ "Oresund",
+ "\t57\t11\t13",
+ "\t56\t11\t13",
+ "\t55\t11\t13",
+ "\t54\t11\t13",
+ "Ozero Mogotoyevo",
+ "\t73\t143\t147",
+ "\t72\t143\t147",
+ "\t71\t143\t147",
+ "Pacific Ocean",
+ "\t59\t-140\t-135",
+ "\t58\t-146\t-134",
+ "\t57\t-153\t-132\t161\t164",
+ "\t56\t-159\t-132\t161\t166",
+ "\t55\t-164\t-131\t160\t168",
+ "\t54\t-167\t-131\t157\t170",
+ "\t53\t-172\t-130\t157\t172",
+ "\t52\t-180\t-129\t156\t179",
+ "\t51\t-180\t-126\t155\t180",
+ "\t50\t-180\t-123\t154\t180",
+ "\t49\t-180\t-123\t153\t180",
+ "\t48\t-180\t-123\t152\t180",
+ "\t47\t-180\t-122\t150\t180",
+ "\t46\t-180\t-122\t148\t180",
+ "\t45\t-180\t-122\t147\t180",
+ "\t44\t-180\t-122\t143\t180",
+ "\t43\t-180\t-123\t142\t180",
+ "\t42\t-180\t-123\t141\t180",
+ "\t41\t-180\t-123\t140\t180",
+ "\t40\t-180\t-122\t140\t180",
+ "\t39\t-180\t-121\t139\t180",
+ "\t38\t-180\t-121\t139\t180",
+ "\t37\t-180\t-120\t139\t180",
+ "\t36\t-180\t-119\t138\t180",
+ "\t35\t-180\t-117\t138\t180",
+ "\t34\t-180\t-116\t138\t180",
+ "\t33\t-180\t-115\t138\t180",
+ "\t32\t-180\t-115\t138\t180",
+ "\t31\t-180\t-114\t139\t180",
+ "\t30\t-180\t-113\t139\t180",
+ "\t29\t-180\t-113\t139\t180",
+ "\t28\t-180\t-113\t140\t180",
+ "\t27\t-180\t-111\t141\t180",
+ "\t26\t-180\t-111\t141\t180",
+ "\t25\t-180\t-110\t141\t180",
+ "\t24\t-180\t-105\t140\t180",
+ "\t23\t-180\t-104\t140\t180",
+ "\t22\t-180\t-104\t140\t180",
+ "\t21\t-180\t-104\t140\t180",
+ "\t20\t-180\t-103\t140\t180",
+ "\t19\t-180\t-101\t143\t180",
+ "\t18\t-180\t-99\t144\t180",
+ "\t17\t-180\t-97\t145\t180",
+ "\t16\t-180\t-91\t145\t180",
+ "\t15\t-180\t-90\t144\t180",
+ "\t14\t-180\t-86\t144\t180",
+ "\t13\t-180\t-85\t143\t180",
+ "\t12\t-180\t-84\t141\t180",
+ "\t11\t-180\t-84\t140\t180",
+ "\t10\t-180\t-82\t138\t180",
+ "\t9\t-180\t-80\t136\t180",
+ "\t8\t-180\t-76\t135\t180",
+ "\t7\t-180\t-76\t133\t180",
+ "\t6\t-180\t-76\t132\t180",
+ "\t5\t-180\t-76\t130\t180",
+ "\t4\t-180\t-76\t128\t180",
+ "\t3\t-180\t-76\t127\t180",
+ "\t2\t-180\t-76\t127\t180",
+ "\t1\t-180\t-76\t127\t180",
+ "\t0\t-180\t-77\t128\t180",
+ "\t-1\t-180\t-78\t128\t180",
+ "Pacific Ocean",
+ "\t4\t171\t173",
+ "\t3\t-93\t-90\t170\t174",
+ "\t2\t-93\t-89\t170\t174",
+ "\t1\t-180\t-79\t130\t180",
+ "\t0\t-180\t-79\t130\t180",
+ "\t-1\t-180\t-79\t130\t180",
+ "\t-2\t-180\t-79\t133\t180",
+ "\t-3\t-180\t-79\t133\t142\t145\t180",
+ "\t-4\t-180\t-79\t133\t136\t150\t180",
+ "\t-5\t-180\t-78\t152\t180",
+ "\t-6\t-180\t-78\t153\t180",
+ "\t-7\t-180\t-77\t154\t180",
+ "\t-8\t-180\t-77\t156\t180",
+ "\t-9\t-180\t-76\t158\t180",
+ "\t-10\t-180\t-76\t160\t180",
+ "\t-11\t-180\t-75\t160\t180",
+ "\t-12\t-180\t-75\t166\t180",
+ "\t-13\t-180\t-74\t166\t180",
+ "\t-14\t-180\t-73\t166\t180",
+ "\t-15\t-180\t-71\t166\t180",
+ "\t-16\t-180\t-69\t167\t180",
+ "\t-17\t-180\t-69\t167\t180",
+ "\t-18\t-180\t-69\t167\t180",
+ "\t-19\t-180\t-69\t168\t180",
+ "\t-20\t-180\t-69\t167\t180",
+ "\t-21\t-180\t-69\t166\t180",
+ "\t-22\t-180\t-69\t165\t180",
+ "\t-23\t-180\t-69\t164\t180",
+ "\t-24\t-180\t-69\t163\t180",
+ "\t-25\t-180\t-69\t161\t180",
+ "\t-26\t-180\t-69\t160\t180",
+ "\t-27\t-180\t-69\t159\t180",
+ "\t-28\t-180\t-69\t158\t180",
+ "\t-29\t-180\t-70\t158\t180",
+ "\t-30\t-180\t-70\t158\t180",
+ "\t-31\t-180\t-70\t158\t180",
+ "\t-32\t-180\t-70\t158\t180",
+ "\t-33\t-180\t-70\t161\t180",
+ "\t-34\t-180\t-70\t165\t180",
+ "\t-35\t-180\t-70\t169\t180",
+ "\t-36\t-180\t-71\t172\t180",
+ "\t-37\t-180\t-71\t173\t180",
+ "\t-38\t-180\t-72\t174\t180",
+ "\t-39\t-180\t-72\t175\t180",
+ "\t-40\t-180\t-72\t173\t180",
+ "\t-41\t-180\t-72\t172\t180",
+ "\t-42\t-180\t-72\t170\t180",
+ "\t-43\t-180\t-72\t169\t180",
+ "\t-44\t-180\t-72\t169\t180",
+ "\t-45\t-180\t-72\t166\t180",
+ "\t-46\t-180\t-72\t166\t180",
+ "\t-47\t-180\t-72\t165\t180",
+ "\t-48\t-180\t-72\t165\t180",
+ "\t-49\t-180\t-72\t165\t180",
+ "\t-50\t-180\t-71\t165\t180",
+ "\t-51\t-180\t-71\t165\t180",
+ "\t-52\t-180\t-71\t165\t180",
+ "\t-53\t-180\t-68\t165\t180",
+ "\t-54\t-180\t-67\t165\t180",
+ "\t-55\t-180\t-67\t165\t180",
+ "\t-56\t-180\t-67\t165\t180",
+ "\t-57\t-180\t-67\t165\t180",
+ "\t-58\t-180\t-67\t165\t180",
+ "\t-59\t-180\t-67\t165\t180",
+ "\t-60\t-180\t-67\t165\t180",
+ "\t-61\t-180\t-67\t165\t180",
+ "Palk Strait",
+ "\t11\t78\t80",
+ "\t10\t77\t81",
+ "\t9\t77\t81",
+ "\t8\t77\t81",
+ "\t7\t78\t80",
+ "Pamlico Sound",
+ "\t36\t-78\t-74",
+ "\t35\t-78\t-74",
+ "\t34\t-78\t-74",
+ "\t33\t-77\t-75",
+ "Peacock Sound",
+ "\t-71\t-103\t-94",
+ "\t-72\t-103\t-94",
+ "\t-73\t-103\t-94",
+ "\t-74\t-102\t-94",
+ "Persian Gulf",
+ "\t31\t46\t51",
+ "\t30\t46\t51",
+ "\t29\t46\t52",
+ "\t28\t46\t57",
+ "\t27\t47\t58",
+ "\t26\t47\t58",
+ "\t25\t48\t58",
+ "\t24\t49\t57",
+ "\t23\t49\t55",
+ "\t22\t50\t53",
+ "Philippine Sea",
+ "\t36\t135\t139",
+ "\t35\t135\t140",
+ "\t34\t132\t141",
+ "\t33\t130\t141",
+ "\t32\t129\t141",
+ "\t31\t129\t141",
+ "\t30\t129\t142",
+ "\t29\t128\t143",
+ "\t28\t127\t143",
+ "\t27\t126\t143",
+ "\t26\t126\t143",
+ "\t25\t120\t143",
+ "\t24\t120\t143",
+ "\t23\t119\t143",
+ "\t22\t119\t144",
+ "\t21\t119\t146",
+ "\t20\t119\t146",
+ "\t19\t120\t147",
+ "\t18\t121\t147",
+ "\t17\t120\t147",
+ "\t16\t120\t147",
+ "\t15\t120\t147",
+ "\t14\t120\t147",
+ "\t13\t120\t146",
+ "\t12\t121\t146",
+ "\t11\t123\t145",
+ "\t10\t124\t144",
+ "\t9\t124\t142",
+ "\t8\t125\t141",
+ "\t7\t125\t139",
+ "\t6\t124\t137",
+ "\t5\t124\t136",
+ "\t4\t124\t134",
+ "\t3\t124\t133",
+ "\t2\t124\t131",
+ "\t1\t127\t129",
+ "Porpoise Bay",
+ "\t-65\t125\t131",
+ "\t-66\t125\t131",
+ "\t-67\t125\t131",
+ "\t-68\t126\t130",
+ "Prince ALbert Sound",
+ "\t71\t-118\t-110",
+ "\t70\t-118\t-110",
+ "\t69\t-118\t-110",
+ "Prince of Wales Strait",
+ "\t74\t-118\t-113",
+ "\t73\t-121\t-113",
+ "\t72\t-121\t-113",
+ "\t71\t-121\t-115",
+ "\t70\t-121\t-117",
+ "Prince William Sound",
+ "\t62\t-149\t-145",
+ "\t61\t-149\t-144",
+ "\t60\t-149\t-144",
+ "\t59\t-149\t-144",
+ "\t58\t-148\t-146",
+ "Prydz Bay",
+ "\t-66\t68\t75",
+ "\t-67\t68\t80",
+ "\t-68\t66\t80",
+ "\t-69\t66\t80",
+ "\t-70\t66\t78",
+ "\t-71\t65\t74",
+ "\t-72\t65\t72",
+ "\t-73\t65\t71",
+ "\t-74\t65\t68",
+ "Puget Sound",
+ "\t49\t-123\t-121",
+ "\t48\t-124\t-121",
+ "\t47\t-124\t-121",
+ "\t46\t-124\t-121",
+ "Qiongzhou Strait",
+ "\t21\t108\t111",
+ "\t20\t108\t111",
+ "\t19\t108\t111",
+ "\t18\t108\t110",
+ "Queen Charlotte Sound",
+ "\t54\t-130\t-128",
+ "\t53\t-132\t-127",
+ "\t52\t-132\t-126",
+ "\t51\t-132\t-126",
+ "\t50\t-132\t-126",
+ "\t49\t-130\t-126",
+ "Queen Charlotte Straight",
+ "\t52\t-128\t-125",
+ "\t51\t-128\t-123",
+ "\t50\t-128\t-123",
+ "\t49\t-128\t-123",
+ "Ragay Gulf",
+ "\t14\t121\t124",
+ "\t13\t121\t124",
+ "\t12\t121\t124",
+ "\t11\t122\t124",
+ "Red Sea",
+ "\t29\t33\t36",
+ "\t28\t32\t36",
+ "\t27\t32\t37",
+ "\t26\t32\t38",
+ "\t25\t32\t39",
+ "\t24\t33\t39",
+ "\t23\t34\t40",
+ "\t22\t34\t40",
+ "\t21\t34\t41",
+ "\t20\t35\t42",
+ "\t19\t36\t42",
+ "\t18\t36\t43",
+ "\t17\t36\t43",
+ "\t16\t37\t43",
+ "\t15\t38\t44",
+ "\t14\t38\t44",
+ "\t13\t39\t44",
+ "\t12\t40\t44",
+ "\t11\t41\t44",
+ "Richard Collinson Inlet",
+ "\t74\t-115\t-113",
+ "\t73\t-115\t-112",
+ "\t72\t-115\t-112",
+ "\t71\t-115\t-112",
+ "Rio de la Plata",
+ "\t-31\t-59\t-57",
+ "\t-32\t-59\t-57",
+ "\t-33\t-59\t-53",
+ "\t-34\t-59\t-53",
+ "\t-35\t-59\t-53",
+ "\t-36\t-58\t-54",
+ "\t-37\t-58\t-55",
+ "Robeson Channel",
+ "\t83\t-63\t-56",
+ "\t82\t-63\t-55",
+ "\t81\t-63\t-55",
+ "\t80\t-62\t-55",
+ "Ronne Entrance",
+ "\t-70\t-76\t-74",
+ "\t-71\t-76\t-72",
+ "\t-72\t-76\t-72",
+ "\t-73\t-76\t-72",
+ "Ross Sea",
+ "\t-70\t169\t180",
+ "\t-71\t167\t180",
+ "\t-72\t167\t180",
+ "\t-73\t167\t180",
+ "\t-74\t168\t180",
+ "\t-75\t168\t180",
+ "\t-76\t165\t180",
+ "\t-77\t160\t180",
+ "\t-78\t158\t180",
+ "\t-79\t157\t180",
+ "\t-80\t157\t180",
+ "\t-81\t157\t180",
+ "\t-82\t159\t180",
+ "\t-83\t160\t180",
+ "\t-84\t166\t180",
+ "\t-85\t176\t180",
+ "Ross Sea",
+ "\t-70\t-180\t-101",
+ "\t-71\t-180\t-101",
+ "\t-72\t-180\t-101",
+ "\t-73\t-180\t-101",
+ "\t-74\t-180\t-107",
+ "\t-75\t-180\t-130\t-125\t-113",
+ "\t-76\t-180\t-135",
+ "\t-77\t-180\t-144",
+ "\t-78\t-180\t-147",
+ "\t-79\t-180\t-147",
+ "\t-80\t-180\t-147",
+ "\t-81\t-180\t-147",
+ "\t-82\t-180\t-149",
+ "\t-83\t-180\t-152",
+ "\t-84\t-180\t-155",
+ "\t-85\t-180\t-155",
+ "\t-86\t-159\t-155",
+ "Salton Sea",
+ "\t34\t-117\t-114",
+ "\t33\t-117\t-114",
+ "\t32\t-117\t-114",
+ "Samar Sea",
+ "\t14\t122\t124",
+ "\t13\t122\t125",
+ "\t12\t122\t126",
+ "\t11\t122\t126",
+ "\t10\t123\t126",
+ "San Francisco Bay",
+ "\t39\t-123\t-120",
+ "\t38\t-123\t-120",
+ "\t37\t-123\t-120",
+ "\t36\t-123\t-121",
+ "Sargasso Sea",
+ "\t36\t-68\t-51",
+ "\t35\t-69\t-50",
+ "\t34\t-70\t-49",
+ "\t33\t-70\t-49",
+ "\t32\t-71\t-49",
+ "\t31\t-71\t-49",
+ "\t30\t-71\t-49",
+ "\t29\t-71\t-49",
+ "\t28\t-71\t-49",
+ "\t27\t-71\t-49",
+ "\t26\t-71\t-49",
+ "\t25\t-71\t-49",
+ "\t24\t-71\t-49",
+ "\t23\t-70\t-49",
+ "\t22\t-69\t-49",
+ "\t21\t-68\t-49",
+ "\t20\t-67\t-49",
+ "\t19\t-64\t-50",
+ "Savu Sea",
+ "\t-7\t117\t126",
+ "\t-8\t117\t126",
+ "\t-9\t117\t126",
+ "\t-10\t117\t125",
+ "\t-11\t119\t124",
+ "Scotia Sea",
+ "\t-50\t-59\t-53",
+ "\t-51\t-60\t-46",
+ "\t-52\t-60\t-40",
+ "\t-53\t-60\t-35",
+ "\t-54\t-60\t-35",
+ "\t-55\t-59\t-35",
+ "\t-56\t-59\t-36",
+ "\t-57\t-58\t-37",
+ "\t-58\t-58\t-39",
+ "\t-59\t-57\t-40",
+ "\t-60\t-57\t-41",
+ "\t-61\t-56\t-43",
+ "\t-62\t-56\t-49",
+ "Sea of Azov",
+ "\t48\t36\t40",
+ "\t47\t33\t40",
+ "\t46\t33\t40",
+ "\t45\t33\t39",
+ "\t44\t33\t39",
+ "Sea of Crete",
+ "\t39\t22\t24",
+ "\t38\t21\t25",
+ "\t37\t21\t29",
+ "\t36\t21\t29",
+ "\t35\t22\t29",
+ "\t34\t22\t28",
+ "Sea of Japan",
+ "\t52\t139\t143",
+ "\t51\t139\t143",
+ "\t50\t139\t143",
+ "\t49\t138\t143",
+ "\t48\t137\t143",
+ "\t47\t137\t143",
+ "\t46\t135\t143",
+ "\t45\t134\t143",
+ "\t44\t130\t143",
+ "\t43\t129\t142",
+ "\t42\t128\t142",
+ "\t41\t127\t141",
+ "\t40\t127\t141",
+ "\t39\t127\t141",
+ "\t38\t127\t141",
+ "\t37\t127\t140",
+ "\t36\t126\t139",
+ "\t35\t125\t138",
+ "\t34\t125\t137",
+ "\t33\t125\t133",
+ "\t32\t125\t131",
+ "\t31\t126\t130",
+ "Sea of Marmara",
+ "\t42\t26\t30",
+ "\t41\t25\t30",
+ "\t40\t25\t30",
+ "\t39\t25\t30",
+ "Sea of Okhotsk",
+ "\t60\t141\t156",
+ "\t59\t139\t156",
+ "\t58\t137\t157",
+ "\t57\t137\t157",
+ "\t56\t137\t157",
+ "\t55\t136\t157",
+ "\t54\t136\t157",
+ "\t53\t136\t157",
+ "\t52\t136\t139\t141\t158",
+ "\t51\t142\t158",
+ "\t50\t142\t158",
+ "\t49\t141\t157",
+ "\t48\t141\t156",
+ "\t47\t141\t155",
+ "\t46\t140\t154",
+ "\t45\t140\t153",
+ "\t44\t140\t151",
+ "\t43\t141\t149",
+ "\t42\t143\t148",
+ "Selat Bali",
+ "\t-7\t113\t116",
+ "\t-8\t113\t116",
+ "\t-9\t113\t116",
+ "Selat Dampier",
+ "\t1\t128\t132",
+ "\t0\t128\t132",
+ "\t-1\t128\t132",
+ "\t-2\t130\t132",
+ "Seno de Skyring",
+ "\t-51\t-74\t-70",
+ "\t-52\t-74\t-70",
+ "\t-53\t-74\t-70",
+ "\t-54\t-74\t-71",
+ "Seno Otway",
+ "\t-51\t-72\t-70",
+ "\t-52\t-73\t-70",
+ "\t-53\t-73\t-70",
+ "\t-54\t-73\t-70",
+ "Shark Bay",
+ "\t-23\t112\t114",
+ "\t-24\t112\t115",
+ "\t-25\t112\t115",
+ "\t-26\t112\t115",
+ "\t-27\t112\t115",
+ "Shelikhova Gulf",
+ "\t63\t162\t166",
+ "\t62\t155\t166",
+ "\t61\t153\t166",
+ "\t60\t153\t165",
+ "\t59\t153\t164",
+ "\t58\t153\t162",
+ "\t57\t154\t160",
+ "\t56\t155\t158",
+ "Sherman Basin",
+ "\t69\t-99\t-97",
+ "\t68\t-99\t-96",
+ "\t67\t-99\t-96",
+ "\t66\t-99\t-96",
+ "Sibuyan Sea",
+ "\t14\t120\t123",
+ "\t13\t120\t124",
+ "\t12\t120\t124",
+ "\t11\t120\t124",
+ "\t10\t120\t124",
+ "Skagerrak",
+ "\t60\t8\t12",
+ "\t59\t6\t12",
+ "\t58\t6\t12",
+ "\t57\t6\t12",
+ "\t56\t6\t11",
+ "\t55\t7\t9",
+ "Smith Sound",
+ "\t53\t-128\t-125",
+ "\t52\t-129\t-125",
+ "\t51\t-129\t-125",
+ "\t50\t-129\t-125",
+ "Sognefjorden",
+ "\t62\t3\t8",
+ "\t61\t3\t8",
+ "\t60\t3\t8",
+ "\t59\t4\t8",
+ "Solomon Sea",
+ "\t-3\t151\t155",
+ "\t-4\t146\t155",
+ "\t-5\t145\t157",
+ "\t-6\t145\t160",
+ "\t-7\t145\t161",
+ "\t-8\t146\t162",
+ "\t-9\t147\t163",
+ "\t-10\t147\t163",
+ "\t-11\t148\t163",
+ "\t-12\t152\t162",
+ "South China Sea",
+ "\t24\t112\t121",
+ "\t23\t112\t121",
+ "\t22\t109\t122",
+ "\t21\t108\t123",
+ "\t20\t108\t123",
+ "\t19\t107\t123",
+ "\t18\t105\t123",
+ "\t17\t105\t123",
+ "\t16\t105\t121",
+ "\t15\t106\t121",
+ "\t14\t107\t121",
+ "\t13\t108\t121",
+ "\t12\t107\t121",
+ "\t11\t104\t121",
+ "\t10\t104\t120",
+ "\t9\t103\t120",
+ "\t8\t102\t119",
+ "\t7\t101\t118",
+ "\t6\t101\t117",
+ "\t5\t101\t117",
+ "\t4\t101\t117",
+ "\t3\t102\t116",
+ "\t2\t102\t114",
+ "\t1\t101\t113",
+ "\t0\t101\t112",
+ "\t-1\t101\t111",
+ "\t-2\t103\t111",
+ "\t-3\t103\t111",
+ "\t-4\t105\t107",
+ "Southern Ocean",
+ "\t-59\t-180\t180",
+ "\t-60\t-180\t180",
+ "\t-61\t-180\t180",
+ "\t-62\t-180\t180",
+ "\t-63\t-180\t180",
+ "\t-64\t-180\t180",
+ "\t-65\t-180\t91\t103\t180",
+ "\t-66\t-180\t87\t110\t180",
+ "\t-67\t-180\t52\t54\t85\t112\t180",
+ "\t-68\t-180\t51\t54\t84\t112\t122\t141\t180",
+ "\t-69\t-180\t-69\t-66\t44\t74\t80\t145\t180",
+ "\t-70\t-180\t-73\t-64\t33\t154\t180",
+ "\t-71\t-180\t-79\t-63\t33\t159\t180",
+ "\t-72\t-180\t-85\t-62\t2\t24\t27\t161\t163\t166\t180",
+ "\t-73\t-100\t-91",
+ "St. Helena Bay",
+ "\t-30\t16\t19",
+ "\t-31\t16\t19",
+ "\t-32\t16\t19",
+ "\t-33\t16\t19",
+ "St. Lawrence River",
+ "\t51\t-67\t-63",
+ "\t50\t-69\t-63",
+ "\t49\t-72\t-63",
+ "\t48\t-72\t-63",
+ "\t47\t-74\t-66",
+ "\t46\t-75\t-68",
+ "\t45\t-75\t-69",
+ "\t44\t-75\t-72",
+ "Stettiner Haff",
+ "\t55\t12\t14",
+ "\t54\t12\t15",
+ "\t53\t12\t15",
+ "\t52\t12\t15",
+ "Storfjorden",
+ "\t79\t17\t22",
+ "\t78\t16\t22",
+ "\t77\t15\t22",
+ "\t76\t15\t22",
+ "\t75\t15\t19",
+ "Strait of Belle Isle",
+ "\t53\t-56\t-54",
+ "\t52\t-58\t-54",
+ "\t51\t-58\t-54",
+ "\t50\t-58\t-54",
+ "Strait of Georgia",
+ "\t51\t-126\t-122",
+ "\t50\t-126\t-121",
+ "\t49\t-126\t-121",
+ "\t48\t-126\t-121",
+ "\t47\t-124\t-121",
+ "Strait of Gibraltar",
+ "\t37\t-7\t-4",
+ "\t36\t-7\t-4",
+ "\t35\t-7\t-4",
+ "\t34\t-6\t-4",
+ "Strait of Juan de Fuca",
+ "\t49\t-125\t-121",
+ "\t48\t-125\t-121",
+ "\t47\t-125\t-121",
+ "Strait of Malacca",
+ "\t9\t97\t99",
+ "\t8\t97\t100",
+ "\t7\t95\t101",
+ "\t6\t94\t101",
+ "\t5\t94\t101",
+ "\t4\t94\t102",
+ "\t3\t96\t103",
+ "\t2\t97\t104",
+ "\t1\t98\t104",
+ "\t0\t99\t104",
+ "\t-1\t101\t104",
+ "Strait of Singapore",
+ "\t2\t102\t105",
+ "\t1\t102\t105",
+ "\t0\t102\t105",
+ "Straits of Florida",
+ "\t27\t-81\t-77",
+ "\t26\t-82\t-77",
+ "\t25\t-84\t-77",
+ "\t24\t-84\t-77",
+ "\t23\t-84\t-77",
+ "\t22\t-84\t-78",
+ "Sulu Sea",
+ "\t13\t118\t122",
+ "\t12\t118\t123",
+ "\t11\t118\t123",
+ "\t10\t117\t124",
+ "\t9\t116\t124",
+ "\t8\t115\t124",
+ "\t7\t115\t124",
+ "\t6\t115\t123",
+ "\t5\t115\t123",
+ "\t4\t116\t121",
+ "Sulzberger Bay",
+ "\t-75\t-153\t-144",
+ "\t-76\t-159\t-144",
+ "\t-77\t-159\t-144",
+ "\t-78\t-159\t-144",
+ "Surigao Strait",
+ "\t11\t124\t126",
+ "\t10\t124\t126",
+ "\t9\t124\t126",
+ "\t8\t124\t126",
+ "Taiwan Strait",
+ "\t26\t117\t122",
+ "\t25\t116\t122",
+ "\t24\t116\t122",
+ "\t23\t116\t121",
+ "\t22\t116\t121",
+ "Tasman Sea",
+ "\t-28\t152\t160",
+ "\t-29\t152\t160",
+ "\t-30\t151\t162",
+ "\t-31\t150\t166",
+ "\t-32\t150\t170",
+ "\t-33\t149\t174",
+ "\t-34\t149\t174",
+ "\t-35\t148\t175",
+ "\t-36\t148\t175",
+ "\t-37\t147\t175",
+ "\t-38\t146\t176",
+ "\t-39\t146\t176",
+ "\t-40\t146\t176",
+ "\t-41\t146\t176",
+ "\t-42\t145\t175",
+ "\t-43\t145\t172",
+ "\t-44\t145\t171",
+ "\t-45\t147\t169",
+ "\t-46\t150\t168",
+ "\t-47\t152\t168",
+ "\t-48\t155\t168",
+ "\t-49\t158\t167",
+ "\t-50\t160\t167",
+ "\t-51\t163\t167",
+ "Tatar Strait",
+ "\t54\t139\t142",
+ "\t53\t139\t142",
+ "\t52\t139\t142",
+ "\t51\t140\t142",
+ "\t50\t140\t142",
+ "Tayabas Bay",
+ "\t14\t119\t123",
+ "\t13\t119\t123",
+ "\t12\t119\t123",
+ "The North Western Passages",
+ "\t81\t-101\t-95",
+ "\t80\t-108\t-90\t-88\t-82",
+ "\t79\t-114\t-80",
+ "\t78\t-117\t-80",
+ "\t77\t-120\t-80",
+ "\t76\t-120\t-81",
+ "\t75\t-120\t-78",
+ "\t74\t-120\t-76",
+ "\t73\t-106\t-76",
+ "\t72\t-106\t-76",
+ "\t71\t-118\t-116\t-106\t-83",
+ "\t70\t-119\t-112\t-108\t-83",
+ "\t69\t-119\t-85",
+ "\t68\t-119\t-92",
+ "\t67\t-118\t-92",
+ "\t66\t-116\t-106\t-104\t-94",
+ "\t65\t-97\t-94",
+ "Timor Sea",
+ "\t-7\t125\t131",
+ "\t-8\t123\t131",
+ "\t-9\t121\t132",
+ "\t-10\t121\t133",
+ "\t-11\t121\t133",
+ "\t-12\t122\t133",
+ "\t-13\t124\t133",
+ "\t-14\t125\t131",
+ "Torres Strait",
+ "\t-8\t140\t144",
+ "\t-9\t140\t144",
+ "\t-10\t140\t144",
+ "\t-11\t141\t143",
+ "\t-12\t141\t143",
+ "Trondheimsfjorden",
+ "\t65\t10\t12",
+ "\t64\t7\t12",
+ "\t63\t7\t12",
+ "\t62\t7\t12",
+ "Tsugaru Strait",
+ "\t42\t139\t142",
+ "\t41\t139\t142",
+ "\t40\t139\t142",
+ "\t39\t139\t142",
+ "Tyrrhenian Sea",
+ "\t45\t8\t11",
+ "\t44\t8\t11",
+ "\t43\t8\t12",
+ "\t42\t8\t14",
+ "\t41\t8\t16",
+ "\t40\t8\t17",
+ "\t39\t7\t17",
+ "\t38\t7\t17",
+ "\t37\t7\t17",
+ "\t36\t10\t14",
+ "Uchiura Bay",
+ "\t43\t139\t144",
+ "\t42\t139\t144",
+ "\t41\t139\t144",
+ "\t40\t139\t143",
+ "\t39\t140\t142",
+ "Uda Bay",
+ "\t57\t136\t139",
+ "\t56\t134\t139",
+ "\t55\t134\t139",
+ "\t54\t134\t139",
+ "\t53\t134\t139",
+ "\t52\t135\t138",
+ "Ungava Bay",
+ "\t61\t-71\t-63",
+ "\t60\t-71\t-63",
+ "\t59\t-71\t-63",
+ "\t58\t-71\t-64",
+ "\t57\t-71\t-64",
+ "\t56\t-70\t-66",
+ "Uummannaq Fjord",
+ "\t73\t-54\t-52",
+ "\t72\t-55\t-50",
+ "\t71\t-55\t-49",
+ "\t70\t-55\t-49",
+ "\t69\t-55\t-49",
+ "Vestfjorden",
+ "\t69\t12\t18",
+ "\t68\t11\t18",
+ "\t67\t11\t18",
+ "\t66\t11\t17",
+ "\t65\t12\t14",
+ "Vil'kitskogo Strait",
+ "\t79\t99\t106",
+ "\t78\t99\t106",
+ "\t77\t99\t106",
+ "\t76\t99\t106",
+ "\t75\t99\t101",
+ "Vincennes Bay",
+ "\t-65\t103\t111",
+ "\t-66\t103\t111",
+ "\t-67\t103\t111",
+ "Visayan Sea",
+ "\t13\t122\t124",
+ "\t12\t121\t125",
+ "\t11\t121\t125",
+ "\t10\t121\t125",
+ "\t9\t121\t125",
+ "Viscount Melville Sound",
+ "\t76\t-110\t-103",
+ "\t75\t-115\t-103",
+ "\t74\t-116\t-103",
+ "\t73\t-116\t-103",
+ "\t72\t-116\t-104",
+ "\t71\t-114\t-107",
+ "Waddenzee",
+ "\t54\t3\t7",
+ "\t53\t3\t7",
+ "\t52\t3\t7",
+ "\t51\t3\t6",
+ "Wager Bay",
+ "\t66\t-92\t-86",
+ "\t65\t-92\t-86",
+ "\t64\t-92\t-86",
+ "Weddell Sea",
+ "\t-70\t-62\t-9",
+ "\t-71\t-63\t-9",
+ "\t-72\t-63\t-9",
+ "\t-73\t-64\t-10",
+ "\t-74\t-66\t-13",
+ "\t-75\t-78\t-14",
+ "\t-76\t-84\t-17",
+ "\t-77\t-84\t-25",
+ "\t-78\t-84\t-22",
+ "\t-79\t-84\t-22",
+ "\t-80\t-82\t-22",
+ "\t-81\t-79\t-23",
+ "\t-82\t-70\t-36",
+ "\t-83\t-66\t-50\t-48\t-42",
+ "\t-84\t-62\t-57",
+ "White Sea",
+ "\t69\t37\t45",
+ "\t68\t30\t33\t37\t45",
+ "\t67\t30\t45",
+ "\t66\t30\t45",
+ "\t65\t31\t45",
+ "\t64\t33\t41",
+ "\t63\t33\t41",
+ "\t62\t35\t38",
+ "Wrigley Gulf",
+ "\t-72\t-131\t-124",
+ "\t-73\t-135\t-123",
+ "\t-74\t-135\t-123",
+ "\t-75\t-135\t-123",
+ "Wynniat Bay",
+ "\t73\t-112\t-109",
+ "\t72\t-112\t-109",
+ "\t71\t-112\t-109",
+ "Yellow Sea",
+ "\t41\t123\t125",
+ "\t40\t120\t126",
+ "\t39\t120\t126",
+ "\t38\t119\t127",
+ "\t37\t119\t127",
+ "\t36\t118\t127",
+ "\t35\t118\t127",
+ "\t34\t118\t127",
+ "\t33\t118\t127",
+ "\t32\t119\t127",
+ "\t31\t119\t125",
+ "\t30\t120\t123",
+ "Yellowstone Lake",
+ "\t45\t-111\t-109",
+ "\t44\t-111\t-109",
+ "\t43\t-111\t-109",
+ "Yenisey Gulf",
+ "\t74\t77\t81",
+ "\t73\t77\t83",
+ "\t72\t77\t84",
+ "\t71\t77\t84",
+ "\t70\t79\t84",
+ "\t69\t81\t84",
+ "Yucatan Channel",
+ "\t23\t-86\t-84",
+ "\t22\t-88\t-83",
+ "\t21\t-88\t-83",
+ "\t20\t-88\t-83",
+ NULL
+};
+
diff --git a/api/sqnutils.h b/api/sqnutils.h
index d0c01c53..51206637 100644
--- a/api/sqnutils.h
+++ b/api/sqnutils.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 9/2/97
*
-* $Revision: 6.483 $
+* $Revision: 6.559 $
*
* File Description:
*
@@ -192,10 +192,14 @@ NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, Boo
NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3);
NLM_EXTERN void FreeAllFuzz (SeqLocPtr location);
NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location);
+NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location);
NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location);
NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp);
NLM_EXTERN Boolean SeqLocBadSortOrder (BioseqPtr bsp, SeqLocPtr slp);
NLM_EXTERN Boolean SeqLocMixedStrands (BioseqPtr bsp, SeqLocPtr slp);
+/* Check/SetSeqLocPartialEx take lim argument - 3 is tr, 4 is tl */
+NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr);
+NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim);
/* GetBioseqGivenSeqLoc returns a segmented bioseq if the SeqLoc is to the parts */
@@ -252,6 +256,7 @@ NLM_EXTERN void ReassignFeatureIDs (SeqEntryPtr sep);
NLM_EXTERN void LinkCDSmRNAbyOverlap (SeqEntryPtr sep);
NLM_EXTERN void LinkCDSmRNAbyProduct (SeqEntryPtr sep);
NLM_EXTERN void LinkCDSmRNAbyLabel (SeqEntryPtr sep);
+NLM_EXTERN void LinkCDSmRNAbyLabelAndLocation (SeqEntryPtr sep);
NLM_EXTERN void StripFeatIDXrefAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
NLM_EXTERN void StripSeqDataGapAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
@@ -259,7 +264,7 @@ NLM_EXTERN void StripNewFeatMolInfoFieldsAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
NLM_EXTERN void StripPCRPrimerAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
NLM_EXTERN void StripOrgNamePgcodeAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
NLM_EXTERN void StripGeneRnaPcrAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
-
+NLM_EXTERN void StripSeqFeatSupportAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
/* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */
/* for example, passing "gene" to SqnTagFind returns "lacZ" */
@@ -566,11 +571,29 @@ NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityID
feature table with ReadAsnFastaOrFlatFile) to stand-alone gene features or protein features
and protein bioseqs. It processes ALL features in the list - you give it the FIRST sfp. */
-NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID);
-NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop,
- Boolean remove_trailingX, Boolean gen_prod_set);
-NLM_EXTERN void PromoteXrefsExEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop,
- Boolean remove_trailingX, Boolean gen_prod_set, Boolean force_local_id);
+NLM_EXTERN void PromoteXrefs (
+ SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Uint2 entityID
+);
+NLM_EXTERN void PromoteXrefsEx (
+ SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Uint2 entityID,
+ Boolean include_stop,
+ Boolean remove_trailingX,
+ Boolean gen_prod_set
+);
+NLM_EXTERN void PromoteXrefsExEx (
+ SeqFeatPtr sfp,
+ BioseqPtr bsp,
+ Uint2 entityID,
+ Boolean include_stop,
+ Boolean remove_trailingX,
+ Boolean gen_prod_set,
+ Boolean force_local_id,
+ BoolPtr seq_fetch_failP
+);
/* SetEmptyGeneticCodes imposes genetic code on all coding regions within a feature table */
@@ -620,15 +643,22 @@ NLM_EXTERN void CleanUpSeqFeat (SeqFeatPtr sfp, Boolean isEmblOrDdbj, Boolean is
NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp);
+NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (SeqFeatPtr sfp, Pointer userdata);
+NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (SeqDescrPtr sdp, Pointer userdata);
+
NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp);
NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial);
+NLM_EXTERN void CleanStructuredComment (UserObjectPtr uop);
+
NLM_EXTERN void SortSeqEntryQualifiers (SeqEntryPtr sep);
/* BasicSeqAnnotCleanup is for cleaning up contents of separate named Seq-annot objects */
NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap);
+NLM_EXTERN void RemoveUnnecessaryGeneXrefs (SeqFeatPtr sfp, Pointer userdata);
+
/* CautiousSeqEntryCleanup is a gradual consolidation and replacement of functions in SeriousSeqEntryCleanup,
which does change the itemID structure, and is intended to be safe for a retrofit of the ID database */
@@ -766,10 +796,12 @@ typedef void (*VisitUserFieldsFunc) (UserFieldPtr ufp, Pointer userdata);
NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback);
NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, VisitUserFieldsFunc callback);
-/* visits all sub UserObjects if the data type is 12 - needed to pack multiple user objects on a single feature */
+/* visits all sub UserObjects if the data type is 12 - needed to pack multiple user objects on a single feature. Does not visit user objects which contain other user objects. */
typedef void (*VisitUserObjectFunc) (UserObjectPtr uop, Pointer userdata);
NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback);
+/* Visits all user objects, even if they contain other user objects */
+NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback);
/* explores sub UserObjects including "CombinedFeatureUserObjects" and finds by label */
@@ -914,6 +946,7 @@ NLM_EXTERN void FixNonWGSSets (ValNodePtr item_list, Pointer data, LogInfoPtr li
/* structures and functions for the Discrepancy Report */
typedef void (*ClickableCallback) (ValNodePtr item_list, Pointer userdata);
typedef void (*ClickableCallbackDataFree) (Pointer userdata);
+typedef void (*AutofixCallback) (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip);
typedef struct clickableitem
{
@@ -927,6 +960,12 @@ typedef struct clickableitem
ValNodePtr subcategories;
Boolean expanded;
Int4 level;
+ AutofixCallback autofix_func; /* note - autofix functions can be set for an
+ * entire category or for an individual clickable
+ * item. Don't set autofix functions in both
+ * places or they will both be called.
+ */
+ Pointer autofix_data; /* data for item-specific autofixes */
} ClickableItemData, PNTR ClickableItemPtr;
extern ClickableItemPtr
@@ -935,6 +974,11 @@ NewClickableItem
CharPtr description_fmt,
ValNodePtr item_list);
+extern ClickableItemPtr
+NewClickableItemNoList
+(Uint4 clickable_item_type,
+ CharPtr description);
+
extern ValNodePtr ClickableItemObjectListFree (ValNodePtr vnp);
extern ValNodePtr ClickableItemObjectListCopy (ValNodePtr orig);
extern ClickableItemPtr ClickableItemFree (ClickableItemPtr cip);
@@ -982,6 +1026,8 @@ typedef enum {
DISC_SHORT_CONTIG,
DISC_INCONSISTENT_BIOSRC,
DISC_SUSPECT_PRODUCT_NAME,
+ DISC_PRODUCT_NAME_TYPO,
+ DISC_PRODUCT_NAME_QUICKFIX,
DISC_INCONSISTENT_BIOSRC_DEFLINE,
DISC_PARTIAL_CDS_IN_COMPLETE_SEQUENCE,
DISC_EC_NUMBER_ON_HYPOTHETICAL_PROTEIN,
@@ -1080,6 +1126,7 @@ typedef enum {
DISC_STRAIN_TAXNAME_MISMATCH,
DISC_HUMAN_HOST,
DISC_BAD_BACTERIAL_GENE_NAME,
+ TEST_BAD_GENE_NAME,
ONCALLER_ORDERED_LOCATION,
ONCALLER_COMMENT_PRESENT,
ONCALLER_DEFLINE_ON_SET,
@@ -1087,6 +1134,30 @@ typedef enum {
SHORT_PROT_SEQUENCES,
TEST_EXON_ON_MRNA,
TEST_HAS_PROJECT_ID,
+ ONCALLER_HAS_STANDARD_NAME,
+ ONCALLER_MISSING_STRUCTURED_COMMENTS,
+ DISC_REQUIRED_STRAIN,
+ MISSING_GENOMEASSEMBLY_COMMENTS,
+ DISC_BACTERIAL_TAX_STRAIN_MISMATCH,
+ TEST_CDS_HAS_CDD_XREF,
+ TEST_UNUSUAL_NT,
+ TEST_LOW_QUALITY_REGION,
+ TEST_ORGANELLE_NOT_GENOMIC,
+ TEST_UNWANTED_SPACER,
+ TEST_ORGANELLE_PRODUCTS,
+ TEST_SP_NOT_UNCULTURED,
+ TEST_BAD_MRNA_QUAL,
+ TEST_UNNECESSARY_ENVIRONMENTAL,
+ TEST_UNNECESSARY_VIRUS_GENE,
+ TEST_UNWANTED_SET_WRAPPER,
+ TEST_MISSING_PRIMER,
+ TEST_UNUSUAL_MISC_RNA,
+ TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE,
+ TEST_DUP_GENES_OPPOSITE_STRANDS,
+ TEST_SMALL_GENOME_SET_PROBLEM,
+ TEST_OVERLAPPING_RRNAS,
+ TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES,
+ TEST_TAXNAME_NOT_IN_DEFLINE,
MAX_DISC_TYPE
} DiscrepancyType;
@@ -1124,6 +1195,7 @@ extern void DisableTRNATests (DiscrepancyConfigPtr dcp);
extern CharPtr SetDiscrepancyReportTestsFromString (CharPtr list, Boolean enable, DiscrepancyConfigPtr dcp);
extern void ConfigureForBigSequence (DiscrepancyConfigPtr dcp);
extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp);
+extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReportType report_type);
typedef void (*PerformDiscrepancyTest) PROTO ((ValNodePtr PNTR, ValNodePtr));
@@ -1316,12 +1388,14 @@ typedef struct barcodetestresults
Boolean failed_tests[eBarcodeTest_LAST];
BioseqPtr bsp;
FloatLo n_percent;
+ Int4 num_trace;
} BarcodeTestResultsData, PNTR BarcodeTestResultsPtr;
extern BarcodeTestResultsPtr BarcodeTestResultsNew ();
extern BarcodeTestResultsPtr BarcodeTestResultsFree (BarcodeTestResultsPtr res);
extern BarcodeTestResultsPtr BarcodeTestResultsCopy (BarcodeTestResultsPtr res);
extern ValNodePtr BarcodeTestResultsListFree (ValNodePtr res_list);
+extern ValNodePtr BarcodeTestResultsExtractPass (ValNodePtr PNTR res_list);
extern Boolean IsBarcodeID (SeqIdPtr sip);
@@ -1330,11 +1404,8 @@ extern CharPtr BarcodeTestGenbankIdString (BioseqPtr bsp);
/* This one gets discrepancies by category */
extern ValNodePtr GetBarcodeDiscrepancies (ValNodePtr sep_list, BarcodeTestConfigPtr cfg);
-/* This one lists accessions that fail */
-extern ValNodePtr GetBarcodeFailedAccessionList (SeqEntryPtr sep, BarcodeTestConfigPtr cfg);
extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg);
NLM_EXTERN CharPtr GetBarcodeTestFailureReasons (BarcodeTestResultsPtr res);
-NLM_EXTERN BarcodeTestResultsPtr BarcodeTestResultsForBioseq (BioseqPtr bsp, BarcodeTestConfigPtr cfg);
/* This one lists passes and failures, with reasons for failures */
extern void WriteBarcodeTestComprehensive (FILE *fp, ValNodePtr results_list);
extern void WriteBarcodeDiscrepancies (FILE *fp, ValNodePtr results_list);
@@ -1342,7 +1413,6 @@ extern void WriteBarcodeFailureReport (FILE *fp, ValNodePtr results_list);
extern void WriteBarcodeTestCompliance (FILE *fp, ValNodePtr results_list);
extern void WriteBarcodeTestComplianceEx (FILE *fp, ValNodePtr results_list, Boolean low_trace_fail);
extern void WriteBarcodeTagTable (FILE *fp, ValNodePtr results_list);
-NLM_EXTERN Boolean HasLowTrace (BioseqPtr bsp);
NLM_EXTERN Boolean IsIBOL (BioseqPtr bsp);
NLM_EXTERN Boolean
@@ -1361,9 +1431,11 @@ extern Boolean PassBarcodeTests (BarcodeTestResultsPtr res);
extern Boolean HasBARCODETech (BioseqPtr bsp);
NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp);
NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp);
-NLM_EXTERN ValNodePtr GetBarcodeLowTraceList (SeqEntryPtr sep);
+NLM_EXTERN Boolean BioseqHasKeyword (BioseqPtr bsp, CharPtr keyword);
NLM_EXTERN void RemoveBarcodeKeywordsFromObjectList (FILE *fp, ValNodePtr object_list);
+NLM_EXTERN Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp);
extern Int4 CountPolymorphismsInBioseq (BioseqPtr bsp);
+NLM_EXTERN Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp);
extern CharPtr ExpandDiscrepancyReportTestsFromString (CharPtr list, Boolean expand, DiscReportOutputConfigPtr dcp);
@@ -1429,8 +1501,13 @@ FixCapitalizationInElement
NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor);
NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil);
+NLM_EXTERN void FixCapsInPubAffilEx (AffilPtr affil, Boolean punct_only);
+NLM_EXTERN void FixCapitalizationInCountryString (CharPtr PNTR pCountry);
+NLM_EXTERN void FixCapitalizationInCountryStringEx (CharPtr PNTR pCountry, Boolean punct_only);
+NLM_EXTERN void FixStateAbbreviationsInAffil (AffilPtr affil, LogInfoPtr lip);
NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl);
+NLM_EXTERN void FixKnownAbbreviationsInElement (CharPtr PNTR pEl);
NLM_EXTERN void FixAbbreviationsInElement (CharPtr PNTR pEl);
NLM_EXTERN void FixOrgNamesInString (CharPtr str, ValNodePtr org_names);
@@ -1537,6 +1614,8 @@ NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep);
NLM_EXTERN ValNodePtr MakeTokensFromLine (CharPtr line);
NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp);
+NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds);
+NLM_EXTERN SeqFeatPtr GetCDSformRNA (SeqFeatPtr mrna);
NLM_EXTERN Boolean IsStringInSpanInList (CharPtr str, CharPtr list);
@@ -1563,6 +1642,7 @@ NLM_EXTERN CharPtr GetRemovableItemName (Int4 i);
typedef enum {
DEFLINE_USE_FEATURES = 1,
DEFLINE_COMPLETE_SEQUENCE,
+ DEFLINE_PARTIAL_SEQUENCE,
DEFLINE_COMPLETE_GENOME,
DEFLINE_PARTIAL_GENOME,
DEFLINE_SEQUENCE
@@ -1578,6 +1658,7 @@ typedef struct deflinefeaturerequestlist {
Boolean suppress_locus_tags;
ValNodePtr suppressed_feature_list;
Boolean use_ncrna_note;
+ Boolean suppress_allele;
} DeflineFeatureRequestList, PNTR DeflineFeatureRequestListPtr;
NLM_EXTERN void InitFeatureRequests (DeflineFeatureRequestListPtr feature_requests);
@@ -1729,6 +1810,8 @@ typedef struct sourcequaldesc
Uint1 subfield;
} SourceQualDescData, PNTR SourceQualDescPtr;
+NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2);
+
NLM_EXTERN void SetRequiredModifiers (ModifierItemLocalPtr modList);
NLM_EXTERN void CountModifiers (ModifierItemLocalPtr ItemList, SeqEntryPtr sep);
NLM_EXTERN ValNodePtr FindBestModifiersEx(SeqEntryPtr sep, ModifierItemLocalPtr ItemList, Boolean use_new);
@@ -1763,6 +1846,14 @@ BuildDefinitionLinesFromFeatureClauseLists
OrganismDescriptionModifiersPtr odmp);
NLM_EXTERN void
+BuildDefLinesFromFeatClauseListsForOneBsp
+(ValNodePtr list,
+ ModifierItemLocalPtr modList,
+ ValNodePtr modifier_indices,
+ OrganismDescriptionModifiersPtr odmp,
+ BioseqPtr bsp);
+
+NLM_EXTERN void
AutoDefForSeqEntry
(SeqEntryPtr sep,
Uint2 entityID,
@@ -1806,6 +1897,7 @@ NLM_EXTERN void AddModifierLabel
CharPtr modifier_text);
NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp);
NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep);
+NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep);
NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep);
NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList);
@@ -1813,19 +1905,36 @@ NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList);
NLM_EXTERN ValNodePtr ReadTabTableFromFile (FILE *fp);
NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list);
NLM_EXTERN ValNodePtr FreeTabTable (ValNodePtr row_list);
+NLM_EXTERN ValNodePtr CopyTabTable (ValNodePtr row_list);
NLM_EXTERN void WriteTabTableToFile (ValNodePtr table, FILE *fp);
NLM_EXTERN ValNodePtr CountTabTableBlanks (ValNodePtr row_list);
NLM_EXTERN ValNodePtr ScanTabTableForSpecialCharacters (ValNodePtr row_list);
+NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInText (CharPtr PNTR text);
+NLM_EXTERN void AutoReplaceSpecialCharactersWithMessage (CharPtr PNTR text);
+NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInTabTable (ValNodePtr row_list);
+NLM_EXTERN void AutoFixSpecialCharactersInEntity (Uint2 entityID);
+
NLM_EXTERN void RemoveQuotesFromTabTable (ValNodePtr row_list);
NLM_EXTERN void ReparseTabTableConvertFirstSpaceToTab (ValNodePtr row_list);
NLM_EXTERN void ReparseTabTableConvertMultiSpaceToTab (ValNodePtr row_list);
NLM_EXTERN void CombineTabTableColumns (ValNodePtr row_list, ValNodePtr column_pos, CharPtr delimiter);
+NLM_EXTERN void ReparseTabTableSeparateColumnAtDelimiter (ValNodePtr row_list, Char delimiter, Int4 col, Boolean stop_after_first);
NLM_EXTERN void AddTextToTabTableColumn (ValNodePtr row_list, Int4 col, CharPtr text, Uint2 existing_text);
NLM_EXTERN ValNodePtr ReadOneColumnList (CharPtr line);
+NLM_EXTERN ValNodePtr SortTableRowByAnyColumn (ValNodePtr table, Int4 column);
NLM_EXTERN void SpecialCharFindWithContext (CharPtr PNTR strp, Pointer userdata, BoolPtr did_find, BoolPtr did_change);
NLM_EXTERN ValNodePtr FreeContextList (ValNodePtr context_list);
+typedef struct twostringhash {
+ CharPtr PNTR table;
+ Int4 num_lines;
+} TwoStringHashData, PNTR TwoStringHashPtr;
+
+NLM_EXTERN TwoStringHashPtr TwoStringHashFree (TwoStringHashPtr tsh);
+NLM_EXTERN TwoStringHashPtr MakeTwoStringHashFromTabTable (ValNodePtr line_list, Int4 column1, Int4 column2);
+NLM_EXTERN CharPtr GetValueFromTwoStringHash (CharPtr key, TwoStringHashPtr tsh);
+
NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5);
/* functions for converting features */
@@ -1866,6 +1975,7 @@ NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertGeneToMiscFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to);
NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
+NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp);
NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp);
NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep);
NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp);
@@ -1877,12 +1987,14 @@ NLM_EXTERN Boolean CodingRegionHasTranslExcept (SeqFeatPtr sfp);
NLM_EXTERN SeqEntryPtr SequenceStringToSeqEntry (CharPtr str, SeqIdPtr sip, Uint1 mol_type);
NLM_EXTERN void RevCompOneFeatForBioseq (SeqFeatPtr sfp, BioseqPtr bsp);
+NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
NLM_EXTERN ValNodePtr SplitPubListFree (ValNodePtr list);
NLM_EXTERN ValNodePtr MakeSplitPubListFromTabList (ValNodePtr PNTR tab_table, SeqEntryPtr sep, ValNodePtr PNTR err_list);
NLM_EXTERN void SplitPubsByList (ValNodePtr split_list);
/* for parsing collection dates */
NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous);
+NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date);
NLM_EXTERN Int4 GetYearFromToken (CharPtr token, Int4 token_len);
NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len);
NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len);
@@ -1890,6 +2002,7 @@ NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev);
NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n);
NLM_EXTERN Int4 GetDaysInMonth (Int4 n);
+NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list);
NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all);
NLM_EXTERN void AddDatabaseNameToStructuredComment (UserObjectPtr uop, CharPtr dbname);
NLM_EXTERN ValNodePtr CreateStructuredCommentTableFromSeqEntry (SeqEntryPtr sep);
@@ -1932,6 +2045,7 @@ NLM_EXTERN SeqFeatPtr FindBestProtein (Uint2 entityID, SeqLocPtr product);
NLM_EXTERN void AddNonExtendableException (SeqFeatPtr sfp);
NLM_EXTERN SeqLocPtr GetmRNALocationFromCDSLocation (SeqLocPtr slp, Uint2 entityID);
NLM_EXTERN void AddmRNAForCDS (SeqFeatPtr sfp);
+NLM_EXTERN Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str);
NLM_EXTERN SeqSubmitPtr FindSeqSubmitForSeqEntry (SeqEntryPtr sep);
NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp);
NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqFeatPtr top_cds, Boolean remove_original);
@@ -1954,9 +2068,9 @@ NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets);
NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_per_set, SeqEntryPtr sep);
NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set);
-NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNodePtr values, CharPtr id_str, ValNodePtr PNTR err_list);
+NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID);
-NLM_EXTERN CharPtr CompressSpaces (CharPtr str);
+NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNodePtr values, CharPtr id_str, ValNodePtr PNTR err_list);
NLM_EXTERN void MergeAdjacentAnnotsInList (SeqAnnotPtr sap);
@@ -1976,7 +2090,7 @@ NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags);
NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep);
NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name);
-extern void FindSuspectProductNamesInNameList (FILE *input_file, FILE *output_file);
+NLM_EXTERN Boolean ReportProductNameProblems (CharPtr product_name, FILE *output_file, CharPtr prefix);
NLM_EXTERN SeqEntryPtr ReadFilteredAsn (FILE *fp, Boolean is_binary, CharPtr accn_list, Uint2Ptr entityIDptr);
NLM_EXTERN void ReintegrateFilteredAsn (SeqEntryPtr sep, FILE *orig_file, FILE *output, Boolean is_binary);
@@ -1985,6 +2099,8 @@ typedef struct descstream {
SeqDescPtr orig;
SeqDescPtr replace;
SeqIdPtr owners;
+ SeqIdPtr last_owner;
+ Boolean on_all;
CharPtr text;
Int4 num_dependent;
} DescStreamData, PNTR DescStreamPtr;
@@ -1995,6 +2111,8 @@ NLM_EXTERN ValNodePtr DescStreamListFree (ValNodePtr vnp);
NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Boolean is_batch, Boolean is_submit, SeqIdPtr PNTR sip_list);
NLM_EXTERN void WriteAsnWithReplacedDescriptors (ValNodePtr desc_stream_list, FILE *orig_file, FILE *output, Boolean is_binary, Boolean is_batch, Boolean is_submit);
+NLM_EXTERN Boolean IdListsMatch (SeqIdPtr sip_list, ValNodePtr all_sip);
+NLM_EXTERN void SetOnAllValsForDescStreamList (ValNodePtr desc_list, ValNodePtr all_sip);
extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset);
@@ -2020,6 +2138,52 @@ FixCapitalizationInTitle
NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep);
+NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep);
+
+#ifdef OS_MSWIN
+NLM_EXTERN Int4 RunSilent(const char *cmdline);
+#endif
+
+
+NLM_EXTERN CharPtr ValNodeSeqIdName (ValNodePtr vnp);
+NLM_EXTERN void ValNodeSeqIdFree (ValNodePtr vnp);
+NLM_EXTERN ValNodePtr ValNodeSeqIdCopy (ValNodePtr vnp);
+NLM_EXTERN Boolean ValNodeSeqIdMatch (ValNodePtr vnp1, ValNodePtr vnp2);
+NLM_EXTERN ValNodePtr ValNodeSeqIdListFree (ValNodePtr list);
+NLM_EXTERN ValNodePtr ValNodeSeqIdListCopy (ValNodePtr list);
+NLM_EXTERN ValNodePtr SeqIdListToValNodeSeqIdList (SeqIdPtr sip_list);
+NLM_EXTERN SeqIdPtr ValNodeSeqIdListToSeqIdList (ValNodePtr vnp_list);
+
+NLM_EXTERN void StringToLower (CharPtr str);
+
+NLM_EXTERN ValNodePtr FixupCountryQuals (SeqEntryPtr sep, Boolean fix_after_colon);
+NLM_EXTERN Boolean FixupCountryQualsWithLog (SeqEntryPtr sep, Boolean fix_after_colon, FILE *log_fp);
+NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp);
+
+NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string);
+NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void);
+extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields);
+
+NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep);
+
+NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub);
+
+NLM_EXTERN void FixProductWordCapitalization (CharPtr PNTR pProduct);
+NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp);
+NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip);
+
+NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome);
+
+NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2);
+NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp);
+NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep);
+NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep);
+NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep);
+NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
+NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
+NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp);
+NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode);
+
#ifdef __cplusplus
}
diff --git a/api/subutil.c b/api/subutil.c
index 572c23bf..2536d82f 100644
--- a/api/subutil.c
+++ b/api/subutil.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.87 $
+* $Revision: 6.93 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -56,6 +56,7 @@ static char *this_file = __FILE__;
#include <seqport.h>
#include <utilpars.h>
#include <sqnutils.h>
+#include <explore.h>
/*****************************************************************************
*
@@ -644,7 +645,8 @@ NLM_EXTERN SeqEntryPtr AddSeqOnlyToSubmission (
targetbssp = (BioseqSetPtr) tmp->data.ptrvalue;
if (targetbssp->_class == 7 ||
(targetbssp->_class >= 13 && targetbssp->_class <= 16) ||
- targetbssp->_class == BioseqseqSet_class_wgs_set) {
+ targetbssp->_class == BioseqseqSet_class_wgs_set ||
+ targetbssp->_class == BioseqseqSet_class_small_genome_set) {
tmp = targetbssp->seq_set;
}
}
@@ -886,7 +888,8 @@ NLM_EXTERN SeqEntryPtr AddSegmentedSeqToSubmission (
targetbssp = (BioseqSetPtr) tmp->data.ptrvalue;
if (targetbssp->_class == 7 ||
(targetbssp->_class >= 13 && targetbssp->_class <= 16) ||
- targetbssp->_class == BioseqseqSet_class_wgs_set) {
+ targetbssp->_class == BioseqseqSet_class_wgs_set ||
+ targetbssp->_class == BioseqseqSet_class_small_genome_set) {
tmp = targetbssp->seq_set;
}
}
@@ -1317,7 +1320,8 @@ NLM_EXTERN SeqEntryPtr AddNucProtToSubmission (
targetbssp = (BioseqSetPtr) tmp->data.ptrvalue;
if (targetbssp->_class == 7 ||
(targetbssp->_class >= 13 && targetbssp->_class <= 16) ||
- targetbssp->_class == BioseqseqSet_class_wgs_set) {
+ targetbssp->_class == BioseqseqSet_class_wgs_set ||
+ targetbssp->_class == BioseqseqSet_class_small_genome_set) {
tmp = targetbssp->seq_set;
}
}
@@ -1652,22 +1656,23 @@ NLM_EXTERN Boolean AddBasesToByteStore (ByteStorePtr bsp, CharPtr the_bases)
Uint1 residue;
Uint1Ptr dnaconv;
CharPtr tmp;
+ Char ch;
dnaconv = GetDNAConv();
buf = MemNew(StringLen(the_bases) + 1);
bu = buf;
for (tmp = the_bases; *tmp != '\0'; tmp++)
{
- *tmp = TO_UPPER(*tmp);
- if (*tmp == 'U') *tmp = 'T';
- if (*tmp == 'X') *tmp = 'N';
- residue = dnaconv[*tmp];
+ ch = TO_UPPER(*tmp);
+ if (ch == 'U') ch = 'T';
+ if (ch == 'X') ch = 'N';
+ residue = dnaconv[ch];
if (residue > 2) {
*bu++ = residue;
} else if (residue == 1 && IS_ALPHA(*tmp)) {
*bu++ = 'N';
} else {
- ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", *tmp);
+ ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", ch);
}
}
BSWrite(bsp, buf, (Int4) (bu - buf));
@@ -1684,20 +1689,21 @@ NLM_EXTERN Boolean AddAAsToByteStore (ByteStorePtr bsp, CharPtr the_aas)
Uint1 residue;
Uint1Ptr aaconv;
CharPtr tmp;
+ Char ch;
aaconv = GetProteinConv();
buf = MemNew(StringLen(the_aas) + 1);
bu = buf;
for (tmp = the_aas; *tmp != '\0'; tmp++)
{
- *tmp = TO_UPPER(*tmp);
- residue = aaconv[*tmp];
+ ch = TO_UPPER(*tmp);
+ residue = aaconv[ch];
if (residue > 2) {
*bu++ = residue;
- } else if (residue == 1 && IS_ALPHA(*tmp)) {
+ } else if (residue == 1 && IS_ALPHA(ch)) {
*bu++ = 'X';
} else {
- ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", *tmp);
+ ErrPostEx(SEV_ERROR, 0,0, "Illegal character in Bioseq [%c]", ch);
}
}
@@ -5811,3 +5817,205 @@ NLM_EXTERN void RemoveAllSeqAnnotCleanupUserObjs (
}
}
+
+static void GetNcbiAutofixDescr(SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ UserObjectPtr PNTR p_uop;
+
+ if (sdp != NULL
+ && sdp->choice == Seq_descr_user
+ && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "NcbiAutofix") == 0
+ && (p_uop = (UserObjectPtr PNTR) data) != NULL) {
+ *p_uop = uop;
+ }
+}
+
+
+NLM_EXTERN UserObjectPtr FindNcbiAutofixUserObject (
+ SeqEntryPtr sep
+)
+
+{
+ UserObjectPtr uop = NULL;
+
+ if (sep == NULL) return NULL;
+
+ VisitDescriptorsInSep (sep, (Pointer) &uop, GetNcbiAutofixDescr);
+
+ return uop;
+}
+
+
+NLM_EXTERN void AddNcbiAutofixUserObject (
+ SeqEntryPtr sep
+)
+
+{
+ SeqDescrPtr sdp;
+ UserObjectPtr uop;
+
+ sdp = CreateNewDescriptor(sep, Seq_descr_user);
+ uop = UserObjectNew ();
+ uop->type = ObjectIdNew();
+ uop->type->str = StringSave ("NcbiAutofix");
+ sdp->data.ptrvalue = uop;
+}
+
+
+static void RemoveNcbiAutofixDescr(SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ ObjValNodePtr ovp;
+
+ if (sdp != NULL
+ && sdp->choice == Seq_descr_user
+ && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "NcbiAutofix") == 0
+ && sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+}
+
+
+NLM_EXTERN void RemoveNcbiAutofixUserObjects (
+ SeqEntryPtr sep
+)
+
+{
+ if (sep == NULL) return;
+
+ VisitDescriptorsInSep (sep, (Pointer) NULL, RemoveNcbiAutofixDescr);
+ DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
+}
+
+NLM_EXTERN UserObjectPtr CreateUnverifiedUserObject (
+ void
+)
+
+{
+ ObjectIdPtr oip;
+ UserObjectPtr uop;
+
+ uop = UserObjectNew ();
+ oip = ObjectIdNew ();
+ oip->str = StringSave ("Unverified");
+ uop->type = oip;
+
+ return uop;
+}
+
+static void GetUnverifiedDescr(SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ UserObjectPtr PNTR p_uop;
+
+ if (sdp != NULL
+ && sdp->choice == Seq_descr_user
+ && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL
+ && IsUnverifiedUserObject(uop)
+ && (p_uop = (UserObjectPtr PNTR) data) != NULL) {
+ *p_uop = uop;
+ }
+}
+
+
+NLM_EXTERN UserObjectPtr FindUnverifiedUserObject (
+ SeqEntryPtr sep
+)
+
+{
+ UserObjectPtr uop = NULL;
+
+ if (sep == NULL) return NULL;
+
+ VisitDescriptorsInSep (sep, (Pointer) &uop, GetUnverifiedDescr);
+
+ return uop;
+}
+
+
+NLM_EXTERN void AddUnverifiedUserObject (
+ SeqEntryPtr sep
+)
+
+{
+ SeqDescrPtr sdp;
+ UserObjectPtr uop;
+
+ sdp = CreateNewDescriptor(sep, Seq_descr_user);
+ uop = UserObjectNew ();
+ uop->type = ObjectIdNew();
+ uop->type->str = StringSave ("Unverified");
+ sdp->data.ptrvalue = uop;
+}
+
+
+NLM_EXTERN void AddUnverifiedUserObjectToBioseq (
+ BioseqPtr bsp
+)
+
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ Boolean found = FALSE;
+
+ if (bsp == NULL || ISA_aa(bsp->mol)) {
+ return;
+ }
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
+ sdp != NULL && !found;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
+ if (IsUnverifiedUserObject(sdp->data.ptrvalue)) {
+ found = TRUE;
+ }
+ }
+ if (!found) {
+ sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
+ sdp->data.ptrvalue = CreateUnverifiedUserObject();
+ }
+}
+
+
+static void RemoveUnverifiedDescr(SeqDescrPtr sdp, Pointer data)
+{
+ UserObjectPtr uop;
+ ObjValNodePtr ovp;
+
+ if (sdp != NULL
+ && sdp->choice == Seq_descr_user
+ && (uop = (UserObjectPtr)sdp->data.ptrvalue) != NULL
+ && uop->type != NULL
+ && StringICmp (uop->type->str, "Unverified") == 0
+ && sdp->extended != 0) {
+ ovp = (ObjValNodePtr) sdp;
+ ovp->idx.deleteme = TRUE;
+ }
+}
+
+
+NLM_EXTERN void RemoveUnverifiedUserObjects (
+ SeqEntryPtr sep
+)
+
+{
+ if (sep == NULL) return;
+
+ VisitDescriptorsInSep (sep, (Pointer) NULL, RemoveUnverifiedDescr);
+ DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
+}
+
+
+NLM_EXTERN Boolean IsUnverifiedUserObject (UserObjectPtr uop)
+{
+ if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "Unverified") != 0) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
diff --git a/api/subutil.h b/api/subutil.h
index 7dbad98e..07d08126 100644
--- a/api/subutil.h
+++ b/api/subutil.h
@@ -31,7 +31,7 @@
*
* Version Creation Date: 11/3/93
*
-* $Revision: 6.78 $
+* $Revision: 6.83 $
*
* File Description: Utilities for creating ASN.1 submissions
*
@@ -692,6 +692,8 @@ NLM_EXTERN Boolean AddGenomeToEntry (
#define SUBSRC_mating_type 38
#define SUBSRC_linkage_group 39
#define SUBSRC_haplogroup 40
+#define SUBSRC_whole_replicon 41
+#define SUBSRC_phenotype 42
#define SUBSRC_other 255
/*********************************************
@@ -739,6 +741,8 @@ NLM_EXTERN Boolean AddGenomeToEntry (
mating-type (38) ,
linkage-group (39) ,
haplogroup (40) ,
+ whole-replicon (41) ,
+ phenotype (42) ,
other (255) } ,
* value is an optional string to give the name (eg. of the
@@ -1623,6 +1627,44 @@ NLM_EXTERN void RemoveAllSeqAnnotCleanupUserObjs (
SeqAnnotPtr sap
);
+NLM_EXTERN UserObjectPtr FindNcbiAutofixUserObject (
+ SeqEntryPtr sep
+);
+
+NLM_EXTERN void AddNcbiAutofixUserObject (
+ SeqEntryPtr sep
+);
+
+NLM_EXTERN void RemoveNcbiAutofixUserObjects (
+ SeqEntryPtr sep
+);
+
+/* Mark unverified sequences */
+
+NLM_EXTERN UserObjectPtr CreateUnverifiedUserObject (
+ void
+);
+
+NLM_EXTERN UserObjectPtr FindUnverifiedUserObject (
+ SeqEntryPtr sep
+);
+
+NLM_EXTERN void AddUnverifiedUserObject (
+ SeqEntryPtr sep
+);
+
+NLM_EXTERN void AddUnverifiedUserObjectToBioseq (
+ BioseqPtr bsp
+);
+
+NLM_EXTERN void RemoveUnverifiedUserObjects (
+ SeqEntryPtr sep
+);
+
+NLM_EXTERN Boolean IsUnverifiedUserObject (
+ UserObjectPtr uop
+);
+
#ifdef __cplusplus
}
diff --git a/api/tofasta.c b/api/tofasta.c
index 58797c41..38c1280e 100644
--- a/api/tofasta.c
+++ b/api/tofasta.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.219 $
+* $Revision: 6.230 $
*
* File Description: various sequence objects to fasta output
*
@@ -876,6 +876,7 @@ static SeqIdPtr ChooseFastaID (BioseqPtr bsp, Boolean allow_mult)
static Int4 BioseqFastaStreamInternal (
BioseqPtr bsp,
SeqLocPtr slp,
+ SeqLitPtr lit,
CharPtr str,
FILE *fp,
ByteStorePtr bs,
@@ -890,16 +891,22 @@ static Int4 BioseqFastaStreamInternal (
)
{
+ Char acc [41];
+ SeqIdPtr accn = NULL;
Char buf [4096];
- Char ch;
+ Char ch, ch1, ch2, ch3;
Int4 count = 0;
+ Int4 gi = -1;
+ SeqIdPtr gpp = NULL;
Char id [128];
+ Uint1 id_format = PRINTID_FASTA_LONG;
+ CharPtr ptr;
StreamFsa sf;
SeqIdPtr sip = NULL;
Char spn [64];
CharPtr tmp;
- if (bsp == NULL && slp == NULL && str == NULL) return 0;
+ if (bsp == NULL && slp == NULL && lit == NULL && str == NULL) return 0;
if (fp == NULL && bs == NULL) return 0;
if (bsp != NULL && bsp->repr == Seq_repr_virtual) return 0;
if (linelen > 128) {
@@ -920,6 +927,7 @@ static Int4 BioseqFastaStreamInternal (
if (grouplen < 1) {
grouplen = 0;
}
+ acc [0] = '\0';
MemSet ((Pointer) &sf, 0, sizeof (StreamFsa));
sf.fp = fp;
sf.bs = bs;
@@ -932,15 +940,108 @@ static Int4 BioseqFastaStreamInternal (
sf.grouplen = grouplen;
sf.skip = skip;
sf.gi = 0;
- if (bsp != NULL) {
- for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (sip->choice != SEQID_GI) continue;
- sf.gi = sip->data.intvalue;
- }
- }
sf.start = 0;
sf.seqpos = 0;
sf.seqspans = (Boolean) ((flags & STREAM_HTML_SPANS) != 0);
+ if (sf.seqspans) {
+ if (bsp != NULL) {
+ for (sip = bsp->id; sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GI :
+ gi = sip->data.intvalue;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_OTHER :
+ accn = sip;
+ break;
+ case SEQID_PIR :
+ case SEQID_SWISSPROT :
+ case SEQID_PRF :
+ case SEQID_PDB :
+ accn = sip;
+ break;
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ accn = sip;
+ break;
+ case SEQID_GPIPE :
+ /* should not override better accession */
+ gpp = sip;
+ break;
+ default :
+ break;
+ }
+ }
+ } else if (slp != NULL) {
+ /* PUBSEQ_OS will send a SeqInt with a chain of Seq-ids */
+ for (sip = SeqLocId (slp); sip != NULL; sip = sip->next) {
+ switch (sip->choice) {
+ case SEQID_GI :
+ gi = sip->data.intvalue;
+ break;
+ case SEQID_GENBANK :
+ case SEQID_EMBL :
+ case SEQID_DDBJ :
+ case SEQID_OTHER :
+ accn = sip;
+ break;
+ case SEQID_PIR :
+ case SEQID_SWISSPROT :
+ case SEQID_PRF :
+ case SEQID_PDB :
+ accn = sip;
+ break;
+ case SEQID_TPG :
+ case SEQID_TPE :
+ case SEQID_TPD :
+ accn = sip;
+ break;
+ case SEQID_GPIPE :
+ /* should not override better accession */
+ gpp = sip;
+ break;
+ default :
+ break;
+ }
+ }
+ if (sip != NULL && sip->choice == SEQID_GI) {
+ sf.gi = sip->data.intvalue;
+ }
+ }
+ if (gi > 0) {
+ sf.gi = gi;
+ }
+ if (accn == NULL) {
+ accn = gpp;
+ }
+ if (accn != NULL) {
+ SeqIdWrite (accn, acc, PRINTID_TEXTID_ACC_ONLY, sizeof (acc) - 1);
+
+ if (accn->choice == SEQID_PDB) {
+ ptr = StringChr (acc, '_');
+ if (ptr != NULL) {
+ ch1 = ptr [1];
+ if (ch1 != '\0') {
+ ch2 = ptr [2];
+ if (ch2 != '\0') {
+ ch3 = ptr [3];
+ if (ch3 == '\0') {
+ if (ch1 == ch2) {
+ if (IS_UPPER (ch1)) {
+ ptr [1] = TO_LOWER (ch1);
+ ptr [2] = '\0';
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
if (do_defline) {
id [0] = '\0';
if (substitute_ids) {
@@ -948,7 +1049,10 @@ static Int4 BioseqFastaStreamInternal (
} else if (bsp != NULL) {
sip = bsp->id;
}
- SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
+ if ((flags & STREAM_ALL_FASTA_IDS) != 0) {
+ id_format = PRINTID_FASTA_ALL;
+ }
+ SeqIdWrite (sip, id, id_format, sizeof (id) - 1);
/* no longer need to do feature indexing if title not present to speed up creation */
/*
sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL);
@@ -984,6 +1088,8 @@ static Int4 BioseqFastaStreamInternal (
count = SeqPortStream (bsp, flags, (Pointer) &sf, FsaStreamProc);
} else if (slp != NULL) {
count = SeqPortStreamLoc (slp, flags, (Pointer) &sf, FsaStreamProc);
+ } else if (lit != NULL) {
+ count = SeqPortStreamLit (lit, flags, (Pointer) &sf, FsaStreamProc);
} else if (str != NULL) {
count = StringLen (str);
FsaStreamProc (str, (Pointer) &sf);
@@ -1007,6 +1113,12 @@ static Int4 BioseqFastaStreamInternal (
fprintf (sf.fp, "</span>");
}
fprintf (sf.fp, "\n");
+ if (sf.seqspans) {
+ fprintf (sf.fp, "<script type=\"text/javascript\">");
+ fprintf (sf.fp, "if (typeof(oData) == \"undefined\") oData = []; ");
+ fprintf (sf.fp, "oData.push({gi:%ld,acc:\"%s\"})", (long) sf.gi, acc);
+ fprintf (sf.fp, "</script>\n");
+ }
} else if (sf.bs != NULL) {
if (sf.seqspans) {
sprintf (spn, "<span class=\"ff_line\" id=\"gi_%ld_%ld\">", (long) sf.gi, (long) (sf.start + 1));
@@ -1017,6 +1129,16 @@ static Int4 BioseqFastaStreamInternal (
BSWrite (sf.bs, "</span>", sizeof ("</span>"));
}
BSWrite (sf.bs, "\n", sizeof ("\n"));
+ if (sf.seqspans) {
+ sprintf (spn, "<script type=\"text/javascript\">");
+ BSWrite (sf.bs, spn, StringLen (spn));
+ sprintf (spn, "if (typeof(oData) == \"undefined\") oData = []; ");
+ BSWrite (sf.bs, spn, StringLen (spn));
+ sprintf (spn, "oData.push({gi:%ld,acc:\"%s\"})", (long) sf.gi, acc);
+ BSWrite (sf.bs, spn, StringLen (spn));
+ sprintf (spn, "</script>\n");
+ BSWrite (sf.bs, spn, StringLen (spn));
+ }
}
}
return count;
@@ -1033,7 +1155,7 @@ NLM_EXTERN Int4 BioseqFastaStream (
)
{
- return BioseqFastaStreamInternal (bsp, NULL, NULL, fp, NULL, flags,
+ return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, fp, NULL, flags,
linelen, blocklen, grouplen,
do_defline, FALSE, FALSE, 0);
}
@@ -1051,7 +1173,7 @@ NLM_EXTERN Int4 BioseqFastaStreamEx (
)
{
- return BioseqFastaStreamInternal (bsp, NULL, NULL, fp, NULL, flags,
+ return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, fp, NULL, flags,
linelen, blocklen, grouplen,
do_defline, substitute_ids, sorted_protein, 0);
}
@@ -1067,7 +1189,7 @@ NLM_EXTERN Int4 BioseqFastaMemStream (
)
{
- return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, bs, flags,
+ return BioseqFastaStreamInternal (bsp, NULL, NULL, NULL, NULL, bs, flags,
linelen, blocklen, grouplen,
do_defline, FALSE, FALSE, 0);
}
@@ -1084,7 +1206,24 @@ NLM_EXTERN Int4 SeqLocFastaStream (
{
if (slp == NULL || fp == NULL) return 0;
- return BioseqFastaStreamInternal (NULL, slp, NULL, fp, NULL, flags,
+ return BioseqFastaStreamInternal (NULL, slp, NULL, NULL, fp, NULL, flags,
+ linelen, blocklen, grouplen,
+ FALSE, FALSE, FALSE, 0);
+}
+
+NLM_EXTERN Int4 SeqLitFastaStream (
+ SeqLitPtr lit,
+ FILE *fp,
+ StreamFlgType flags,
+ Int2 linelen,
+ Int2 blocklen,
+ Int2 grouplen
+)
+
+{
+ if (lit == NULL || fp == NULL) return 0;
+
+ return BioseqFastaStreamInternal (NULL, NULL, lit, NULL, fp, NULL, flags,
linelen, blocklen, grouplen,
FALSE, FALSE, FALSE, 0);
}
@@ -1275,7 +1414,7 @@ NLM_EXTERN Int4 CdRegionFastaStream (
skip = 2;
}
- return BioseqFastaStreamInternal (NULL, sfp->location, NULL, fp, NULL, flags,
+ return BioseqFastaStreamInternal (NULL, sfp->location, NULL, NULL, fp, NULL, flags,
linelen, blocklen, grouplen,
FALSE, FALSE, FALSE, skip);
}
@@ -1330,7 +1469,7 @@ NLM_EXTERN Int4 TranslationFastaStream (
}
}
- count = BioseqFastaStreamInternal (NULL, NULL, str, fp, NULL, flags,
+ count = BioseqFastaStreamInternal (NULL, NULL, NULL, str, fp, NULL, flags,
linelen, blocklen, grouplen,
FALSE, FALSE, FALSE, 0);
@@ -1339,6 +1478,153 @@ NLM_EXTERN Int4 TranslationFastaStream (
return count;
}
+static void DoGeneDefline (
+ SeqFeatPtr sfp,
+ FILE *fp,
+ GeneRefPtr grp,
+ CharPtr idSuffix
+)
+
+{
+ BioseqPtr bsp = NULL;
+ Char buf [512];
+ Boolean do_defline = TRUE;
+ Uint2 entityID;
+ SeqMgrFeatContext genecontext;
+ IntAsn2gbJob iaj;
+ Boolean partial5;
+ Boolean partial3;
+ SeqIdPtr sip;
+ CharPtr str;
+ Char tmp [64];
+
+ if (sfp == NULL || fp == NULL || grp == NULL) return;
+ if (sfp == NULL || fp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ if (grp == NULL) return;
+
+ if (do_defline) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ if (bsp == NULL) {
+ do_defline = FALSE;
+ StringCpy (buf, "lcl|");
+ sip = SeqLocId (sfp->location);
+ if (sip != NULL) {
+ SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1);
+ StringCat (buf, tmp);
+ }
+ if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) {
+ StringCat (buf, idSuffix);
+ }
+ FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp);
+ StringCpy (buf, "?");
+ FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp);
+ fflush (fp);
+ }
+ }
+
+ if (do_defline && bsp != NULL) {
+ if (sfp != SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &genecontext)) {
+ do_defline = FALSE;
+ StringCpy (buf, "lcl|");
+ sip = SeqIdFindWorst (bsp->id);
+ if (sip != NULL) {
+ SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1);
+ StringCat (buf, tmp);
+ }
+ if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) {
+ StringCat (buf, idSuffix);
+ }
+ FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp);
+ StringCpy (buf, "?");
+ FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp);
+ fflush (fp);
+ }
+ }
+
+ if (do_defline) {
+ entityID = ObjMgrGetEntityIDForPointer (bsp);
+ if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
+ SeqMgrIndexFeatures (entityID, NULL);
+ }
+
+ CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+
+ MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob));
+ iaj.flags.iupacaaOnly = FALSE;
+ iaj.relModeError = FALSE;
+
+ StringCpy (buf, "lcl|");
+ sip = SeqIdFindWorst (bsp->id);
+ if (sip != NULL) {
+ SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp) - 1);
+ StringCat (buf, tmp);
+ }
+ if (StringDoesHaveText (idSuffix) && StringLen (idSuffix) < 200) {
+ StringCat (buf, idSuffix);
+ }
+
+ FastaFileFunc (bsp, FASTA_ID, buf, sizeof (buf), (Pointer) fp);
+
+ buf [0] = '\0';
+ if (StringDoesHaveText (grp->locus)) {
+ StringCat (buf, "[gene=");
+ StringCat (buf, grp->locus);
+ StringCat (buf, "] ");
+ }
+ if (StringDoesHaveText (grp->locus_tag)) {
+ StringCat (buf, "[locus_tag=");
+ StringCat (buf, grp->locus_tag);
+ StringCat (buf, "] ");
+ }
+ if (StringLen (buf) == 0 && StringDoesHaveText (genecontext.label)) {
+ StringCat (buf, "[gene=");
+ StringCat (buf, genecontext.label);
+ StringCat (buf, "] ");
+ }
+ str = FFFlatLoc (&iaj, bsp, sfp->location, FALSE, FALSE);
+ if (str != NULL && StringLen (str) + StringLen (buf) < sizeof (buf) - 10) {
+ StringCat (buf, "[location=");
+ StringCat (buf, str);
+ StringCat (buf, "] ");
+ MemFree (str);
+ }
+ TrimSpacesAroundString (buf);
+
+ FastaFileFunc (bsp, FASTA_DEFLINE, buf, sizeof (buf), (Pointer) fp);
+
+ fflush (fp);
+ }
+}
+
+NLM_EXTERN Int4 GeneFastaStream (
+ SeqFeatPtr sfp,
+ FILE *fp,
+ StreamFlgType flags,
+ Int2 linelen,
+ Int2 blocklen,
+ Int2 grouplen,
+ Boolean do_defline,
+ CharPtr idSuffix
+)
+
+{
+ GeneRefPtr grp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return 0;
+ if (fp == NULL) return 0;
+ grp = (GeneRefPtr) sfp->data.value.ptrvalue;
+ if (grp == NULL) return 0;
+
+ if (do_defline) {
+ DoGeneDefline (sfp, fp, grp, idSuffix);
+ }
+
+ return BioseqFastaStreamInternal (NULL, sfp->location, NULL, NULL, fp, NULL, flags,
+ linelen, blocklen, grouplen,
+ FALSE, FALSE, FALSE, 0);
+}
+
/*****************************************************************************
*
* SeqEntryFastaStream (bsp, fp, flags, linelen, blocklen, grouplen,
@@ -5109,6 +5395,7 @@ typedef struct deflinestruct {
/* subsource fields */
CharPtr m_chromosome;
CharPtr m_clone;
+ Boolean m_has_clone;
CharPtr m_map;
CharPtr m_plasmid;
CharPtr m_segment;
@@ -5117,6 +5404,9 @@ typedef struct deflinestruct {
CharPtr m_isolate;
CharPtr m_strain;
+ /* user object fields */
+ Boolean m_is_unverified;
+
/* exception fields */
TextFsaPtr m_low_quality_fsa;
} DefLineData, PNTR DefLinePtr;
@@ -5176,6 +5466,7 @@ static void x_SetFlags (
SeqIdPtr sip;
CharPtr str;
TextSeqIdPtr tsip;
+ UserObjectPtr uop;
ValNodePtr vnp;
if (dlp == NULL) return;
@@ -5307,6 +5598,19 @@ static void x_SetFlags (
}
}
+ /* process Unverified user object */
+ for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_user, NULL);
+ sdp != NULL;
+ sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_user, sdp)) {
+ if (sdp->choice != Seq_descr_user) continue;
+ uop = (UserObjectPtr) sdp->data.ptrvalue;
+ if (uop == NULL) continue;
+ oip = uop->type;
+ if (oip == NULL) continue;
+ if (StringICmp (oip->str, "Unverified") != 0) continue;
+ dlp->m_is_unverified = TRUE;
+ }
+
if (dlp->m_htg_tech || dlp->m_third_party) {
/* process keywords */
keywords = NULL;
@@ -5365,6 +5669,31 @@ static void x_SetFlags (
}
/* set instance variables from BioSource */
+static void x_SetSrcClone (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BioSourcePtr biop;
+ DefLinePtr dlp;
+ SubSourcePtr ssp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
+ dlp = (DefLinePtr) userdata;
+ if (dlp == NULL) return;
+
+ biop = (BioSourcePtr) sfp->data.value.ptrvalue;
+ if (biop == NULL) return;
+
+ /* look for clones on source features */
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (StringHasNoText (ssp->name)) continue;
+ if (ssp->subtype != SUBSRC_clone) continue;
+ dlp->m_has_clone = TRUE;
+ }
+}
+
static void x_SetBioSrc (
DefLinePtr dlp
)
@@ -5404,6 +5733,7 @@ static void x_SetBioSrc (
break;
case SUBSRC_clone :
dlp->m_clone = ssp->name;
+ dlp->m_has_clone = TRUE;
break;
case SUBSRC_map :
dlp->m_map = ssp->name;
@@ -5444,6 +5774,10 @@ static void x_SetBioSrc (
}
}
}
+
+ if (dlp->m_has_clone) return;
+
+ VisitFeaturesOnBsp (bsp, (Pointer) dlp, x_SetSrcClone);
}
static CharPtr x_TrimFirstNCharacters (
@@ -5593,6 +5927,11 @@ static CharPtr x_DescribeClones (
if (dlp == NULL) return NULL;
+ if (dlp->m_htgs_unfinished && dlp->m_htgs_pooled && dlp->m_has_clone) {
+ result = StringSave (", pooled multiple clones");
+ return result;
+ }
+
str = dlp->m_clone;
if (StringHasNoText (str)) return NULL;
@@ -5606,9 +5945,7 @@ static CharPtr x_DescribeClones (
ch = *str;
}
- if (dlp->m_htgs_unfinished && dlp->m_htgs_pooled) {
- result = StringSave (", pooled multiple clones");
- } else if (count > 3) {
+ if (count > 3) {
sprintf (buf, ", %d clones", (int) count);
result = StringSave (buf);
} else {
@@ -6817,7 +7154,8 @@ static CharPtr x_TitleFromWGS (
}
static CharPtr x_SetPrefix (
- DefLinePtr dlp
+ DefLinePtr dlp,
+ CharPtr title
)
{
@@ -6825,7 +7163,11 @@ static CharPtr x_SetPrefix (
if (dlp == NULL) return NULL;
- if (dlp->m_is_tsa) {
+ if (dlp->m_is_unverified) {
+ if (StringStr (title, "UNVERIFIED") == NULL) {
+ prefix = "UNVERIFIED: ";
+ }
+ } else if (dlp->m_is_tsa) {
prefix = "TSA: ";
} else if (dlp->m_third_party) {
if (dlp->m_tpa_exp) {
@@ -7062,6 +7404,8 @@ NLM_EXTERN CharPtr NewCreateDefLine (
x_TrimFirstNCharacters (title, 10);
} else if (StringNICmp (title, "TSA:", 4) == 0) {
x_TrimFirstNCharacters (title, 4);
+ } else if (StringNICmp (title, "UNVERIFIED:", 11) == 0) {
+ x_TrimFirstNCharacters (title, 11);
}
/* strip leading spaces remaining after removal of old TPA or TSA prefixes */
@@ -7071,7 +7415,7 @@ NLM_EXTERN CharPtr NewCreateDefLine (
x_TrimMostPunctFromEnd (title);
/* calcualte prefix */
- prefix = x_SetPrefix (dlp);
+ prefix = x_SetPrefix (dlp, title);
/* calculate suffix */
suffix = x_SetSuffix (dlp, title);
@@ -7093,6 +7437,8 @@ NLM_EXTERN CharPtr NewCreateDefLine (
dlp = MemFree (dlp);
+ Asn2gnbkCompressSpaces (result);
+
return result;
}
diff --git a/api/tofasta.h b/api/tofasta.h
index e3e43108..af2e15e5 100644
--- a/api/tofasta.h
+++ b/api/tofasta.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 7/12/91
*
-* $Revision: 6.39 $
+* $Revision: 6.41 $
*
* File Description: various sequence objects to fasta output
*
@@ -148,8 +148,10 @@ NLM_EXTERN Boolean BioseqToFastaX PROTO((BioseqPtr bsp, MyFsaPtr mfp, Boolean is
* BioseqFastaStream (bsp, fp, flags, linelen, blocklen, grouplen, do_defline)
* BioseqFastaMemStream (bsp, bs, flags, linelen, blocklen, grouplen, do_defline)
* SeqLocFastaStream (slp, fp, flags, linelen, blocklen, grouplen)
+* SeqLitFastaStream (lit, fp, flags, linelen, blocklen, grouplen)
* CdRegionFastaStream (sfp, fp, flags, linelen, blocklen, grouplen)
* TranslationFastaStream (sfp, fp, flags, linelen, blocklen, grouplen)
+* GeneFastaStream (sfp, fp, flags, linelen, blocklen, grouplen)
* SeqEntryFastaStream (sep, fp, flags, linelen, blocklen, grouplen,
* do_na, do_aa, master_style)
*
@@ -197,6 +199,15 @@ NLM_EXTERN Int4 SeqLocFastaStream (
Int2 grouplen
);
+NLM_EXTERN Int4 SeqLitFastaStream (
+ SeqLitPtr lit,
+ FILE *fp,
+ StreamFlgType flags,
+ Int2 linelen,
+ Int2 blocklen,
+ Int2 grouplen
+);
+
NLM_EXTERN Int4 CdRegionFastaStream (
SeqFeatPtr sfp,
FILE *fp,
@@ -219,6 +230,17 @@ NLM_EXTERN Int4 TranslationFastaStream (
CharPtr idSuffix
);
+NLM_EXTERN Int4 GeneFastaStream (
+ SeqFeatPtr sfp,
+ FILE *fp,
+ StreamFlgType flags,
+ Int2 linelen,
+ Int2 blocklen,
+ Int2 grouplen,
+ Boolean do_defline,
+ CharPtr idSuffix
+);
+
NLM_EXTERN Int4 SeqEntryFastaStream (
SeqEntryPtr sep,
FILE *fp,
diff --git a/api/utilpub.c b/api/utilpub.c
index 3d3dd5ad..933ab96a 100644
--- a/api/utilpub.c
+++ b/api/utilpub.c
@@ -259,7 +259,8 @@ Uint2 entityID, Uint4 itemID, Uint2 itemtype)
if (pdp) {
descr = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
(AsnWriteFunc) PubdescAsnWrite);
- vnp = ValNodeNew(NULL);
+ if (descr == NULL) return NULL;
+ vnp = ValNodeNew(NULL);
vnp->choice = PUB_Equiv;
vnp->data.ptrvalue = descr->pub;
psp = (PubStructPtr) MemNew(sizeof(PubStruct));
@@ -1595,6 +1596,12 @@ NLM_EXTERN void EntryStripSerialNumber (SeqEntryPtr sep)
}
}
+NLM_EXTERN void ForceStripSerialNumber (SeqEntryPtr sep)
+{
+ if (sep == NULL) return;
+ SeqEntryExplore(sep, NULL, StripSerialNumber);
+}
+
NLM_EXTERN ValNodePtr remove_node(ValNodePtr head, ValNodePtr x)
{
ValNodePtr v, p;
diff --git a/api/utilpub.h b/api/utilpub.h
index a712195d..4ea30fbf 100644
--- a/api/utilpub.h
+++ b/api/utilpub.h
@@ -64,6 +64,8 @@ NLM_EXTERN Boolean empty_citgen PROTO((CitGenPtr cit));
NLM_EXTERN void EntryStripSerialNumber PROTO((SeqEntryPtr sep));
+NLM_EXTERN void ForceStripSerialNumber PROTO((SeqEntryPtr sep));
+
NLM_EXTERN void VnpHeapSort PROTO ((ValNodePtr PNTR vnp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr ))));
NLM_EXTERN SeqFeatPtr remove_feat PROTO((SeqFeatPtr head, SeqFeatPtr x));
diff --git a/api/valapi.c b/api/valapi.c
index e7f589aa..8563697c 100755
--- a/api/valapi.c
+++ b/api/valapi.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 4/7/2009
*
-* $Revision: 1.9 $
+* $Revision: 1.10 $
*
* File Description:
*
@@ -136,7 +136,18 @@ static CharPtr commentRulesStr = "Comment-set ::= {\n" \
" { \n" \
" field-name \"Current Finishing Status\" ,\n" \
" match-expression \"^\\(Standard Draft\\|High Quality Draft\\|Improved High Quality Draft\\|Annotation Directed\\|Non-contiguous Finished\\|Finished\\)$\" } } }\n" \
-"} } }\n";
+"} } , \n" \
+" { \n" \
+" prefix \"##Assembly-Data-START##\" , \n" \
+" fields { \n" \
+" { \n" \
+" field-name \"Assembly Method\" , \n" \
+" match-expression \".+ v\\. .+\" , \n" \
+" required TRUE } , \n" \
+" { \n" \
+" field-name \"Sequencing Technology\" , \n" \
+" required TRUE } } } \n" \
+"}\n";
#endif
diff --git a/api/valid.c b/api/valid.c
index c526bbf1..d3fdba59 100644
--- a/api/valid.c
+++ b/api/valid.c
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.1388 $
+* $Revision: 6.1533 $
*
* File Description: Sequence editing utilities
*
@@ -70,6 +70,10 @@ static char *this_file = __FILE__;
#include <macroapi.h>
#include <objvalid.h>
#include <valapi.h>
+#include "ecnum_specific.inc"
+#include "ecnum_ambiguous.inc"
+#include "ecnum_deleted.inc"
+#include "ecnum_replaced.inc"
/*****************************************************************************
*
@@ -121,6 +125,24 @@ static Boolean ECnumberWasDeleted (CharPtr str);
static Boolean ECnumberWasReplaced (CharPtr str);
static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp);
+static Boolean HasFeatId(SeqFeatPtr sfp, Int4 num)
+{
+ Boolean rval = FALSE;
+ ObjectIdPtr oip;
+
+ if (sfp == NULL) {
+ return FALSE;
+ }
+ if (sfp->id.choice == 3) {
+ oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
+ if (oip->id == num) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
/* alignment validator */
NLM_EXTERN Boolean ValidateSeqAlignWithinValidator (ValidStructPtr vsp, SeqEntryPtr sep, Boolean find_remote_bsp, Boolean do_hist_assembly);
@@ -161,6 +183,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean strictLatLonCountry;
Boolean rubiscoTest;
Boolean indexerVersion;
+ Boolean disableSuppression;
Int2 validationLimit;
ValidErrorFunc errfunc;
Pointer userdata;
@@ -174,6 +197,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean is_gpipe_in_sep;
Boolean is_gps_in_sep;
Boolean is_embl_ddbj_in_sep;
+ Boolean is_old_gb_in_sep;
+ Boolean is_patent_in_sep;
Boolean other_sets_in_sep;
Boolean is_insd_in_sep;
Boolean only_lcl_gnl_in_sep;
@@ -182,6 +207,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
Boolean is_smupd_in_sep;
Boolean feat_loc_has_gi;
Boolean feat_prod_has_gi;
+ Boolean has_multi_int_genes;
+ Boolean has_seg_bioseqs;
Boolean far_fetch_failure;
if (vsp == NULL)
@@ -214,6 +241,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
strictLatLonCountry = vsp->strictLatLonCountry;
rubiscoTest = vsp->rubiscoTest;
indexerVersion = vsp->indexerVersion;
+ disableSuppression = vsp->disableSuppression;
validationLimit = vsp->validationLimit;
errfunc = vsp->errfunc;
userdata = vsp->userdata;
@@ -228,6 +256,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
is_gps_in_sep = vsp->is_gps_in_sep;
other_sets_in_sep = vsp->other_sets_in_sep;
is_embl_ddbj_in_sep = vsp->is_embl_ddbj_in_sep;
+ is_old_gb_in_sep = vsp->is_old_gb_in_sep;
+ is_patent_in_sep = vsp->is_patent_in_sep;
is_insd_in_sep = vsp->is_insd_in_sep;
only_lcl_gnl_in_sep = vsp->only_lcl_gnl_in_sep;
has_gnl_prot_sep = vsp->has_gnl_prot_sep;
@@ -235,6 +265,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
is_smupd_in_sep = vsp->is_smupd_in_sep;
feat_loc_has_gi = vsp->feat_loc_has_gi;
feat_prod_has_gi = vsp->feat_prod_has_gi;
+ has_multi_int_genes = vsp->has_multi_int_genes;
+ has_seg_bioseqs = vsp->has_seg_bioseqs;
far_fetch_failure = vsp->far_fetch_failure;
MemSet ((VoidPtr) vsp, 0, sizeof (ValidStruct));
vsp->errbuf = errbuf;
@@ -264,6 +296,7 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->strictLatLonCountry = strictLatLonCountry;
vsp->rubiscoTest = rubiscoTest;
vsp->indexerVersion = indexerVersion;
+ vsp->disableSuppression = disableSuppression;
vsp->validationLimit = validationLimit;
vsp->errfunc = errfunc;
vsp->userdata = userdata;
@@ -278,6 +311,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->is_gps_in_sep = is_gps_in_sep;
vsp->other_sets_in_sep = other_sets_in_sep;
vsp->is_embl_ddbj_in_sep = is_embl_ddbj_in_sep;
+ vsp->is_old_gb_in_sep = is_old_gb_in_sep;
+ vsp->is_patent_in_sep = is_patent_in_sep;
vsp->is_insd_in_sep = is_insd_in_sep;
vsp->only_lcl_gnl_in_sep = only_lcl_gnl_in_sep;
vsp->has_gnl_prot_sep = has_gnl_prot_sep;
@@ -285,6 +320,8 @@ NLM_EXTERN void ValidStructClear (ValidStructPtr vsp)
vsp->is_smupd_in_sep = is_smupd_in_sep;
vsp->feat_loc_has_gi = feat_loc_has_gi;
vsp->feat_prod_has_gi = feat_prod_has_gi;
+ vsp->has_multi_int_genes = has_multi_int_genes;
+ vsp->has_seg_bioseqs = has_seg_bioseqs;
vsp->far_fetch_failure = far_fetch_failure;
return;
}
@@ -571,7 +608,8 @@ static CharPtr err1Label [] = {
"SeqLitDataLength0",
"DSmRNA",
"HighNContentStretch",
- "HighNContentPercent"
+ "HighNContentPercent",
+ "BadSegmentedSeq"
};
static CharPtr err2Label [] = {
@@ -651,7 +689,12 @@ static CharPtr err2Label [] = {
"BadStrucCommMultipleFields",
"BioSourceNeedsChromosome",
"MolInfoConflictsWithBioSource",
- "MissingKeyword"
+ "MissingKeyword",
+ "FakeStructuredComment",
+ "StructuredCommentPrefixOrSuffixMissing",
+ "LatLonWater",
+ "LatLonOffshore",
+ "MissingPersonalCollectionName"
};
static CharPtr err3Label [] = {
@@ -701,7 +744,9 @@ static CharPtr err4Label [] = {
"MissingSetTitle",
"NucProtSetHasTitle",
"ComponentMissingTitle",
- "SingleItemSet"
+ "SingleItemSet",
+ "MisplacedMolInfo",
+ "ImproperlyNestedSets"
};
static CharPtr err5Label [] = {
@@ -881,7 +926,11 @@ static CharPtr err5Label [] = {
"ShortIntron",
"GeneXrefStrandProblem",
"CDSmRNAXrefLocationProblem",
- "LocusCollidesWithLocusTag"
+ "LocusCollidesWithLocusTag",
+ "IdenticalGeneSymbolAndSynonym",
+ "NeedsNote",
+ "RptUnitRangeProblem",
+ "TooManyInferenceAccessions"
};
static CharPtr err6Label [] = {
@@ -1142,6 +1191,13 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
} else if (vsp->descr != NULL) {
label = tmp;
diff = SeqDescLabel (vsp->descr, tmp, wrklen, OM_LABEL_BOTH);
+
+ if (diff > 100 && vsp->descr->choice == Seq_descr_comment && errcode == 2 && subcode == 77) {
+ diff = 100;
+ *(tmp + diff - 3) = '.';
+ *(tmp + diff - 2) = '.';
+ *(tmp + diff - 1) = '.';
+ }
buflen -= diff;
tmp += diff;
*tmp = '\0';
@@ -1296,6 +1352,229 @@ static void CustValErr (ValidStructPtr vsp, ErrSev severity, int errcode, int su
featureID, message, objtype, label, context, location, product, vsp->userdata);
}
+
+/* framework for suppressing validator errors using a list-based strategy */
+typedef Boolean (*ValidErrSuppressFunc) PROTO ((ValidStructPtr));
+
+static Boolean IsGenomicPipeline (ValidStructPtr vsp)
+{
+ if (vsp == NULL) {
+ return FALSE;
+ } else if (vsp->bsp_genomic_in_sep && vsp->is_gpipe_in_sep) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean IsUnclassifiedExcept (ValidStructPtr vsp)
+{
+ Boolean rval = FALSE;
+ if (vsp == NULL || vsp->sfp == NULL) {
+ return FALSE;
+ }
+ if (vsp->sfp->excpt && (! vsp->ignoreExceptions)) {
+ if (vsp->sfp->data.choice == SEQFEAT_CDREGION) {
+ if (StringStr (vsp->sfp->except_text, "unclassified translation discrepancy") != NULL) {
+ rval = TRUE;
+ }
+ } else if (vsp->sfp->idx.subtype == FEATDEF_mRNA) {
+ if (StringStr (vsp->sfp->except_text, "unclassified transcription discrepancy") != NULL) {
+ rval = TRUE;
+ }
+ }
+ }
+ return rval;
+}
+
+
+static Boolean IsNotUnclassifiedExcept (ValidStructPtr vsp)
+{
+ return !IsUnclassifiedExcept(vsp);
+}
+
+
+static Boolean IsUnclassifedExceptAndGenomicPipeline (ValidStructPtr vsp)
+{
+ if (IsGenomicPipeline(vsp) && IsUnclassifiedExcept(vsp)) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+
+static Boolean NonconsensusExcept (ValidStructPtr vsp)
+{
+ Boolean rval = FALSE;
+ if (vsp == NULL || vsp->sfp == NULL) {
+ return FALSE;
+ }
+ if (vsp->sfp->excpt && (! vsp->ignoreExceptions)) {
+ if (StringISearch (vsp->sfp->except_text, "nonconsensus splice site") != NULL ||
+ StringISearch (vsp->sfp->except_text, "heterogeneous population sequenced") != NULL ||
+ StringISearch (vsp->sfp->except_text, "low-quality sequence region") != NULL ||
+ StringISearch (vsp->sfp->except_text, "artificial location") != NULL) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
+typedef struct validerrsuppression {
+ int code1;
+ int code2;
+ CharPtr search_phrase;
+ CharPtr exclude_phrase;
+ ValidErrSuppressFunc func;
+} ValidErrSuppressionData, PNTR ValidErrSuppressionPtr;
+
+static ValidErrSuppressionData valid_suppress[] = {
+ {ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial", NULL, IsGenomicPipeline },
+ {ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "AND is not at consensus splice site", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Internal partial intervals do not include first/last residue of sequence", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "AND is not at consensus splice site", NULL, NonconsensusExcept},
+ {ERR_SEQ_FEAT_PartialProblem, "(but is at consensus splice site)", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Start does not include first/last residue of sequence", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialProblem, "PartialLocation: Stop does not include first/last residue of sequence", NULL, IsGenomicPipeline},
+ {ERR_SEQ_FEAT_PartialsInconsistent, NULL, NULL, IsGenomicPipeline },
+ {ERR_SEQ_FEAT_PolyATail, NULL, NULL, IsGenomicPipeline },
+ {ERR_SEQ_FEAT_InternalStop, NULL, NULL, IsUnclassifedExceptAndGenomicPipeline},
+ {ERR_SEQ_FEAT_StartCodon , NULL, NULL, IsUnclassifiedExcept}
+
+};
+
+const Int4 kNumSuppressionRules = sizeof (valid_suppress) / sizeof (ValidErrSuppressionData);
+
+static Boolean ShouldSuppressValidErr (ValidStructPtr vsp, int code1, int code2, const char *fmt)
+{
+ Int4 i;
+ Boolean rval = FALSE;
+
+ if (vsp->disableSuppression) return FALSE;
+
+ for (i = 0; i < kNumSuppressionRules && !rval; i++) {
+ if (code1 == valid_suppress[i].code1 && code2 == valid_suppress[i].code2
+ && (valid_suppress[i].search_phrase == NULL || StringISearch (fmt, valid_suppress[i].search_phrase) != NULL)
+ && (valid_suppress[i].func == NULL || valid_suppress[i].func(vsp))
+ && (valid_suppress[i].exclude_phrase == NULL || StringISearch (fmt, valid_suppress[i].exclude_phrase) == NULL)) {
+ rval = TRUE;
+ }
+ }
+
+ return rval;
+}
+
+
+/* framework for changing validator warnings using a list-based strategy */
+typedef int (*ValidErrSevChangeFunc) PROTO ((int, ValidStructPtr));
+
+typedef struct validerrsevchange {
+ int code1;
+ int code2;
+ CharPtr search_phrase;
+ CharPtr exclude_phrase;
+ ValidErrSevChangeFunc func;
+} ValidErrSevChangeData, PNTR ValidErrSevChangePtr;
+
+
+static int LowerToInfoForGenomic (int severity, ValidStructPtr vsp)
+{
+ if (IsGenomicPipeline(vsp)) {
+ return SEV_INFO;
+ } else {
+ return severity;
+ }
+}
+
+
+static int WarnForGPSOrRefSeq (int severity, ValidStructPtr vsp)
+{
+ Boolean gpsOrRefSeq = FALSE;
+ SeqEntryPtr sep;
+ SeqFeatPtr sfp;
+ BioseqSetPtr bssp;
+ SeqLocPtr head, slp = NULL, nxt;
+ SeqIdPtr sip, id;
+ BioseqPtr bsp;
+ TextSeqIdPtr tsip;
+
+ sep = vsp->sep;
+ if (sep != NULL && IS_Bioseq_set (sep)) {
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
+ gpsOrRefSeq = TRUE;
+ }
+ }
+
+ if (!gpsOrRefSeq) {
+ sfp = vsp->sfp;
+ head = sfp->location;
+ slp = SeqLocFindPart (head, slp, EQUIV_IS_ONE);
+ while (slp != NULL && !gpsOrRefSeq) {
+ sip = SeqLocId (slp);
+ if (sip == NULL)
+ break;
+ nxt = SeqLocFindPart (head, slp, EQUIV_IS_ONE);
+
+ /* genomic product set or NT_ contig always relaxes to SEV_WARNING */
+ bsp = BioseqFind (sip);
+ if (bsp != NULL) {
+ for (id = bsp->id; id != NULL; id = id->next) {
+ if (id->choice == SEQID_OTHER) {
+ tsip = (TextSeqIdPtr) id->data.ptrvalue;
+ if (tsip != NULL && tsip->accession != NULL) {
+ gpsOrRefSeq = TRUE;
+ }
+ }
+ }
+ }
+
+ slp = nxt;
+ }
+ }
+ if (gpsOrRefSeq) {
+ if (severity > SEV_WARNING) {
+ severity = SEV_WARNING;
+ }
+ }
+ return severity;
+}
+
+
+static ValidErrSevChangeData valid_sevchange[] = {
+ {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found at start of intron, position", NULL, LowerToInfoForGenomic},
+ {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found at end of intron, position", NULL, LowerToInfoForGenomic},
+ {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found after exon", NULL, LowerToInfoForGenomic},
+ {ERR_SEQ_FEAT_NotSpliceConsensusDonor, "Splice donor consensus (GT) not found after exon", NULL, WarnForGPSOrRefSeq},
+ {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found before exon", NULL, LowerToInfoForGenomic},
+ {ERR_SEQ_FEAT_NotSpliceConsensusAcceptor, "Splice acceptor consensus (AG) not found before exon", NULL, WarnForGPSOrRefSeq},
+};
+
+const Int4 kNumSevChangeRules = sizeof (valid_sevchange) / sizeof (ValidErrSevChangeData);
+
+static int AdjustSeverity (int severity, ValidStructPtr vsp, int code1, int code2, const char *fmt)
+{
+ Int4 i;
+ int rval = severity;
+
+ for (i = 0; i < kNumSevChangeRules; i++) {
+ if (code1 == valid_sevchange[i].code1 && code2 == valid_sevchange[i].code2
+ && (valid_sevchange[i].search_phrase == NULL || StringISearch (fmt, valid_sevchange[i].search_phrase) != NULL)
+ && (valid_sevchange[i].exclude_phrase == NULL || StringISearch (fmt, valid_sevchange[i].exclude_phrase) == NULL)
+ && valid_sevchange[i].func != NULL) {
+ rval = (valid_sevchange[i].func)(rval, vsp);
+ }
+ }
+
+ return rval;
+}
+
+
#ifdef VAR_ARGS
NLM_EXTERN void CDECL ValidErr (vsp, severity, code1, code2, fmt, va_alist)
ValidStructPtr vsp;
@@ -1322,9 +1601,11 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
SeqFeatPtr sfp;
SeqIdPtr sip;
- if (vsp == NULL || severity < vsp->cutoff)
+ if (vsp == NULL || severity < vsp->cutoff || ShouldSuppressValidErr(vsp, code1, code2, fmt))
return;
+ severity = AdjustSeverity(severity, vsp, code1, code2, fmt);
+
if (vsp->errbuf == NULL) {
vsp->errbuf = MemNew (8192);
if (vsp->errbuf == NULL)
@@ -1491,9 +1772,27 @@ NLM_EXTERN void CDECL ValidErr (ValidStructPtr vsp, int severity, int code1, int
buflen -= diff;
tmp += diff;
- diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH);
- buflen -= diff;
- tmp += diff;
+ if (vsp->descr->choice == Seq_descr_comment) {
+ diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH);
+ if (diff > 100) {
+ /* truncate long comment in message */
+ tmp [94] = ' ';
+ tmp [95] = '.';
+ tmp [96] = '.';
+ tmp [97] = '.';
+ tmp [98] = '\0';
+ diff = 98;
+ buflen -= diff;
+ tmp += diff;
+ } else {
+ buflen -= diff;
+ tmp += diff;
+ }
+ } else {
+ diff = SeqDescLabel (vsp->descr, tmp, buflen, OM_LABEL_BOTH);
+ buflen -= diff;
+ tmp += diff;
+ }
}
/*
@@ -1653,6 +1952,18 @@ static void StructuredCommentError (EFieldValid err_code, FieldRulePtr field_rul
}
+static Boolean StringLooksLikeFakeStructuredComment (CharPtr str)
+{
+ if (StringHasNoText (str)) {
+ return FALSE;
+ }
+ if (StringSearch (str, "::") != NULL) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
/*****************************************************************************
*
* Valid1GatherProc(gcp)
@@ -1871,6 +2182,10 @@ static Boolean Valid1GatherProc (GatherContextPtr gcp)
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_SerialInComment,
"Comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead.");
}
+ if (StringLooksLikeFakeStructuredComment (str)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_FakeStructuredComment,
+ "Comment may be formatted to look like a structured comment.");
+ }
for (vnp2 = sdp->next; vnp2 != NULL; vnp2 = vnp2->next) {
if (vnp2->choice == Seq_descr_comment) {
ptr = (CharPtr) vnp2->data.ptrvalue;
@@ -2595,13 +2910,21 @@ static void CheckForCollidingSerials (
static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
{
+ SeqEntryPtr bsep;
+ BioseqPtr bsp = NULL;
GatherContext gc;
VfcData vfd;
if (vsp == NULL || sep == NULL) return;
+
+ bsep = FindNthBioseq (sep, 1);
+ if (bsep != NULL && IS_Bioseq (bsep)) {
+ bsp = (BioseqPtr) bsep->data.ptrvalue;
+ }
+
vsp->gcp = &gc;
vsp->bssp = NULL;
- vsp->bsp = NULL;
+ vsp->bsp = bsp;
vsp->sfp = NULL;
vsp->descr = NULL;
MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
@@ -2613,7 +2936,7 @@ static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
VisitFeaturesInSep (sep, (Pointer) &vfd, CheckFeatCits);
vsp->bssp = NULL;
- vsp->bsp = NULL;
+ vsp->bsp = bsp;
vsp->sfp = NULL;
vsp->descr = NULL;
vfd.serial = ValNodeSort (vfd.serial, SortByIntvalue);
@@ -2625,10 +2948,12 @@ static void ValidateFeatCits (SeqEntryPtr sep, ValidStructPtr vsp)
ValNodeFree (vfd.serial);
}
-static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp)
+static void ValidateFeatIDs (SeqEntryPtr sep, Uint2 entityID, ValidStructPtr vsp)
{
SMFidItemPtr PNTR array;
+ SeqEntryPtr bsep;
+ BioseqPtr bsp = NULL;
BioseqExtraPtr bspextra;
SMFeatItemPtr feat;
GatherContext gc;
@@ -2640,7 +2965,7 @@ static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp)
ObjMgrDataPtr omdp;
SeqFeatPtr sfp;
- if (entityID < 1 || vsp == NULL) return;
+ if (sep == NULL || entityID < 1 || vsp == NULL) return;
omdp = ObjMgrGetData (entityID);
if (omdp == NULL) return;
bspextra = (BioseqExtraPtr) omdp->extradata;
@@ -2649,9 +2974,14 @@ static void ValidateFeatIDs (Uint2 entityID, ValidStructPtr vsp)
num = bspextra->numfids;
if (array == NULL || num < 1) return;
+ bsep = FindNthBioseq (sep, 1);
+ if (bsep != NULL && IS_Bioseq (bsep)) {
+ bsp = (BioseqPtr) bsep->data.ptrvalue;
+ }
+
vsp->gcp = &gc;
vsp->bssp = NULL;
- vsp->bsp = NULL;
+ vsp->bsp = bsp;
vsp->sfp = NULL;
vsp->descr = NULL;
MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
@@ -2684,21 +3014,6 @@ typedef struct vsicdata {
ValNodePtr tailid;
} VsicData, PNTR VsicDataPtr;
-static Boolean IsNCBIFileID (SeqIdPtr sip)
-{
- DbtagPtr dbt;
-
- if (sip == NULL || sip->choice != SEQID_GENERAL) return FALSE;
- dbt = (DbtagPtr) sip->data.ptrvalue;
- if (dbt == NULL) return FALSE;
- if (StringCmp (dbt->db, "NCBIFILE") == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static void CaptureTextSeqIDs (BioseqPtr bsp, Pointer userdata)
{
@@ -2755,6 +3070,8 @@ static ValNodePtr UniqueValNodeCaseSensitive (ValNodePtr list)
static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp)
{
+ SeqEntryPtr bsep;
+ BioseqPtr bsp = NULL;
CharPtr curr;
GatherContext gc;
GatherContextPtr gcp;
@@ -2764,13 +3081,18 @@ static void ValidateSeqIdCase (SeqEntryPtr sep, ValidStructPtr vsp)
if (vsp == NULL || sep == NULL) return;
+ bsep = FindNthBioseq (sep, 1);
+ if (bsep != NULL && IS_Bioseq (bsep)) {
+ bsp = (BioseqPtr) bsep->data.ptrvalue;
+ }
+
MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
MemSet ((Pointer) &vd, 0, sizeof (VsicData));
gcp = &gc;
vsp->gcp = &gc;
vsp->bssp = NULL;
- vsp->bsp = NULL;
+ vsp->bsp = bsp;
vsp->sfp = NULL;
vsp->descr = NULL;
vd.vsp = vsp;
@@ -2806,6 +3128,7 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
Boolean has_lcl_gnl = FALSE;
Boolean has_others = FALSE;
SeqIdPtr sip;
+ TextSeqIdPtr tsip;
ValidStructPtr vsp;
if (bsp == NULL || userdata == NULL) return;
@@ -2819,10 +3142,21 @@ static void LookForBioseqFields (BioseqPtr bsp, Pointer userdata)
/* and fall through */
case SEQID_GENBANK:
case SEQID_TPG:
+ vsp->is_insd_in_sep = TRUE;
+ tsip = (TextSeqIdPtr) sip->data.ptrvalue;
+ if (tsip != NULL) {
+ if (StringLen (tsip->accession) == 6) {
+ vsp->is_old_gb_in_sep = TRUE;
+ }
+ }
+ break;
case SEQID_TPE:
case SEQID_TPD:
vsp->is_insd_in_sep = TRUE;
break;
+ case SEQID_PATENT:
+ vsp->is_patent_in_sep = TRUE;
+ break;
case SEQID_OTHER:
vsp->is_refseq_in_sep = TRUE;
break;
@@ -2870,6 +3204,7 @@ static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata)
case BioseqseqSet_class_phy_set:
case BioseqseqSet_class_eco_set:
case BioseqseqSet_class_wgs_set:
+ case BioseqseqSet_class_small_genome_set:
break;
vsp->other_sets_in_sep = TRUE;
default:
@@ -2883,7 +3218,8 @@ static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata)
bssp->_class == BioseqseqSet_class_pop_set ||
bssp->_class == BioseqseqSet_class_phy_set ||
bssp->_class == BioseqseqSet_class_eco_set ||
- bssp->_class == BioseqseqSet_class_wgs_set) {
+ bssp->_class == BioseqseqSet_class_wgs_set ||
+ bssp->_class == BioseqseqSet_class_small_genome_set) {
vsp->other_sets_in_sep = TRUE;
}
}
@@ -2943,6 +3279,46 @@ static void LookForSeqDescrFields (SeqDescrPtr sdp, Pointer userdata)
}
}
+static void FindMultiIntervalGenes (
+ SeqFeatPtr sfp,
+ Pointer userdata
+)
+
+{
+ BoolPtr multiIntervalGenesP;
+ SeqLocPtr slp;
+
+ if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
+ multiIntervalGenesP = (BoolPtr) userdata;
+ if (multiIntervalGenesP == NULL) return;
+
+ slp = sfp->location;
+ if (slp == NULL) return;
+ switch (slp->choice) {
+ case SEQLOC_PACKED_INT :
+ case SEQLOC_PACKED_PNT :
+ case SEQLOC_MIX :
+ case SEQLOC_EQUIV :
+ *multiIntervalGenesP = TRUE;
+ break;
+ default :
+ break;
+ }
+}
+
+static void FindSegmentedBioseqs (
+ BioseqPtr bsp,
+ Pointer userdata
+)
+
+{
+ BoolPtr segmentedBioseqsP;
+
+ if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
+ segmentedBioseqsP = (BoolPtr) userdata;
+ if (segmentedBioseqsP == NULL) return;
+ *segmentedBioseqsP = TRUE;
+}
static void SetPubScratchData (SeqDescrPtr sdp, Pointer userdata)
{
@@ -3231,6 +3607,7 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp)
CharPtr ptr;
ErrSev sev;
CharPtr str;
+ CharPtr tmp;
/* only check first time program runs validator */
@@ -3260,14 +3637,22 @@ static void TestDeletedOrReplacedECnumbers (ValidStructPtr vsp)
if (! ECnumberNotInList (str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replaced EC number %s still in live list", str);
}
- if (ECnumberNotInList (ptr)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s not in live list", ptr);
- }
if (ECnumberWasDeleted (str)) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replaced EC number %s in deleted list", str);
}
- if (ECnumberWasDeleted (ptr)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s in deleted list", ptr);
+ while (StringDoesHaveText (ptr)) {
+ tmp = StringChr (ptr, '\t');
+ if (tmp != NULL) {
+ *tmp = '\0';
+ tmp++;
+ }
+ if (ECnumberNotInList (ptr)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s not in live list", ptr);
+ }
+ if (ECnumberWasDeleted (ptr)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_EcNumberProblem, "Replacement EC number %s in deleted list", ptr);
+ }
+ ptr = tmp;
}
}
}
@@ -3334,21 +3719,6 @@ static CollisionInfoPtr CollisionInfoFree (CollisionInfoPtr cip)
}
-static Boolean IsNcbiFileId(SeqIdPtr sip)
-{
- DbtagPtr dbtag;
-
- if (sip == NULL || sip->choice != SEQID_GENERAL || (dbtag = sip->data.ptrvalue) == NULL) {
- return FALSE;
- }
- if (StringCmp (dbtag->db, "NCBIFILE") == 0) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static void LongCollisionCallback (BioseqPtr bsp, Pointer data)
{
SeqIdPtr sip;
@@ -3358,7 +3728,7 @@ static void LongCollisionCallback (BioseqPtr bsp, Pointer data)
}
for (sip = bsp->id; sip != NULL; sip = sip->next) {
- if (!IsNcbiFileId(sip)) {
+ if (!IsNCBIFileID(sip)) {
ValNodeAddPointer ((ValNodePtr PNTR) data, 0, CollisionInfoNew (sip, bsp));
}
}
@@ -3469,6 +3839,165 @@ static Boolean ValTooManyFarComponents (
return toomanyfar;
}
+static CharPtr inferencePrefix [] = {
+ "",
+ "similar to sequence",
+ "similar to AA sequence",
+ "similar to DNA sequence",
+ "similar to RNA sequence",
+ "similar to RNA sequence, mRNA",
+ "similar to RNA sequence, EST",
+ "similar to RNA sequence, other RNA",
+ "profile",
+ "nucleotide motif",
+ "protein motif",
+ "ab initio prediction",
+ "alignment",
+ NULL
+};
+
+
+static CharPtr NextColonOrVerticalBarPtr (CharPtr ptr)
+
+{
+ Char ch = '\0';
+
+ if (ptr == NULL) return NULL;
+
+ ch = *ptr;
+ while (ch != '\0') {
+ if (ch == ':' || ch == '|') return ptr;
+ ptr++;
+ ch = *ptr;
+ }
+
+ return NULL;
+}
+
+typedef struct valcountdata {
+ Int4 numInferences;
+ Int4 numAccessions;
+} ValCountData, PNTR ValCountPtr;
+
+static void ValCountInfAccnVer (SeqFeatPtr sfp, Pointer userdata)
+
+{
+ Int2 best, j;
+ Char ch;
+ GBQualPtr gbq;
+ size_t len;
+ CharPtr nxt;
+ CharPtr ptr;
+ CharPtr rest;
+ CharPtr str;
+ CharPtr tmp;
+ ValCountPtr vcp;
+
+
+ if (sfp == NULL || userdata == NULL) return;
+ vcp = (ValCountPtr) userdata;
+
+ for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
+ if (StringICmp (gbq->qual, "inference") != 0) continue;
+ if (StringHasNoText (gbq->val)) continue;
+
+ (vcp->numInferences)++;
+
+ rest = NULL;
+ best = -1;
+ for (j = 0; inferencePrefix [j] != NULL; j++) {
+ len = StringLen (inferencePrefix [j]);
+ if (StringNICmp (gbq->val, inferencePrefix [j], len) != 0) continue;
+ rest = gbq->val + len;
+ best = j;
+ }
+ if (best < 0 || inferencePrefix [best] == NULL) continue;
+ if (rest == NULL) continue;
+
+ ch = *rest;
+ while (IS_WHITESP (ch)) {
+ rest++;
+ ch = *rest;
+ }
+ if (StringNICmp (rest, "(same species)", 14) == 0) {
+ rest += 14;
+ }
+ ch = *rest;
+ while (IS_WHITESP (ch) || ch == ':') {
+ rest++;
+ ch = *rest;
+ }
+ if (StringHasNoText (rest)) continue;
+
+ str = StringSave (rest);
+
+ ptr = str;
+ if (best == 12) {
+ ptr = StringRChr (str, ':');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ }
+ while (ptr != NULL) {
+ nxt = StringChr (ptr, ',');
+ if (nxt != NULL) {
+ *nxt = '\0';
+ nxt++;
+ }
+ tmp = NextColonOrVerticalBarPtr (ptr);
+ if (tmp != NULL) {
+ *tmp = '\0';
+ tmp++;
+ TrimSpacesAroundString (ptr);
+ TrimSpacesAroundString (tmp);
+ if (StringDoesHaveText (tmp)) {
+ if (StringICmp (ptr, "INSD") == 0 || StringICmp (ptr, "RefSeq") == 0) {
+ (vcp->numAccessions)++;
+ }
+ }
+ }
+ ptr = nxt;
+ }
+
+ MemFree (str);
+ }
+}
+
+NLM_EXTERN Boolean TooManyInferenceAccessions (
+ SeqEntryPtr sep,
+ Int4Ptr numInferences,
+ Int4Ptr numAccessions
+)
+
+{
+ ValCountData vcd;
+
+ if (numInferences != NULL) {
+ *numInferences = 0;
+ }
+ if (numAccessions != NULL) {
+ *numAccessions = 0;
+ }
+ if (sep == NULL) return FALSE;
+
+ vcd.numInferences = 0;
+ vcd.numAccessions = 0;
+
+ VisitFeaturesInSep (sep, (Pointer) &vcd, ValCountInfAccnVer);
+
+ if (numInferences != NULL) {
+ *numInferences = vcd.numInferences;
+ }
+ if (numAccessions != NULL) {
+ *numAccessions = vcd.numAccessions;
+ }
+
+ if (vcd.numInferences > 1000 || vcd.numAccessions > 1000) return TRUE;
+
+ return FALSE;
+}
+
NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
{
@@ -3487,6 +4016,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
Boolean first = TRUE;
Int4 errors[6];
Int2 i;
+ Boolean inferenceAccnCheck;
Boolean suppress_no_pubs = TRUE;
Boolean suppress_no_biosrc = TRUE;
FeatProb featprob;
@@ -3504,10 +4034,14 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
SubmitBlockPtr sbp;
ErrSev sev;
SeqIdPtr sip;
+ Boolean has_multi_int_genes = FALSE;
+ Boolean has_seg_bioseqs = FALSE;
Boolean isGPS = FALSE;
Boolean isPatent = FALSE;
Boolean isPDB = FALSE;
FindRepData frd;
+ Int4 numInferences;
+ Int4 numAccessions;
if (sep == NULL || vsp == NULL) return FALSE;
@@ -3595,6 +4129,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
vsp->is_gps_in_sep = FALSE;
vsp->other_sets_in_sep = FALSE;
vsp->is_embl_ddbj_in_sep = FALSE;
+ vsp->is_old_gb_in_sep = FALSE;
vsp->is_insd_in_sep = FALSE;
vsp->only_lcl_gnl_in_sep = FALSE;
vsp->has_gnl_prot_sep = FALSE;
@@ -3605,6 +4140,11 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
VisitSetsInSep (sep, (Pointer) vsp, LookForBioseqSetFields);
VisitDescriptorsInSep (sep, (Pointer) vsp, LookForSeqDescrFields);
+ VisitFeaturesInSep (sep, (Pointer) &has_multi_int_genes, FindMultiIntervalGenes);
+ vsp->has_multi_int_genes = has_multi_int_genes;
+ VisitBioseqsInSep (sep, (Pointer) &has_seg_bioseqs, FindSegmentedBioseqs);
+ vsp->has_seg_bioseqs = has_seg_bioseqs;
+
/*
vsp->is_htg_in_sep = FALSE;
VisitDescriptorsInSep (sep, (Pointer) &(vsp->is_htg_in_sep), LookForHTG);
@@ -3633,6 +4173,8 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
globalvsp = vsp; /* for spell checker */
+ inferenceAccnCheck = vsp->inferenceAccnCheck;
+
while (sep != NULL) {
vsp->far_fetch_failure = FALSE;
@@ -3798,8 +4340,24 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
/* AssignIDsInEntity (gc.entityID, 0, NULL); */
+ if (inferenceAccnCheck) {
+ numInferences = 0;
+ numAccessions = 0;
+ if (TooManyInferenceAccessions (sep, &numInferences, &numAccessions)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_TooManyInferenceAccessions,
+ "Skipping validation of %ld /inference qualifiers with %ld accessions",
+ (long) numInferences, (long) numAccessions);
+
+ /* suppress inference accession.version check for this record */
+ vsp->inferenceAccnCheck = FALSE;
+ }
+ }
+
GatherSeqEntry (sep, (Pointer) vsp, Valid1GatherProc, &gs);
+ /* restore inferenceAccnCheck flag for next record */
+ vsp->inferenceAccnCheck = inferenceAccnCheck;
+
if (ssp != NULL) {
if (ssp->datatype == 1) {
vsp->bsp = NULL;
@@ -3844,7 +4402,7 @@ NLM_EXTERN Boolean ValidateSeqEntry (SeqEntryPtr sep, ValidStructPtr vsp)
vsp->gcp = NULL;
vsp->gcp = NULL;
- ValidateFeatIDs (gc.entityID, vsp);
+ ValidateFeatIDs (sep, gc.entityID, vsp);
vsp->gcp = NULL;
vsp->gcp = NULL;
@@ -3970,6 +4528,8 @@ static CharPtr GetBioseqSetClass (Uint1 cl)
return ("gen-prod-set");
if (cl == BioseqseqSet_class_wgs_set)
return ("wgs-set");
+ if (cl == BioseqseqSet_class_small_genome_set)
+ return ("small-genome-set");
if (cl == BioseqseqSet_class_other)
return ("other");
return ("not-set");
@@ -4101,15 +4661,19 @@ static void ValidateNucProtSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice == Seq_descr_title) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_NucProtSetHasTitle,
+ "Nuc-prot set should not have title descriptor");
+ }
+ }
+
+ for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
if (sdp->choice == Seq_descr_source) {
biop = (BioSourcePtr) sdp->data.ptrvalue;
if (biop != NULL) {
orp = biop->org;
if (orp != NULL && StringDoesHaveText (orp->taxname)) return;
}
- } else if (sdp->choice == Seq_descr_title) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_NucProtSetHasTitle,
- "Nuc-prot set should not have title descriptor");
}
}
@@ -4447,6 +5011,20 @@ static void LookForMolInfoInconsistency (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+static Boolean SetHasMolInfo (BioseqSetPtr bssp)
+
+{
+ SeqDescrPtr sdp;
+
+ if (bssp == NULL) return FALSE;
+
+ for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
+ if (sdp->choice == Seq_descr_molinfo) return TRUE;
+ }
+
+ return FALSE;
+}
+
static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp)
{
@@ -4473,6 +5051,10 @@ static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+ if (SetHasMolInfo (bssp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Pop set has MolInfo on set");
+ }
+
LookForMolInfoInconsistency (bssp, vsp);
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
@@ -4529,6 +5111,10 @@ static void ValidateMutSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+ if (SetHasMolInfo (bssp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Mut set has MolInfo on set");
+ }
+
LookForMolInfoInconsistency (bssp, vsp);
/* error is currently suppressed
@@ -4559,6 +5145,10 @@ static void ValidateGenbankSet (BioseqSetPtr bssp, ValidStructPtr vsp)
"Bioseq-set contains internal GenBank Bioseq-set");
}
}
+
+ if (SetHasMolInfo (bssp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Genbank set has MolInfo on set");
+ }
}
static void ValidatePhyEcoWgsSet (BioseqSetPtr bssp, ValidStructPtr vsp)
@@ -4578,6 +5168,10 @@ static void ValidatePhyEcoWgsSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+ if (SetHasMolInfo (bssp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "Phy/eco/wgs set has MolInfo on set");
+ }
+
LookForMolInfoInconsistency (bssp, vsp);
}
@@ -4637,10 +5231,53 @@ static void ValidateGenProdSet (BioseqSetPtr bssp, ValidStructPtr vsp)
}
}
+ if (SetHasMolInfo (bssp)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_MisplacedMolInfo, "GenProd set has MolInfo on set");
+ }
+
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
}
+static void NestedSetProc (BioseqSetPtr bssp, Pointer userdata)
+
+{
+ ValidStructPtr vsp;
+ GatherContextPtr gcp = NULL;
+
+ if (bssp == NULL) return;
+
+ /* pop/phy/mut/eco set can contain up to nuc-prot sets */
+ switch (bssp->_class) {
+ case BioseqseqSet_class_nuc_prot:
+ case BioseqseqSet_class_segset:
+ case BioseqseqSet_class_parts:
+ return;
+ default:
+ break;
+ }
+
+ vsp = (ValidStructPtr) userdata;
+ if (vsp == NULL) return;
+ gcp = vsp->gcp;
+ if (gcp == NULL) return;
+
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ImproperlyNestedSets, "Nested sets within Pop/Phy/Mut/Eco/Wgs set");
+}
+
+static void CheckForNestedSets (BioseqSetPtr bssp, Pointer userdata)
+
+{
+ SeqEntryPtr sep;
+
+ if (bssp == NULL) return;
+
+ for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
+ if (!IS_Bioseq_set (sep)) continue;
+ VisitSetsInSep (sep, userdata, NestedSetProc);
+ }
+}
+
static void ValidateBioseqSet (GatherContextPtr gcp)
{
@@ -4706,14 +5343,18 @@ static void ValidateBioseqSet (GatherContextPtr gcp)
break;
case BioseqseqSet_class_pop_set:
ValidatePopSet (bssp, vsp);
+ CheckForNestedSets (bssp, vsp);
break;
case BioseqseqSet_class_mut_set:
ValidateMutSet (bssp, vsp);
+ CheckForNestedSets (bssp, vsp);
break;
case BioseqseqSet_class_phy_set:
case BioseqseqSet_class_eco_set:
case BioseqseqSet_class_wgs_set:
+ case BioseqseqSet_class_small_genome_set:
ValidatePhyEcoWgsSet (bssp, vsp);
+ CheckForNestedSets (bssp, vsp);
break;
case BioseqseqSet_class_gen_prod_set:
ValidateGenProdSet (bssp, vsp);
@@ -4748,7 +5389,7 @@ static void ValidateBioseqSet (GatherContextPtr gcp)
if (sep == NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_EmptySet, "Pop/Phy/Mut/Eco set has no components");
} else if (sep->next == NULL) {
- if (VisitAlignmentsInSep (sep, NULL, NULL) == 0) {
+ if (VisitAlignmentsInSep (gcp->sep, NULL, NULL) == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_SingleItemSet, "Pop/Phy/Mut/Eco set has only one component and no alignments");
}
}
@@ -4818,10 +5459,12 @@ static void LookForSecondaryConflict (ValidStructPtr vsp, GatherContextPtr gcp,
static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, BioseqPtr bsp)
{
BioseqSetPtr bssp;
+ Boolean is_odd;
BioseqPtr part;
SeqEntryPtr sep;
SeqIdPtr sip;
SeqLocPtr slp;
+ BioseqPtr vbsp;
if (vsp == NULL || gcp == NULL || bsp == NULL)
return;
@@ -4845,6 +5488,25 @@ static void CheckSegBspAgainstParts (ValidStructPtr vsp, GatherContextPtr gcp, B
if (bssp->_class != BioseqseqSet_class_parts)
return;
+ is_odd = FALSE;
+ for (slp = (ValNodePtr) bsp->seq_ext; slp != NULL; slp = slp->next) {
+ is_odd = (! is_odd);
+ if (is_odd) {
+ if (slp->choice == SEQLOC_NULL) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSegmentedSeq, "Odd segmented component is not expected to be NULL");
+ }
+ } else {
+ if (slp->choice != SEQLOC_NULL) {
+ vbsp = BioseqFindFromSeqLoc (slp);
+ if (vbsp != NULL) {
+ if (vbsp->repr != Seq_repr_virtual) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_INST_BadSegmentedSeq, "Even segmented component is expected to be NULL or VIRTUAL");
+ }
+ }
+ }
+ }
+ }
+
sep = bssp->seq_set;
for (slp = (ValNodePtr) bsp->seq_ext; slp != NULL; slp = slp->next) {
if (slp->choice == SEQLOC_NULL)
@@ -5160,7 +5822,7 @@ static Int4 CountAdjacentNsInInterval (GatherContextPtr gcp, BioseqPtr bsp, Int4
SeqLocPtr slp;
RunOfNs ron;
- if (bsp == NULL || from < 0 || to < from) {
+ if (bsp == NULL || from < 0 || to < from || ISA_aa (bsp->mol)) {
return 0;
}
@@ -5375,7 +6037,7 @@ static void ReportLongSeqId (SeqIdPtr sip, ValidStructPtr vsp, Int4 max_len)
Int4 id_len = 0;
CharPtr id_txt;
- if (sip == NULL || vsp == NULL || IsNcbiFileId(sip)) {
+ if (sip == NULL || vsp == NULL || IsNCBIFileID(sip)) {
return;
}
@@ -5389,6 +6051,23 @@ static void ReportLongSeqId (SeqIdPtr sip, ValidStructPtr vsp, Int4 max_len)
}
+static Boolean SequenceHasGaps (BioseqPtr bsp)
+{
+ SeqMgrFeatContext context;
+ SeqFeatPtr sfp;
+
+ if (bsp == NULL) {
+ return FALSE;
+ }
+ sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_gap, &context);
+ if (sfp == NULL) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+}
+
+
static void ValidateBioseqInst (GatherContextPtr gcp)
{
Boolean retval = TRUE;
@@ -5430,7 +6109,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
SeqMgrFeatContext protctxt;
CharPtr protlbl = NULL;
TextSeqIdPtr tsip;
- CharPtr ptr, last, str, title, buf;
+ CharPtr ptr, last, str, title, buf, bufplus;
Uint1 lastchoice;
Char ch;
Boolean multitoken;
@@ -5459,6 +6138,7 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
Boolean is_gps = FALSE;
Boolean isRefSeq = FALSE;
Boolean isSwissProt = FALSE;
+ Boolean only_local = TRUE;
Boolean isLRG = FALSE;
ValNodePtr keywords;
Boolean last_is_gap;
@@ -5533,6 +6213,9 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
}
for (sip1 = bsp->id; sip1 != NULL; sip1 = sip1->next) {
+ if (sip1->choice != SEQID_LOCAL) {
+ only_local = FALSE;
+ }
if (sip1->choice == SEQID_OTHER) {
isRefSeq = TRUE;
tsip = (TextSeqIdPtr) sip1->data.ptrvalue;
@@ -5778,8 +6461,13 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
break;
case SEQID_GENERAL:
dbt = (DbtagPtr) sip1->data.ptrvalue;
- if (dbt != NULL && StringICmp (dbt->db, "LRG") == 0) {
- isLRG = TRUE;
+ if (dbt != NULL) {
+ if (StringICmp (dbt->db, "LRG") == 0) {
+ isLRG = TRUE;
+ }
+ if (StringLen (dbt->db) > 20) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_BadSeqIdFormat, "Database name longer than 20 characters");
+ }
}
break;
default:
@@ -6957,9 +7645,20 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
gcp->thistype = olditemtype;
}
}
+
+ if (StringISearch (title, "complete genome") != NULL && SequenceHasGaps (bsp)) {
+ /* warning if title contains complete genome but sequence contains gap features */
+ olditemid = gcp->itemID;
+ olditemtype = gcp->thistype;
+ gcp->itemID = bsp->idx.itemID;
+ gcp->thistype = OBJ_BIOSEQ;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_CompleteTitleProblem, "Title contains 'complete genome' but sequence has gaps");
+ gcp->itemID = olditemid;
+ gcp->thistype = olditemtype;
+ }
}
} else {
- if (ISA_na (bsp->mol) && vsp->other_sets_in_sep && vsp->indexerVersion) {
+ if (ISA_na (bsp->mol) && vsp->other_sets_in_sep && (vsp->is_insd_in_sep || vsp->is_refseq_in_sep) && vsp->indexerVersion) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_ComponentMissingTitle,
"Nucleotide component of pop/phy/mut/eco/wgs set is missing its title");
}
@@ -7003,7 +7702,18 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
if (StringICmp (buf, title) != 0) {
/* also check generated protein defline with all prp->names - old convention */
if (NewCreateDefLineBuf (&ii, bsp, buf, buflen, TRUE, TRUE)) {
- if (StringICmp (buf, title) != 0) {
+ bufplus = buf;
+ if (StringNCmp (bufplus, "PREDICTED: ", 11) == 0) {
+ bufplus += 11;
+ } else if (StringNCmp (bufplus, "UNVERIFIED: ", 12) == 0) {
+ bufplus += 12;
+ }
+ if (StringNCmp (title, "PREDICTED: ", 11) == 0) {
+ title += 11;
+ } else if (StringNCmp (title, "UNVERIFIED: ", 12) == 0) {
+ title += 12;
+ }
+ if (StringICmp (bufplus, title) != 0) {
olditemid = gcp->itemID;
olditemtype = gcp->thistype;
if (vnp->extended != 0) {
@@ -7106,6 +7816,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
sev = SEV_WARNING;
} else if (bsp->topology == TOPOLOGY_CIRCULAR) {
sev = SEV_WARNING;
+ } else if (only_local) {
+ sev = SEV_WARNING;
} else if (StringICmp (str, "NNNNNNNNNN") == 0) {
sev = SEV_ERROR;
} else {
@@ -7138,6 +7850,8 @@ static void ValidateBioseqInst (GatherContextPtr gcp)
sev = SEV_WARNING;
} else if (bsp->topology == TOPOLOGY_CIRCULAR) {
sev = SEV_WARNING;
+ } else if (only_local) {
+ sev = SEV_WARNING;
} else if (StringICmp (str, "NNNNNNNNNN") == 0) {
sev = SEV_ERROR;
} else {
@@ -7411,7 +8125,9 @@ static void ValidateCitSub (ValidStructPtr vsp, CitSubPtr csp)
ValidErr (vsp, SEV_ERROR, ERR_GENERIC_MissingPubInfo, "Submission citation has no author names");
}
if (!hasAffil) {
- ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation has no affiliation");
+ if (! vsp->is_patent_in_sep) {
+ ValidErr (vsp, sev, ERR_GENERIC_MissingPubInfo, "Submission citation has no affiliation");
+ }
}
dp = csp->date;
if (dp != NULL) {
@@ -7682,7 +8398,9 @@ static void ValidatePubdesc (ValidStructPtr vsp, GatherContextPtr gcp, PubdescPt
cgp = (CitGenPtr) vnp->data.ptrvalue;
hasName = FALSE;
if (cgp != NULL) {
- if (!StringHasNoText (cgp->cit)) {
+ if (StringDoesHaveText (cgp->cit)) {
+ /* skip if just BackBone id number */
+ if (StringNICmp (cgp->cit, "BackBone id_pub = ", 18) == 0 && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number < 0) break;
if (StringNICmp (cgp->cit, "submitted", 8) == 0 ||
StringNICmp (cgp->cit, "unpublished", 11) == 0 ||
StringNICmp (cgp->cit, "Online Publication", 18) == 0 ||
@@ -8086,27 +8804,6 @@ static Boolean DeltaOrFarSeg (SeqEntryPtr sep, SeqLocPtr location)
}
-static Boolean IsLocationOrganelle (Uint1 genome)
-{
- if (genome == GENOME_chloroplast
- || genome == GENOME_chromoplast
- || genome == GENOME_kinetoplast
- || genome == GENOME_mitochondrion
- || genome == GENOME_cyanelle
- || genome == GENOME_nucleomorph
- || genome == GENOME_apicoplast
- || genome == GENOME_leucoplast
- || genome == GENOME_proplastid
- || genome == GENOME_hydrogenosome
- || genome == GENOME_plastid
- || genome == GENOME_chromatophore) {
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-
static Boolean IsOrganelleBioseq (BioseqPtr bsp)
{
SeqDescrPtr sdp;
@@ -8136,7 +8833,6 @@ ValidateIntronEndsAtSpliceSiteOrGap
Char id_buf[150];
SeqFeatPtr rna;
SeqMgrFeatContext rcontext;
- ErrSev sev = SEV_WARNING;
if (vsp == NULL || slp == NULL) return;
CheckSeqLocForPartial (slp, &partial5, &partial3);
@@ -8178,10 +8874,6 @@ ValidateIntronEndsAtSpliceSiteOrGap
strand = SeqLocStrand (slp);
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- sev = SEV_INFO;
- }
-
if (!partial5) {
if (strand == Seq_strand_minus) {
SeqPortStreamInt (bsp, stop - 1, stop, Seq_strand_minus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
@@ -8198,7 +8890,7 @@ ValidateIntronEndsAtSpliceSiteOrGap
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
"Splice donor consensus (GT) not found at start of terminal intron, position %ld of %s", (long) (pos + 1), id_buf);
} else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
"Splice donor consensus (GT) not found at start of intron, position %ld of %s", (long) (pos + 1), id_buf);
}
}
@@ -8217,13 +8909,77 @@ ValidateIntronEndsAtSpliceSiteOrGap
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
"Splice acceptor consensus (AG) not found at end of terminal intron, position %ld of %s, but at end of sequence", (long) (pos + 1), id_buf);
} else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
"Splice acceptor consensus (AG) not found at end of intron, position %ld of %s", (long) (pos + 1), id_buf);
}
}
BioseqUnlock (bsp);
}
+static Boolean IsLocInSmallGenomeSet (
+ SeqLocPtr loc
+)
+
+{
+ BioseqPtr bsp;
+ SeqIdPtr sip;
+ SeqLocPtr slp;
+
+ if (loc == NULL) return FALSE;
+
+ slp = SeqLocFindNext (loc, NULL);
+ while (slp != NULL) {
+ sip = SeqLocId (slp);
+ if (sip == NULL) return FALSE;
+ bsp = BioseqFind (sip);
+ if (bsp == NULL) return FALSE;
+ slp = SeqLocFindNext (loc, slp);
+ }
+
+ return TRUE;
+}
+
+static Boolean AllPartsInSmallGenomeSet (
+ SeqLocPtr loc,
+ ValidStructPtr vsp,
+ BioseqPtr bsp
+)
+
+{
+ BioseqSetPtr bssp;
+ SeqEntryPtr oldscope;
+ Boolean rsult = FALSE;
+ SeqEntryPtr sep;
+
+ if (loc == NULL || vsp == NULL || bsp == NULL) return FALSE;
+
+ sep = vsp->sep;
+ if (sep == NULL) return FALSE;
+ if (! IS_Bioseq_set (sep)) return FALSE;
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ if (bssp == NULL) return FALSE;
+
+ /* if genbank set wraps everything, go down one set level */
+ if (bssp->_class == BioseqseqSet_class_genbank) {
+ sep = bssp->seq_set;
+ if (sep == NULL) return FALSE;
+ if (! IS_Bioseq_set (sep)) return FALSE;
+ bssp = (BioseqSetPtr) sep->data.ptrvalue;
+ }
+
+ /* check for small genome set */
+ if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return FALSE;
+
+ /* scope within small genome set for subsequent BioseqFind calls */
+ oldscope = SeqEntrySetScope (sep);
+
+ rsult = IsLocInSmallGenomeSet (loc);
+
+ SeqEntrySetScope (oldscope);
+
+ return rsult;
+}
+
/*****************************************************************************
*
@@ -8411,7 +9167,7 @@ static Boolean ValidateSeqFeatCommon (SeqFeatPtr sfp, BioseqValidStrPtr bvsp, Va
}
}
- if (farloc && (! is_nc) && (! is_emb)) {
+ if (farloc && (! is_nc) && (! is_emb) && (! AllPartsInSmallGenomeSet (sfp->location, vsp, bsp))) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_FarLocation, "Feature has 'far' location - accession not packaged in record");
}
@@ -8619,6 +9375,7 @@ static CharPtr Nlm_valid_country_codes [] = {
"Bahamas",
"Bahrain",
"Baker Island",
+ "Baltic Sea",
"Bangladesh",
"Barbados",
"Bassas da India",
@@ -8990,584 +9747,358 @@ NLM_EXTERN CharPtr GetCorrectedCountryCapitalization (CharPtr name)
return NULL;
}
-
-static CharPtr ctry_lat_lon [] = {
- "Afghanistan\tAF\t60.4\t29.3\t74.9\t38.5",
- "Albania\tAL\t19.2\t39.6\t21.1\t42.7",
- "Algeria\tAG\t-8.7\t18.9\t12.0\t37.1",
- "American Samoa\tAQ\t-171.1\t-11.1\t-171.1\t-11.0\t-170.9\t-14.4\t-169.4\t-14.2",
- "Andorra\tAN\t1.4\t42.4\t1.8\t42.7",
- "Angola\tAO\t11.6\t-18.1\t24.1\t-4.4",
- "Anguilla\tAV\t-63.2\t18.1\t-62.9\t18.3",
- "Antarctica\tAY\t",
- "Antigua and Barbuda\tAC\t-62.4\t16.9\t-62.3\t16.9\t-62.0\t16.9\t-61.7\t17.7",
- "Arctic Ocean\tXX\t",
- "Argentina\tAR\t-73.6\t-55.1\t-53.6\t-21.8",
- "Armenia\tAM\t43.4\t38.8\t46.6\t41.3",
- "Aruba\tAA\t-70.1\t12.4\t-69.8\t12.7",
- "Ashmore and Cartier Islands\tAT\t122.9\t-12.3\t123.1\t-12.1",
- "Atlantic Ocean\tXX\t",
- "Australia\tAS\t112.9\t-43.7\t153.6\t-10.0",
- "Australia: Australian Capital Territory\tXX\t148.7\t-36.0\t149.4\t-35.1",
- "Australia: Jervis Bay Territory\tXX\t150.5\t-35.2\t150.8\t-35.1",
- "Australia: New South Wales\tXX\t140.9\t-37.6\t153.6\t-28.2",
- "Australia: Northern Territory\tXX\t128.9\t-26.1\t138.0\t-10.9",
- "Australia: Queensland\tXX\t137.9\t-29.2\t153.6\t-10.0",
- "Australia: South Australia\tXX\t128.9\t-38.1\t141.0\t-26.0",
- "Australia: Tasmania\tXX\t143.8\t-43.7\t148.5\t-39.6",
- "Australia: Victoria\tXX\t140.9\t-39.6\t150.0\t-34.0",
- "Australia: Western Australia\tXX\t112.9\t-35.2\t129.0\t-13.7",
- "Austria\tAU\t9.5\t46.3\t17.2\t49.0",
- "Azerbaijan\tAJ\t45.0\t38.3\t50.6\t41.9",
- "Bahamas\tBF\t-79.7\t20.9\t-72.7\t27.2",
- "Bahrain\tBA\t50.3\t25.7\t50.7\t26.3",
- "Baker Island\tFQ\t-176.5\t0.1\t-176.5\t0.2",
- "Bangladesh\tBG\t88.0\t20.5\t92.7\t26.6",
- "Barbados\tBB\t-59.7\t13.0\t-59.4\t13.3",
- "Bassas da India\tBS\t39.6\t-21.6\t39.8\t-21.4",
- "Belarus\tBO\t23.1\t51.2\t32.8\t56.2",
- "Belgium\tBE\t2.5\t49.4\t6.4\t51.5",
- "Belize\tBH\t-89.3\t15.8\t-86.9\t18.5",
- "Benin\tBN\t0.7\t6.2\t3.9\t12.4",
- "Bermuda\tBD\t-64.9\t32.2\t-64.7\t32.4",
- "Bhutan\tBT\t88.7\t26.7\t92.1\t28.3",
- "Bolivia\tBL\t-69.7\t-22.9\t-57.5\t-9.7",
- "Borneo\tXX\t108.6\t-4.2\t119.3\t7.4",
- "Bosnia and Herzegovina\tBK\t15.7\t42.5\t19.7\t45.3",
- "Botswana\tBC\t19.9\t-27.0\t29.4\t-17.8",
- "Bouvet Island\tBV\t3.3\t-54.5\t3.5\t-54.4",
- "Brazil\tBR\t-74.0\t-33.8\t-34.8\t5.0",
- "British Virgin Islands\tVI\t-64.8\t18.2\t-63.2\t18.8",
- "Brunei\tBX\t114.0\t4.0\t115.4\t5.0",
- "Bulgaria\tBU\t22.3\t41.2\t28.6\t44.2",
- "Burkina Faso\tUV\t-5.6\t9.4\t2.4\t15.1",
- "Burundi\tBY\t28.9\t-4.5\t30.8\t-2.3",
- "Cambodia\tCB\t102.3\t9.2\t107.6\t14.7",
- "Cameroon\tCM\t8.4\t1.6\t16.2\t13.1",
- "Canada\tCA\t-141.0\t41.7\t-52.6\t83.1",
- "Canada: Alberta\tXX\t-120.0\t48.9\t-110.0\t60.0",
- "Canada: British Columbia\tXX\t-139.1\t48.3\t-114.1\t60.0",
- "Canada: Manitoba\tXX\t-102.1\t48.9\t-89.0\t60.0",
- "Canada: New Brunswick\tXX\t-69.1\t44.5\t-63.8\t48.1",
- "Canada: Newfoundland and Labrador\tXX\t-67.9\t46.6\t-52.6\t60.4",
- "Canada: Northwest Territories\tXX\t-136.5\t60.0\t-102.0\t78.8",
- "Canada: Nova Scotia\tXX\t-66.4\t43.3\t-59.7\t47.0",
- "Canada: Nunavut\tXX\t-120.4\t60.0\t-61.2\t83.1",
- "Canada: Ontario\tXX\t-95.2\t41.6\t-74.3\t56.9",
- "Canada: Prince Edward Island\tXX\t-64.5\t45.9\t-62.0\t47.1",
- "Canada: Quebec\tXX\t-79.8\t45.0\t-57.1\t62.6",
- "Canada: Saskatchewan\tXX\t-110.0\t48.9\t-101.4\t60.0",
- "Canada: Yukon\tXX\t-141.0\t60.0\t-124.0\t69.6",
- "Cape Verde\tCV\t-25.4\t14.8\t-22.7\t17.2",
- "Cayman Islands\tCJ\t-81.5\t19.2\t-81.1\t19.4\t-80.2\t19.6\t-79.7\t19.8",
- "Central African Republic\tCT\t14.4\t2.2\t27.5\t11.0",
- "Chad\tCD\t13.4\t7.4\t24.0\t23.5",
- "Chile\tCI\t-75.8\t-56.0\t-66.4\t-17.5",
- "China\tCH\t73.5\t20.2\t134.8\t53.6\t108.6\t18.1\t111.1\t20.2",
- "China: Hainan\tXX\t108.6\t18.1\t111.1\t20.2",
- "Christmas Island\tKT\t105.5\t-10.6\t105.7\t-10.4",
- "Clipperton Island\tIP\t-109.3\t10.2\t-109.2\t10.3",
- "Cocos Islands\tCK\t96.8\t-12.2\t96.9\t-11.8",
- "Colombia\tCO\t-79.1\t-4.3\t-66.9\t12.5",
- "Comoros\tCN\t43.2\t-12.5\t44.5\t-11.4",
- "Cook Islands\tCW\t-159.9\t-22.0\t-157.3\t-18.8",
- "Coral Sea Islands\tCR\t",
- "Costa Rica\tCS\t-87.1\t5.4\t-87.0\t5.6\t-86.0\t8.0\t-82.6\t11.2",
- "Cote d'Ivoire\tIV\t-8.6\t4.3\t-2.5\t10.7",
- "Croatia\tHR\t13.4\t42.3\t19.4\t46.5",
- "Cuba\tCU\t-85.0\t19.8\t-74.1\t23.3",
- "Cyprus\tCY\t32.2\t34.5\t34.6\t35.7",
- "Czech Republic\tEZ\t12.0\t48.5\t18.9\t51.0",
- "Democratic Republic of the Congo\tCG\t12.2\t-13.5\t31.3\t5.4",
- "Denmark\tDA\t8.0\t54.5\t12.7\t57.7\t14.6\t54.9\t15.2\t55.3",
- "Djibouti\tDJ\t41.7\t10.9\t43.4\t12.7",
- "Dominica\tDO\t-61.5\t15.2\t-61.2\t15.6",
- "Dominican Republic\tDR\t-72.1\t17.4\t-68.3\t19.9",
- "East Timor\tTT\t124.9\t-9.5\t127.4\t-8.3",
- "Ecuador\tEC\t-92.1\t-1.5\t-89.2\t1.7\t-81.1\t-5.0\t-75.2\t1.4",
- "Ecuador: Galapagos\tXX\t-92.1\t-1.5\t-89.2\t1.7",
- "Egypt\tEG\t24.6\t21.7\t35.8\t31.7",
- "El Salvador\tES\t-90.2\t13.1\t-87.7\t14.4",
- "Equatorial Guinea\tEK\t8.4\t3.2\t8.9\t3.8\t9.2\t0.8\t11.3\t2.3",
- "Eritrea\tER\t36.4\t12.3\t43.1\t18.0",
- "Estonia\tEN\t21.7\t57.5\t28.2\t59.7",
- "Ethiopia\tET\t32.9\t3.4\t48.0\t14.9",
- "Europa Island\tEU\t40.3\t-22.4\t40.4\t-22.3",
- "Falkland Islands (Islas Malvinas)\tFK\t-61.4\t-53.0\t-57.7\t-51.0",
- "Faroe Islands\tFO\t-7.7\t61.3\t-6.3\t62.4",
- "Fiji\tFJ\t-180.0\t-20.7\t-178.2\t-15.7\t-175.7\t-19.8\t-175.0\t-15.6\t176.8\t-19.3\t180.0\t-12.5",
- "Finland\tFI\t19.3\t59.7\t31.6\t70.1",
- "France\tFR\t-5.2\t42.3\t8.2\t51.1\t8.5\t41.3\t9.6\t43.1",
- "France: Corsica\tXX\t8.5\t41.3\t9.6\t43.1",
- "French Guiana\tFG\t-54.6\t2.1\t-51.6\t5.8",
- "French Polynesia\tFP\t-154.7\t-27.7\t-134.9\t-7.8",
- "French Southern and Antarctic Lands\tFS\t68.6\t-49.8\t70.6\t-48.5",
- "Gabon\tGB\t8.6\t-4.0\t14.5\t2.3",
- "Gambia\tGA\t-16.9\t13.0\t-13.8\t13.8",
- "Gaza Strip\tGZ\t34.2\t31.2\t34.5\t31.6",
- "Georgia\tGG\t40.0\t41.0\t46.7\t43.6",
- "Germany\tGM\t5.8\t47.2\t15.0\t55.1",
- "Ghana\tGH\t-3.3\t4.7\t1.2\t11.2",
- "Gibraltar\tGI\t-5.4\t36.1\t-5.3\t36.2",
- "Glorioso Islands\tGO\t47.2\t-11.6\t47.4\t-11.5",
- "Greece\tGR\t19.3\t34.8\t28.2\t41.8",
- "Greenland\tGL\t-73.3\t59.7\t-11.3\t83.6",
- "Grenada\tGJ\t-61.8\t11.9\t-61.6\t12.3",
- "Guadeloupe\tGP\t-63.2\t17.8\t-62.8\t18.1\t-61.9\t15.8\t-61.0\t16.5",
- "Guam\tGQ\t144.6\t13.2\t145.0\t13.7",
- "Guatemala\tGT\t-92.3\t13.7\t-88.2\t17.8",
- "Guernsey\tGK\t-2.7\t49.4\t-2.4\t49.5",
- "Guinea\tGV\t-15.1\t7.1\t-7.6\t12.7",
- "Guinea-Bissau\tPU\t-16.8\t10.8\t-13.6\t12.7",
- "Guyana\tGY\t-61.4\t1.1\t-56.5\t8.6",
- "Haiti\tHA\t-74.5\t18.0\t-71.6\t20.1",
- "Heard Island and McDonald Islands\tHM\t73.2\t-53.2\t73.7\t-52.9",
- "Honduras\tHO\t-89.4\t12.9\t-83.2\t16.5",
- "Hong Kong\tHK\t113.8\t22.1\t114.4\t22.6",
- "Howland Island\tHQ\t-176.7\t0.7\t-176.6\t0.8",
- "Hungary\tHU\t16.1\t45.7\t22.9\t48.6",
- "Iceland\tIC\t-24.6\t63.2\t-13.5\t66.6",
- "India\tIN\t67.3\t8.0\t97.4\t35.5",
- "Indian Ocean\tXX\t",
- "Indonesia\tID\t95.0\t-11.1\t141.0\t5.9",
- "Iran\tIR\t44.0\t25.0\t63.3\t39.8",
- "Iraq\tIZ\t38.8\t29.1\t48.6\t37.4",
- "Ireland\tEI\t-10.7\t51.4\t-6.0\t55.4",
- "Isle of Man\tIM\t-4.9\t54.0\t-4.3\t54.4",
- "Israel\tIS\t34.2\t29.4\t35.7\t33.3",
- "Italy\tIT\t6.6\t35.4\t18.5\t47.1",
- "Jamaica\tJM\t-78.4\t17.7\t-76.2\t18.5",
- "Jan Mayen\tJN\t-9.1\t70.8\t-7.9\t71.2",
- "Japan\tJA\t122.9\t24.0\t125.5\t25.9\t126.7\t20.5\t145.8\t45.5",
- "Jarvis Island\tDQ\t-160.1\t-0.4\t-160.0\t-0.4",
- "Jersey\tJE\t-2.3\t49.1\t-2.0\t49.3",
- "Johnston Atoll\tJQ\t-169.6\t16.7\t-169.4\t16.8",
- "Jordan\tJO\t34.9\t29.1\t39.3\t33.4",
- "Juan de Nova Island\tJU\t42.6\t-17.1\t42.8\t-16.8",
- "Kazakhstan\tKZ\t46.4\t40.9\t87.3\t55.4",
- "Kenya\tKE\t33.9\t-4.7\t41.9\t4.6",
- "Kerguelen Archipelago\tXX\t",
- "Kingman Reef\tKQ\t-162.9\t6.1\t-162.4\t6.7",
- "Kiribati\tKR\t172.6\t0.1\t173.9\t3.4\t174.2\t-2.7\t176.9\t-0.5",
- "Kosovo\tKV\t20.0\t41.8\t43.3\t21.9",
- "Kuwait\tKU\t46.5\t28.5\t48.4\t30.1",
- "Kyrgyzstan\tKG\t69.2\t39.1\t80.3\t43.2",
- "Laos\tLA\t100.0\t13.9\t107.7\t22.5",
- "Latvia\tLG\t20.9\t55.6\t28.2\t58.1",
- "Lebanon\tLE\t35.1\t33.0\t36.6\t34.7",
- "Lesotho\tLT\t27.0\t-30.7\t29.5\t-28.6",
- "Liberia\tLI\t-11.5\t4.3\t-7.4\t8.6",
- "Libya\tLY\t9.3\t19.5\t25.2\t33.2",
- "Liechtenstein\tLS\t9.4\t47.0\t9.6\t47.3",
- "Lithuania\tLH\t20.9\t53.9\t26.9\t56.4",
- "Luxembourg\tLU\t5.7\t49.4\t6.5\t50.2",
- "Macau\tMC\t113.5\t22.1\t113.6\t22.2",
- "Macedonia\tMK\t20.4\t40.8\t23.0\t42.4",
- "Madagascar\tMA\t43.1\t-25.7\t50.5\t-11.9",
- "Malawi\tMI\t32.6\t-17.2\t35.9\t-9.4",
- "Malaysia\tMY\t98.9\t5.6\t98.9\t5.7\t99.6\t1.2\t104.5\t6.7\t109.5\t0.8\t119.3\t7.4",
- "Maldives\tMV\t72.6\t-0.7\t73.7\t7.1",
- "Mali\tML\t-12.3\t10.1\t4.2\t25.0",
- "Malta\tMT\t14.1\t35.8\t14.6\t36.1",
- "Marshall Islands\tRM\t160.7\t4.5\t172.0\t14.8",
- "Martinique\tMB\t-61.3\t14.3\t-60.8\t14.9",
- "Mauritania\tMR\t-17.1\t14.7\t-4.8\t27.3",
- "Mauritius\tMP\t57.3\t-20.6\t57.8\t-20.0\t59.5\t-16.9\t59.6\t-16.7",
- "Mayotte\tMF\t45.0\t-13.1\t45.3\t-12.6",
- "Mediterranean Sea\tXX\t",
- "Mexico\tMX\t-118.5\t28.8\t-118.3\t29.2\t-117.3\t14.5\t-86.7\t32.7",
- "Micronesia\tFM\t138.0\t9.4\t138.2\t9.6\t139.6\t9.8\t139.8\t10.0\t140.5\t9.7\t140.5\t9.8\t147.0\t7.3\t147.0\t7.4\t149.3\t6.6\t149.3\t6.7\t151.5\t7.1\t152.0\t7.5\t153.5\t5.2\t153.8\t5.6\t157.1\t5.7\t160.7\t7.1\t162.9\t5.2\t163.0\t5.4",
- "Midway Islands\tMQ\t-178.4\t28.3\t-178.3\t28.4\t-177.4\t28.1\t-177.3\t28.2\t-174.0\t26.0\t-174.0\t26.1\t-171.8\t25.7\t-171.7\t25.8",
- "Moldova\tMD\t26.6\t45.4\t30.2\t48.5",
- "Monaco\tMN\t7.3\t43.7\t7.5\t43.8",
- "Mongolia\tMG\t87.7\t41.5\t119.9\t52.2",
- "Montenegro\tMJ\t18.4\t42.2\t20.4\t43.6",
- "Montserrat\tMH\t-62.3\t16.6\t-62.1\t16.8",
- "Morocco\tMO\t-13.2\t27.6\t-1.0\t35.9",
- "Mozambique\tMZ\t30.2\t-26.9\t40.8\t-10.5",
- "Myanmar\tBM\t92.1\t9.6\t101.2\t28.5",
- "Namibia\tWA\t11.7\t-29.0\t25.3\t-17.0",
- "Nauru\tNR\t166.8\t-0.6\t166.9\t-0.5",
- "Navassa Island\tBQ\t-75.1\t18.3\t-75.0\t18.4",
- "Nepal\tNP\t80.0\t26.3\t88.2\t30.4",
- "Netherlands\tNL\t3.3\t50.7\t7.2\t53.6",
- "Netherlands Antilles\tNT\t-69.2\t11.9\t-68.2\t12.4\t-63.3\t17.4\t-62.9\t18.1",
- "New Caledonia\tNC\t163.5\t-22.8\t169.0\t-19.5",
- "New Zealand\tNZ\t166.4\t-48.1\t178.6\t-34.1",
- "Nicaragua\tNU\t-87.7\t10.7\t-82.6\t15.0",
- "Niger\tNG\t0.1\t11.6\t16.0\t23.5",
- "Nigeria\tNI\t2.6\t4.2\t14.7\t13.9",
- "Niue\tNE\t-170.0\t-19.2\t-169.8\t-19.0",
- "Norfolk Island\tNF\t168.0\t-29.2\t168.1\t-29.0",
- "North Korea\tKN\t124.1\t37.5\t130.7\t43.0",
- "North Sea\tXX\t",
- "Northern Mariana Islands\tCQ\t144.8\t14.1\t146.1\t20.6",
- "Norway\tNO\t4.6\t57.9\t31.1\t71.2",
- "Oman\tMU\t51.8\t16.6\t59.8\t25.0",
- "Pacific Ocean\tXX\t",
- "Pakistan\tPK\t60.8\t23.6\t77.8\t37.1",
- "Palau\tPS\t132.3\t4.3\t132.3\t4.3\t134.1\t6.8\t134.7\t7.7",
- "Palmyra Atoll\tLQ\t-162.2\t5.8\t-162.0\t5.9",
- "Panama\tPM\t-83.1\t7.1\t-77.2\t9.6",
- "Papua New Guinea\tPP\t140.8\t-11.7\t156.0\t-0.9\t157.0\t-4.9\t157.1\t-4.8\t159.4\t-4.7\t159.5\t-4.5",
- "Paracel Islands\tPF\t111.1\t15.7\t111.2\t15.8",
- "Paraguay\tPA\t-62.7\t-27.7\t-54.3\t-19.3",
- "Peru\tPE\t-81.4\t-18.4\t-68.7\t0.0",
- "Philippines\tRP\t116.9\t4.9\t126.6\t21.1",
- "Pitcairn Islands\tPC\t-128.4\t-24.5\t-128.3\t-24.3",
- "Poland\tPL\t14.1\t49.0\t24.2\t54.8",
- "Portugal\tPO\t-9.5\t36.9\t-6.2\t42.1\t-31.3\t36.9\t-25.0\t39.8\t-17.3\t32.4\t-16.2\t33.2",
- "Portugal: Azores\tXX\t-31.3\t36.9\t-25.0\t39.8",
- "Portugal: Madeira\tXX\t-17.3\t32.4\t-16.2\t33.2",
- "Puerto Rico\tRQ\t-68.0\t17.8\t-65.2\t18.5",
- "Qatar\tQA\t50.7\t24.4\t52.4\t26.2",
- "Republic of the Congo\tCF\t11.2\t-5.1\t18.6\t3.7",
- "Reunion\tRE\t55.2\t-21.4\t55.8\t-20.9",
- "Romania\tRO\t20.2\t43.6\t29.7\t48.3",
- "Ross Sea\tXX\t",
- "Russia\tRS\t-180.0\t64.2\t-169.0\t71.6\t19.7\t54.3\t22.9\t55.3\t26.9\t41.1\t180.0\t81.3",
- "Rwanda\tRW\t28.8\t-2.9\t30.9\t-1.1",
- "Saint Helena\tSH\t-5.8\t-16.1\t-5.6\t-15.9",
- "Saint Kitts and Nevis\tSC\t62.9\t17.0\t62.5\t17.5",
- "Saint Lucia\tST\t-61.1\t13.7\t-60.9\t14.1",
- "Saint Pierre and Miquelon\tSB\t-56.5\t46.7\t-56.2\t47.1",
- "Saint Vincent and the Grenadines\tVC\t-61.6\t12.4\t-61.1\t13.4",
- "Samoa\tWS\t-172.8\t-14.1\t-171.4\t-13.4",
- "San Marino\tSM\t12.4\t43.8\t12.5\t44.0",
- "Sao Tome and Principe\tTP\t6.4\t0.0\t1.7\t7.5",
- "Saudi Arabia\tSA\t34.4\t15.6\t55.7\t32.2",
- "Senegal\tSG\t-17.6\t12.3\t-11.4\t16.7",
- "Serbia\tRB\t18.8\t42.2\t23.1\t46.2",
- "Seychelles\tSE\t50.7\t-9.6\t51.1\t-9.2\t52.7\t-7.2\t52.8\t-7.0\t53.0\t-6.3\t53.7\t-5.1\t55.2\t-5.9\t56.0\t-3.7\t56.2\t-7.2\t56.3\t-7.1",
- "Sierra Leone\tSL\t-13.4\t6.9\t-10.3\t10.0",
- "Singapore\tSN\t103.6\t1.1\t104.1\t1.5",
- "Slovakia\tLO\t16.8\t47.7\t22.6\t49.6",
- "Slovenia\tSI\t13.3\t45.4\t16.6\t46.9",
- "Solomon Islands\tBP\t155.5\t-11.9\t162.8\t-5.1\t165.6\t-11.8\t167.0\t-10.1\t167.1\t-10.0\t167.3\t-9.8\t168.8\t-12.3\t168.8\t-12.3",
- "Somalia\tSO\t40.9\t-1.7\t51.4\t12.0",
- "South Africa\tSF\t16.4\t-34.9\t32.9\t-22.1",
- "South Georgia and the South Sandwich Islands\tSX\t-38.3\t-54.9\t-35.7\t-53.9",
- "South Korea\tKS\t125.0\t33.1\t129.6\t38.6",
- "Southern Ocean\tXX\t",
- "Spain\tSP\t-9.3\t35.1\t4.3\t43.8\t-18.2\t27.6\t-13.4\t29.5",
- "Spain: Canary Islands\tXX\t-18.2\t27.6\t-13.4\t29.5",
- "Spratly Islands\tPG\t114.0\t9.6\t115.8\t11.1",
- "Sri Lanka\tCE\t79.6\t5.9\t81.9\t9.8",
- "Sudan\tSU\t21.8\t3.4\t38.6\t23.6",
- "Suriname\tNS\t-58.1\t1.8\t-54.0\t6.0",
- "Svalbard\tSV\t10.4\t76.4\t33.5\t80.8",
- "Swaziland\tWZ\t30.7\t-27.4\t32.1\t-25.7",
- "Sweden\tSW\t10.9\t55.3\t24.2\t69.1",
- "Switzerland\tSZ\t5.9\t45.8\t10.5\t47.8",
- "Syria\tSY\t35.7\t32.3\t42.4\t37.3",
- "Taiwan\tTW\t119.3\t21.9\t122.0\t25.3",
- "Tajikistan\tTI\t67.3\t36.6\t75.1\t41.0",
- "Tanzania\tTZ\t29.3\t-11.8\t40.4\t-1.0",
- "Tasman Sea\tXX\t",
- "Thailand\tTH\t97.3\t5.6\t105.6\t20.5",
- "Togo\tTO\t-0.2\t6.1\t1.8\t11.1",
- "Tokelau\tTL\t-172.6\t-9.5\t-171.1\t-8.5",
- "Tonga\tTN\t-176.3\t-22.4\t-176.2\t-22.3\t-175.5\t-21.5\t-174.5\t-20.0",
- "Trinidad and Tobago\tTD\t-62.0\t10.0\t-60.5\t11.3",
- "Tromelin Island\tTE\t54.5\t-15.9\t54.5\t-15.9",
- "Tunisia\tTS\t7.5\t30.2\t11.6\t37.5",
- "Turkey\tTU\t25.6\t35.8\t44.8\t42.1",
- "Turkmenistan\tTX\t52.4\t35.1\t66.7\t42.8",
- "Turks and Caicos Islands\tTK\t-73.8\t20.9\t-73.0\t21.3",
- "Tuvalu\tTV\t176.0\t-7.3\t177.3\t-5.6\t178.4\t-8.0\t178.7\t-7.4\t179.0\t-9.5\t179.9\t-8.5",
- "Uganda\tUG\t29.5\t-1.5\t35.0\t4.2",
- "Ukraine\tUP\t22.1\t44.3\t40.2\t52.4",
- "United Arab Emirates\tAE\t51.1\t22.4\t56.4\t26.1",
- "United Kingdom\tUK\t-8.7\t49.7\t1.8\t60.8",
- "Uruguay\tUY\t-58.5\t-35.0\t-53.1\t-30.1",
- "USA\tUS\t-124.8\t24.5\t-66.9\t49.4\t-168.2\t54.3\t-130.0\t71.4\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5\t-164.8\t23.5\t-164.7\t23.6\t-162.0\t23.0\t-161.9\t23.1\t-160.6\t18.9\t-154.8\t22.2",
- "USA: Alabama\tXX\t-88.8\t30.1\t-84.9\t35.0",
- "USA: Alaska\tXX\t-168.2\t54.3\t-130.0\t71.4\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5",
- "USA: Alaska, Aleutian Islands\tXX\t172.4\t52.3\t176.0\t53.0\t177.2\t51.3\t179.8\t52.1\t-179.5\t51.0\t-172.0\t52.5\t-171.5\t52.0\t-164.5\t54.5",
- "USA: Arizona\tXX\t-114.9\t31.3\t-109.0\t37.0",
- "USA: Arkansas\tXX\t-94.7\t33.0\t-89.6\t36.5",
- "USA: California\tXX\t-124.5\t32.5\t-114.1\t42.0",
- "USA: Colorado\tXX\t-109.1\t36.9\t-102.0\t41.0",
- "USA: Connecticut\tXX\t-73.8\t40.9\t-71.8\t42.1",
- "USA: Delaware\tXX\t-75.8\t38.4\t-74.9\t39.8",
- "USA: Florida\tXX\t-87.7\t24.5\t-80.0\t31.0",
- "USA: Georgia\tXX\t-85.7\t30.3\t-80.8\t35.0",
- "USA: Hawaii\tXX\t-164.8\t23.5\t-164.7\t23.6\t-162.0\t23.0\t-161.9\t23.1\t-160.6\t18.9\t-154.8\t22.2",
- "USA: Idaho\tXX\t-117.3\t41.9\t-111.0\t49.0",
- "USA: Illinois\tXX\t-91.6\t36.9\t-87.0\t42.5",
- "USA: Indiana\tXX\t-88.1\t37.7\t-84.8\t41.8",
- "USA: Iowa\tXX\t-96.7\t40.3\t-90.1\t43.5",
- "USA: Kansas\tXX\t-102.1\t36.9\t-94.6\t40.0",
- "USA: Kentucky\tXX\t-89.5\t36.5\t-82.0\t39.1",
- "USA: Louisiana\tXX\t-94.1\t28.9\t-88.8\t33.0",
- "USA: Maine\tXX\t-71.1\t43.0\t-66.9\t47.5",
- "USA: Maryland\tXX\t-79.5\t37.8\t-75.1\t39.7",
- "USA: Massachusetts\tXX\t-73.6\t41.2\t-69.9\t42.9",
- "USA: Michigan\tXX\t-90.5\t41.6\t-82.1\t48.3",
- "USA: Minnesota\tXX\t-97.3\t43.4\t-90.0\t49.4",
- "USA: Mississippi\tXX\t-91.7\t30.1\t-88.1\t35.0",
- "USA: Missouri\tXX\t-95.8\t36.0\t-89.1\t40.6",
- "USA: Montana\tXX\t-116.1\t44.3\t-104.0\t49.0",
- "USA: Nebraska\tXX\t-104.1\t40.0\t-95.3\t43.0",
- "USA: Nevada\tXX\t-120.0\t35.0\t-114.0\t42.0",
- "USA: New Hampshire\tXX\t-72.6\t42.6\t-70.7\t45.3",
- "USA: New Jersey\tXX\t-75.6\t38.9\t-73.9\t41.4",
- "USA: New Mexico\tXX\t-109.1\t31.3\t-103.0\t37.0",
- "USA: New York\tXX\t-79.8\t40.4\t-71.9\t45.0",
- "USA: North Carolina\tXX\t-84.4\t33.8\t-75.5\t36.6",
- "USA: North Dakota\tXX\t-104.1\t45.9\t-96.6\t49.0",
- "USA: Ohio\tXX\t-84.9\t38.3\t-80.5\t42.3",
- "USA: Oklahoma\tXX\t-103.1\t33.6\t-94.4\t37.0",
- "USA: Oregon\tXX\t-124.6\t41.9\t-116.5\t46.3",
- "USA: Pennsylvania\tXX\t-80.6\t39.7\t-74.7\t42.5",
- "USA: Rhode Island\tXX\t-71.9\t41.1\t-71.1\t42.0",
- "USA: South Carolina\tXX\t-83.4\t32.0\t-78.6\t35.2",
- "USA: South Dakota\tXX\t-104.1\t42.4\t-96.4\t45.9",
- "USA: Tennessee\tXX\t-90.4\t35.0\t-81.7\t36.7",
- "USA: Texas\tXX\t-106.7\t25.8\t-93.5\t36.5",
- "USA: Utah\tXX\t-114.1\t37.0\t-109.1\t42.0",
- "USA: Vermont\tXX\t-73.5\t42.7\t-71.5\t45.0",
- "USA: Virginia\tXX\t-83.7\t36.5\t-75.2\t39.5",
- "USA: Washington\tXX\t-124.8\t45.5\t-116.9\t49.0",
- "USA: West Virginia\tXX\t-82.7\t37.1\t-77.7\t40.6",
- "USA: Wisconsin\tXX\t-92.9\t42.4\t-86.3\t47.3",
- "USA: Wyoming\tXX\t-111.1\t40.9\t-104.1\t45.0",
- "Uzbekistan\tUZ\t55.9\t37.1\t73.1\t45.6",
- "Vanuatu\tNH\t166.5\t-20.3\t170.2\t-13.1",
- "Venezuela\tVE\t-73.4\t0.7\t-59.8\t12.2",
- "Viet Nam\tVM\t102.1\t8.4\t109.5\t23.4",
- "Virgin Islands\tVQ\t-65.1\t17.6\t-64.6\t18.5",
- "Wake Island\tWQ\t166.5\t19.2\t166.7\t19.3",
- "Wallis and Futuna\tWF\t-178.3\t-14.4\t-178.0\t-14.2\t-176.3\t-13.4\t-176.1\t-13.2",
- "West Bank\tWE\t34.8\t31.3\t35.6\t32.6",
- "Western Sahara\tWI\t-17.2\t20.7\t-8.7\t27.7",
- "Yemen\tYM\t41.8\t11.7\t54.5\t19.0",
- "Zambia\tZA\t21.9\t-18.1\t33.7\t-8.2",
- "Zimbabwe\tZI\t25.2\t-22.5\t33.1\t-15.6",
+static CharPtr bodiesOfWater [] = {
+ "Basin",
+ "Bay",
+ "Bight",
+ "Canal",
+ "Channel",
+ "Coastal",
+ "Cove",
+ "Estuary",
+ "Fjord",
+ "Freshwater",
+ "Gulf",
+ "Harbor",
+ "Inlet",
+ "Lagoon",
+ "Lake",
+ "Narrows",
+ "Ocean",
+ "Offshore",
+ "Passage",
+ "Passages",
+ "Reef",
+ "River",
+ "Sea",
+ "Seawater",
+ "Sound",
+ "Strait",
+ "Trench",
+ "Trough",
+ "Water",
+ "Waters",
NULL
};
+static TextFsaPtr GetBodiesOfWaterFSA (void)
-/* one CtBlock for each discontiguous area per country */
-typedef struct ctblock {
- CharPtr country; /* points to instance in countries list */
- FloatHi minx;
- FloatHi miny;
- FloatHi maxx;
- FloatHi maxy;
-} CtBlock, PNTR CtBlockPtr;
+{
+ TextFsaPtr fsa;
+ Int2 i;
+ CharPtr prop = "BodiesOfWaterFSA";
-/* one CtGrid for each 10-degree-by-10-degree area touched by a CtBlock */
+ fsa = (TextFsaPtr) GetAppProperty (prop);
+ if (fsa != NULL) return fsa;
-typedef struct ctgrid {
- CtBlockPtr cbp;
- Int2 xindex;
- Int2 yindex;
-} CtGrid, PNTR CtGridPtr;
+ fsa = TextFsaNew ();
+ if (fsa != NULL) {
+ for (i = 0; bodiesOfWater [i] != NULL; i++) {
+ TextFsaAdd (fsa, bodiesOfWater [i]);
+ }
+ }
-/* main structure for country/lat-lon lookup */
+ SetAppProperty (prop, (Pointer) fsa);
-typedef struct ctset {
- ValNodePtr countries;
- ValNodePtr blocks;
- ValNodePtr grids;
- CtBlockPtr PNTR bkarray; /* sorted by country name */
- CtGridPtr PNTR gdarray; /* sorted by geographic index */
- Int4 num_blocks;
- Int4 num_grids;
-} CtSet, PNTR CtSetPtr;
+ return fsa;
+}
-static int LIBCALLBACK SortCbpByCountry (
- VoidPtr ptr1,
- VoidPtr ptr2
-)
+NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str)
{
- int compare;
- CtBlockPtr cbp1, cbp2;
+ Char ch;
+ TextFsaPtr fsa;
+ CharPtr ptr;
+ Int4 state;
+ ValNodePtr matches;
- if (ptr1 == NULL || ptr2 == NULL) return 0;
- cbp1 = *((CtBlockPtr PNTR) ptr1);
- cbp2 = *((CtBlockPtr PNTR) ptr2);
- if (cbp1 == NULL || cbp2 == NULL) return 0;
+ if (StringHasNoText (str)) return FALSE;
- compare = StringICmp (cbp1->country, cbp2->country);
- if (compare > 0) {
- return 1;
- } else if (compare < 0) {
- return -1;
+ fsa = GetBodiesOfWaterFSA ();
+ if (fsa == NULL) return FALSE;
+
+ state = 0;
+ ptr = str;
+ ch = *ptr;
+
+ while (ch != '\0') {
+ matches = NULL;
+ state = TextFsaNext (fsa, state, ch, &matches);
+ ptr++;
+ ch = *ptr;
+ if (ch == '\0' || ch == ',' || ch == ':' || ch == ';' || ch == ' ') {
+ if (matches != NULL) return TRUE;
+ state = 0;
+ }
}
- return 0;
+ return FALSE;
}
-static int CgpGridComp (
- CtGridPtr cgp1,
- Int2 xindex,
- Int2 yindex
+/* BEGINNING OF NEW LATITUDE-LONGITUDE COUNTRY VALIDATION CODE */
+
+/* latitude-longitude to country conversion */
+
+typedef struct ctyblock {
+ CharPtr name; /* name of country or country: subregion */
+ CharPtr level0; /* just the country */
+ CharPtr level1; /* just the subregion */
+ Int4 area; /* pixel area for choosing smallest overlapping subregion */
+ Int4 minlat; /* minimum latitude */
+ Int4 maxlat; /* maximum latitude */
+ Int4 minlon; /* minimum longitude */
+ Int4 maxlon; /* maximum longitude */
+} CtyBlock, PNTR CtyBlockPtr;
+
+typedef struct latblock {
+ CtyBlockPtr landmass; /* points to instance in countries list */
+ Int4 lat; /* latitude (integer in 10ths of a degree) */
+ Int4 minlon; /* minimum longitude */
+ Int4 maxlon; /* maximum longitude */
+} LatBlock, PNTR LatBlockPtr;
+
+typedef struct ctryset {
+ ValNodePtr ctyblocks; /* linked list of country blocks */
+ CtyBlockPtr PNTR ctyarray; /* country blocks sorted by name */
+ Int4 numCtyBlocks;
+ ValNodePtr latblocks; /* linked list of latitude blocks */
+ LatBlockPtr PNTR latarray; /* latitude blocks sorted by latitude then longitude */
+ Int4 numLatBlocks;
+ FloatHi scale;
+} CtrySet, PNTR CtrySetPtr;
+
+static int LIBCALLBACK SortByCountry (
+ VoidPtr ptr1,
+ VoidPtr ptr2
)
{
- if (cgp1 == NULL) return 0;
+ CtyBlockPtr cbp1;
+ CtyBlockPtr cbp2;
+ int cmp;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
- if (cgp1->xindex > xindex) {
- return 1;
- } else if (cgp1->xindex < xindex) {
- return -1;
- }
+ if (ptr1 == NULL || ptr2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ cbp1 = (CtyBlockPtr) vnp1->data.ptrvalue;
+ cbp2 = (CtyBlockPtr) vnp2->data.ptrvalue;
+ if (cbp1 == NULL || cbp2 == NULL) return 0;
- if (cgp1->yindex > yindex) {
+ cmp = StringICmp (cbp1->name, cbp2->name);
+ if (cmp > 0) {
return 1;
- } else if (cgp1->yindex < yindex) {
+ } else if (cmp < 0) {
return -1;
}
return 0;
}
-static int LIBCALLBACK SortCgpByGrid (
+static int LIBCALLBACK SortByLatLon (
VoidPtr ptr1,
VoidPtr ptr2
)
{
- CtBlockPtr cbp1, cbp2;
- CtGridPtr cgp1, cgp2;
- int compare;
+ CtyBlockPtr cbp1;
+ CtyBlockPtr cbp2;
+ int cmp;
+ LatBlockPtr lbp1;
+ LatBlockPtr lbp2;
+ ValNodePtr vnp1;
+ ValNodePtr vnp2;
if (ptr1 == NULL || ptr2 == NULL) return 0;
- cgp1 = *((CtGridPtr PNTR) ptr1);
- cgp2 = *((CtGridPtr PNTR) ptr2);
- if (cgp1 == NULL || cgp2 == NULL) return 0;
+ vnp1 = *((ValNodePtr PNTR) ptr1);
+ vnp2 = *((ValNodePtr PNTR) ptr2);
+ if (vnp1 == NULL || vnp2 == NULL) return 0;
+ lbp1 = (LatBlockPtr) vnp1->data.ptrvalue;
+ lbp2 = (LatBlockPtr) vnp2->data.ptrvalue;
+ if (lbp1 == NULL || lbp2 == NULL) return 0;
- compare = CgpGridComp (cgp1, cgp2->xindex, cgp2->yindex);
- if (compare > 0) {
- return 1;
- } else if (compare < 0) {
+ if (lbp1->lat < lbp2->lat) {
return -1;
- }
-
- cbp1 = cgp1->cbp;
- cbp2 = cgp2->cbp;
- if (cbp1 == NULL || cbp2 == NULL) return 0;
-
- if (cbp1->minx > cbp2->minx) {
+ } else if (lbp1->lat > lbp2->lat) {
return 1;
- } else if (cbp1->minx < cbp2->minx) {
- return -1;
}
- if (cbp1->maxx > cbp2->maxx) {
+ if (lbp1->minlon < lbp2->minlon) {
return -1;
- } else if (cbp1->maxx < cbp2->maxx) {
+ } else if (lbp1->minlon > lbp2->minlon) {
return 1;
}
- if (cbp1->miny > cbp2->miny) {
+ if (lbp1->maxlon < lbp2->maxlon) {
return 1;
- } else if (cbp1->miny < cbp2->miny) {
+ } else if (lbp1->maxlon > lbp2->maxlon) {
return -1;
}
- if (cbp1->maxy > cbp2->maxy) {
+ cbp1 = lbp1->landmass;
+ cbp2 = lbp2->landmass;
+ if (cbp1 == NULL || cbp2 == NULL) return 0;
+
+ if (cbp1->area < cbp2->area) {
return -1;
- } else if (cbp1->maxy < cbp2->maxy) {
+ } else if (cbp1->area > cbp2->area) {
return 1;
}
- compare = StringICmp (cbp1->country, cbp2->country);
- if (compare > 0) {
+ cmp = StringICmp (cbp1->name, cbp2->name);
+ if (cmp > 0) {
return 1;
- } else if (compare < 0) {
+ } else if (cmp < 0) {
return -1;
}
return 0;
}
-static Int2 LatLonDegreeToIndex (
- FloatHi coord
-)
+#define EPSILON 0.001
-{
- double fval;
- long ival;
+static Int4 ConvertLat (FloatHi lat, FloatHi scale) {
+
+ Int4 val = 0;
+
+ if (lat < -90.0) {
+ lat = -90.0;
+ }
+ if (lat > 90.0) {
+ lat = 90.0;
+ }
+
+ if (lat > 0) {
+ val = (Int4) (lat * scale + EPSILON);
+ } else {
+ val = (Int4) (-(-lat * scale + EPSILON));
+ }
+
+ return val;
+}
+
+static Int4 ConvertLon (FloatHi lon, FloatHi scale) {
+
+ Int4 val = 0;
+
+ if (lon < -180.0) {
+ lon = -180.0;
+ }
+ if (lon > 180.0) {
+ lon = 180.0;
+ }
- fval = coord;
- fval += 200.0;
- fval /= 10.0;
- ival = (long) fval;
- ival -= 20;
+ if (lon > 0) {
+ val = (Int4) (lon * scale + EPSILON);
+ } else {
+ val = (Int4) (-(-lon * scale + EPSILON));
+ }
- return (Int2) ival;
+ return val;
}
-static CtSetPtr CtSetDataFree (
- CtSetPtr csp
+static CtrySetPtr FreeLatLonCountryData (
+ CtrySetPtr csp
)
{
+ CtyBlockPtr cbp;
+ ValNodePtr vnp;
+
if (csp == NULL) return NULL;
- ValNodeFreeData (csp->countries);
- ValNodeFreeData (csp->blocks);
- ValNodeFreeData (csp->grids);
+ for (vnp = csp->ctyblocks; vnp != NULL; vnp = vnp->next) {
+ cbp = (CtyBlockPtr) vnp->data.ptrvalue;
+ if (cbp == NULL) continue;
+ MemFree (cbp->name);
+ MemFree (cbp->level0);
+ MemFree (cbp->level1);
+ }
+
+ ValNodeFreeData (csp->ctyblocks);
+ ValNodeFreeData (csp->latblocks);
- MemFree (csp->bkarray);
- MemFree (csp->gdarray);
+ MemFree (csp->ctyarray);
+ MemFree (csp->latarray);
MemFree (csp);
return NULL;
}
-static Boolean ct_set_not_found = FALSE;
+/* Original data source is Natural Earth. Free vector and raster map data @ http://naturalearthdata.com */
+
+static CharPtr LatLonCountryReadNextLine (
+ FileCache PNTR fcp,
+ CharPtr buf,
+ size_t bufsize,
+ CharPtr PNTR local,
+ Int4Ptr idxP
+)
+
+{
+ Int4 idx;
+ CharPtr str = NULL;
+
+ if (fcp != NULL) {
+ str = FileCacheReadLine (fcp, buf, bufsize, NULL);
+ }
+
+ if (local != NULL && idxP != NULL) {
+ idx = *idxP;
+ str = local [idx];
+ if (str != NULL) {
+ StringNCpy_0 (buf, local [idx], bufsize);
+ str = buf;
+ }
+ idx++;
+ *idxP = idx;
+ }
+
+ return str;
+}
-static CtSetPtr GetCtSetLatLonDataInt (
+static CtrySetPtr ReadLatLonCountryData (
CharPtr prop,
CharPtr file,
CharPtr PNTR local
)
{
- CtBlockPtr PNTR bkarray;
- ValNodePtr blocks = NULL;
- FloatHi bounds [4];
- CtBlockPtr cbp;
- CtGridPtr cgp;
- ValNodePtr countries = NULL;
- CharPtr country;
- CtSetPtr csp;
- FileCache fc;
- FILE *fp = NULL;
- CtGridPtr PNTR gdarray;
- ValNodePtr grids = NULL;
- Int2 hix;
- Int2 hiy;
- Int2 i;
- Int2 j = 0;
- ValNodePtr lastblk = NULL;
- ValNodePtr lastctry = NULL;
- ValNodePtr lastgrd = NULL;
- Char line [1024];
- Int2 lox;
- Int2 loy;
- Int4 num;
- Char path [PATH_MAX];
- CharPtr ptr;
- ErrSev sev;
- CharPtr str = NULL;
- double val;
- ValNodePtr vnp;
- CharPtr wrk;
- Int2 x;
- Int2 y;
-
- csp = (CtSetPtr) GetAppProperty (prop);
- if (csp != NULL) return csp;
-
- if (ct_set_not_found) return NULL;
+ Char buf [128];
+ Char ch;
+ CtyBlockPtr cbp = NULL;
+ CtrySetPtr csp = NULL;
+ CtyBlockPtr PNTR ctyarray;
+ ValNodePtr ctyblocks = NULL;
+ FileCache fc;
+ FileCache PNTR fcp = NULL;
+ FILE *fp = NULL;
+ Int4 i;
+ Int4 idx = 0;
+ ValNodePtr lastlatblock = NULL;
+ ValNodePtr lastctyblock = NULL;
+ FloatHi latitude;
+ LatBlockPtr PNTR latarray;
+ ValNodePtr latblocks = NULL;
+ LatBlockPtr lbp;
+ Char line [1024];
+ FloatHi maxlongitude;
+ FloatHi minlongitude;
+ Char path [PATH_MAX];
+ CharPtr ptr;
+ CharPtr recentCountry = NULL;
+ FloatHi scale = 0.0;
+ Boolean scale_not_set = TRUE;
+ ErrSev sev;
+ CharPtr str;
+ Char tmp [128];
+ double val;
+ ValNodePtr vnp;
+ CharPtr wrk;
if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
FileBuildPath (path, NULL, file);
@@ -9578,156 +10109,258 @@ static CtSetPtr GetCtSetLatLonDataInt (
if (fp != NULL) {
FileCacheSetup (&fc, fp);
- str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
- } else if (local != NULL) {
- str = local [j];
- if (str != NULL) {
- StringNCpy_0 (line, str, sizeof (line));
- str = line;
+ fcp = &fc;
+ local = NULL;
+ } else if (local == NULL) {
+ return NULL;
+ }
+
+ for (str = LatLonCountryReadNextLine (fcp, line, sizeof (line), local, &idx);
+ str != NULL;
+ str = LatLonCountryReadNextLine (fcp, line, sizeof (line), local, &idx)) {
+ if (StringHasNoText (str)) continue;
+
+ /* if reading from local copy, str cannot be modified, so copy to local buf and reset pointer */
+
+ StringNCpy_0 (buf, str, sizeof (buf));
+ str = buf;
+
+ ch = str [0];
+
+ /* ignore comment lines starting with hyphen */
+
+ if (ch == '-') continue;
+
+ /* Scale should be at top of file, after comments */
+
+ if (IS_DIGIT (ch)) {
+ if (scale_not_set && sscanf (str, "%lf", &val) == 1) {
+ scale = (FloatHi) val;
+ scale_not_set = FALSE;
+ }
+
+ continue;
}
- } else return NULL;
- while (str != NULL) {
- if (StringDoesHaveText (str)) {
+ /* Country starts on first column */
+
+ if (IS_ALPHA (ch)) {
+
+ if (scale_not_set) {
+ scale = 20.0;
+ scale_not_set = FALSE;
+ }
+
ptr = StringChr (str, '\t');
if (ptr != NULL) {
*ptr = '\0';
+ }
+
+ if (StringCmp (str, recentCountry) == 0) continue;
+
+ cbp = (CtyBlockPtr) MemNew (sizeof (CtyBlock));
+ if (cbp == NULL) continue;
+
+ TrimSpacesAroundString (str);
+ cbp->name = StringSave (str);
+ StringNCpy_0 (tmp, str, sizeof (tmp));
+ ptr = StringChr (tmp, ':');
+ if (ptr != NULL) {
+ *ptr = '\0';
ptr++;
- ptr = StringChr (ptr, '\t');
- if (ptr != NULL) {
- ptr++;
- if (StringDoesHaveText (str) && StringDoesHaveText (ptr)) {
+ TrimSpacesAroundString (ptr);
+ if (StringDoesHaveText (ptr)) {
+ cbp->level1 = StringSave (ptr);
+ }
+ TrimSpacesAroundString (tmp);
+ cbp->level0 = StringSave (tmp);
+ } else {
+ TrimSpacesAroundString (str);
+ cbp->level0 = StringSave (str);
+ }
+ cbp->area = 0;
+ cbp->minlat = INT4_MAX;
+ cbp->maxlat = INT4_MIN;
+ cbp->minlon = INT4_MAX;
+ cbp->maxlon = INT4_MIN;
+ vnp = ValNodeAddPointer (&lastctyblock, 0, (Pointer) cbp);
+ if (ctyblocks == NULL) {
+ ctyblocks = vnp;
+ }
+ lastctyblock = vnp;
- country = StringSave (str);
+ recentCountry = cbp->name;
- vnp = ValNodeAddPointer (&lastctry, 0, (Pointer) country);
- if (countries == NULL) {
- countries = vnp;
- }
- lastctry = vnp;
+ continue;
+ }
- wrk = StringSave (ptr);
- str = wrk;
- i = 0;
+ /* Latitude with longitude min/max pairs on line starting with tab */
- while (StringDoesHaveText (str)) {
+ if (ch != '\t') continue;
+
+ wrk = StringSave (str + 1);
+ if (wrk == NULL) continue;
+
+ ptr = StringChr (wrk, '\t');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ if (sscanf (wrk, "%lf", &val) == 1) {
+ latitude = (FloatHi) val;
+
+ str = ptr;
+ while (StringDoesHaveText (str)) {
+ ptr = StringChr (str, '\t');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ ptr++;
+ }
+ if (sscanf (str, "%lf", &val) != 1) {
+ /* prevent infinite loop if it fails */
+ str = NULL;
+ } else {
+ minlongitude = (FloatHi) val;
+ str = ptr;
+ if (StringDoesHaveText (str)) {
ptr = StringChr (str, '\t');
if (ptr != NULL) {
*ptr = '\0';
ptr++;
}
-
if (sscanf (str, "%lf", &val) == 1) {
- bounds [i] = (FloatHi) val;
- i++;
- if (i > 3) {
-
- cbp = (CtBlockPtr) MemNew (sizeof (CtBlock));
- if (cbp != NULL) {
- cbp->country = country;
- cbp->minx = bounds [0];
- cbp->miny = bounds [1];
- cbp->maxx = bounds [2];
- cbp->maxy = bounds [3];
-
- vnp = ValNodeAddPointer (&lastblk, 0, (Pointer) cbp);
- if (blocks == NULL) {
- blocks = vnp;
- }
- lastblk = vnp;
-
- lox = LatLonDegreeToIndex (cbp->minx);
- loy = LatLonDegreeToIndex (cbp->miny);
- hix = LatLonDegreeToIndex (cbp->maxx);
- hiy = LatLonDegreeToIndex (cbp->maxy);
-
- for (x = lox; x <= hix; x++) {
- for (y = loy; y <= hiy; y++) {
- cgp = (CtGridPtr) MemNew (sizeof (CtGrid));
- if (cgp != NULL) {
- cgp->cbp = cbp;
- cgp->xindex = x;
- cgp->yindex = y;
-
- vnp = ValNodeAddPointer (&lastgrd, 0, (Pointer) cgp);
- if (grids == NULL) {
- grids = vnp;
- }
- lastgrd = vnp;
- }
- }
- }
+ maxlongitude = (FloatHi) val;
+
+ lbp = (LatBlockPtr) MemNew (sizeof (LatBlock));
+ if (lbp != NULL) {
+ lbp->landmass = cbp;
+ lbp->lat = ConvertLat (latitude, scale);
+ lbp->minlon = ConvertLon (minlongitude, scale);
+ lbp->maxlon = ConvertLon (maxlongitude, scale);
+
+ vnp = ValNodeAddPointer (&lastlatblock, 0, (Pointer) lbp);
+ if (latblocks == NULL) {
+ latblocks = vnp;
}
-
- i = 0;
+ lastlatblock = vnp;
}
}
-
- str = ptr;
}
-
- MemFree (wrk);
+ str = ptr;
}
}
}
}
- if (fp != NULL) {
- str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
- } else {
- j++;
- str = local [j];
- if (str != NULL) {
- StringNCpy_0 (line, str, sizeof (line));
- str = line;
- }
- }
+ MemFree (wrk);
}
if (fp != NULL) {
FileClose (fp);
}
- if (countries == NULL || blocks == NULL || grids == NULL) {
- ct_set_not_found = TRUE;
+ if (ctyblocks == NULL || latblocks == NULL) {
return NULL;
}
- csp = (CtSetPtr) MemNew (sizeof (CtSet));
+ csp = (CtrySetPtr) MemNew (sizeof (CtrySet));
if (csp == NULL) return NULL;
- /* now populate, heap sort arrays */
+ for (vnp = latblocks; vnp != NULL; vnp = vnp->next) {
+ lbp = (LatBlockPtr) vnp->data.ptrvalue;
+ if (lbp == NULL) continue;
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+ cbp->area += lbp->maxlon - lbp->minlon + 1;
+ if (cbp->minlat > lbp->lat) {
+ cbp->minlat = lbp->lat;
+ }
+ if (cbp->maxlat < lbp->lat) {
+ cbp->maxlat = lbp->lat;
+ }
+ if (cbp->minlon > lbp->minlon) {
+ cbp->minlon = lbp->minlon;
+ }
+ if (cbp->maxlon < lbp->maxlon) {
+ cbp->maxlon = lbp->maxlon;
+ }
+ }
+
+ ctyblocks = ValNodeSort (ctyblocks, SortByCountry);
+ csp->ctyblocks = ctyblocks;
+ csp->numCtyBlocks = ValNodeLen (ctyblocks);
- num = ValNodeLen (blocks);
+ latblocks = ValNodeSort (latblocks, SortByLatLon);
+ csp->latblocks = latblocks;
+ csp->numLatBlocks = ValNodeLen (latblocks);
- csp->countries = countries;
- csp->blocks = blocks;
- csp->num_blocks = (Int2) num;
+ if (scale_not_set) {
+ scale = 20.0;
+ }
+ csp->scale = scale;
- bkarray = (CtBlockPtr PNTR) MemNew (sizeof (CtBlockPtr) * (num + 1));
- if (bkarray != NULL) {
- for (vnp = blocks, i = 0; vnp != NULL; vnp = vnp->next, i++) {
- cbp = (CtBlockPtr) vnp->data.ptrvalue;
- bkarray [i] = cbp;
+ ctyarray = (CtyBlockPtr PNTR) MemNew (sizeof (CtyBlockPtr) * (csp->numCtyBlocks + 1));
+ if (ctyarray != NULL) {
+ for (vnp = ctyblocks, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ cbp = (CtyBlockPtr) vnp->data.ptrvalue;
+ ctyarray [i] = cbp;
}
- HeapSort (bkarray, (size_t) num, sizeof (CtBlockPtr), SortCbpByCountry);
- csp->bkarray = bkarray;
+ csp->ctyarray = ctyarray;
}
- num = ValNodeLen (grids);
+ latarray = (LatBlockPtr PNTR) MemNew (sizeof (LatBlockPtr) * (csp->numLatBlocks + 1));
+ if (latarray != NULL) {
+ for (vnp = latblocks, i = 0; vnp != NULL; vnp = vnp->next, i++) {
+ lbp = (LatBlockPtr) vnp->data.ptrvalue;
+ latarray [i] = lbp;
+ }
- csp->num_grids = (Int2) num;
+ csp->latarray = latarray;
+ }
- gdarray = (CtGridPtr PNTR) MemNew (sizeof (CtGridPtr) * (num + 1));
- if (gdarray != NULL) {
- for (vnp = grids, i = 0; vnp != NULL; vnp = vnp->next, i++) {
- cgp = (CtGridPtr) vnp->data.ptrvalue;
- gdarray [i] = cgp;
+/*
+{
+ FILE *fp;
+ fp = FileOpen ("ctrymap.txt", "w");
+ if (fp != NULL) {
+ for (vnp = latblocks; vnp != NULL; vnp = vnp->next) {
+ lbp = (LatBlockPtr) vnp->data.ptrvalue;
+ if (lbp == NULL) continue;
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+ fprintf (fp, "%s\t[%d]\t%d\t%d\t%d\n", cbp->name, (int) cbp->area,
+ (int) lbp->lat, (int) lbp->minlon, (int) lbp->maxlon);
}
+ FileClose (fp);
+ }
+}
+*/
+
+ return csp;
+}
+
+static Boolean ctryset_not_found = FALSE;
+static Boolean watrset_not_found = FALSE;
+
+extern CharPtr latlon_onedegree [];
+extern CharPtr water_onedegree [];
+
+static CtrySetPtr GetLatLonCountryData (void)
+
+{
+ CtrySetPtr csp = NULL;
+ CharPtr prop = "CountryLatLonData";
+
+ csp = (CtrySetPtr) GetAppProperty (prop);
+ if (csp != NULL) return csp;
+
+ if (ctryset_not_found) return NULL;
- HeapSort (gdarray, (size_t) num, sizeof (CtGridPtr), SortCgpByGrid);
- csp->gdarray = gdarray;
+ csp = ReadLatLonCountryData (prop, "lat_lon_country.txt", latlon_onedegree);
+
+ if (csp == NULL) {
+ ctryset_not_found = TRUE;
+ return NULL;
}
SetAppProperty (prop, (Pointer) csp);
@@ -9735,135 +10368,229 @@ static CtSetPtr GetCtSetLatLonDataInt (
return csp;
}
-static CtSetPtr GetCtSetLatLonData (
- void
-)
+static CtrySetPtr GetLatLonWaterData (void)
{
- return GetCtSetLatLonDataInt ("CountryLatLonList", "country_lat_lon.txt", ctry_lat_lon);
+ CtrySetPtr csp = NULL;
+ CharPtr prop = "WaterLatLonData";
+
+ csp = (CtrySetPtr) GetAppProperty (prop);
+ if (csp != NULL) return csp;
+
+ if (watrset_not_found) return NULL;
+
+ csp = ReadLatLonCountryData (prop, "lat_lon_water.txt", water_onedegree);
+
+ if (csp == NULL) {
+ watrset_not_found = TRUE;
+ return NULL;
+ }
+
+ SetAppProperty (prop, (Pointer) csp);
+
+ return csp;
}
-NLM_EXTERN Boolean IsCountryInLatLonList (
- CharPtr country
+static CtyBlockPtr GetEntryInLatLonListIndex (
+ CharPtr country,
+ CtrySetPtr csp
)
{
- CtBlockPtr cbp;
- CtBlockPtr PNTR bkarray;
- CtSetPtr csp;
- Int2 L, R, mid;
-
- if (StringHasNoText (country)) return FALSE;
+ CtyBlockPtr PNTR array;
+ CtyBlockPtr cbp;
+ Int2 L, R, mid;
- csp = GetCtSetLatLonData ();
- if (csp == NULL) return FALSE;
+ if (StringHasNoText (country)) return NULL;
+ if (csp == NULL) return NULL;
- bkarray = csp->bkarray;
- if (bkarray == NULL) return FALSE;
+ array = csp->ctyarray;
+ if (array == NULL) return NULL;
L = 0;
- R = csp->num_blocks - 1;
+ R = csp->numCtyBlocks - 1;
while (L < R) {
mid = (L + R) / 2;
- cbp = bkarray [mid];
- if (cbp != NULL && StringICmp (cbp->country, country) < 0) {
+ cbp = array [mid];
+ if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) < 0) {
L = mid + 1;
} else {
R = mid;
}
}
- cbp = bkarray [R];
- if (cbp != NULL && StringICmp (cbp->country, country) == 0) return TRUE;
+ cbp = array [R];
+ if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return cbp;
+
+ return NULL;
+}
+
+NLM_EXTERN Boolean CountryIsInLatLonList (
+ CharPtr country
+)
+
+{
+ CtyBlockPtr cbp;
+ CtrySetPtr csp;
+
+ if (StringHasNoText (country)) return FALSE;
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return FALSE;
+
+ cbp = GetEntryInLatLonListIndex (country, csp);
+ if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return TRUE;
return FALSE;
}
-static Int2 GetCountryBlockIndex (
+NLM_EXTERN Boolean IsCountryInLatLonList (
+ CharPtr country
+)
+
+{
+ return CountryIsInLatLonList (country);
+}
+
+NLM_EXTERN Boolean WaterIsInLatLonList (
CharPtr country
)
{
- CtBlockPtr cbp;
- CtBlockPtr PNTR bkarray;
- CtSetPtr csp;
- Int2 L, R, mid;
+ CtyBlockPtr cbp;
+ CtrySetPtr csp;
+
+ if (StringHasNoText (country)) return FALSE;
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return FALSE;
- if (StringHasNoText (country)) return -1;
+ cbp = GetEntryInLatLonListIndex (country, csp);
+ if (cbp != NULL && cbp->name != NULL && StringICmp (cbp->name, country) == 0) return TRUE;
- csp = GetCtSetLatLonData ();
- if (csp == NULL) return -1;
+ return FALSE;
+}
- bkarray = csp->bkarray;
- if (bkarray == NULL) return -1;
+static int LatLonCmp (
+ LatBlockPtr lbp,
+ Int2 latitude
+)
+
+{
+ if (lbp == NULL) return 0;
+
+ if (lbp->lat < latitude) {
+ return -1;
+ } else if (lbp->lat > latitude) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static Int4 GetLatLonIndex (
+ CtrySetPtr csp,
+ LatBlockPtr PNTR array,
+ Int2 latitude
+)
+
+{
+ LatBlockPtr lbp;
+ Int4 L, R, mid;
+
+ if (csp == NULL || array == NULL) return 0;
L = 0;
- R = csp->num_blocks - 1;
+ R = csp->numLatBlocks - 1;
while (L < R) {
mid = (L + R) / 2;
- cbp = bkarray [mid];
- if (cbp != NULL && StringICmp (cbp->country, country) < 0) {
+ lbp = array [mid];
+ if (lbp != NULL && LatLonCmp (lbp, latitude) < 0) {
L = mid + 1;
} else {
R = mid;
}
}
- if (R < csp->num_blocks) {
- cbp = bkarray [R];
- if (cbp == NULL) return -1;
- if (StringICmp (cbp->country, country) != 0) return -1;
- return R;
- }
-
- return -1;
+ return R;
}
-NLM_EXTERN Boolean CountryBoxesOverlap (
- CharPtr country1,
- CharPtr country2
+static Boolean SubregionStringICmp (
+ CharPtr region,
+ CharPtr country
)
{
- CtBlockPtr cbp1, cbp2;
- CtBlockPtr PNTR bkarray;
- CtSetPtr csp;
- Int4 num_blocks;
- Int2 R1, R2, x1, x2;
+ Char possible [256];
+ CharPtr ptr;
+
+ if (StringHasNoText (region) || StringHasNoText (country)) return FALSE;
+ StringNCpy_0 (possible, region, sizeof (possible));
+ ptr = StringChr (possible, ':');
+ if (ptr == NULL) return FALSE;
+ *ptr = '\0';
+ if (StringICmp (possible, country) == 0) return TRUE;
+ return FALSE;
+}
- R1 = GetCountryBlockIndex (country1);
- R2 = GetCountryBlockIndex (country2);
+static Boolean RegionContainsLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ CtrySetPtr csp
+)
- if (R1 < 0 || R2 < 0) return FALSE;
+{
+ LatBlockPtr PNTR array;
+ CtyBlockPtr cbp;
+ Int4 latitude;
+ Int4 longitude;
+ LatBlockPtr lbp;
+ Int4 R;
- csp = GetCtSetLatLonData ();
+ if (StringHasNoText (country)) return FALSE;
if (csp == NULL) return FALSE;
- num_blocks = csp->num_blocks;
- bkarray = csp->bkarray;
- if (bkarray == NULL) return FALSE;
+ array = csp->latarray;
+ if (array == NULL) return FALSE;
- for (x1 = R1; x1 < num_blocks; x1++) {
- cbp1 = bkarray [x1];
- if (cbp1 == NULL) return FALSE;
- if (StringICmp (cbp1->country, country1) != 0) break;
+ latitude = ConvertLat (lat, csp->scale);
+ longitude = ConvertLon (lon, csp->scale);
- for (x2 = R2; x2 < num_blocks; x2++) {
- cbp2 = bkarray [x2];
- if (cbp2 == NULL) return FALSE;
- if (StringICmp (cbp2->country, country2) != 0) break;
+ for (R = GetLatLonIndex (csp, array, latitude); R < csp->numLatBlocks; R++) {
+ lbp = array [R];
+ if (lbp == NULL) break;
+ if (latitude != lbp->lat) break;
- if (cbp1->maxx >= cbp2->minx && cbp1->minx <= cbp2->maxx) {
- if (cbp1->maxy >= cbp2->miny && cbp1->miny <= cbp2->maxy) return TRUE;
- }
- }
+ if (longitude < lbp->minlon) continue;
+ if (longitude > lbp->maxlon) continue;
+
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+ if (StringICmp (cbp->name, country) == 0) return TRUE;
+ if (SubregionStringICmp (cbp->name, country)) return TRUE;
}
return FALSE;
}
+NLM_EXTERN Boolean CountryContainsLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon
+)
+
+{
+ CtrySetPtr csp;
+
+ if (StringHasNoText (country)) return FALSE;
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return FALSE;
+
+ return RegionContainsLatLon (country, lat, lon, csp);
+}
+
NLM_EXTERN Boolean TestLatLonForCountry (
CharPtr country,
FloatHi lat,
@@ -9871,41 +10598,153 @@ NLM_EXTERN Boolean TestLatLonForCountry (
)
{
- CtBlockPtr cbp;
- CtBlockPtr PNTR bkarray;
- CtSetPtr csp;
- Int2 L, R, mid;
+ return CountryContainsLatLon (country, lat, lon);
+}
+
+NLM_EXTERN Boolean WaterContainsLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon
+)
+
+{
+ CtrySetPtr csp;
if (StringHasNoText (country)) return FALSE;
- csp = GetCtSetLatLonData ();
+ csp = GetLatLonWaterData ();
if (csp == NULL) return FALSE;
- bkarray = csp->bkarray;
- if (bkarray == NULL) return FALSE;
+ return RegionContainsLatLon (country, lat, lon, csp);
+}
- L = 0;
- R = csp->num_blocks - 1;
+static Boolean NewLatLonCandidateIsBetter (
+ CharPtr country,
+ CharPtr province,
+ CtyBlockPtr best,
+ CtyBlockPtr cbp,
+ Boolean newer_is_smaller
+)
- while (L < R) {
- mid = (L + R) / 2;
- cbp = bkarray [mid];
- if (cbp != NULL && StringICmp (cbp->country, country) < 0) {
- L = mid + 1;
- } else {
- R = mid;
+{
+ if (cbp == NULL) return FALSE;
+ if (best == NULL) return TRUE;
+
+ /* if no preferred country, just look for smallest area */
+ if (country == NULL) {
+ return newer_is_smaller;
+ }
+
+ /* if match to preferred country */
+ if (StringICmp (country, cbp->level0) == 0) {
+
+ /* if best was not preferred country, take new match */
+ if (StringICmp (country, best->level0) != 0) return TRUE;
+
+ /* if match to preferred province */
+ if (province != NULL && StringICmp (province, cbp->level1) == 0) {
+
+ /* if best was not preferred province, take new match */
+ if (StringICmp (province, best->level1) != 0) return TRUE;
}
+
+ /* if both match province, or neither does, or no preferred province, take smallest */
+ return newer_is_smaller;
}
- while (R < csp->num_blocks) {
- cbp = bkarray [R];
- if (cbp == NULL) return FALSE;
- if (StringICmp (cbp->country, country) != 0) return FALSE;
- if (lon >= cbp->minx && lat >= cbp->miny && lon <= cbp->maxx && lat <= cbp->maxy) return TRUE;
- R++;
+ /* if best matches preferred country, keep */
+ if (StringICmp (country, best->level0) == 0) return FALSE;
+
+ /* otherwise take smallest */
+ return newer_is_smaller;
+}
+
+static CtyBlockPtr LookupRegionByLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ CharPtr country,
+ CharPtr province,
+ CtrySetPtr csp
+)
+
+{
+ LatBlockPtr PNTR array;
+ CtyBlockPtr cbp, best = NULL;
+ Int4 latitude;
+ Int4 longitude;
+ LatBlockPtr lbp;
+ Int4 R;
+
+ if (csp == NULL) return NULL;
+
+ array = csp->latarray;
+ if (array == NULL) return NULL;
+
+ latitude = ConvertLat (lat, csp->scale);
+ longitude = ConvertLon (lon, csp->scale);
+
+ for (R = GetLatLonIndex (csp, array, latitude); R < csp->numLatBlocks; R++) {
+ lbp = array [R];
+ if (lbp == NULL) break;
+ if (latitude != lbp->lat) break;
+
+ if (longitude < lbp->minlon) continue;
+ if (longitude > lbp->maxlon) continue;
+
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+
+ if (best == NULL || NewLatLonCandidateIsBetter (country, province, best, cbp, (Boolean) (cbp->area < best->area))) {
+ best = cbp;
+ }
}
- return FALSE;
+ return best;
+}
+
+static CtyBlockPtr GuessCountryByLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ CharPtr country,
+ CharPtr province
+)
+
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return NULL;
+
+ return LookupRegionByLatLon (lat, lon, country, province, csp);
+}
+
+static CtyBlockPtr GuessWaterByLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ CharPtr country
+)
+
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return NULL;
+
+ return LookupRegionByLatLon (lat, lon, country, NULL, csp);
+}
+
+NLM_EXTERN CharPtr LookupCountryByLatLon (
+ FloatHi lat,
+ FloatHi lon
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = GuessCountryByLatLon (lat, lon, NULL, NULL);
+ if (cbp == NULL) return NULL;
+
+ return cbp->name;
}
NLM_EXTERN CharPtr GuessCountryForLatLon (
@@ -9914,138 +10753,550 @@ NLM_EXTERN CharPtr GuessCountryForLatLon (
)
{
- CtBlockPtr cbp;
- CtGridPtr cgp;
- CharPtr country = NULL;
- CtSetPtr csp;
- CtGridPtr PNTR gdarray;
- Int2 L, R, mid;
- Int2 x;
- Int2 y;
+ return LookupCountryByLatLon (lat, lon);
+}
+
+NLM_EXTERN CharPtr LookupWaterByLatLon (
+ FloatHi lat,
+ FloatHi lon
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = GuessWaterByLatLon (lat, lon, NULL);
+ if (cbp == NULL) return NULL;
+
+ return cbp->name;
+}
+
+NLM_EXTERN FloatHi CountryDataScaleIs (void)
+
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return 0.0;
+
+ return csp->scale;
+}
+
+NLM_EXTERN FloatHi WaterDataScaleIs (void)
+
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return 0.0;
+
+ return csp->scale;
+}
+
+
+static Boolean RegionExtremesOverlap (
+ CharPtr first,
+ CharPtr second,
+ CtrySetPtr csp
+)
+
+{
+ CtyBlockPtr cbp1, cbp2;
+
+ if (StringHasNoText (first) || StringHasNoText (second)) return FALSE;
+ if (csp == NULL) return FALSE;
+
+ cbp1 = GetEntryInLatLonListIndex (first, csp);
+ if (cbp1 == NULL || cbp1->name == NULL || StringICmp (cbp1->name, first) != 0) return FALSE;
+
+ cbp2 = GetEntryInLatLonListIndex (second, csp);
+ if (cbp2 == NULL || cbp2->name == NULL || StringICmp (cbp2->name, second) != 0) return FALSE;
+
+ if (cbp1->minlat > cbp2->maxlat) return FALSE;
+ if (cbp2->minlat > cbp1->maxlat) return FALSE;
+ if (cbp1->minlon > cbp2->maxlon) return FALSE;
+ if (cbp2->minlon > cbp1->maxlon) return FALSE;
+
+ return TRUE;
+}
+
+NLM_EXTERN Boolean CountryExtremesOverlap (
+ CharPtr first,
+ CharPtr second
+)
+
+{
+ CtrySetPtr csp;
+
+ if (StringHasNoText (first) || StringHasNoText (second)) return FALSE;
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return FALSE;
+
+ return RegionExtremesOverlap (first, second, csp);
+}
+
+NLM_EXTERN Boolean CountryBoxesOverlap (
+ CharPtr country1,
+ CharPtr country2
+)
+
+{
+ return CountryExtremesOverlap (country1, country2);
+}
+
+NLM_EXTERN Boolean WaterExtremesOverlap (
+ CharPtr first,
+ CharPtr second
+)
+
+{
+ CtrySetPtr csp;
+
+ if (StringHasNoText (first) || StringHasNoText (second)) return FALSE;
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return FALSE;
+
+ return RegionExtremesOverlap (first, second, csp);
+}
+
+/*
+Distance on a spherical surface calculation adapted from
+http://www.linuxjournal.com/magazine/
+work-shell-calculating-distance-between-two-latitudelongitude-points
+*/
+
+#define EARTH_RADIUS 6371.0 /* average radius of non-spherical earth in kilometers */
+#define CONST_PI 3.14159265359
+
+static double DegreesToRadians (
+ FloatHi degrees
+)
+
+{
+ return (degrees * (CONST_PI / 180.0));
+}
+
+static FloatHi DistanceOnGlobe (
+ FloatHi latA,
+ FloatHi lonA,
+ FloatHi latB,
+ FloatHi lonB
+)
+
+{
+ double lat1, lon1, lat2, lon2;
+ double dLat, dLon, a, c;
+
+ lat1 = DegreesToRadians (latA);
+ lon1 = DegreesToRadians (lonA);
+ lat2 = DegreesToRadians (latB);
+ lon2 = DegreesToRadians (lonB);
+
+ dLat = lat2 - lat1;
+ dLon = lon2 - lon1;
+
+ a = sin (dLat / 2) * sin (dLat / 2) +
+ cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
+ c = 2 * atan2 (sqrt (a), sqrt (1 - a));
+
+ return (FloatHi) (EARTH_RADIUS * c);
+}
+
+static FloatHi ErrorDistance (
+ FloatHi latA,
+ FloatHi lonA,
+ FloatHi scale)
+{
+ double lat1, lon1, lat2, lon2;
+ double dLat, dLon, a, c;
+
+ lat1 = DegreesToRadians (latA);
+ lon1 = DegreesToRadians (lonA);
+ lat2 = DegreesToRadians (latA + (1.0 / scale));
+ lon2 = DegreesToRadians (lonA + (1.0 / scale));
+
+ dLat = lat2 - lat1;
+ dLon = lon2 - lon1;
+
+ a = sin (dLat / 2) * sin (dLat / 2) +
+ cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
+ c = 2 * atan2 (sqrt (a), sqrt (1 - a));
+
+ return (FloatHi) (EARTH_RADIUS * c);
+
+}
+
+
+static CtyBlockPtr RegionClosestToLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP,
+ CtrySetPtr csp
+)
+
+{
+ LatBlockPtr PNTR array;
+ CtyBlockPtr cbp, best = NULL;
+ FloatHi closest = EARTH_RADIUS * CONST_PI * 2;
+ FloatHi delta;
+ Int4 latitude;
+ Int4 longitude;
+ Int4 maxDelta;
+ LatBlockPtr lbp;
+ Int4 R;
+ Int4 x;
+ Int4 y;
+ Boolean is_geographically_better;
+
+ if (distanceP != NULL) {
+ *distanceP = 0.0;
+ }
- csp = GetCtSetLatLonData ();
if (csp == NULL) return NULL;
- gdarray = csp->gdarray;
- if (gdarray == NULL) return NULL;
+ array = csp->latarray;
+ if (array == NULL) return NULL;
- L = 0;
- R = csp->num_grids - 1;
+ latitude = ConvertLat (lat, csp->scale);
+ longitude = ConvertLon (lon, csp->scale);
- x = LatLonDegreeToIndex (lon);
- y = LatLonDegreeToIndex (lat);
+ maxDelta = (Int4) (range * csp->scale + EPSILON);
- while (L < R) {
- mid = (L + R) / 2;
- cgp = gdarray [mid];
- if (cgp != NULL && CgpGridComp (cgp, x, y) < 0) {
- L = mid + 1;
+ for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) {
+ lbp = array [R];
+ if (lbp == NULL) break;
+ if (latitude + maxDelta < lbp->lat) break;
+
+ if (longitude < lbp->minlon - maxDelta) continue;
+ if (longitude > lbp->maxlon + maxDelta) continue;
+
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+
+ if (longitude < lbp->minlon) {
+ x = lbp->minlon;
+ } else if (longitude > lbp->maxlon) {
+ x = lbp->maxlon;
} else {
- R = mid;
+ x = longitude;
+ }
+
+ y = lbp->lat;
+
+ delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale));
+
+ is_geographically_better = FALSE;
+ if (delta < closest) {
+ is_geographically_better = TRUE;
+ } else if (delta - closest < 0.000001) {
+ if (best == NULL || cbp->area < best->area) {
+ is_geographically_better = TRUE;
+ }
+ }
+
+ if (best == NULL || NewLatLonCandidateIsBetter (NULL, NULL, best, cbp, is_geographically_better)) {
+ best = cbp;
+ closest = delta;
}
}
- while (R < csp->num_grids) {
- cgp = gdarray [R];
- if (cgp == NULL) return country;
- if (cgp->xindex != x || cgp->yindex != y) return country;
- cbp = cgp->cbp;
- if (cbp == NULL) return country;
- if (lon >= cbp->minx && lat >= cbp->miny && lon <= cbp->maxx && lat <= cbp->maxy) {
- country = cbp->country;
+ if (best != NULL) {
+ if (distanceP != NULL) {
+ *distanceP = closest;
}
- R++;
}
- return country;
+ return best;
}
+static CtyBlockPtr NearestCountryByLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
-static CharPtr bodiesOfWater [] = {
- "Bay",
- "Canal",
- "Channel",
- "Coastal",
- "Cove",
- "Estuary",
- "Fjord",
- "Freshwater",
- "Gulf",
- "Harbor",
- "Inlet",
- "Lagoon",
- "Lake",
- "Narrows",
- "Ocean",
- "Offshore",
- "Passage",
- "River",
- "Sea",
- "Seawater",
- "Sound",
- "Strait",
- "Water",
- "Waters",
- NULL
-};
+{
+ CtrySetPtr csp;
-static TextFsaPtr GetBodiesOfWaterFSA (void)
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return NULL;
+ return RegionClosestToLatLon (lat, lon, range, distanceP, csp);
+}
+
+static CtyBlockPtr NearestWaterByLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
{
- TextFsaPtr fsa;
- Int2 i;
- CharPtr prop = "BodiesOfWaterFSA";
+ CtrySetPtr csp;
- fsa = (TextFsaPtr) GetAppProperty (prop);
- if (fsa != NULL) return fsa;
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return NULL;
- fsa = TextFsaNew ();
- if (fsa != NULL) {
- for (i = 0; bodiesOfWater [i] != NULL; i++) {
- TextFsaAdd (fsa, bodiesOfWater [i]);
+ return RegionClosestToLatLon (lat, lon, range, distanceP, csp);
+}
+
+NLM_EXTERN CharPtr CountryClosestToLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = NearestCountryByLatLon (lat, lon, range, distanceP);
+ if (cbp == NULL) return NULL;
+
+ return cbp->name;
+}
+
+NLM_EXTERN CharPtr WaterClosestToLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = NearestWaterByLatLon (lat, lon, range, distanceP);
+ if (cbp == NULL) return NULL;
+
+ return cbp->name;
+}
+
+static CtyBlockPtr RegionIsNearLatLon (
+ CharPtr country,
+ CharPtr province,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP,
+ CtrySetPtr csp
+)
+
+{
+ LatBlockPtr PNTR array;
+ CtyBlockPtr cbp, best = NULL;
+ FloatHi closest = EARTH_RADIUS * CONST_PI * 2;
+ FloatHi delta;
+ Int4 latitude;
+ Int4 longitude;
+ Int4 maxDelta;
+ LatBlockPtr lbp;
+ Int4 R;
+ Int4 x;
+ Int4 y;
+
+ if (distanceP != NULL) {
+ *distanceP = 0.0;
+ }
+
+ if (StringHasNoText (country)) return NULL;
+ if (csp == NULL) return NULL;
+
+ array = csp->latarray;
+ if (array == NULL) return NULL;
+
+ latitude = ConvertLat (lat, csp->scale);
+ longitude = ConvertLon (lon, csp->scale);
+
+ maxDelta = (Int4) (range * csp->scale + EPSILON);
+
+ for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) {
+ lbp = array [R];
+ if (lbp == NULL) break;
+ if (latitude + maxDelta < lbp->lat) break;
+
+ if (longitude < lbp->minlon - maxDelta) continue;
+ if (longitude > lbp->maxlon + maxDelta) continue;
+
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+
+ if (StringICmp (country, cbp->level0) != 0) continue;
+ if (/* province != NULL && */ StringICmp (province, cbp->level1) != 0) continue;
+
+ if (longitude < lbp->minlon) {
+ x = lbp->minlon;
+ } else if (longitude > lbp->maxlon) {
+ x = lbp->maxlon;
+ } else {
+ x = longitude;
+ }
+
+ y = lbp->lat;
+
+ delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale));
+
+ if (best == NULL || delta < closest) {
+ best = cbp;
+ closest = delta;
}
}
- SetAppProperty (prop, (Pointer) fsa);
+ if (best != NULL) {
+ if (distanceP != NULL) {
+ *distanceP = closest;
+ }
+ }
- return fsa;
+ return best;
}
-NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str)
+static CtyBlockPtr CountryToLatLonDistance (
+ CharPtr country,
+ CharPtr province,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
{
- Char ch;
- TextFsaPtr fsa;
- CharPtr ptr;
- Int4 state;
- ValNodePtr matches;
+ CtrySetPtr csp;
- if (StringHasNoText (str)) return FALSE;
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return NULL;
- fsa = GetBodiesOfWaterFSA ();
- if (fsa == NULL) return FALSE;
+ return RegionIsNearLatLon (country, province, lat, lon, range, distanceP, csp);
+}
- state = 0;
- ptr = str;
- ch = *ptr;
+static CtyBlockPtr WaterToLatLonDistance (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
- while (ch != '\0') {
- matches = NULL;
- state = TextFsaNext (fsa, state, ch, &matches);
- ptr++;
- ch = *ptr;
- if (ch == '\0' || ch == ',' || ch == ':' || ch == ';' || ch == ' ') {
- if (matches != NULL) return TRUE;
- state = 0;
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) return NULL;
+
+ return RegionIsNearLatLon (country, NULL, lat, lon, range, distanceP, csp);
+}
+
+NLM_EXTERN Boolean CountryIsNearLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = CountryToLatLonDistance (country, NULL, lat, lon, range, distanceP);
+ if (cbp == NULL) return FALSE;
+
+ return TRUE;
+}
+
+NLM_EXTERN Boolean WaterIsNearLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
+
+{
+ CtyBlockPtr cbp;
+
+ cbp = WaterToLatLonDistance (country, lat, lon, range, distanceP);
+ if (cbp == NULL) return FALSE;
+
+ return TRUE;
+}
+
+/*
+static void WriteLatLonRegionData (
+ CtrySetPtr csp,
+ FILE* fp
+)
+
+{
+ Char buf [150];
+ CtyBlockPtr cbp;
+ LatBlockPtr lbp;
+ ValNodePtr vnp;
+
+ if (csp == NULL || fp == NULL) return;
+
+ for (vnp = csp->latblocks; vnp != NULL; vnp = vnp->next) {
+ lbp = (LatBlockPtr) vnp->data.ptrvalue;
+ if (lbp == NULL) {
+ fprintf (fp, "NULL LatBlockPtr\n");
+ continue;
}
+ cbp = lbp->landmass;
+ if (cbp == NULL) {
+ fprintf (fp, "NULL CtyBlockPtr\n");
+ continue;
+ }
+
+ if (StringHasNoText (cbp->name)) {
+ fprintf (fp, "NULL cbp->name\n");
+ continue;
+ }
+
+ StringNCpy_0 (buf, cbp->name, 50);
+ StringCat (buf, " ");
+ buf [50] = '\0';
+
+ fprintf (fp, "%s %4d : %4d .. %4d\n", buf, (int) lbp->lat, (int) lbp->minlon, (int) lbp->maxlon);
}
- return FALSE;
+ fprintf (fp, "\n\n");
}
+static void TestLatLonCountryData (void)
+{
+ CtrySetPtr csp;
+ FILE *fp;
+ fp = FileOpen ("stdout", "w");
+ if (fp == NULL) {
+ Message (MSG_OK, "Unable to open output file");
+ return;
+ }
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) {
+ fprintf (fp, "GetLatLonCountryData failed\n");
+ FileClose (fp);
+ return;
+ }
+
+ WriteLatLonRegionData (csp, fp);
+
+ csp = GetLatLonWaterData ();
+ if (csp == NULL) {
+ fprintf (fp, "GetLatLonWaterData failed\n");
+ FileClose (fp);
+ return;
+ }
+
+ WriteLatLonRegionData (csp, fp);
+
+ FileClose (fp);
+}
+*/
+
+/* END OF NEW LATITUDE-LONGITUDE COUNTRY VALIDATION CODE */
static Boolean StringListIsUnique (ValNodePtr list)
@@ -10143,6 +11394,7 @@ static Boolean PrimerSeqIsValid (ValidStructPtr vsp, CharPtr name, Char PNTR bad
ValNodePtr matches;
CharPtr ptr;
Int4 state;
+ Boolean first;
if (badch != NULL) {
*badch = '\0';
@@ -10175,12 +11427,14 @@ static Boolean PrimerSeqIsValid (ValidStructPtr vsp, CharPtr name, Char PNTR bad
if (ch == '<') {
state = 0;
matches = NULL;
- while (ch != '\0' && ch != '>') {
+ first = TRUE;
+ while (ch != '\0' && ch != '>' && (first || ch != '<')) {
state = TextFsaNext (fsa, state, ch, &matches);
ptr++;
ch = *ptr;
+ first = FALSE;
}
- if (ch != '>') {
+ if (ch != '>' || ch == '<') {
if (badch != NULL) {
*badch = ch;
}
@@ -10740,8 +11994,14 @@ static void ValidateOrgModVoucher (ValidStructPtr vsp, OrgModPtr mod)
return;
}
- /* ignore personal collections */
- if (StringNICmp (inst, "personal", 8) == 0) return;
+ /* previously ignored personal collections, now complain if name missing */
+ if (StringNICmp (inst, "personal", 8) == 0) {
+ if (StringICmp (inst, "personal") == 0 && StringLen (str) > 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MissingPersonalCollectionName,
+ "Personal collection does not have name of collector");
+ }
+ return;
+ }
len1 = StringLen (inst);
len2 = StringLen (str);
@@ -11086,6 +12346,703 @@ static Boolean IsValidSexValue (CharPtr str)
return FALSE;
}
+static Boolean LatLonInRange (
+ FloatHi lat,
+ FloatHi lon
+)
+
+{
+ if (lat < -90.0001 || lat > 90.0001) return FALSE;
+ if (lon < -180.0001 || lon > 180.0001) return FALSE;
+
+ return TRUE;
+}
+
+static Boolean RegionIsClosestToLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP,
+ CtrySetPtr csp
+)
+
+{
+ LatBlockPtr PNTR array;
+ CtyBlockPtr cbp;
+ FloatHi closest = EARTH_RADIUS * CONST_PI * 2;
+ CharPtr guess = NULL;
+ FloatHi delta;
+ Int4 latitude;
+ Int4 longitude;
+ Int4 maxDelta;
+ LatBlockPtr lbp;
+ Int4 R;
+ Int4 x;
+ Int4 y;
+
+
+ if (StringHasNoText (country)) return FALSE;
+
+ if (distanceP != NULL) {
+ *distanceP = 0.0;
+ }
+
+ if (csp == NULL) return FALSE;
+
+ array = csp->latarray;
+ if (array == NULL) return FALSE;
+
+ latitude = ConvertLat (lat, csp->scale);
+ longitude = ConvertLon (lon, csp->scale);
+
+ maxDelta = (Int4) (range * csp->scale + EPSILON);
+
+ for (R = GetLatLonIndex (csp, array, latitude - maxDelta); R < csp->numLatBlocks; R++) {
+ lbp = array [R];
+ if (lbp == NULL) break;
+ if (latitude + maxDelta < lbp->lat) break;
+
+ if (longitude < lbp->minlon - maxDelta) continue;
+ if (longitude > lbp->maxlon + maxDelta) continue;
+
+ cbp = lbp->landmass;
+ if (cbp == NULL) continue;
+
+ if (longitude < lbp->minlon) {
+ x = lbp->minlon;
+ } else if (longitude > lbp->maxlon) {
+ x = lbp->maxlon;
+ } else {
+ x = longitude;
+ }
+
+ y = lbp->lat;
+
+ delta = DistanceOnGlobe (lat, lon, (FloatHi) (y / csp->scale), (FloatHi) (x / csp->scale));
+
+ if (delta < closest) {
+ guess = cbp->name;
+ closest = delta;
+ } else if (delta == closest) {
+ if (StringCmp (country, cbp->name) == 0) {
+ guess = cbp->name;
+ }
+ }
+ }
+
+ if (guess != NULL) {
+ if (distanceP != NULL) {
+ *distanceP = closest;
+ }
+ }
+
+ if (StringCmp (guess, country) == 0) return TRUE;
+
+ return FALSE;
+}
+
+
+static Boolean CountryIsClosestToLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+)
+
+{
+ CtrySetPtr csp;
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) return FALSE;
+
+ return RegionIsClosestToLatLon (country, lat, lon, range, distanceP, csp);
+}
+
+
+static int AdjustAndRoundDistance (
+ FloatHi distance,
+ FloatHi scale
+)
+
+{
+ if (scale < 1.1) {
+ distance += 111.19;
+ } else if (scale > 19.5 && scale < 20.5) {
+ distance += 5.56;
+ } else if (scale > 99.5 && scale < 100.5) {
+ distance += 1.11;
+ }
+
+ return (int) (distance + 0.5);
+}
+
+typedef struct latlonmap {
+ FloatHi lat;
+ FloatHi lon;
+ CharPtr fullguess;
+ CharPtr guesscountry;
+ CharPtr guessprovince;
+ CharPtr guesswater;
+ CharPtr closestfull;
+ CharPtr closestcountry;
+ CharPtr closestprovince;
+ CharPtr closestwater;
+ CharPtr claimedfull;
+ int landdistance;
+ int waterdistance;
+ int claimeddistance;
+} LatLonMap, PNTR LatLonMapPtr;
+
+static void CalculateLatLonMap (
+ FloatHi lat,
+ FloatHi lon,
+ CharPtr country,
+ CharPtr province,
+ FloatHi scale,
+ LatLonMapPtr lmp
+)
+
+{
+ CtyBlockPtr cbp;
+ FloatHi landdistance = 0.0, waterdistance = 0.0, claimeddistance = 0.0;
+ Boolean goodmatch = FALSE;
+
+ if (lmp == NULL) return;
+
+ /* initialize result values */
+ MemSet ((Pointer) lmp, 0, sizeof (LatLonMap));
+
+ lmp->lat = lat;
+ lmp->lon = lon;
+
+ /* lookup region by coordinates, or find nearest region and calculate distance */
+ cbp = GuessCountryByLatLon (lat, lon, country, province);
+ if (cbp != NULL) {
+ /* successfully found inside some country */
+ lmp->fullguess = cbp->name;
+ lmp->guesscountry = cbp->level0;
+ lmp->guessprovince = cbp->level1;
+ if (StringICmp (country, lmp->guesscountry) == 0 && (province == NULL || StringICmp (province, lmp->guessprovince) == 0)) {
+ goodmatch = TRUE;
+ }
+ } else {
+ /* not inside a country, check water */
+ cbp = GuessWaterByLatLon (lat, lon, country);
+ if (cbp != NULL) {
+ /* found inside water */
+ lmp->guesswater = cbp->name;
+ if (StringICmp (country, lmp->guesswater) == 0) {
+ goodmatch = TRUE;
+ }
+ /*
+ also see if close to land for coastal warning (if country is land)
+ or proximity message (if country is water)
+ */
+ cbp = NearestCountryByLatLon (lat, lon, 5.0, &landdistance);
+ if (cbp != NULL) {
+ lmp->closestfull = cbp->name;
+ lmp->closestcountry = cbp->level0;
+ lmp->closestprovince = cbp->level1;
+ lmp->landdistance = AdjustAndRoundDistance (landdistance, scale);
+ if (StringICmp (country, lmp->closestcountry) == 0 && (province == NULL || StringICmp (province, lmp->closestprovince) == 0)) {
+ goodmatch = TRUE;
+ }
+ }
+ } else {
+ /* may be coastal inlet, area of data insufficiency */
+ cbp = NearestCountryByLatLon (lat, lon, 5.0, &landdistance);
+ if (cbp != NULL) {
+ lmp->closestfull = cbp->name;
+ lmp->closestcountry = cbp->level0;
+ lmp->closestprovince = cbp->level1;
+ lmp->landdistance = AdjustAndRoundDistance (landdistance, scale);
+ if (StringICmp (country, lmp->closestcountry) == 0 && (province == NULL || StringICmp (province, lmp->closestprovince) == 0)) {
+ goodmatch = TRUE;
+ }
+ }
+ cbp = NearestWaterByLatLon (lat, lon, 5.0, &waterdistance);
+ if (cbp != NULL) {
+ lmp->closestwater = cbp->level0;
+ lmp->waterdistance = AdjustAndRoundDistance (waterdistance, scale);
+ if (StringICmp (country, lmp->closestwater) == 0) {
+ goodmatch = TRUE;
+ }
+ }
+ }
+ }
+ /* if guess is not the provided country or province, calculate distance to claimed country */
+ if (! goodmatch) {
+ cbp = CountryToLatLonDistance (country, province, lat, lon, 5.0, &claimeddistance);
+ if (cbp != NULL) {
+ if (claimeddistance < ErrorDistance(lmp->lat, lmp->lon, scale)) {
+ lmp->guesscountry = country;
+ lmp->guessprovince = province;
+ lmp->fullguess = cbp->name;
+ } else {
+ lmp->claimedfull = cbp->name;
+ lmp->claimeddistance = AdjustAndRoundDistance (claimeddistance, scale);
+ }
+ } else if (province == NULL) {
+ cbp = WaterToLatLonDistance (country, lat, lon, 5.0, &claimeddistance);
+ if (cbp != NULL) {
+ lmp->claimedfull = cbp->name;
+ lmp->claimeddistance = AdjustAndRoundDistance (claimeddistance, scale);
+ }
+ }
+ }
+}
+
+
+enum {
+ eLatLonClassify_CountryMatch = 1 ,
+ eLatLonClassify_ProvinceMatch = 2 ,
+ eLatLonClassify_WaterMatch = 4 ,
+ eLatLonClassify_CountryClosest = 8 ,
+ eLatLonClassify_ProvinceClosest = 16 ,
+ eLatLonClassify_WaterClosest = 32 ,
+ eLatLonClassify_Error = 256
+} ELatLonClassify;
+
+
+static Uint4 ClassifyLatLonMap (
+ CharPtr fullname,
+ CharPtr country,
+ CharPtr province,
+ LatLonMapPtr lmp
+)
+
+{
+ Uint4 rval = 0;
+
+ if (lmp == NULL) return eLatLonClassify_Error;
+
+ /* compare guesses or closest regions to indicated country and province */
+ if (lmp->guesscountry != NULL) {
+
+ /* if top level countries match */
+ if (StringICmp (country, lmp->guesscountry) == 0) {
+ rval |= eLatLonClassify_CountryMatch;
+ /* if both are null, call it a match */
+ if (StringICmp (province, lmp->guessprovince) == 0) {
+ rval |= eLatLonClassify_ProvinceMatch;
+ }
+ }
+ /* if they don't match, do they overlap or are closest? */
+ if (!(rval & eLatLonClassify_CountryMatch)) {
+ if (StringICmp (country, lmp->closestcountry) == 0) {
+ rval |= eLatLonClassify_CountryClosest;
+ if (StringICmp (province, lmp->closestprovince) == 0) {
+ rval |= eLatLonClassify_ProvinceClosest;
+ }
+ }
+ } else if (!(rval & eLatLonClassify_ProvinceMatch) && province != NULL) {
+ if (StringICmp (province, lmp->closestprovince) == 0) {
+ rval |= eLatLonClassify_ProvinceClosest;
+ }
+ }
+ }
+ if (lmp->guesswater != NULL) {
+ /* was the non-approved body of water correctly indicated? */
+ if (StringICmp (country, lmp->guesswater) == 0) {
+ rval |= eLatLonClassify_WaterMatch;
+ } else if (StringICmp (country, lmp->closestwater) == 0) {
+ rval |= eLatLonClassify_WaterClosest;
+ }
+ }
+ if (lmp->closestcountry != NULL && StringICmp (country, lmp->closestcountry) == 0) {
+ if (lmp->guesscountry == NULL && lmp->guesswater == NULL) {
+ /* coastal area */
+ rval |= eLatLonClassify_CountryMatch;
+ lmp->guesscountry = lmp->closestcountry;
+ lmp->fullguess = lmp->closestcountry;
+ if (lmp->closestprovince != NULL && StringICmp (province, lmp->closestprovince) == 0) {
+ rval |= eLatLonClassify_ProvinceMatch;
+ lmp->guessprovince = lmp->closestprovince;
+ lmp->fullguess = lmp->closestfull;
+ }
+ } else {
+ rval |= eLatLonClassify_CountryClosest;
+ if (lmp->closestprovince != NULL && StringICmp (province, lmp->closestprovince) == 0) {
+ rval |= eLatLonClassify_ProvinceClosest;
+ }
+ }
+ }
+ return rval;
+}
+
+
+static void LatLonWaterErrors (
+ ValidStructPtr vsp,
+ LatLonMapPtr lmp,
+ Uint4 test,
+ FloatHi neardist,
+ CharPtr country,
+ CharPtr province,
+ CharPtr lat_lon,
+ CharPtr fullname,
+ FloatHi scale
+ )
+{
+ CharPtr fmt = "Lat_lon '%s' is closest to %s'%s' at distance %d km, but in water '%s'";
+ CharPtr claimed_fmt = "Lat_lon '%s' is closest to %s'%s' at distance %d km, but in water '%s' - claimed region '%s' is at distance %d km";
+
+ Boolean suppress = FALSE;
+ CharPtr reportregion;
+ CharPtr nosubphrase = "";
+ CharPtr desphrase = "designated subregion ";
+ CharPtr subphrase = "another subregion ";
+ CharPtr phrase = nosubphrase;
+ Boolean show_claimed = FALSE;
+
+ if (test & (eLatLonClassify_CountryClosest | eLatLonClassify_ProvinceClosest)) {
+
+ if (lmp->landdistance < 22) {
+ /* for now, will not report */
+ /* this is a policy decision */
+ suppress = TRUE;
+ } else if (StringStr (fullname, "Island") != NULL) {
+ suppress = TRUE;
+ }
+
+ if (test & eLatLonClassify_ProvinceClosest) {
+ reportregion = fullname;
+ phrase = desphrase;
+ } else {
+ /* wasn't closest province, so must be closest country */
+ if (province != NULL && vsp->testLatLonSubregion) {
+ phrase = subphrase;
+ reportregion = lmp->closestfull;
+ } else {
+ reportregion = lmp->closestcountry;
+ }
+ if (lmp->claimedfull != NULL) {
+ show_claimed = TRUE;
+ }
+ }
+
+ if (!suppress) {
+ if (show_claimed) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, claimed_fmt, lat_lon,
+ phrase, reportregion,
+ lmp->landdistance, lmp->guesswater,
+ lmp->claimedfull, lmp->claimeddistance);
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater,
+ fmt, lat_lon,
+ phrase, reportregion,
+ lmp->landdistance, lmp->guesswater);
+ }
+ }
+
+ } else if (neardist > 0) {
+ fmt = "Lat_lon '%s' is in water '%s', '%s' is %d km away";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, lmp->guesswater, fullname, AdjustAndRoundDistance (neardist, scale));
+ } else {
+ fmt = "Lat_lon '%s' is in water '%s'";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, lmp->guesswater);
+ }
+}
+
+
+static void LatLonLandErrors (
+ ValidStructPtr vsp,
+ LatLonMapPtr lmp,
+ CharPtr country,
+ CharPtr province,
+ CharPtr lat_lon,
+ CharPtr fullname
+ )
+{
+ CharPtr fmt;
+
+ if (lmp->claimedfull != NULL) {
+ fmt = "Lat_lon '%s' maps to '%s' instead of '%s' - claimed region '%s' is at distance %d km";
+ if (province != NULL) {
+ if (StringICmp (lmp->guesscountry, country) == 0) {
+ if (vsp->testLatLonSubregion) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, lmp->fullguess, fullname, lmp->claimedfull, lmp->claimeddistance);
+ }
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname, lmp->claimedfull, lmp->claimeddistance);
+ }
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, country, lmp->claimedfull, lmp->claimeddistance);
+ }
+ } else {
+ fmt = "Lat_lon '%s' maps to '%s' instead of '%s'";
+ if (StringICmp (lmp->guesscountry, country) == 0) {
+ if (vsp->testLatLonSubregion) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, lmp->fullguess, fullname);
+ }
+ } else {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, lmp->fullguess, fullname);
+ }
+ }
+}
+
+
+typedef enum {
+ eLatLonAdjust_none = 0 ,
+ eLatLonAdjust_flip = 1 ,
+ eLatLonAdjust_negate_lat = 2 ,
+ eLatLonAdjust_negate_lon = 4
+} ELatLonAdjust;
+
+static void NewerValidateCountryLatLon (
+ ValidStructPtr vsp,
+ GatherContextPtr gcp,
+ CharPtr countryname,
+ CharPtr lat_lon
+)
+
+{
+ Char buf0 [256], buf1 [256], buf2 [256];
+ CharPtr country = NULL, province = NULL, fullname = NULL;
+ CtrySetPtr csp;
+ Boolean format_ok = FALSE, lat_in_range = FALSE, lon_in_range = FALSE;
+ FloatHi lat = 0.0;
+ FloatHi lon = 0.0;
+ LatLonMap llm, adjusted;
+ CharPtr ptr;
+ FloatHi scale = 1.0;
+ FloatHi neardist = 0.0;
+ ELatLonAdjust adjust = eLatLonAdjust_none;
+ Uint4 test, adjust_test = 0;
+ CharPtr fmt;
+
+ if (vsp == NULL || gcp == NULL) return;
+ if (StringHasNoText (countryname)) return;
+ if (StringHasNoText (lat_lon)) return;
+
+ IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range);
+ if (! format_ok) {
+ /* may have comma and then altitude, so just get lat_lon component */
+ StringNCpy_0 (buf0, lat_lon, sizeof (buf0));
+ ptr = StringChr (buf0, ',');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ lat_lon = buf0;
+ IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range);
+ }
+ }
+
+ /* reality checks */
+ if (! format_ok) {
+ /* incorrect lat_lon format should be reported elsewhere */
+ return;
+ }
+ if (! lat_in_range) {
+ /* incorrect latitude range should be reported elsewhere */
+ return;
+ }
+ if (! lon_in_range) {
+ /* incorrect longitude range should be reported elsewhere */
+ return;
+ }
+
+ if (! ParseLatLon (lat_lon, &lat, &lon)) {
+ /* report unable to parse lat_lon */
+ return;
+ }
+
+ StringNCpy_0 (buf1, countryname, sizeof (buf1));
+ /* trim at comma or semicolon, leaving only country/ocean and possibly state/province */
+ ptr = StringChr (buf1, ',');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ ptr = StringChr (buf1, ';');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ }
+ TrimSpacesAroundString (buf1);
+ if (StringDoesHaveText (buf1)) {
+ fullname = buf1;
+ }
+
+ StringNCpy_0 (buf2, buf1, sizeof (buf2));
+ /* separate country from state/province */
+ ptr = StringChr (buf2, ':');
+ if (ptr != NULL) {
+ if (CountryIsInLatLonList (buf2)) {
+ /* store province if in data list as subregion of designated country */
+ *ptr = '\0';
+ ptr++;
+ TrimSpacesAroundString (ptr);
+ if (StringDoesHaveText (ptr)) {
+ province = ptr;
+ }
+ } else {
+ /* otherwise just truncate country at colon, trimming further descriptive information */
+ *ptr = '\0';
+ ptr++;
+ }
+ }
+ TrimSpacesAroundString (buf2);
+ if (StringDoesHaveText (buf2)) {
+ country = buf2;
+ }
+
+ if (StringHasNoText (country)) {
+ /* report leading colon without country */
+ return;
+ }
+
+ /* known exceptions - don't even bother calculating any further */
+ if (StringCmp (country, "Antarctica") == 0 && lat < -60.0) {
+ return;
+ }
+
+ if (! CountryIsInLatLonList (country)) {
+ if (! WaterIsInLatLonList (country)) {
+ /* report unrecognized country */
+ return;
+ } else {
+ /* report that it may refer to specific small body of water */
+ /* continue to look for nearby country for proximity report */
+ /* (do not return) */
+ }
+ }
+
+ csp = GetLatLonCountryData ();
+ if (csp == NULL) {
+ /* report unable to find data */
+ return;
+ }
+
+ /* scale (reciprocal of degree resolution) needed for adjusting offshore distance calculation */
+ scale = csp->scale;
+
+ /* calculate assignment or proximity by coordinates */
+ CalculateLatLonMap (lat, lon, country, province, scale, &llm);
+
+ /* compare indicated country/province to guess/proximate country/water */
+ test = ClassifyLatLonMap (fullname, country, province, &llm);
+
+ if (!test && CountryIsNearLatLon(country, lat, lon, 2.0, &neardist) && neardist < 5.0) {
+ llm.guesscountry = country;
+ llm.guessprovince = NULL;
+ test = ClassifyLatLonMap (fullname, country, province, &llm);
+ }
+
+ if (!test && !CountryIsNearLatLon(country, lat, lon, 20.0, &neardist) && !WaterIsNearLatLon(country, lat, lon, 20.0, &neardist)) {
+ CalculateLatLonMap (lon, lat, country, province, scale, &adjusted);
+ adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
+ if (adjust_test) {
+ adjust = eLatLonAdjust_flip;
+ } else {
+ CalculateLatLonMap (-lat, lon, country, province, scale, &adjusted);
+ adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
+ if (adjust_test) {
+ adjust = eLatLonAdjust_negate_lat;
+ } else {
+ CalculateLatLonMap (lat, -lon, country, province, scale, &adjusted);
+ adjust_test = ClassifyLatLonMap (fullname, country, province, &adjusted);
+ if (adjust_test) {
+ adjust = eLatLonAdjust_negate_lon;
+ }
+ }
+ }
+
+ if (adjust_test) {
+ test = adjust_test;
+ MemCopy (&llm, &adjusted, sizeof (LatLonMap));
+ }
+ }
+
+ if (adjust) {
+ if (adjust == eLatLonAdjust_flip) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude and longitude values appear to be exchanged");
+ } else if (adjust == eLatLonAdjust_negate_lat) {
+ if (lat < 0.0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to N (northern hemisphere)");
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to S (southern hemisphere)");
+ }
+ } else if (adjust == eLatLonAdjust_negate_lon) {
+ if (lon < 0.0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to E (eastern hemisphere)");
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to W (western hemisphere)");
+ }
+ }
+ } else {
+ if ((test & eLatLonClassify_CountryMatch) && (test & eLatLonClassify_ProvinceMatch)) {
+ /* success! nothing to report */
+ } else if (test & eLatLonClassify_WaterMatch) {
+ /* success! nothing to report */
+ } else if (test & eLatLonClassify_CountryMatch && province == NULL) {
+ if (vsp->testLatLonSubregion) {
+ fmt = "Lat_lon %s is in %s (more specific than %s)";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState, fmt, lat_lon, llm.fullguess, country);
+ }
+ } else if (llm.guesswater != NULL) {
+ LatLonWaterErrors(vsp, &llm, test, neardist, country, province, lat_lon, fullname, scale);
+ } else if (llm.guesscountry != NULL) {
+ LatLonLandErrors (vsp, &llm, country, province, lat_lon, fullname);
+ } else if (llm.closestcountry != NULL) {
+ fmt = "Lat_lon '%s' is closest to '%s' instead of '%s'";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, llm.closestcountry, fullname);
+ } else if (llm.closestwater != NULL) {
+ fmt = "Lat_lon '%s' is closest to '%s' instead of '%s'";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonWater, fmt, lat_lon, llm.closestwater, fullname);
+ } else {
+ fmt = "Unable to determine mapping for lat_lon '%s' and country '%s'";
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, fmt, lat_lon, fullname);
+ }
+ }
+}
+
+
+/* note - special case for sex because it prevents a different message from being displayed, do not list here */
+static const Uint1 sUnexpectedViralSubSourceQualifiers[] = {
+ SUBSRC_cell_line,
+ SUBSRC_cell_type,
+ SUBSRC_tissue_type,
+ SUBSRC_dev_stage
+};
+
+static const Int4 sNumUnexpectedViralSubSourceQualifiers = sizeof (sUnexpectedViralSubSourceQualifiers) / sizeof (Uint1);
+
+
+static Boolean IsUnexpectedViralSubSourceQualifier (Uint1 subtype)
+{
+ Int4 i;
+ Boolean rval = FALSE;
+
+ for (i = 0; i < sNumUnexpectedViralSubSourceQualifiers && !rval; i++) {
+ if (subtype == sUnexpectedViralSubSourceQualifiers[i]) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+static const Uint1 sUnexpectedViralOrgModQualifiers[] = {
+ ORGMOD_breed,
+ ORGMOD_cultivar,
+ ORGMOD_specimen_voucher
+};
+
+static const Int4 sNumUnexpectedViralOrgModQualifiers = sizeof (sUnexpectedViralOrgModQualifiers) / sizeof (Uint1);
+
+
+static Boolean IsUnexpectedViralOrgModQualifier (Uint1 subtype)
+{
+ Int4 i;
+ Boolean rval = FALSE;
+
+ for (i = 0; i < sNumUnexpectedViralOrgModQualifiers && !rval; i++) {
+ if (subtype == sUnexpectedViralOrgModQualifiers[i]) {
+ rval = TRUE;
+ }
+ }
+ return rval;
+}
+
+
static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSourcePtr biop, SeqFeatPtr sfp, ValNodePtr sdp)
{
Char badch;
@@ -11093,24 +13050,23 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Boolean bad_frequency;
BioseqPtr bsp;
BioseqSetPtr bssp;
- Char buf [256];
Char ch;
Boolean chromconf = FALSE;
Int2 chromcount = 0;
SubSourcePtr chromosome = NULL;
CharPtr countryname = NULL;
+ CtrySetPtr csp;
ValNodePtr db;
DbtagPtr dbt;
- Boolean format_ok;
CharPtr gb_synonym = NULL;
Boolean germline = FALSE;
CharPtr good;
- CharPtr guess = NULL;
Boolean has_strain = FALSE;
Boolean has_fwd_pcr_seq = FALSE;
Boolean has_rev_pcr_seq = FALSE;
Boolean has_pcr_name = FALSE;
Boolean has_metagenome_source = FALSE;
+ Boolean has_plasmid = FALSE;
Int4 id;
Boolean is_env_sample = FALSE;
Boolean is_iso_source = FALSE;
@@ -11129,11 +13085,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Boolean is_rf;
Boolean is_sc;
CharPtr last_db = NULL;
- FloatHi lat = 0.0;
- FloatHi lon = 0.0;
CharPtr lat_lon = NULL;
- Boolean lat_in_range;
- Boolean lon_in_range;
Int2 num_bio_material = 0;
Int2 num_culture_collection = 0;
Int2 num_specimen_voucher = 0;
@@ -11152,18 +13104,17 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
Int4 primer_len_before;
Int4 primer_len_after;
ValNodePtr pset;
- CharPtr ptr;
Boolean rearranged = FALSE;
SeqEntryPtr sep;
ErrSev sev;
SubSourcePtr ssp;
CharPtr str;
- Boolean strict = TRUE;
CharPtr synonym = NULL;
- Char tmp [128];
Boolean varietyOK;
CharPtr inst1, inst2, id1, id2, coll1, coll2;
Char buf1 [512], buf2 [512];
+ PCRPrimerPtr ppp;
+ PCRReactionSetPtr prp;
if (vsp->sourceQualTags == NULL) {
InitializeSourceQualTags (vsp);
@@ -11302,6 +13253,7 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Unexpected use of /mating_type qualifier");
}
} else if (ssp->subtype == SUBSRC_plasmid_name) {
+ has_plasmid = TRUE;
if (biop->genome != GENOME_plasmid) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid subsource but not plasmid location");
}
@@ -11441,15 +13393,20 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "bad frequency qualifier value %s", ssp->name);
}
}
- } else if (ssp->subtype == SUBSRC_cell_line && isViral) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected cell_line qualifier");
- } else if (ssp->subtype == SUBSRC_cell_type && isViral) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected cell_type qualifier");
- } else if (ssp->subtype == SUBSRC_tissue_type && isViral) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected tissue_type qualifier");
+ }
+
+ if (isViral && IsUnexpectedViralSubSourceQualifier(ssp->subtype)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected %s qualifier", GetSubsourceQualName (ssp->subtype));
}
ssp = ssp->next;
}
+
+ if (biop->genome == GENOME_plasmid) {
+ if (! has_plasmid) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid location but not plasmid subsource");
+ }
+ }
+
if (num_country > 1) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_MultipleSourceQualifiers, "Multiple country qualifiers present");
}
@@ -11470,112 +13427,9 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
if (countryname != NULL && lat_lon != NULL) {
- IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range);
- if (! format_ok) {
- /* may have comma and then altitude, so just get lat_lon component */
- StringNCpy_0 (tmp, lat_lon, sizeof (tmp));
- ptr = StringChr (tmp, ',');
- if (ptr != NULL) {
- *ptr = '\0';
- lat_lon = tmp;
- IsCorrectLatLonFormat (tmp, &format_ok, &lat_in_range, &lon_in_range);
- }
- }
- if (format_ok && ParseLatLon (lat_lon, &lat, &lon)) {
- StringNCpy_0 (buf, countryname, sizeof (buf));
- ptr = StringChr (buf, ':');
- if (ptr != NULL) {
- *ptr = '\0';
- strict = FALSE;
- }
- if (IsCountryInLatLonList (buf)) {
- if (TestLatLonForCountry (buf, lat, lon)) {
- /* match */
- if (! strict) {
- StringNCpy_0 (buf, countryname, sizeof (buf));
- ptr = StringChr (buf, ',');
- if (ptr != NULL) {
- *ptr = '\0';
- }
- ptr = StringChr (buf, ';');
- if (ptr != NULL) {
- *ptr = '\0';
- }
- if (IsCountryInLatLonList (buf)) {
- if (TestLatLonForCountry (buf, lat, lon)) {
- /* match */
- } else {
- if (vsp->strictLatLonCountry || (vsp->testLatLonSubregion && (! StringContainsBodyOfWater (countryname)))) {
- /* passed unqualified but failed qualified country name, report at info level for now */
- guess = GuessCountryForLatLon (lat, lon);
- if (StringDoesHaveText (guess)) {
- if (CountryBoxesOverlap (buf, guess)) {
- if (vsp->indexerVersion) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonAdjacent,
- "Lat_lon '%s' MIGHT be in '%s' instead of adjacent '%s' - SHIFT DOUBLE CLICK TO LAUNCH GOOGLE EARTH -",
- lat_lon, guess, buf);
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState,
- "Lat_lon '%s' MIGHT be in '%s' instead of '%s'", lat_lon, guess, buf);
- }
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState,
- "Lat_lon '%s' does not map to subregion '%s', but may be in '%s'", lat_lon, buf, guess);
- }
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonState,
- "Lat_lon '%s' does not map to subregion '%s'", lat_lon, buf);
- }
- }
- }
- }
- }
- } else if (TestLatLonForCountry (buf, -lat, lon)) {
- if (lat < 0.0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to N (northern hemisphere)");
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude should be set to S (southern hemisphere)");
- }
- } else if (TestLatLonForCountry (buf, lat, -lon)) {
- if (lon < 0.0) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to E (eastern hemisphere)");
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Longitude should be set to W (western hemisphere)");
- }
- /*
- } else if (TestLatLonForCountry (buf, -lat, -lon)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Both latitude and longitude appear to be in wrong hemispheres");
- */
- } else if (TestLatLonForCountry (buf, lon, lat)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_LatLonValue, "Latitude and longitude values appear to be exchanged");
- /*
- } else if (strict) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry, "Lat_lon '%s' does not map to '%s'", lat_lon, buf);
- */
- } else {
- if (vsp->strictLatLonCountry || (! StringContainsBodyOfWater (countryname))) {
- guess = GuessCountryForLatLon (lat, lon);
- if (guess != NULL) {
- if (CountryBoxesOverlap (buf, guess)) {
- if (vsp->indexerVersion) {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonAdjacent,
- "Lat_lon '%s' MIGHT be in '%s' instead of adjacent '%s' - SHIFT DOUBLE CLICK TO LAUNCH GOOGLE EARTH -",
- lat_lon, guess, buf);
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry,
- "Lat_lon '%s' MIGHT be in '%s' instead of '%s'", lat_lon, guess, buf);
- }
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry,
- "Lat_lon '%s' does not map to '%s', but may be in '%s'", lat_lon, buf, guess);
- }
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_LatLonCountry,
- "Lat_lon '%s' does not map to '%s'", lat_lon, buf);
- }
- }
- }
- }
+ csp = GetLatLonCountryData ();
+ if (csp != NULL) {
+ NewerValidateCountryLatLon (vsp, gcp, countryname, lat_lon);
}
}
@@ -11604,6 +13458,41 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
}
}
+ for (prp = biop->pcr_primers; prp != NULL; prp = prp->next) {
+
+ for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
+ if (StringDoesHaveText (ppp->seq) && (! PrimerSeqIsValid (vsp, ppp->seq, &badch))) {
+ if (badch < ' ' || badch > '~') {
+ badch = '?';
+ }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerSequence,
+ "PCR forward primer sequence format is incorrect, first bad character is '%c'", (char) badch);
+ }
+ if (StringLen (ppp->name) > 10 && PrimerSeqIsValid (vsp, ppp->name, &badch)) {
+ if (badch < ' ' || badch > '~') {
+ badch = '?';
+ }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR forward primer name appears to be a sequence");
+ }
+ }
+
+ for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
+ if (StringDoesHaveText (ppp->seq) && (! PrimerSeqIsValid (vsp, ppp->seq, &badch))) {
+ if (badch < ' ' || badch > '~') {
+ badch = '?';
+ }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerSequence,
+ "PCR reverse primer sequence format is incorrect, first bad character is '%c'", (char) badch);
+ }
+ if (StringLen (ppp->name) > 10 && PrimerSeqIsValid (vsp, ppp->name, &badch)) {
+ if (badch < ' ' || badch > '~') {
+ badch = '?';
+ }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadPCRPrimerName, "PCR reverse primer name appears to be a sequence");
+ }
+ }
+ }
+
if (germline && rearranged) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Germline and rearranged should not both be present");
}
@@ -11734,15 +13623,23 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
if (StringStr (onp->lineage, "Chlorarachniophyceae") == 0 && StringStr (onp->lineage, "Cryptophyta") == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrganelle, "Only Chlorarachniophyceae and Cryptophyta have nucleomorphs");
}
+ } else if (biop->genome == GENOME_macronuclear) {
+ if (StringStr (onp->lineage, "Ciliophora") == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadOrganelle, "Only Ciliophora have macronuclear locations");
+ }
}
/* warn if bacteria has organelle location */
- if (StringCmp (onp->div, "BCT") == 0
- && biop->genome != GENOME_unknown
- && biop->genome != GENOME_genomic
- && biop->genome != GENOME_plasmid
- && biop->genome != GENOME_chromosome) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Bacterial source should not have organelle location");
+ if (StringCmp (onp->div, "BCT") == 0 || StringCmp (onp->div, "VRL") == 0) {
+ if (biop->genome == GENOME_unknown
+ || biop->genome == GENOME_genomic
+ || biop->genome == GENOME_plasmid
+ || biop->genome == GENOME_chromosome
+ || (biop->genome == GENOME_proviral && StringCmp (onp->div, "VRL") == 0)) {
+ /* it's ok */
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Bacterial or viral source should not have organelle location");
+ }
}
if (StringCmp (onp->div, "ENV") == 0 && (! is_env_sample)) {
@@ -11812,6 +13709,11 @@ static void ValidateBioSource (ValidStructPtr vsp, GatherContextPtr gcp, BioSour
} else if (omp->subtype == ORGMOD_gb_synonym) {
gb_synonym = omp->subname;
}
+
+ if (isViral && IsUnexpectedViralOrgModQualifier(omp->subtype)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Virus has unexpected %s qualifier", GetOrgModQualName (omp->subtype));
+ }
+
omp = omp->next;
}
@@ -12079,6 +13981,23 @@ static Boolean StringHasPMID (CharPtr str)
return FALSE;
}
+
+static Boolean HasStructuredCommentPrefix (UserObjectPtr uop)
+{
+ UserFieldPtr ufp;
+
+ if (uop == NULL) {
+ return FALSE;
+ }
+ for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
+ if (ufp->label != NULL && StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, ValidStructPtr vsp, Uint4 descitemid)
{
ValNodePtr vnp, vnp2;
@@ -12217,6 +14136,10 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_SerialInComment,
"Comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead.");
}
+ if (StringLooksLikeFakeStructuredComment (str)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_FakeStructuredComment,
+ "Comment may be formatted to look like a structured comment.");
+ }
for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) {
if (vnp2->choice == Seq_descr_comment) {
ptr = (CharPtr) vnp2->data.ptrvalue;
@@ -12428,6 +14351,9 @@ static Boolean ValidateSeqDescrCommon (ValNodePtr sdp, BioseqValidStrPtr bvsp, V
if (uop->data == NULL) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_UserObjectProblem, "Structured Comment user object descriptor is empty");
}
+ if (!HasStructuredCommentPrefix (uop)) {
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_DESCR_StructuredCommentPrefixOrSuffixMissing, "Structured Comment lacks prefix");
+ }
}
}
}
@@ -13234,7 +15160,7 @@ static Int2 IdXrefsNotReciprocal (
for (xref = cds->xref; xref != NULL; xref = xref->next) {
if (xref->id.choice != 0) {
matchsfp = SeqMgrGetFeatureByFeatID (cds->idx.entityID, NULL, NULL, xref, NULL);
- if (matchsfp != mrna) {
+ if (matchsfp != NULL && matchsfp->idx.subtype == FEATDEF_mRNA && matchsfp != mrna) {
return 1;
}
}
@@ -13243,7 +15169,7 @@ static Int2 IdXrefsNotReciprocal (
for (xref = mrna->xref; xref != NULL; xref = xref->next) {
if (xref->id.choice != 0) {
matchsfp = SeqMgrGetFeatureByFeatID (mrna->idx.entityID, NULL, NULL, xref, NULL);
- if (matchsfp != cds) {
+ if (matchsfp != NULL && matchsfp->idx.subtype == FEATDEF_CDS && matchsfp != cds) {
return 1;
}
}
@@ -13709,21 +15635,23 @@ static void ValidateCDSmRNAmatch (
rpt_region = SeqMgrGetOverlappingFeature (sfp->location, 0, repeat_region_array, num_repeat_regions,
NULL, CONTAINED_WITHIN, &rcontext);
if (rpt_region == NULL) {
- /*
- if (gcp != NULL) {
- gcp->itemID = sfp->idx.itemID;
- gcp->thistype = OBJ_SEQFEAT;
- }
- vsp->descr = NULL;
- vsp->sfp = sfp;
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSwithNoMRNAOverlap, "CDS overlapped by 0 mRNAs");
- */
- vnp = ValNodeAddPointer (&cdstail, 0, (Pointer) sfp);
- if (cdshead == NULL) {
- cdshead = vnp;
+ if (StringStr (sfp->except_text, "rearrangement required for product") == NULL) {
+ /*
+ if (gcp != NULL) {
+ gcp->itemID = sfp->idx.itemID;
+ gcp->thistype = OBJ_SEQFEAT;
+ }
+ vsp->descr = NULL;
+ vsp->sfp = sfp;
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_CDSwithNoMRNAOverlap, "CDS overlapped by 0 mRNAs");
+ */
+ vnp = ValNodeAddPointer (&cdstail, 0, (Pointer) sfp);
+ if (cdshead == NULL) {
+ cdshead = vnp;
+ }
+ cdstail = vnp;
+ num_no_mrna++;
}
- cdstail = vnp;
- num_no_mrna++;
}
}
}
@@ -14226,6 +16154,7 @@ static void CheckForNonViralComplete (BioseqPtr bsp, ValidStructPtr vsp, GatherC
ObjValNodePtr ovp;
SeqDescrPtr sdp;
CharPtr title = NULL;
+ SubSourcePtr ssp;
if (bsp == NULL || vsp == NULL) return;
@@ -14258,6 +16187,12 @@ static void CheckForNonViralComplete (BioseqPtr bsp, ValidStructPtr vsp, GatherC
if (StringNICmp (onp->lineage, "Viroids; ", 9) == 0) return;
if (StringICmp (onp->lineage, "Viruses") == 0 && StringICmp (onp->div, "PHG") == 0) return;
+ for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
+ if (ssp->subtype == SUBSRC_endogenous_virus_name) {
+ return;
+ }
+ }
+
if (gcp != NULL) {
olditemid = gcp->itemID;
olditemtype = gcp->thistype;
@@ -14404,7 +16339,8 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
Int2 numBadFullSource;
SubSourcePtr sbsp;
Int2 numgene, numcds, nummrna, numcdsproducts, nummrnaproducts,
- numcdspseudo, nummrnapseudo, lastrnatype, thisrnatype;
+ numcdspseudo, nummrnapseudo, numrearrangedcds, lastrnatype,
+ thisrnatype;
Boolean cds_products_unique = TRUE, mrna_products_unique = TRUE,
suppress_duplicate_messages = FALSE, pseudo;
SeqIdPtr sip;
@@ -14460,6 +16396,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
nummrnaproducts = 0;
numcdspseudo = 0;
nummrnapseudo = 0;
+ numrearrangedcds = 0;
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
while (sfp != NULL) {
@@ -14469,6 +16406,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
break;
case FEATDEF_CDS :
numcds++;
+ if (StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
+ numrearrangedcds++;
+ }
if (sfp->product != NULL) {
numcdsproducts++;
sip = SeqLocId (sfp->product);
@@ -14567,7 +16507,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
cds_products_unique && mrna_products_unique) {
suppress_duplicate_messages = TRUE;
}
- if (numcdsproducts > 0 && numcdsproducts + numcdspseudo != numcds) {
+ if (numcdsproducts > 0 && numcdsproducts + numcdspseudo != numcds && numcdsproducts + numcdspseudo + numrearrangedcds != numcds) {
if (gcp != NULL) {
gcp->itemID = olditemid;
gcp->thistype = olditemtype;
@@ -14646,6 +16586,7 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
last_reported = FALSE;
while (sfp != NULL) {
+ HasFeatId(sfp, 932);
leave = TRUE;
if (last != NULL) {
ivalssame = FALSE;
@@ -15097,9 +17038,9 @@ static Boolean ValidateBioseqContextIndexed (BioseqPtr bsp, BioseqValidStrPtr bv
if (cdsRight + 1 != fcontext.left) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotAbutCDS, "CDS does not abut 3'UTR");
}
- if (bvsp->is_mrna && cdscount == 1 && utr3count == 1 && fcontext.right != bsp->length - 1) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotExtendToEnd, "3'UTR does not extend to end of mRNA");
- }
+ }
+ if (bvsp->is_mrna && cdscount == 1 && utr3count == 1 && fcontext.right != bsp->length - 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UTRdoesNotExtendToEnd, "3'UTR does not extend to end of mRNA");
}
threeUTRright = fcontext.right;
}
@@ -15710,6 +17651,9 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
CharPtr str;
ValNode vn;
+ if (ISA_aa (bsp->mol)) {
+ return;
+ }
gcp = vsp->gcp;
oldEntityID = gcp->entityID;
@@ -15768,6 +17712,19 @@ static void ValidateTSASequenceForNs (BioseqPtr bsp, ValidStructPtr vsp)
}
MemFree (str);
}
+ } else {
+ CountNsInSequence (bsp, &total, &max_stretch, FALSE);
+ percent_N = (total * 100) / bsp->length;
+ if (percent_N > 50) {
+ vsp->bsp = bsp;
+ vsp->descr = NULL;
+ vsp->sfp = NULL;
+ gcp->entityID = bsp->idx.entityID;
+ gcp->itemID = bsp->idx.itemID;
+ gcp->thistype = OBJ_BIOSEQ;
+
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_INST_HighNContentPercent, "Sequence contains %d percent Ns", percent_N);
+ }
}
gcp->entityID = oldEntityID;
gcp->itemID = oldItemID;
@@ -15814,6 +17771,9 @@ static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_v
if (sdp != NULL) {
title = (CharPtr) sdp->data.ptrvalue;
if (StringDoesHaveText (title)) {
+ if (StringNCmp (title, "PREDICTED: ", 11) == 0) {
+ title += 11;
+ }
len = StringLen (taxname);
tlen = StringLen (title);
if (ISA_na (bsp->mol)) {
@@ -15834,6 +17794,87 @@ static void ValidateRefSeqTitle (BioseqPtr bsp, ValidStructPtr vsp, Boolean is_v
}
+static Boolean EndsWithSuffixPlusFieldValue (CharPtr str, CharPtr suffix, CharPtr val)
+{
+ CharPtr cp, last_word;
+
+ cp = StringSearch (str, suffix);
+ if (cp == NULL) {
+ return FALSE;
+ }
+ last_word = StringRChr (str, ' ');
+ if (last_word == NULL || last_word < cp) {
+ return FALSE;
+ }
+ if (StringCmp (last_word + 1, val) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+
+}
+
+
+static void ValidateBarcodeIndexNumber (CharPtr bin, BioseqPtr bsp, ValidStructPtr vsp)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext context;
+ BioSourcePtr biop;
+ Int4 bin_len;
+
+ if (StringHasNoText (bin) || bsp == NULL || vsp == NULL) {
+ return;
+ }
+
+ bin_len = StringLen (bin);
+
+ sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
+ if (sdp == NULL || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL || biop->org == NULL) {
+ return;
+ }
+ /* only check if name contains "sp." or "bacterium" */
+ if (StringISearch (biop->org->taxname, "sp.") == NULL && StringISearch (biop->org->taxname, "bacterium") == NULL) {
+ return;
+ }
+ /* only check if name contains BOLD */
+ if (StringSearch (biop->org->taxname, "BOLD") == NULL) {
+ return;
+ }
+ if (!EndsWithSuffixPlusFieldValue(biop->org->taxname, "sp. ", bin)
+ && !EndsWithSuffixPlusFieldValue(biop->org->taxname, "bacterium ", bin)) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BadStrucCommInvalidFieldValue, "Organism name should end with sp. plus Barcode Index Number (%s)", bin);
+ }
+}
+
+
+static void ValidateStructuredCommentsInContext (BioseqPtr bsp, ValidStructPtr vsp)
+{
+ SeqDescPtr sdp;
+ SeqMgrDescContext dcontext;
+ UserObjectPtr uop;
+ ObjectIdPtr oip;
+ UserFieldPtr curr;
+
+ /* validate structured comments in context */
+ for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
+ sdp != NULL;
+ sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext))
+ {
+ uop = sdp->data.ptrvalue;
+ if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "StructuredComment") == 0)
+ {
+ for (curr = uop->data; curr != NULL; curr = curr->next)
+ {
+ if (curr->choice != 1) continue;
+ oip = curr->label;
+ if (oip == NULL || StringCmp (oip->str, "Barcode Index Number") != 0) continue;
+ ValidateBarcodeIndexNumber ((CharPtr) curr->data.ptrvalue, bsp, vsp);
+ }
+ }
+ }
+}
+
+
/*****************************************************************************
*
* ValidateBioseqContext(gcp)
@@ -15889,7 +17930,7 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
CharPtr str;
CharPtr taxname = NULL;
TextSeqIdPtr tsip;
- BioSourcePtr biop;
+ BioSourcePtr biop = NULL;
OrgRefPtr orp;
OrgNamePtr onp;
OrgModPtr omp;
@@ -16082,6 +18123,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
gcp->itemID = oldItemID;
gcp->thistype = oldItemtype;
}
+ if (BioseqHasKeyword(bsp, "BARCODE") && BioseqHasKeyword(bsp, "UNVERIFIED")) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadKeyword, "Sequence has both BARCODE and UNVERIFIED keywords");
+ }
}
if (is_neg_strand_virus && mip != NULL) {
@@ -16393,6 +18437,9 @@ static void ValidateBioseqContext (GatherContextPtr gcp)
/* TSA checks */
ValidateTSASequenceForNs (bsp, vsp);
+
+ /* validate structured comments in context */
+ ValidateStructuredCommentsInContext (bsp, vsp);
}
/*****************************************************************************
@@ -16424,6 +18471,9 @@ static void CheckPeptideOnCodonBoundary (ValidStructPtr vsp, GatherContextPtr gc
Boolean partial5, partial3;
Int4 pos1, pos2, adjust = 0, mod1, mod2;
+ if (SeqLocStop (sfp->location) == 2150166) {
+ mod1 = 0;
+ }
cds = SeqMgrGetOverlappingCDS (sfp->location, NULL);
if (cds == NULL)
return;
@@ -16500,969 +18550,6 @@ static CharPtr legal_mobile_element_strings [] = {
NULL
};
-static CharPtr ecnum_ambig [] = {
- "1.-.-.-", "1.1.-.-", "1.1.1.-", "1.1.1.n", "1.1.2.-", "1.1.2.n",
- "1.1.3.-", "1.1.3.n", "1.1.4.-", "1.1.4.n", "1.1.5.-", "1.1.5.n",
- "1.1.98.-", "1.1.98.n", "1.1.99.-", "1.1.99.n", "1.1.n.n",
- "1.2.-.-", "1.2.1.-", "1.2.1.n", "1.2.2.-", "1.2.2.n", "1.2.3.-",
- "1.2.3.n", "1.2.4.-", "1.2.4.n", "1.2.7.-", "1.2.7.n", "1.2.99.-",
- "1.2.99.n", "1.2.n.n", "1.3.-.-", "1.3.1.-", "1.3.1.n", "1.3.2.-",
- "1.3.2.n", "1.3.3.-", "1.3.3.n", "1.3.5.-", "1.3.5.n", "1.3.7.-",
- "1.3.7.n", "1.3.99.-", "1.3.99.n", "1.3.n.n", "1.4.-.-", "1.4.1.-",
- "1.4.1.n", "1.4.2.-", "1.4.2.n", "1.4.3.-", "1.4.3.n", "1.4.4.-",
- "1.4.4.n", "1.4.5.-", "1.4.5.n", "1.4.7.-", "1.4.7.n", "1.4.99.-",
- "1.4.99.n", "1.4.n.n", "1.5.-.-", "1.5.1.-", "1.5.1.n", "1.5.3.-",
- "1.5.3.n", "1.5.4.-", "1.5.4.n", "1.5.5.-", "1.5.5.n", "1.5.7.-",
- "1.5.7.n", "1.5.8.-", "1.5.8.n", "1.5.99.-", "1.5.99.n", "1.5.n.n",
- "1.6.-.-", "1.6.1.-", "1.6.1.n", "1.6.2.-", "1.6.2.n", "1.6.3.-",
- "1.6.3.n", "1.6.4.-", "1.6.4.n", "1.6.5.-", "1.6.5.n", "1.6.6.-",
- "1.6.6.n", "1.6.7.-", "1.6.7.n", "1.6.8.-", "1.6.8.n", "1.6.99.-",
- "1.6.99.n", "1.6.n.n", "1.7.-.-", "1.7.1.-", "1.7.1.n", "1.7.2.-",
- "1.7.2.n", "1.7.3.-", "1.7.3.n", "1.7.5.-", "1.7.5.n", "1.7.7.-",
- "1.7.7.n", "1.7.99.-", "1.7.99.n", "1.7.n.n", "1.8.-.-", "1.8.1.-",
- "1.8.1.n", "1.8.2.-", "1.8.2.n", "1.8.3.-", "1.8.3.n", "1.8.4.-",
- "1.8.4.n", "1.8.5.-", "1.8.5.n", "1.8.6.-", "1.8.6.n", "1.8.7.-",
- "1.8.7.n", "1.8.98.-", "1.8.98.n", "1.8.99.-", "1.8.99.n",
- "1.8.n.n", "1.9.-.-", "1.9.3.-", "1.9.3.n", "1.9.6.-", "1.9.6.n",
- "1.9.99.-", "1.9.99.n", "1.9.n.n", "1.10.-.-", "1.10.1.-",
- "1.10.1.n", "1.10.2.-", "1.10.2.n", "1.10.3.-", "1.10.3.n",
- "1.10.99.-", "1.10.99.n", "1.10.n.n", "1.11.-.-", "1.11.1.-",
- "1.11.1.n", "1.11.n.n", "1.12.-.-", "1.12.1.-", "1.12.1.n",
- "1.12.2.-", "1.12.2.n", "1.12.5.-", "1.12.5.n", "1.12.7.-",
- "1.12.7.n", "1.12.98.-", "1.12.98.n", "1.12.99.-", "1.12.99.n",
- "1.12.n.n", "1.13.-.-", "1.13.1.-", "1.13.1.n", "1.13.11.-",
- "1.13.11.n", "1.13.12.-", "1.13.12.n", "1.13.99.-", "1.13.99.n",
- "1.13.n.n", "1.14.-.-", "1.14.1.-", "1.14.1.n", "1.14.2.-",
- "1.14.2.n", "1.14.3.-", "1.14.3.n", "1.14.11.-", "1.14.11.n",
- "1.14.12.-", "1.14.12.n", "1.14.13.-", "1.14.13.n", "1.14.14.-",
- "1.14.14.n", "1.14.15.-", "1.14.15.n", "1.14.16.-", "1.14.16.n",
- "1.14.17.-", "1.14.17.n", "1.14.18.-", "1.14.18.n", "1.14.19.-",
- "1.14.19.n", "1.14.20.-", "1.14.20.n", "1.14.21.-", "1.14.21.n",
- "1.14.99.-", "1.14.99.n", "1.14.n.n", "1.15.-.-", "1.15.1.-",
- "1.15.1.n", "1.15.n.n", "1.16.-.-", "1.16.1.-", "1.16.1.n",
- "1.16.3.-", "1.16.3.n", "1.16.8.-", "1.16.8.n", "1.16.n.n",
- "1.17.-.-", "1.17.1.-", "1.17.1.n", "1.17.3.-", "1.17.3.n",
- "1.17.4.-", "1.17.4.n", "1.17.5.-", "1.17.5.n", "1.17.7.-",
- "1.17.7.n", "1.17.99.-", "1.17.99.n", "1.17.n.n", "1.18.-.-",
- "1.18.1.-", "1.18.1.n", "1.18.2.-", "1.18.2.n", "1.18.3.-",
- "1.18.3.n", "1.18.6.-", "1.18.6.n", "1.18.96.-", "1.18.96.n",
- "1.18.99.-", "1.18.99.n", "1.18.n.n", "1.19.-.-", "1.19.6.-",
- "1.19.6.n", "1.19.n.n", "1.20.-.-", "1.20.1.-", "1.20.1.n",
- "1.20.4.-", "1.20.4.n", "1.20.98.-", "1.20.98.n", "1.20.99.-",
- "1.20.99.n", "1.20.n.n", "1.21.-.-", "1.21.3.-", "1.21.3.n",
- "1.21.4.-", "1.21.4.n", "1.21.99.-", "1.21.99.n", "1.21.n.n",
- "1.22.-.-", "1.22.1.-", "1.22.1.n", "1.22.n.n", "1.97.-.-",
- "1.97.1.-", "1.97.1.n", "1.97.n.n", "1.98.-.-", "1.98.1.-",
- "1.98.1.n", "1.98.n.n", "1.99.-.-", "1.99.1.-", "1.99.1.n",
- "1.99.2.-", "1.99.2.n", "1.99.n.n", "1.n.n.n", "2.-.-.-", "2.1.-.-",
- "2.1.1.-", "2.1.1.n", "2.1.2.-", "2.1.2.n", "2.1.3.-", "2.1.3.n",
- "2.1.4.-", "2.1.4.n", "2.1.n.n", "2.2.-.-", "2.2.1.-", "2.2.1.n",
- "2.2.n.n", "2.3.-.-", "2.3.1.-", "2.3.1.n", "2.3.2.-", "2.3.2.n",
- "2.3.3.-", "2.3.3.n", "2.3.n.n", "2.4.-.-", "2.4.1.-", "2.4.1.n",
- "2.4.2.-", "2.4.2.n", "2.4.99.-", "2.4.99.n", "2.4.n.n", "2.5.-.-",
- "2.5.1.-", "2.5.1.n", "2.5.n.n", "2.6.-.-", "2.6.1.-", "2.6.1.n",
- "2.6.2.-", "2.6.2.n", "2.6.3.-", "2.6.3.n", "2.6.99.-", "2.6.99.n",
- "2.6.n.n", "2.7.-.-", "2.7.1.-", "2.7.1.n", "2.7.2.-", "2.7.2.n",
- "2.7.3.-", "2.7.3.n", "2.7.4.-", "2.7.4.n", "2.7.5.-", "2.7.5.n",
- "2.7.6.-", "2.7.6.n", "2.7.7.-", "2.7.7.n", "2.7.8.-", "2.7.8.n",
- "2.7.9.-", "2.7.9.n", "2.7.10.-", "2.7.10.n", "2.7.11.-",
- "2.7.11.n", "2.7.12.-", "2.7.12.n", "2.7.13.-", "2.7.13.n",
- "2.7.99.-", "2.7.99.n", "2.7.n.n", "2.8.-.-", "2.8.1.-", "2.8.1.n",
- "2.8.2.-", "2.8.2.n", "2.8.3.-", "2.8.3.n", "2.8.4.-", "2.8.4.n",
- "2.8.n.n", "2.9.-.-", "2.9.1.-", "2.9.1.n", "2.9.n.n", "2.n.n.n",
- "3.-.-.-", "3.1.-.-", "3.1.1.-", "3.1.1.n", "3.1.2.-", "3.1.2.n",
- "3.1.3.-", "3.1.3.n", "3.1.4.-", "3.1.4.n", "3.1.5.-", "3.1.5.n",
- "3.1.6.-", "3.1.6.n", "3.1.7.-", "3.1.7.n", "3.1.8.-", "3.1.8.n",
- "3.1.11.-", "3.1.11.n", "3.1.13.-", "3.1.13.n", "3.1.14.-",
- "3.1.14.n", "3.1.15.-", "3.1.15.n", "3.1.16.-", "3.1.16.n",
- "3.1.21.-", "3.1.21.n", "3.1.22.-", "3.1.22.n", "3.1.23.-",
- "3.1.23.n", "3.1.24.-", "3.1.24.n", "3.1.25.-", "3.1.25.n",
- "3.1.26.-", "3.1.26.n", "3.1.27.-", "3.1.27.n", "3.1.30.-",
- "3.1.30.n", "3.1.31.-", "3.1.31.n", "3.1.n.n", "3.2.-.-", "3.2.1.-",
- "3.2.1.n", "3.2.2.-", "3.2.2.n", "3.2.3.-", "3.2.3.n", "3.2.n.n",
- "3.3.-.-", "3.3.1.-", "3.3.1.n", "3.3.2.-", "3.3.2.n", "3.3.n.n",
- "3.4.-.-", "3.4.1.-", "3.4.1.n", "3.4.2.-", "3.4.2.n", "3.4.3.-",
- "3.4.3.n", "3.4.4.-", "3.4.4.n", "3.4.11.-", "3.4.11.n", "3.4.12.-",
- "3.4.12.n", "3.4.13.-", "3.4.13.n", "3.4.14.-", "3.4.14.n",
- "3.4.15.-", "3.4.15.n", "3.4.16.-", "3.4.16.n", "3.4.17.-",
- "3.4.17.n", "3.4.18.-", "3.4.18.n", "3.4.19.-", "3.4.19.n",
- "3.4.21.-", "3.4.21.n", "3.4.22.-", "3.4.22.n", "3.4.23.-",
- "3.4.23.n", "3.4.24.-", "3.4.24.n", "3.4.25.-", "3.4.25.n",
- "3.4.99.-", "3.4.99.n", "3.4.n.n", "3.5.-.-", "3.5.1.-", "3.5.1.n",
- "3.5.2.-", "3.5.2.n", "3.5.3.-", "3.5.3.n", "3.5.4.-", "3.5.4.n",
- "3.5.5.-", "3.5.5.n", "3.5.99.-", "3.5.99.n", "3.5.n.n", "3.6.-.-",
- "3.6.1.-", "3.6.1.n", "3.6.2.-", "3.6.2.n", "3.6.3.-", "3.6.3.n",
- "3.6.4.-", "3.6.4.n", "3.6.5.-", "3.6.5.n", "3.6.n.n", "3.7.-.-",
- "3.7.1.-", "3.7.1.n", "3.7.n.n", "3.8.-.-", "3.8.1.-", "3.8.1.n",
- "3.8.2.-", "3.8.2.n", "3.8.n.n", "3.9.-.-", "3.9.1.-", "3.9.1.n",
- "3.9.n.n", "3.10.-.-", "3.10.1.-", "3.10.1.n", "3.10.n.n",
- "3.11.-.-", "3.11.1.-", "3.11.1.n", "3.11.n.n", "3.12.-.-",
- "3.12.1.-", "3.12.1.n", "3.12.n.n", "3.13.-.-", "3.13.1.-",
- "3.13.1.n", "3.13.n.n", "3.n.n.n", "4.-.-.-", "4.1.-.-", "4.1.1.-",
- "4.1.1.n", "4.1.2.-", "4.1.2.n", "4.1.3.-", "4.1.3.n", "4.1.99.-",
- "4.1.99.n", "4.1.n.n", "4.2.-.-", "4.2.1.-", "4.2.1.n", "4.2.2.-",
- "4.2.2.n", "4.2.3.-", "4.2.3.n", "4.2.99.-", "4.2.99.n", "4.2.n.n",
- "4.3.-.-", "4.3.1.-", "4.3.1.n", "4.3.2.-", "4.3.2.n", "4.3.3.-",
- "4.3.3.n", "4.3.99.-", "4.3.99.n", "4.3.n.n", "4.4.-.-", "4.4.1.-",
- "4.4.1.n", "4.4.n.n", "4.5.-.-", "4.5.1.-", "4.5.1.n", "4.5.n.n",
- "4.6.-.-", "4.6.1.-", "4.6.1.n", "4.6.n.n", "4.99.-.-", "4.99.1.-",
- "4.99.1.n", "4.99.n.n", "4.n.n.n", "5.-.-.-", "5.1.-.-", "5.1.1.-",
- "5.1.1.n", "5.1.2.-", "5.1.2.n", "5.1.3.-", "5.1.3.n", "5.1.99.-",
- "5.1.99.n", "5.1.n.n", "5.2.-.-", "5.2.1.-", "5.2.1.n", "5.2.n.n",
- "5.3.-.-", "5.3.1.-", "5.3.1.n", "5.3.2.-", "5.3.2.n", "5.3.3.-",
- "5.3.3.n", "5.3.4.-", "5.3.4.n", "5.3.99.-", "5.3.99.n", "5.3.n.n",
- "5.4.-.-", "5.4.1.-", "5.4.1.n", "5.4.2.-", "5.4.2.n", "5.4.3.-",
- "5.4.3.n", "5.4.4.-", "5.4.4.n", "5.4.99.-", "5.4.99.n", "5.4.n.n",
- "5.5.-.-", "5.5.1.-", "5.5.1.n", "5.5.n.n", "5.99.-.-", "5.99.1.-",
- "5.99.1.n", "5.99.n.n", "5.n.n.n", "6.-.-.-", "6.1.-.-", "6.1.1.-",
- "6.1.1.n", "6.1.n.n", "6.2.-.-", "6.2.1.-", "6.2.1.n", "6.2.n.n",
- "6.3.-.-", "6.3.1.-", "6.3.1.n", "6.3.2.-", "6.3.2.n", "6.3.3.-",
- "6.3.3.n", "6.3.4.-", "6.3.4.n", "6.3.5.-", "6.3.5.n", "6.3.n.n",
- "6.4.-.-", "6.4.1.-", "6.4.1.n", "6.4.n.n", "6.5.-.-", "6.5.1.-",
- "6.5.1.n", "6.5.n.n", "6.6.-.-", "6.6.1.-", "6.6.1.n", "6.6.n.n",
- "6.n.n.n",
- NULL
-};
-
-static CharPtr ecnum_specif [] = {
- "1.1.1.1", "1.1.1.2", "1.1.1.3", "1.1.1.4", "1.1.1.6", "1.1.1.7",
- "1.1.1.8", "1.1.1.9", "1.1.1.10", "1.1.1.11", "1.1.1.12",
- "1.1.1.13", "1.1.1.14", "1.1.1.15", "1.1.1.16", "1.1.1.17",
- "1.1.1.18", "1.1.1.19", "1.1.1.20", "1.1.1.21", "1.1.1.22",
- "1.1.1.23", "1.1.1.24", "1.1.1.25", "1.1.1.26", "1.1.1.27",
- "1.1.1.28", "1.1.1.29", "1.1.1.30", "1.1.1.31", "1.1.1.32",
- "1.1.1.33", "1.1.1.34", "1.1.1.35", "1.1.1.36", "1.1.1.37",
- "1.1.1.38", "1.1.1.39", "1.1.1.40", "1.1.1.41", "1.1.1.42",
- "1.1.1.43", "1.1.1.44", "1.1.1.45", "1.1.1.46", "1.1.1.47",
- "1.1.1.48", "1.1.1.49", "1.1.1.50", "1.1.1.51", "1.1.1.52",
- "1.1.1.53", "1.1.1.54", "1.1.1.55", "1.1.1.56", "1.1.1.57",
- "1.1.1.58", "1.1.1.59", "1.1.1.60", "1.1.1.61", "1.1.1.62",
- "1.1.1.63", "1.1.1.64", "1.1.1.65", "1.1.1.66", "1.1.1.67",
- "1.1.1.69", "1.1.1.71", "1.1.1.72", "1.1.1.73", "1.1.1.75",
- "1.1.1.76", "1.1.1.77", "1.1.1.78", "1.1.1.79", "1.1.1.80",
- "1.1.1.81", "1.1.1.82", "1.1.1.83", "1.1.1.84", "1.1.1.85",
- "1.1.1.86", "1.1.1.87", "1.1.1.88", "1.1.1.90", "1.1.1.91",
- "1.1.1.92", "1.1.1.93", "1.1.1.94", "1.1.1.95", "1.1.1.96",
- "1.1.1.97", "1.1.1.98", "1.1.1.99", "1.1.1.100", "1.1.1.101",
- "1.1.1.102", "1.1.1.103", "1.1.1.104", "1.1.1.105", "1.1.1.106",
- "1.1.1.107", "1.1.1.108", "1.1.1.110", "1.1.1.111", "1.1.1.112",
- "1.1.1.113", "1.1.1.114", "1.1.1.115", "1.1.1.116", "1.1.1.117",
- "1.1.1.118", "1.1.1.119", "1.1.1.120", "1.1.1.121", "1.1.1.122",
- "1.1.1.123", "1.1.1.124", "1.1.1.125", "1.1.1.126", "1.1.1.127",
- "1.1.1.128", "1.1.1.129", "1.1.1.130", "1.1.1.131", "1.1.1.132",
- "1.1.1.133", "1.1.1.134", "1.1.1.135", "1.1.1.136", "1.1.1.137",
- "1.1.1.138", "1.1.1.140", "1.1.1.141", "1.1.1.142", "1.1.1.143",
- "1.1.1.144", "1.1.1.145", "1.1.1.146", "1.1.1.147", "1.1.1.148",
- "1.1.1.149", "1.1.1.150", "1.1.1.151", "1.1.1.152", "1.1.1.153",
- "1.1.1.154", "1.1.1.156", "1.1.1.157", "1.1.1.158", "1.1.1.159",
- "1.1.1.160", "1.1.1.161", "1.1.1.162", "1.1.1.163", "1.1.1.164",
- "1.1.1.165", "1.1.1.166", "1.1.1.167", "1.1.1.168", "1.1.1.169",
- "1.1.1.170", "1.1.1.172", "1.1.1.173", "1.1.1.174", "1.1.1.175",
- "1.1.1.176", "1.1.1.177", "1.1.1.178", "1.1.1.179", "1.1.1.181",
- "1.1.1.183", "1.1.1.184", "1.1.1.185", "1.1.1.186", "1.1.1.187",
- "1.1.1.188", "1.1.1.189", "1.1.1.190", "1.1.1.191", "1.1.1.192",
- "1.1.1.193", "1.1.1.194", "1.1.1.195", "1.1.1.196", "1.1.1.197",
- "1.1.1.198", "1.1.1.199", "1.1.1.200", "1.1.1.201", "1.1.1.202",
- "1.1.1.203", "1.1.1.205", "1.1.1.206", "1.1.1.207", "1.1.1.208",
- "1.1.1.209", "1.1.1.210", "1.1.1.211", "1.1.1.212", "1.1.1.213",
- "1.1.1.214", "1.1.1.215", "1.1.1.216", "1.1.1.217", "1.1.1.218",
- "1.1.1.219", "1.1.1.220", "1.1.1.221", "1.1.1.222", "1.1.1.223",
- "1.1.1.224", "1.1.1.225", "1.1.1.226", "1.1.1.227", "1.1.1.228",
- "1.1.1.229", "1.1.1.230", "1.1.1.231", "1.1.1.232", "1.1.1.233",
- "1.1.1.234", "1.1.1.235", "1.1.1.236", "1.1.1.237", "1.1.1.238",
- "1.1.1.239", "1.1.1.240", "1.1.1.241", "1.1.1.243", "1.1.1.244",
- "1.1.1.245", "1.1.1.246", "1.1.1.247", "1.1.1.248", "1.1.1.250",
- "1.1.1.251", "1.1.1.252", "1.1.1.254", "1.1.1.255", "1.1.1.256",
- "1.1.1.257", "1.1.1.258", "1.1.1.259", "1.1.1.260", "1.1.1.261",
- "1.1.1.262", "1.1.1.263", "1.1.1.264", "1.1.1.265", "1.1.1.266",
- "1.1.1.267", "1.1.1.268", "1.1.1.269", "1.1.1.270", "1.1.1.271",
- "1.1.1.272", "1.1.1.273", "1.1.1.274", "1.1.1.275", "1.1.1.276",
- "1.1.1.277", "1.1.1.278", "1.1.1.279", "1.1.1.280", "1.1.1.281",
- "1.1.1.282", "1.1.1.283", "1.1.1.284", "1.1.1.285", "1.1.1.286",
- "1.1.1.287", "1.1.1.288", "1.1.1.289", "1.1.1.290", "1.1.1.291",
- "1.1.1.292", "1.1.1.294", "1.1.1.295", "1.1.1.296", "1.1.1.297",
- "1.1.1.298", "1.1.1.299", "1.1.1.300", "1.1.1.301", "1.1.1.302",
- "1.1.1.303", "1.1.1.304", "1.1.2.2", "1.1.2.3", "1.1.2.4",
- "1.1.2.5", "1.1.3.3", "1.1.3.4", "1.1.3.5", "1.1.3.6", "1.1.3.7",
- "1.1.3.8", "1.1.3.9", "1.1.3.10", "1.1.3.11", "1.1.3.12",
- "1.1.3.13", "1.1.3.14", "1.1.3.15", "1.1.3.16", "1.1.3.17",
- "1.1.3.18", "1.1.3.19", "1.1.3.20", "1.1.3.21", "1.1.3.23",
- "1.1.3.27", "1.1.3.28", "1.1.3.29", "1.1.3.30", "1.1.3.37",
- "1.1.3.38", "1.1.3.39", "1.1.3.40", "1.1.3.41", "1.1.4.1",
- "1.1.4.2", "1.1.5.2", "1.1.5.3", "1.1.5.4", "1.1.5.5", "1.1.5.6",
- "1.1.5.7", "1.1.99.1", "1.1.99.2", "1.1.99.3", "1.1.99.4",
- "1.1.99.6", "1.1.99.7", "1.1.99.8", "1.1.99.9", "1.1.99.10",
- "1.1.99.11", "1.1.99.12", "1.1.99.13", "1.1.99.14", "1.1.99.18",
- "1.1.99.20", "1.1.99.21", "1.1.99.22", "1.1.99.23", "1.1.99.24",
- "1.1.99.25", "1.1.99.26", "1.1.99.27", "1.1.99.28", "1.1.99.29",
- "1.1.99.30", "1.1.99.31", "1.1.99.32", "1.1.99.33", "1.2.1.2",
- "1.2.1.3", "1.2.1.4", "1.2.1.5", "1.2.1.7", "1.2.1.8", "1.2.1.9",
- "1.2.1.10", "1.2.1.11", "1.2.1.12", "1.2.1.13", "1.2.1.15",
- "1.2.1.16", "1.2.1.17", "1.2.1.18", "1.2.1.19", "1.2.1.20",
- "1.2.1.21", "1.2.1.22", "1.2.1.23", "1.2.1.24", "1.2.1.25",
- "1.2.1.26", "1.2.1.27", "1.2.1.28", "1.2.1.29", "1.2.1.30",
- "1.2.1.31", "1.2.1.32", "1.2.1.33", "1.2.1.36", "1.2.1.38",
- "1.2.1.39", "1.2.1.40", "1.2.1.41", "1.2.1.42", "1.2.1.43",
- "1.2.1.44", "1.2.1.45", "1.2.1.46", "1.2.1.47", "1.2.1.48",
- "1.2.1.49", "1.2.1.50", "1.2.1.51", "1.2.1.52", "1.2.1.53",
- "1.2.1.54", "1.2.1.57", "1.2.1.58", "1.2.1.59", "1.2.1.60",
- "1.2.1.61", "1.2.1.62", "1.2.1.63", "1.2.1.64", "1.2.1.65",
- "1.2.1.66", "1.2.1.67", "1.2.1.68", "1.2.1.69", "1.2.1.70",
- "1.2.1.71", "1.2.1.72", "1.2.1.73", "1.2.1.74", "1.2.1.75",
- "1.2.1.76", "1.2.1.77", "1.2.1.78", "1.2.2.1", "1.2.2.2", "1.2.2.3",
- "1.2.2.4", "1.2.3.1", "1.2.3.3", "1.2.3.4", "1.2.3.5", "1.2.3.6",
- "1.2.3.7", "1.2.3.8", "1.2.3.9", "1.2.3.11", "1.2.3.13", "1.2.3.14",
- "1.2.4.1", "1.2.4.2", "1.2.4.4", "1.2.7.1", "1.2.7.2", "1.2.7.3",
- "1.2.7.4", "1.2.7.5", "1.2.7.6", "1.2.7.7", "1.2.7.8", "1.2.99.2",
- "1.2.99.3", "1.2.99.4", "1.2.99.5", "1.2.99.6", "1.2.99.7",
- "1.3.1.1", "1.3.1.2", "1.3.1.3", "1.3.1.4", "1.3.1.5", "1.3.1.6",
- "1.3.1.7", "1.3.1.8", "1.3.1.9", "1.3.1.10", "1.3.1.11", "1.3.1.12",
- "1.3.1.13", "1.3.1.14", "1.3.1.15", "1.3.1.16", "1.3.1.17",
- "1.3.1.18", "1.3.1.19", "1.3.1.20", "1.3.1.21", "1.3.1.22",
- "1.3.1.24", "1.3.1.25", "1.3.1.26", "1.3.1.27", "1.3.1.28",
- "1.3.1.29", "1.3.1.30", "1.3.1.31", "1.3.1.32", "1.3.1.33",
- "1.3.1.34", "1.3.1.35", "1.3.1.36", "1.3.1.37", "1.3.1.38",
- "1.3.1.39", "1.3.1.40", "1.3.1.41", "1.3.1.42", "1.3.1.43",
- "1.3.1.44", "1.3.1.45", "1.3.1.46", "1.3.1.47", "1.3.1.48",
- "1.3.1.49", "1.3.1.51", "1.3.1.52", "1.3.1.53", "1.3.1.54",
- "1.3.1.56", "1.3.1.57", "1.3.1.58", "1.3.1.60", "1.3.1.62",
- "1.3.1.63", "1.3.1.64", "1.3.1.65", "1.3.1.66", "1.3.1.67",
- "1.3.1.68", "1.3.1.69", "1.3.1.70", "1.3.1.71", "1.3.1.72",
- "1.3.1.73", "1.3.1.74", "1.3.1.75", "1.3.1.76", "1.3.1.77",
- "1.3.1.78", "1.3.1.79", "1.3.1.80", "1.3.1.81", "1.3.1.82",
- "1.3.1.83", "1.3.1.84", "1.3.2.3", "1.3.3.1", "1.3.3.3", "1.3.3.4",
- "1.3.3.5", "1.3.3.6", "1.3.3.7", "1.3.3.8", "1.3.3.9", "1.3.3.10",
- "1.3.3.11", "1.3.3.12", "1.3.5.1", "1.3.5.2", "1.3.7.1", "1.3.7.2",
- "1.3.7.3", "1.3.7.4", "1.3.7.5", "1.3.7.6", "1.3.99.1", "1.3.99.2",
- "1.3.99.3", "1.3.99.4", "1.3.99.5", "1.3.99.6", "1.3.99.7",
- "1.3.99.8", "1.3.99.10", "1.3.99.12", "1.3.99.13", "1.3.99.14",
- "1.3.99.15", "1.3.99.16", "1.3.99.17", "1.3.99.18", "1.3.99.19",
- "1.3.99.20", "1.3.99.21", "1.3.99.22", "1.3.99.23", "1.3.99.24",
- "1.3.99.25", "1.4.1.1", "1.4.1.2", "1.4.1.3", "1.4.1.4", "1.4.1.5",
- "1.4.1.7", "1.4.1.8", "1.4.1.9", "1.4.1.10", "1.4.1.11", "1.4.1.12",
- "1.4.1.13", "1.4.1.14", "1.4.1.15", "1.4.1.16", "1.4.1.17",
- "1.4.1.18", "1.4.1.19", "1.4.1.20", "1.4.1.21", "1.4.2.1",
- "1.4.3.1", "1.4.3.2", "1.4.3.3", "1.4.3.4", "1.4.3.5", "1.4.3.7",
- "1.4.3.8", "1.4.3.10", "1.4.3.11", "1.4.3.12", "1.4.3.13",
- "1.4.3.14", "1.4.3.15", "1.4.3.16", "1.4.3.19", "1.4.3.20",
- "1.4.3.21", "1.4.3.22", "1.4.3.23", "1.4.4.2", "1.4.5.1", "1.4.7.1",
- "1.4.99.1", "1.4.99.2", "1.4.99.3", "1.4.99.4", "1.4.99.5",
- "1.5.1.1", "1.5.1.2", "1.5.1.3", "1.5.1.5", "1.5.1.6", "1.5.1.7",
- "1.5.1.8", "1.5.1.9", "1.5.1.10", "1.5.1.11", "1.5.1.12",
- "1.5.1.15", "1.5.1.16", "1.5.1.17", "1.5.1.18", "1.5.1.19",
- "1.5.1.20", "1.5.1.21", "1.5.1.22", "1.5.1.23", "1.5.1.24",
- "1.5.1.25", "1.5.1.26", "1.5.1.27", "1.5.1.28", "1.5.1.29",
- "1.5.1.30", "1.5.1.31", "1.5.1.32", "1.5.1.33", "1.5.1.34",
- "1.5.3.1", "1.5.3.2", "1.5.3.4", "1.5.3.5", "1.5.3.6", "1.5.3.7",
- "1.5.3.10", "1.5.3.11", "1.5.3.12", "1.5.3.13", "1.5.3.14",
- "1.5.3.15", "1.5.3.16", "1.5.3.17", "1.5.4.1", "1.5.5.1", "1.5.7.1",
- "1.5.8.1", "1.5.8.2", "1.5.99.1", "1.5.99.2", "1.5.99.3",
- "1.5.99.4", "1.5.99.5", "1.5.99.6", "1.5.99.8", "1.5.99.9",
- "1.5.99.11", "1.5.99.12", "1.5.99.13", "1.6.1.1", "1.6.1.2",
- "1.6.2.2", "1.6.2.4", "1.6.2.5", "1.6.2.6", "1.6.3.1", "1.6.5.2",
- "1.6.5.3", "1.6.5.4", "1.6.5.5", "1.6.5.6", "1.6.5.7", "1.6.6.9",
- "1.6.99.1", "1.6.99.3", "1.6.99.5", "1.6.99.6", "1.7.1.1",
- "1.7.1.2", "1.7.1.3", "1.7.1.4", "1.7.1.5", "1.7.1.6", "1.7.1.7",
- "1.7.1.9", "1.7.1.10", "1.7.1.11", "1.7.1.12", "1.7.1.13",
- "1.7.2.1", "1.7.2.2", "1.7.2.3", "1.7.3.1", "1.7.3.2", "1.7.3.3",
- "1.7.3.4", "1.7.3.5", "1.7.5.1", "1.7.7.1", "1.7.7.2", "1.7.99.1",
- "1.7.99.4", "1.7.99.6", "1.7.99.7", "1.7.99.8", "1.8.1.2",
- "1.8.1.3", "1.8.1.4", "1.8.1.5", "1.8.1.6", "1.8.1.7", "1.8.1.8",
- "1.8.1.9", "1.8.1.10", "1.8.1.11", "1.8.1.12", "1.8.1.13",
- "1.8.1.14", "1.8.1.15", "1.8.1.16", "1.8.2.1", "1.8.2.2", "1.8.3.1",
- "1.8.3.2", "1.8.3.3", "1.8.3.4", "1.8.3.5", "1.8.4.1", "1.8.4.2",
- "1.8.4.3", "1.8.4.4", "1.8.4.7", "1.8.4.8", "1.8.4.9", "1.8.4.10",
- "1.8.4.11", "1.8.4.12", "1.8.4.13", "1.8.4.14", "1.8.5.1",
- "1.8.5.2", "1.8.7.1", "1.8.98.1", "1.8.98.2", "1.8.99.1",
- "1.8.99.2", "1.8.99.3", "1.9.3.1", "1.9.6.1", "1.9.99.1",
- "1.10.1.1", "1.10.2.1", "1.10.2.2", "1.10.3.1", "1.10.3.2",
- "1.10.3.3", "1.10.3.4", "1.10.3.5", "1.10.3.6", "1.10.99.1",
- "1.10.99.2", "1.10.99.3", "1.11.1.1", "1.11.1.2", "1.11.1.3",
- "1.11.1.5", "1.11.1.6", "1.11.1.7", "1.11.1.8", "1.11.1.9",
- "1.11.1.10", "1.11.1.11", "1.11.1.12", "1.11.1.13", "1.11.1.14",
- "1.11.1.15", "1.11.1.16", "1.11.1.17", "1.12.1.2", "1.12.1.3",
- "1.12.2.1", "1.12.5.1", "1.12.7.2", "1.12.98.1", "1.12.98.2",
- "1.12.98.3", "1.12.99.6", "1.13.11.1", "1.13.11.2", "1.13.11.3",
- "1.13.11.4", "1.13.11.5", "1.13.11.6", "1.13.11.8", "1.13.11.9",
- "1.13.11.10", "1.13.11.11", "1.13.11.12", "1.13.11.13",
- "1.13.11.14", "1.13.11.15", "1.13.11.16", "1.13.11.17",
- "1.13.11.18", "1.13.11.19", "1.13.11.20", "1.13.11.22",
- "1.13.11.23", "1.13.11.24", "1.13.11.25", "1.13.11.26",
- "1.13.11.27", "1.13.11.28", "1.13.11.29", "1.13.11.30",
- "1.13.11.31", "1.13.11.33", "1.13.11.34", "1.13.11.35",
- "1.13.11.36", "1.13.11.37", "1.13.11.38", "1.13.11.39",
- "1.13.11.40", "1.13.11.41", "1.13.11.43", "1.13.11.44",
- "1.13.11.45", "1.13.11.46", "1.13.11.47", "1.13.11.48",
- "1.13.11.49", "1.13.11.50", "1.13.11.51", "1.13.11.52",
- "1.13.11.53", "1.13.11.54", "1.13.11.55", "1.13.11.56", "1.13.12.1",
- "1.13.12.2", "1.13.12.3", "1.13.12.4", "1.13.12.5", "1.13.12.6",
- "1.13.12.7", "1.13.12.8", "1.13.12.9", "1.13.12.12", "1.13.12.13",
- "1.13.12.14", "1.13.12.15", "1.13.12.16", "1.13.12.17", "1.13.99.1",
- "1.13.99.3", "1.14.11.1", "1.14.11.2", "1.14.11.3", "1.14.11.4",
- "1.14.11.6", "1.14.11.7", "1.14.11.8", "1.14.11.9", "1.14.11.10",
- "1.14.11.11", "1.14.11.12", "1.14.11.13", "1.14.11.14",
- "1.14.11.15", "1.14.11.16", "1.14.11.17", "1.14.11.18",
- "1.14.11.19", "1.14.11.20", "1.14.11.21", "1.14.11.22",
- "1.14.11.23", "1.14.11.24", "1.14.11.25", "1.14.11.26",
- "1.14.11.27", "1.14.11.28", "1.14.12.1", "1.14.12.3", "1.14.12.4",
- "1.14.12.5", "1.14.12.7", "1.14.12.8", "1.14.12.9", "1.14.12.10",
- "1.14.12.11", "1.14.12.12", "1.14.12.13", "1.14.12.14",
- "1.14.12.15", "1.14.12.16", "1.14.12.17", "1.14.12.18",
- "1.14.12.19", "1.14.12.20", "1.14.12.21", "1.14.13.1", "1.14.13.2",
- "1.14.13.3", "1.14.13.4", "1.14.13.5", "1.14.13.6", "1.14.13.7",
- "1.14.13.8", "1.14.13.9", "1.14.13.10", "1.14.13.11", "1.14.13.12",
- "1.14.13.13", "1.14.13.14", "1.14.13.15", "1.14.13.16",
- "1.14.13.17", "1.14.13.18", "1.14.13.19", "1.14.13.20",
- "1.14.13.21", "1.14.13.22", "1.14.13.23", "1.14.13.24",
- "1.14.13.25", "1.14.13.26", "1.14.13.27", "1.14.13.28",
- "1.14.13.29", "1.14.13.30", "1.14.13.31", "1.14.13.32",
- "1.14.13.33", "1.14.13.34", "1.14.13.35", "1.14.13.36",
- "1.14.13.37", "1.14.13.38", "1.14.13.39", "1.14.13.40",
- "1.14.13.41", "1.14.13.42", "1.14.13.43", "1.14.13.44",
- "1.14.13.46", "1.14.13.47", "1.14.13.48", "1.14.13.49",
- "1.14.13.50", "1.14.13.51", "1.14.13.52", "1.14.13.53",
- "1.14.13.54", "1.14.13.55", "1.14.13.56", "1.14.13.57",
- "1.14.13.58", "1.14.13.59", "1.14.13.60", "1.14.13.61",
- "1.14.13.62", "1.14.13.63", "1.14.13.64", "1.14.13.66",
- "1.14.13.67", "1.14.13.68", "1.14.13.69", "1.14.13.70",
- "1.14.13.71", "1.14.13.72", "1.14.13.73", "1.14.13.74",
- "1.14.13.75", "1.14.13.76", "1.14.13.77", "1.14.13.78",
- "1.14.13.79", "1.14.13.80", "1.14.13.81", "1.14.13.82",
- "1.14.13.83", "1.14.13.84", "1.14.13.85", "1.14.13.86",
- "1.14.13.87", "1.14.13.88", "1.14.13.89", "1.14.13.90",
- "1.14.13.91", "1.14.13.92", "1.14.13.93", "1.14.13.94",
- "1.14.13.95", "1.14.13.96", "1.14.13.97", "1.14.13.98",
- "1.14.13.99", "1.14.13.100", "1.14.13.101", "1.14.13.102",
- "1.14.13.103", "1.14.13.104", "1.14.13.105", "1.14.13.106",
- "1.14.13.107", "1.14.13.108", "1.14.13.109", "1.14.13.110",
- "1.14.13.111", "1.14.13.112", "1.14.13.113", "1.14.14.1",
- "1.14.14.3", "1.14.14.5", "1.14.14.7", "1.14.15.1", "1.14.15.2",
- "1.14.15.3", "1.14.15.4", "1.14.15.5", "1.14.15.6", "1.14.15.7",
- "1.14.15.8", "1.14.16.1", "1.14.16.2", "1.14.16.3", "1.14.16.4",
- "1.14.16.5", "1.14.16.6", "1.14.17.1", "1.14.17.3", "1.14.17.4",
- "1.14.18.1", "1.14.18.2", "1.14.19.1", "1.14.19.2", "1.14.19.3",
- "1.14.19.4", "1.14.19.5", "1.14.19.6", "1.14.20.1", "1.14.21.1",
- "1.14.21.2", "1.14.21.3", "1.14.21.4", "1.14.21.5", "1.14.21.6",
- "1.14.21.7", "1.14.99.1", "1.14.99.2", "1.14.99.3", "1.14.99.4",
- "1.14.99.7", "1.14.99.9", "1.14.99.10", "1.14.99.11", "1.14.99.12",
- "1.14.99.14", "1.14.99.15", "1.14.99.19", "1.14.99.20",
- "1.14.99.21", "1.14.99.22", "1.14.99.23", "1.14.99.24",
- "1.14.99.26", "1.14.99.27", "1.14.99.28", "1.14.99.29",
- "1.14.99.30", "1.14.99.31", "1.14.99.32", "1.14.99.33",
- "1.14.99.34", "1.14.99.35", "1.14.99.36", "1.14.99.37",
- "1.14.99.38", "1.14.99.39", "1.14.99.40", "1.15.1.1", "1.15.1.2",
- "1.16.1.1", "1.16.1.2", "1.16.1.3", "1.16.1.4", "1.16.1.5",
- "1.16.1.6", "1.16.1.7", "1.16.1.8", "1.16.3.1", "1.16.8.1",
- "1.17.1.1", "1.17.1.2", "1.17.1.3", "1.17.1.4", "1.17.1.5",
- "1.17.3.1", "1.17.3.2", "1.17.3.3", "1.17.4.1", "1.17.4.2",
- "1.17.5.1", "1.17.7.1", "1.17.99.1", "1.17.99.2", "1.17.99.3",
- "1.17.99.4", "1.17.99.5", "1.18.1.1", "1.18.1.2", "1.18.1.3",
- "1.18.1.4", "1.18.6.1", "1.19.6.1", "1.20.1.1", "1.20.4.1",
- "1.20.4.2", "1.20.4.3", "1.20.98.1", "1.20.99.1", "1.21.3.1",
- "1.21.3.2", "1.21.3.3", "1.21.3.4", "1.21.3.5", "1.21.3.6",
- "1.21.4.1", "1.21.4.2", "1.21.4.3", "1.21.4.4", "1.21.99.1",
- "1.22.1.1", "1.97.1.1", "1.97.1.2", "1.97.1.3", "1.97.1.4",
- "1.97.1.8", "1.97.1.9", "1.97.1.10", "1.97.1.11", "2.1.1.1",
- "2.1.1.2", "2.1.1.3", "2.1.1.4", "2.1.1.5", "2.1.1.6", "2.1.1.7",
- "2.1.1.8", "2.1.1.9", "2.1.1.10", "2.1.1.11", "2.1.1.12",
- "2.1.1.13", "2.1.1.14", "2.1.1.15", "2.1.1.16", "2.1.1.17",
- "2.1.1.18", "2.1.1.19", "2.1.1.20", "2.1.1.21", "2.1.1.22",
- "2.1.1.25", "2.1.1.26", "2.1.1.27", "2.1.1.28", "2.1.1.29",
- "2.1.1.31", "2.1.1.32", "2.1.1.33", "2.1.1.34", "2.1.1.35",
- "2.1.1.36", "2.1.1.37", "2.1.1.38", "2.1.1.39", "2.1.1.40",
- "2.1.1.41", "2.1.1.42", "2.1.1.43", "2.1.1.44", "2.1.1.45",
- "2.1.1.46", "2.1.1.47", "2.1.1.48", "2.1.1.49", "2.1.1.50",
- "2.1.1.51", "2.1.1.52", "2.1.1.53", "2.1.1.54", "2.1.1.55",
- "2.1.1.56", "2.1.1.57", "2.1.1.59", "2.1.1.60", "2.1.1.61",
- "2.1.1.62", "2.1.1.63", "2.1.1.64", "2.1.1.65", "2.1.1.66",
- "2.1.1.67", "2.1.1.68", "2.1.1.69", "2.1.1.70", "2.1.1.71",
- "2.1.1.72", "2.1.1.74", "2.1.1.75", "2.1.1.76", "2.1.1.77",
- "2.1.1.78", "2.1.1.79", "2.1.1.80", "2.1.1.82", "2.1.1.83",
- "2.1.1.84", "2.1.1.85", "2.1.1.86", "2.1.1.87", "2.1.1.88",
- "2.1.1.89", "2.1.1.90", "2.1.1.91", "2.1.1.94", "2.1.1.95",
- "2.1.1.96", "2.1.1.97", "2.1.1.98", "2.1.1.99", "2.1.1.100",
- "2.1.1.101", "2.1.1.102", "2.1.1.103", "2.1.1.104", "2.1.1.105",
- "2.1.1.106", "2.1.1.107", "2.1.1.108", "2.1.1.109", "2.1.1.110",
- "2.1.1.111", "2.1.1.112", "2.1.1.113", "2.1.1.114", "2.1.1.115",
- "2.1.1.116", "2.1.1.117", "2.1.1.118", "2.1.1.119", "2.1.1.120",
- "2.1.1.121", "2.1.1.122", "2.1.1.123", "2.1.1.124", "2.1.1.125",
- "2.1.1.126", "2.1.1.127", "2.1.1.128", "2.1.1.129", "2.1.1.130",
- "2.1.1.131", "2.1.1.132", "2.1.1.133", "2.1.1.136", "2.1.1.137",
- "2.1.1.139", "2.1.1.140", "2.1.1.141", "2.1.1.142", "2.1.1.143",
- "2.1.1.144", "2.1.1.145", "2.1.1.146", "2.1.1.147", "2.1.1.148",
- "2.1.1.149", "2.1.1.150", "2.1.1.151", "2.1.1.152", "2.1.1.153",
- "2.1.1.154", "2.1.1.155", "2.1.1.156", "2.1.1.157", "2.1.1.158",
- "2.1.1.159", "2.1.1.160", "2.1.1.161", "2.1.1.162", "2.1.1.163",
- "2.1.1.164", "2.1.1.165", "2.1.2.1", "2.1.2.2", "2.1.2.3",
- "2.1.2.4", "2.1.2.5", "2.1.2.7", "2.1.2.8", "2.1.2.9", "2.1.2.10",
- "2.1.2.11", "2.1.3.1", "2.1.3.2", "2.1.3.3", "2.1.3.5", "2.1.3.6",
- "2.1.3.7", "2.1.3.8", "2.1.3.9", "2.1.3.10", "2.1.3.11", "2.1.4.1",
- "2.1.4.2", "2.2.1.1", "2.2.1.2", "2.2.1.3", "2.2.1.4", "2.2.1.5",
- "2.2.1.6", "2.2.1.7", "2.2.1.8", "2.2.1.9", "2.3.1.1", "2.3.1.2",
- "2.3.1.3", "2.3.1.4", "2.3.1.5", "2.3.1.6", "2.3.1.7", "2.3.1.8",
- "2.3.1.9", "2.3.1.10", "2.3.1.11", "2.3.1.12", "2.3.1.13",
- "2.3.1.14", "2.3.1.15", "2.3.1.16", "2.3.1.17", "2.3.1.18",
- "2.3.1.19", "2.3.1.20", "2.3.1.21", "2.3.1.22", "2.3.1.23",
- "2.3.1.24", "2.3.1.25", "2.3.1.26", "2.3.1.27", "2.3.1.28",
- "2.3.1.29", "2.3.1.30", "2.3.1.31", "2.3.1.32", "2.3.1.33",
- "2.3.1.34", "2.3.1.35", "2.3.1.36", "2.3.1.37", "2.3.1.38",
- "2.3.1.39", "2.3.1.40", "2.3.1.41", "2.3.1.42", "2.3.1.43",
- "2.3.1.44", "2.3.1.45", "2.3.1.46", "2.3.1.47", "2.3.1.48",
- "2.3.1.49", "2.3.1.50", "2.3.1.51", "2.3.1.52", "2.3.1.53",
- "2.3.1.54", "2.3.1.56", "2.3.1.57", "2.3.1.58", "2.3.1.59",
- "2.3.1.60", "2.3.1.61", "2.3.1.62", "2.3.1.63", "2.3.1.64",
- "2.3.1.65", "2.3.1.66", "2.3.1.67", "2.3.1.68", "2.3.1.69",
- "2.3.1.71", "2.3.1.72", "2.3.1.73", "2.3.1.74", "2.3.1.75",
- "2.3.1.76", "2.3.1.77", "2.3.1.78", "2.3.1.79", "2.3.1.80",
- "2.3.1.81", "2.3.1.82", "2.3.1.83", "2.3.1.84", "2.3.1.85",
- "2.3.1.86", "2.3.1.87", "2.3.1.88", "2.3.1.89", "2.3.1.90",
- "2.3.1.91", "2.3.1.92", "2.3.1.93", "2.3.1.94", "2.3.1.95",
- "2.3.1.96", "2.3.1.97", "2.3.1.98", "2.3.1.99", "2.3.1.100",
- "2.3.1.101", "2.3.1.102", "2.3.1.103", "2.3.1.104", "2.3.1.105",
- "2.3.1.106", "2.3.1.107", "2.3.1.108", "2.3.1.109", "2.3.1.110",
- "2.3.1.111", "2.3.1.112", "2.3.1.113", "2.3.1.114", "2.3.1.115",
- "2.3.1.116", "2.3.1.117", "2.3.1.118", "2.3.1.119", "2.3.1.121",
- "2.3.1.122", "2.3.1.123", "2.3.1.125", "2.3.1.126", "2.3.1.127",
- "2.3.1.128", "2.3.1.129", "2.3.1.130", "2.3.1.131", "2.3.1.132",
- "2.3.1.133", "2.3.1.134", "2.3.1.135", "2.3.1.136", "2.3.1.137",
- "2.3.1.138", "2.3.1.139", "2.3.1.140", "2.3.1.141", "2.3.1.142",
- "2.3.1.143", "2.3.1.144", "2.3.1.145", "2.3.1.146", "2.3.1.147",
- "2.3.1.148", "2.3.1.149", "2.3.1.150", "2.3.1.151", "2.3.1.152",
- "2.3.1.153", "2.3.1.154", "2.3.1.155", "2.3.1.156", "2.3.1.157",
- "2.3.1.158", "2.3.1.159", "2.3.1.160", "2.3.1.161", "2.3.1.162",
- "2.3.1.163", "2.3.1.164", "2.3.1.165", "2.3.1.166", "2.3.1.167",
- "2.3.1.168", "2.3.1.169", "2.3.1.170", "2.3.1.171", "2.3.1.172",
- "2.3.1.173", "2.3.1.174", "2.3.1.175", "2.3.1.176", "2.3.1.177",
- "2.3.1.178", "2.3.1.179", "2.3.1.180", "2.3.1.181", "2.3.1.182",
- "2.3.1.183", "2.3.1.184", "2.3.1.185", "2.3.1.186", "2.3.1.187",
- "2.3.1.188", "2.3.1.189", "2.3.1.190", "2.3.2.1", "2.3.2.2",
- "2.3.2.3", "2.3.2.4", "2.3.2.5", "2.3.2.6", "2.3.2.7", "2.3.2.8",
- "2.3.2.9", "2.3.2.10", "2.3.2.11", "2.3.2.12", "2.3.2.13",
- "2.3.2.14", "2.3.2.15", "2.3.3.1", "2.3.3.2", "2.3.3.3", "2.3.3.4",
- "2.3.3.5", "2.3.3.6", "2.3.3.7", "2.3.3.8", "2.3.3.9", "2.3.3.10",
- "2.3.3.11", "2.3.3.12", "2.3.3.13", "2.3.3.14", "2.3.3.15",
- "2.4.1.1", "2.4.1.2", "2.4.1.4", "2.4.1.5", "2.4.1.7", "2.4.1.8",
- "2.4.1.9", "2.4.1.10", "2.4.1.11", "2.4.1.12", "2.4.1.13",
- "2.4.1.14", "2.4.1.15", "2.4.1.16", "2.4.1.17", "2.4.1.18",
- "2.4.1.19", "2.4.1.20", "2.4.1.21", "2.4.1.22", "2.4.1.23",
- "2.4.1.24", "2.4.1.25", "2.4.1.26", "2.4.1.27", "2.4.1.28",
- "2.4.1.29", "2.4.1.30", "2.4.1.31", "2.4.1.32", "2.4.1.33",
- "2.4.1.34", "2.4.1.35", "2.4.1.36", "2.4.1.37", "2.4.1.38",
- "2.4.1.39", "2.4.1.40", "2.4.1.41", "2.4.1.43", "2.4.1.44",
- "2.4.1.45", "2.4.1.46", "2.4.1.47", "2.4.1.48", "2.4.1.49",
- "2.4.1.50", "2.4.1.52", "2.4.1.53", "2.4.1.54", "2.4.1.56",
- "2.4.1.57", "2.4.1.58", "2.4.1.60", "2.4.1.62", "2.4.1.63",
- "2.4.1.64", "2.4.1.65", "2.4.1.66", "2.4.1.67", "2.4.1.68",
- "2.4.1.69", "2.4.1.70", "2.4.1.71", "2.4.1.73", "2.4.1.74",
- "2.4.1.78", "2.4.1.79", "2.4.1.80", "2.4.1.81", "2.4.1.82",
- "2.4.1.83", "2.4.1.85", "2.4.1.86", "2.4.1.87", "2.4.1.88",
- "2.4.1.90", "2.4.1.91", "2.4.1.92", "2.4.1.94", "2.4.1.95",
- "2.4.1.96", "2.4.1.97", "2.4.1.99", "2.4.1.100", "2.4.1.101",
- "2.4.1.102", "2.4.1.103", "2.4.1.104", "2.4.1.105", "2.4.1.106",
- "2.4.1.109", "2.4.1.110", "2.4.1.111", "2.4.1.113", "2.4.1.114",
- "2.4.1.115", "2.4.1.116", "2.4.1.117", "2.4.1.118", "2.4.1.119",
- "2.4.1.120", "2.4.1.121", "2.4.1.122", "2.4.1.123", "2.4.1.125",
- "2.4.1.126", "2.4.1.127", "2.4.1.128", "2.4.1.129", "2.4.1.130",
- "2.4.1.131", "2.4.1.132", "2.4.1.133", "2.4.1.134", "2.4.1.135",
- "2.4.1.136", "2.4.1.137", "2.4.1.138", "2.4.1.139", "2.4.1.140",
- "2.4.1.141", "2.4.1.142", "2.4.1.143", "2.4.1.144", "2.4.1.145",
- "2.4.1.146", "2.4.1.147", "2.4.1.148", "2.4.1.149", "2.4.1.150",
- "2.4.1.152", "2.4.1.153", "2.4.1.155", "2.4.1.156", "2.4.1.157",
- "2.4.1.158", "2.4.1.159", "2.4.1.160", "2.4.1.161", "2.4.1.162",
- "2.4.1.163", "2.4.1.164", "2.4.1.165", "2.4.1.166", "2.4.1.167",
- "2.4.1.168", "2.4.1.170", "2.4.1.171", "2.4.1.172", "2.4.1.173",
- "2.4.1.174", "2.4.1.175", "2.4.1.176", "2.4.1.177", "2.4.1.178",
- "2.4.1.179", "2.4.1.180", "2.4.1.181", "2.4.1.182", "2.4.1.183",
- "2.4.1.184", "2.4.1.185", "2.4.1.186", "2.4.1.187", "2.4.1.188",
- "2.4.1.189", "2.4.1.190", "2.4.1.191", "2.4.1.192", "2.4.1.193",
- "2.4.1.194", "2.4.1.195", "2.4.1.196", "2.4.1.197", "2.4.1.198",
- "2.4.1.199", "2.4.1.201", "2.4.1.202", "2.4.1.203", "2.4.1.205",
- "2.4.1.206", "2.4.1.207", "2.4.1.208", "2.4.1.209", "2.4.1.210",
- "2.4.1.211", "2.4.1.212", "2.4.1.213", "2.4.1.214", "2.4.1.215",
- "2.4.1.216", "2.4.1.217", "2.4.1.218", "2.4.1.219", "2.4.1.220",
- "2.4.1.221", "2.4.1.222", "2.4.1.223", "2.4.1.224", "2.4.1.225",
- "2.4.1.226", "2.4.1.227", "2.4.1.228", "2.4.1.229", "2.4.1.230",
- "2.4.1.231", "2.4.1.232", "2.4.1.234", "2.4.1.236", "2.4.1.237",
- "2.4.1.238", "2.4.1.239", "2.4.1.240", "2.4.1.241", "2.4.1.242",
- "2.4.1.243", "2.4.1.244", "2.4.1.245", "2.4.1.246", "2.4.1.247",
- "2.4.1.248", "2.4.1.249", "2.4.1.250", "2.4.2.1", "2.4.2.2",
- "2.4.2.3", "2.4.2.4", "2.4.2.5", "2.4.2.6", "2.4.2.7", "2.4.2.8",
- "2.4.2.9", "2.4.2.10", "2.4.2.11", "2.4.2.12", "2.4.2.14",
- "2.4.2.15", "2.4.2.16", "2.4.2.17", "2.4.2.18", "2.4.2.19",
- "2.4.2.20", "2.4.2.21", "2.4.2.22", "2.4.2.23", "2.4.2.24",
- "2.4.2.25", "2.4.2.26", "2.4.2.27", "2.4.2.28", "2.4.2.29",
- "2.4.2.30", "2.4.2.31", "2.4.2.32", "2.4.2.33", "2.4.2.34",
- "2.4.2.35", "2.4.2.36", "2.4.2.37", "2.4.2.38", "2.4.2.39",
- "2.4.2.40", "2.4.2.41", "2.4.2.42", "2.4.99.1", "2.4.99.2",
- "2.4.99.3", "2.4.99.4", "2.4.99.5", "2.4.99.6", "2.4.99.7",
- "2.4.99.8", "2.4.99.9", "2.4.99.10", "2.4.99.11", "2.5.1.1",
- "2.5.1.2", "2.5.1.3", "2.5.1.4", "2.5.1.5", "2.5.1.6", "2.5.1.7",
- "2.5.1.9", "2.5.1.10", "2.5.1.11", "2.5.1.15", "2.5.1.16",
- "2.5.1.17", "2.5.1.18", "2.5.1.19", "2.5.1.20", "2.5.1.21",
- "2.5.1.22", "2.5.1.23", "2.5.1.24", "2.5.1.25", "2.5.1.26",
- "2.5.1.27", "2.5.1.28", "2.5.1.29", "2.5.1.30", "2.5.1.31",
- "2.5.1.32", "2.5.1.33", "2.5.1.34", "2.5.1.35", "2.5.1.36",
- "2.5.1.38", "2.5.1.39", "2.5.1.41", "2.5.1.42", "2.5.1.43",
- "2.5.1.44", "2.5.1.45", "2.5.1.46", "2.5.1.47", "2.5.1.48",
- "2.5.1.49", "2.5.1.50", "2.5.1.51", "2.5.1.52", "2.5.1.53",
- "2.5.1.54", "2.5.1.55", "2.5.1.56", "2.5.1.57", "2.5.1.58",
- "2.5.1.59", "2.5.1.60", "2.5.1.61", "2.5.1.62", "2.5.1.63",
- "2.5.1.65", "2.5.1.66", "2.5.1.67", "2.5.1.68", "2.5.1.69",
- "2.5.1.70", "2.5.1.71", "2.5.1.72", "2.5.1.73", "2.5.1.74",
- "2.5.1.75", "2.5.1.76", "2.5.1.77", "2.5.1.78", "2.5.1.79",
- "2.5.1.80", "2.6.1.1", "2.6.1.2", "2.6.1.3", "2.6.1.4", "2.6.1.5",
- "2.6.1.6", "2.6.1.7", "2.6.1.8", "2.6.1.9", "2.6.1.11", "2.6.1.12",
- "2.6.1.13", "2.6.1.14", "2.6.1.15", "2.6.1.16", "2.6.1.17",
- "2.6.1.18", "2.6.1.19", "2.6.1.21", "2.6.1.22", "2.6.1.23",
- "2.6.1.24", "2.6.1.26", "2.6.1.27", "2.6.1.28", "2.6.1.29",
- "2.6.1.30", "2.6.1.31", "2.6.1.32", "2.6.1.33", "2.6.1.34",
- "2.6.1.35", "2.6.1.36", "2.6.1.37", "2.6.1.38", "2.6.1.39",
- "2.6.1.40", "2.6.1.41", "2.6.1.42", "2.6.1.43", "2.6.1.44",
- "2.6.1.45", "2.6.1.46", "2.6.1.47", "2.6.1.48", "2.6.1.49",
- "2.6.1.50", "2.6.1.51", "2.6.1.52", "2.6.1.54", "2.6.1.55",
- "2.6.1.56", "2.6.1.57", "2.6.1.58", "2.6.1.59", "2.6.1.60",
- "2.6.1.62", "2.6.1.63", "2.6.1.64", "2.6.1.65", "2.6.1.66",
- "2.6.1.67", "2.6.1.68", "2.6.1.70", "2.6.1.71", "2.6.1.72",
- "2.6.1.73", "2.6.1.74", "2.6.1.75", "2.6.1.76", "2.6.1.77",
- "2.6.1.78", "2.6.1.79", "2.6.1.80", "2.6.1.81", "2.6.1.82",
- "2.6.1.83", "2.6.1.84", "2.6.1.85", "2.6.1.86", "2.6.3.1",
- "2.6.99.1", "2.6.99.2", "2.7.1.1", "2.7.1.2", "2.7.1.3", "2.7.1.4",
- "2.7.1.5", "2.7.1.6", "2.7.1.7", "2.7.1.8", "2.7.1.10", "2.7.1.11",
- "2.7.1.12", "2.7.1.13", "2.7.1.14", "2.7.1.15", "2.7.1.16",
- "2.7.1.17", "2.7.1.18", "2.7.1.19", "2.7.1.20", "2.7.1.21",
- "2.7.1.22", "2.7.1.23", "2.7.1.24", "2.7.1.25", "2.7.1.26",
- "2.7.1.27", "2.7.1.28", "2.7.1.29", "2.7.1.30", "2.7.1.31",
- "2.7.1.32", "2.7.1.33", "2.7.1.34", "2.7.1.35", "2.7.1.36",
- "2.7.1.39", "2.7.1.40", "2.7.1.41", "2.7.1.42", "2.7.1.43",
- "2.7.1.44", "2.7.1.45", "2.7.1.46", "2.7.1.47", "2.7.1.48",
- "2.7.1.49", "2.7.1.50", "2.7.1.51", "2.7.1.52", "2.7.1.53",
- "2.7.1.54", "2.7.1.55", "2.7.1.56", "2.7.1.58", "2.7.1.59",
- "2.7.1.60", "2.7.1.61", "2.7.1.62", "2.7.1.63", "2.7.1.64",
- "2.7.1.65", "2.7.1.66", "2.7.1.67", "2.7.1.68", "2.7.1.69",
- "2.7.1.71", "2.7.1.72", "2.7.1.73", "2.7.1.74", "2.7.1.76",
- "2.7.1.77", "2.7.1.78", "2.7.1.79", "2.7.1.80", "2.7.1.81",
- "2.7.1.82", "2.7.1.83", "2.7.1.84", "2.7.1.85", "2.7.1.86",
- "2.7.1.87", "2.7.1.88", "2.7.1.89", "2.7.1.90", "2.7.1.91",
- "2.7.1.92", "2.7.1.93", "2.7.1.94", "2.7.1.95", "2.7.1.100",
- "2.7.1.101", "2.7.1.102", "2.7.1.103", "2.7.1.105", "2.7.1.106",
- "2.7.1.107", "2.7.1.108", "2.7.1.113", "2.7.1.114", "2.7.1.118",
- "2.7.1.119", "2.7.1.121", "2.7.1.122", "2.7.1.127", "2.7.1.130",
- "2.7.1.134", "2.7.1.136", "2.7.1.137", "2.7.1.138", "2.7.1.140",
- "2.7.1.142", "2.7.1.143", "2.7.1.144", "2.7.1.145", "2.7.1.146",
- "2.7.1.147", "2.7.1.148", "2.7.1.149", "2.7.1.150", "2.7.1.151",
- "2.7.1.153", "2.7.1.154", "2.7.1.156", "2.7.1.157", "2.7.1.158",
- "2.7.1.159", "2.7.1.160", "2.7.1.161", "2.7.1.162", "2.7.1.163",
- "2.7.1.164", "2.7.1.165", "2.7.2.1", "2.7.2.2", "2.7.2.3",
- "2.7.2.4", "2.7.2.6", "2.7.2.7", "2.7.2.8", "2.7.2.10", "2.7.2.11",
- "2.7.2.12", "2.7.2.13", "2.7.2.14", "2.7.2.15", "2.7.3.1",
- "2.7.3.2", "2.7.3.3", "2.7.3.4", "2.7.3.5", "2.7.3.6", "2.7.3.7",
- "2.7.3.8", "2.7.3.9", "2.7.3.10", "2.7.4.1", "2.7.4.2", "2.7.4.3",
- "2.7.4.4", "2.7.4.6", "2.7.4.7", "2.7.4.8", "2.7.4.9", "2.7.4.10",
- "2.7.4.11", "2.7.4.12", "2.7.4.13", "2.7.4.14", "2.7.4.15",
- "2.7.4.16", "2.7.4.17", "2.7.4.18", "2.7.4.19", "2.7.4.20",
- "2.7.4.21", "2.7.4.22", "2.7.4.23", "2.7.4.24", "2.7.6.1",
- "2.7.6.2", "2.7.6.3", "2.7.6.4", "2.7.6.5", "2.7.7.1", "2.7.7.2",
- "2.7.7.3", "2.7.7.4", "2.7.7.5", "2.7.7.6", "2.7.7.7", "2.7.7.8",
- "2.7.7.9", "2.7.7.10", "2.7.7.11", "2.7.7.12", "2.7.7.13",
- "2.7.7.14", "2.7.7.15", "2.7.7.18", "2.7.7.19", "2.7.7.21",
- "2.7.7.22", "2.7.7.23", "2.7.7.24", "2.7.7.25", "2.7.7.27",
- "2.7.7.28", "2.7.7.30", "2.7.7.31", "2.7.7.32", "2.7.7.33",
- "2.7.7.34", "2.7.7.35", "2.7.7.36", "2.7.7.37", "2.7.7.38",
- "2.7.7.39", "2.7.7.40", "2.7.7.41", "2.7.7.42", "2.7.7.43",
- "2.7.7.44", "2.7.7.45", "2.7.7.46", "2.7.7.47", "2.7.7.48",
- "2.7.7.49", "2.7.7.50", "2.7.7.51", "2.7.7.52", "2.7.7.53",
- "2.7.7.54", "2.7.7.55", "2.7.7.56", "2.7.7.57", "2.7.7.58",
- "2.7.7.59", "2.7.7.60", "2.7.7.61", "2.7.7.62", "2.7.7.63",
- "2.7.7.64", "2.7.7.65", "2.7.7.66", "2.7.7.67", "2.7.7.68",
- "2.7.8.1", "2.7.8.2", "2.7.8.3", "2.7.8.4", "2.7.8.5", "2.7.8.6",
- "2.7.8.7", "2.7.8.8", "2.7.8.9", "2.7.8.10", "2.7.8.11", "2.7.8.12",
- "2.7.8.13", "2.7.8.14", "2.7.8.15", "2.7.8.17", "2.7.8.18",
- "2.7.8.19", "2.7.8.20", "2.7.8.21", "2.7.8.22", "2.7.8.23",
- "2.7.8.24", "2.7.8.25", "2.7.8.26", "2.7.8.27", "2.7.8.28",
- "2.7.9.1", "2.7.9.2", "2.7.9.3", "2.7.9.4", "2.7.9.5", "2.7.10.1",
- "2.7.10.2", "2.7.11.1", "2.7.11.2", "2.7.11.3", "2.7.11.4",
- "2.7.11.5", "2.7.11.6", "2.7.11.7", "2.7.11.8", "2.7.11.9",
- "2.7.11.10", "2.7.11.11", "2.7.11.12", "2.7.11.13", "2.7.11.14",
- "2.7.11.15", "2.7.11.16", "2.7.11.17", "2.7.11.18", "2.7.11.19",
- "2.7.11.20", "2.7.11.21", "2.7.11.22", "2.7.11.23", "2.7.11.24",
- "2.7.11.25", "2.7.11.26", "2.7.11.27", "2.7.11.28", "2.7.11.29",
- "2.7.11.30", "2.7.11.31", "2.7.12.1", "2.7.12.2", "2.7.13.1",
- "2.7.13.2", "2.7.13.3", "2.7.99.1", "2.8.1.1", "2.8.1.2", "2.8.1.3",
- "2.8.1.4", "2.8.1.5", "2.8.1.6", "2.8.1.7", "2.8.1.8", "2.8.2.1",
- "2.8.2.2", "2.8.2.3", "2.8.2.4", "2.8.2.5", "2.8.2.6", "2.8.2.7",
- "2.8.2.8", "2.8.2.9", "2.8.2.10", "2.8.2.11", "2.8.2.13",
- "2.8.2.14", "2.8.2.15", "2.8.2.16", "2.8.2.17", "2.8.2.18",
- "2.8.2.19", "2.8.2.20", "2.8.2.21", "2.8.2.22", "2.8.2.23",
- "2.8.2.24", "2.8.2.25", "2.8.2.26", "2.8.2.27", "2.8.2.28",
- "2.8.2.29", "2.8.2.30", "2.8.2.31", "2.8.2.32", "2.8.2.33",
- "2.8.2.34", "2.8.3.1", "2.8.3.2", "2.8.3.3", "2.8.3.5", "2.8.3.6",
- "2.8.3.7", "2.8.3.8", "2.8.3.9", "2.8.3.10", "2.8.3.11", "2.8.3.12",
- "2.8.3.13", "2.8.3.14", "2.8.3.15", "2.8.3.16", "2.8.3.17",
- "2.8.4.1", "2.8.4.2", "2.9.1.1", "2.9.1.2", "3.1.1.1", "3.1.1.2",
- "3.1.1.3", "3.1.1.4", "3.1.1.5", "3.1.1.6", "3.1.1.7", "3.1.1.8",
- "3.1.1.10", "3.1.1.11", "3.1.1.13", "3.1.1.14", "3.1.1.15",
- "3.1.1.17", "3.1.1.19", "3.1.1.20", "3.1.1.21", "3.1.1.22",
- "3.1.1.23", "3.1.1.24", "3.1.1.25", "3.1.1.26", "3.1.1.27",
- "3.1.1.28", "3.1.1.29", "3.1.1.30", "3.1.1.31", "3.1.1.32",
- "3.1.1.33", "3.1.1.34", "3.1.1.35", "3.1.1.36", "3.1.1.37",
- "3.1.1.38", "3.1.1.39", "3.1.1.40", "3.1.1.41", "3.1.1.42",
- "3.1.1.43", "3.1.1.44", "3.1.1.45", "3.1.1.46", "3.1.1.47",
- "3.1.1.48", "3.1.1.49", "3.1.1.50", "3.1.1.51", "3.1.1.52",
- "3.1.1.53", "3.1.1.54", "3.1.1.55", "3.1.1.56", "3.1.1.57",
- "3.1.1.58", "3.1.1.59", "3.1.1.60", "3.1.1.61", "3.1.1.63",
- "3.1.1.64", "3.1.1.65", "3.1.1.66", "3.1.1.67", "3.1.1.68",
- "3.1.1.70", "3.1.1.71", "3.1.1.72", "3.1.1.73", "3.1.1.74",
- "3.1.1.75", "3.1.1.76", "3.1.1.77", "3.1.1.78", "3.1.1.79",
- "3.1.1.80", "3.1.1.81", "3.1.1.82", "3.1.1.83", "3.1.1.84",
- "3.1.2.1", "3.1.2.2", "3.1.2.3", "3.1.2.4", "3.1.2.5", "3.1.2.6",
- "3.1.2.7", "3.1.2.10", "3.1.2.11", "3.1.2.12", "3.1.2.13",
- "3.1.2.14", "3.1.2.15", "3.1.2.16", "3.1.2.17", "3.1.2.18",
- "3.1.2.19", "3.1.2.20", "3.1.2.21", "3.1.2.22", "3.1.2.23",
- "3.1.2.25", "3.1.2.26", "3.1.2.27", "3.1.3.1", "3.1.3.2", "3.1.3.3",
- "3.1.3.4", "3.1.3.5", "3.1.3.6", "3.1.3.7", "3.1.3.8", "3.1.3.9",
- "3.1.3.10", "3.1.3.11", "3.1.3.12", "3.1.3.13", "3.1.3.14",
- "3.1.3.15", "3.1.3.16", "3.1.3.17", "3.1.3.18", "3.1.3.19",
- "3.1.3.20", "3.1.3.21", "3.1.3.22", "3.1.3.23", "3.1.3.24",
- "3.1.3.25", "3.1.3.26", "3.1.3.27", "3.1.3.28", "3.1.3.29",
- "3.1.3.31", "3.1.3.32", "3.1.3.33", "3.1.3.34", "3.1.3.35",
- "3.1.3.36", "3.1.3.37", "3.1.3.38", "3.1.3.39", "3.1.3.40",
- "3.1.3.41", "3.1.3.42", "3.1.3.43", "3.1.3.44", "3.1.3.45",
- "3.1.3.46", "3.1.3.47", "3.1.3.48", "3.1.3.49", "3.1.3.50",
- "3.1.3.51", "3.1.3.52", "3.1.3.53", "3.1.3.54", "3.1.3.55",
- "3.1.3.56", "3.1.3.57", "3.1.3.58", "3.1.3.59", "3.1.3.60",
- "3.1.3.62", "3.1.3.63", "3.1.3.64", "3.1.3.66", "3.1.3.67",
- "3.1.3.68", "3.1.3.69", "3.1.3.70", "3.1.3.71", "3.1.3.72",
- "3.1.3.73", "3.1.3.74", "3.1.3.75", "3.1.3.76", "3.1.3.77",
- "3.1.3.78", "3.1.3.79", "3.1.3.80", "3.1.4.1", "3.1.4.2", "3.1.4.3",
- "3.1.4.4", "3.1.4.11", "3.1.4.12", "3.1.4.13", "3.1.4.14",
- "3.1.4.15", "3.1.4.16", "3.1.4.17", "3.1.4.35", "3.1.4.37",
- "3.1.4.38", "3.1.4.39", "3.1.4.40", "3.1.4.41", "3.1.4.42",
- "3.1.4.43", "3.1.4.44", "3.1.4.45", "3.1.4.46", "3.1.4.48",
- "3.1.4.49", "3.1.4.50", "3.1.4.51", "3.1.4.52", "3.1.4.53",
- "3.1.5.1", "3.1.6.1", "3.1.6.2", "3.1.6.3", "3.1.6.4", "3.1.6.6",
- "3.1.6.7", "3.1.6.8", "3.1.6.9", "3.1.6.10", "3.1.6.11", "3.1.6.12",
- "3.1.6.13", "3.1.6.14", "3.1.6.15", "3.1.6.16", "3.1.6.17",
- "3.1.6.18", "3.1.7.1", "3.1.7.2", "3.1.7.3", "3.1.7.4", "3.1.7.5",
- "3.1.8.1", "3.1.8.2", "3.1.11.1", "3.1.11.2", "3.1.11.3",
- "3.1.11.4", "3.1.11.5", "3.1.11.6", "3.1.13.1", "3.1.13.2",
- "3.1.13.3", "3.1.13.4", "3.1.13.5", "3.1.14.1", "3.1.15.1",
- "3.1.16.1", "3.1.21.1", "3.1.21.2", "3.1.21.3", "3.1.21.4",
- "3.1.21.5", "3.1.21.6", "3.1.21.7", "3.1.22.1", "3.1.22.2",
- "3.1.22.4", "3.1.22.5", "3.1.25.1", "3.1.26.1", "3.1.26.2",
- "3.1.26.3", "3.1.26.4", "3.1.26.5", "3.1.26.6", "3.1.26.7",
- "3.1.26.8", "3.1.26.9", "3.1.26.10", "3.1.26.11", "3.1.26.12",
- "3.1.26.13", "3.1.27.1", "3.1.27.2", "3.1.27.3", "3.1.27.4",
- "3.1.27.5", "3.1.27.6", "3.1.27.7", "3.1.27.8", "3.1.27.9",
- "3.1.27.10", "3.1.30.1", "3.1.30.2", "3.1.31.1", "3.2.1.1",
- "3.2.1.2", "3.2.1.3", "3.2.1.4", "3.2.1.6", "3.2.1.7", "3.2.1.8",
- "3.2.1.10", "3.2.1.11", "3.2.1.14", "3.2.1.15", "3.2.1.17",
- "3.2.1.18", "3.2.1.20", "3.2.1.21", "3.2.1.22", "3.2.1.23",
- "3.2.1.24", "3.2.1.25", "3.2.1.26", "3.2.1.28", "3.2.1.31",
- "3.2.1.32", "3.2.1.33", "3.2.1.35", "3.2.1.36", "3.2.1.37",
- "3.2.1.38", "3.2.1.39", "3.2.1.40", "3.2.1.41", "3.2.1.42",
- "3.2.1.43", "3.2.1.44", "3.2.1.45", "3.2.1.46", "3.2.1.47",
- "3.2.1.48", "3.2.1.49", "3.2.1.50", "3.2.1.51", "3.2.1.52",
- "3.2.1.53", "3.2.1.54", "3.2.1.55", "3.2.1.56", "3.2.1.57",
- "3.2.1.58", "3.2.1.59", "3.2.1.60", "3.2.1.61", "3.2.1.62",
- "3.2.1.63", "3.2.1.64", "3.2.1.65", "3.2.1.66", "3.2.1.67",
- "3.2.1.68", "3.2.1.70", "3.2.1.71", "3.2.1.72", "3.2.1.73",
- "3.2.1.74", "3.2.1.75", "3.2.1.76", "3.2.1.77", "3.2.1.78",
- "3.2.1.80", "3.2.1.81", "3.2.1.82", "3.2.1.83", "3.2.1.84",
- "3.2.1.85", "3.2.1.86", "3.2.1.87", "3.2.1.88", "3.2.1.89",
- "3.2.1.91", "3.2.1.92", "3.2.1.93", "3.2.1.94", "3.2.1.95",
- "3.2.1.96", "3.2.1.97", "3.2.1.98", "3.2.1.99", "3.2.1.100",
- "3.2.1.101", "3.2.1.102", "3.2.1.103", "3.2.1.104", "3.2.1.105",
- "3.2.1.106", "3.2.1.107", "3.2.1.108", "3.2.1.109", "3.2.1.111",
- "3.2.1.112", "3.2.1.113", "3.2.1.114", "3.2.1.115", "3.2.1.116",
- "3.2.1.117", "3.2.1.118", "3.2.1.119", "3.2.1.120", "3.2.1.121",
- "3.2.1.122", "3.2.1.123", "3.2.1.124", "3.2.1.125", "3.2.1.126",
- "3.2.1.127", "3.2.1.128", "3.2.1.129", "3.2.1.130", "3.2.1.131",
- "3.2.1.132", "3.2.1.133", "3.2.1.134", "3.2.1.135", "3.2.1.136",
- "3.2.1.137", "3.2.1.139", "3.2.1.140", "3.2.1.141", "3.2.1.142",
- "3.2.1.143", "3.2.1.144", "3.2.1.145", "3.2.1.146", "3.2.1.147",
- "3.2.1.149", "3.2.1.150", "3.2.1.151", "3.2.1.152", "3.2.1.153",
- "3.2.1.154", "3.2.1.155", "3.2.1.156", "3.2.1.157", "3.2.1.158",
- "3.2.1.159", "3.2.1.161", "3.2.1.162", "3.2.1.163", "3.2.1.164",
- "3.2.1.165", "3.2.2.1", "3.2.2.2", "3.2.2.3", "3.2.2.4", "3.2.2.5",
- "3.2.2.6", "3.2.2.7", "3.2.2.8", "3.2.2.9", "3.2.2.10", "3.2.2.11",
- "3.2.2.12", "3.2.2.13", "3.2.2.14", "3.2.2.15", "3.2.2.16",
- "3.2.2.17", "3.2.2.19", "3.2.2.20", "3.2.2.21", "3.2.2.22",
- "3.2.2.23", "3.2.2.24", "3.2.2.25", "3.2.2.26", "3.2.2.27",
- "3.2.2.28", "3.2.2.29", "3.3.1.1", "3.3.1.2", "3.3.2.1", "3.3.2.2",
- "3.3.2.4", "3.3.2.5", "3.3.2.6", "3.3.2.7", "3.3.2.8", "3.3.2.9",
- "3.3.2.10", "3.3.2.11", "3.4.11.1", "3.4.11.2", "3.4.11.3",
- "3.4.11.4", "3.4.11.5", "3.4.11.6", "3.4.11.7", "3.4.11.9",
- "3.4.11.10", "3.4.11.13", "3.4.11.14", "3.4.11.15", "3.4.11.16",
- "3.4.11.17", "3.4.11.18", "3.4.11.19", "3.4.11.20", "3.4.11.21",
- "3.4.11.22", "3.4.11.23", "3.4.11.24", "3.4.13.3", "3.4.13.4",
- "3.4.13.5", "3.4.13.7", "3.4.13.9", "3.4.13.12", "3.4.13.17",
- "3.4.13.18", "3.4.13.19", "3.4.13.20", "3.4.13.21", "3.4.13.22",
- "3.4.14.1", "3.4.14.2", "3.4.14.4", "3.4.14.5", "3.4.14.6",
- "3.4.14.9", "3.4.14.10", "3.4.14.11", "3.4.14.12", "3.4.15.1",
- "3.4.15.4", "3.4.15.5", "3.4.15.6", "3.4.16.2", "3.4.16.4",
- "3.4.16.5", "3.4.16.6", "3.4.17.1", "3.4.17.2", "3.4.17.3",
- "3.4.17.4", "3.4.17.6", "3.4.17.8", "3.4.17.10", "3.4.17.11",
- "3.4.17.12", "3.4.17.13", "3.4.17.14", "3.4.17.15", "3.4.17.16",
- "3.4.17.17", "3.4.17.18", "3.4.17.19", "3.4.17.20", "3.4.17.21",
- "3.4.17.22", "3.4.17.23", "3.4.18.1", "3.4.19.1", "3.4.19.2",
- "3.4.19.3", "3.4.19.5", "3.4.19.6", "3.4.19.7", "3.4.19.9",
- "3.4.19.11", "3.4.19.12", "3.4.21.1", "3.4.21.2", "3.4.21.3",
- "3.4.21.4", "3.4.21.5", "3.4.21.6", "3.4.21.7", "3.4.21.9",
- "3.4.21.10", "3.4.21.12", "3.4.21.19", "3.4.21.20", "3.4.21.21",
- "3.4.21.22", "3.4.21.25", "3.4.21.26", "3.4.21.27", "3.4.21.32",
- "3.4.21.34", "3.4.21.35", "3.4.21.36", "3.4.21.37", "3.4.21.38",
- "3.4.21.39", "3.4.21.41", "3.4.21.42", "3.4.21.43", "3.4.21.45",
- "3.4.21.46", "3.4.21.47", "3.4.21.48", "3.4.21.49", "3.4.21.50",
- "3.4.21.53", "3.4.21.54", "3.4.21.55", "3.4.21.57", "3.4.21.59",
- "3.4.21.60", "3.4.21.61", "3.4.21.62", "3.4.21.63", "3.4.21.64",
- "3.4.21.65", "3.4.21.66", "3.4.21.67", "3.4.21.68", "3.4.21.69",
- "3.4.21.70", "3.4.21.71", "3.4.21.72", "3.4.21.73", "3.4.21.74",
- "3.4.21.75", "3.4.21.76", "3.4.21.77", "3.4.21.78", "3.4.21.79",
- "3.4.21.80", "3.4.21.81", "3.4.21.82", "3.4.21.83", "3.4.21.84",
- "3.4.21.85", "3.4.21.86", "3.4.21.88", "3.4.21.89", "3.4.21.90",
- "3.4.21.91", "3.4.21.92", "3.4.21.93", "3.4.21.94", "3.4.21.95",
- "3.4.21.96", "3.4.21.97", "3.4.21.98", "3.4.21.99", "3.4.21.100",
- "3.4.21.101", "3.4.21.102", "3.4.21.103", "3.4.21.104",
- "3.4.21.105", "3.4.21.106", "3.4.21.107", "3.4.21.108",
- "3.4.21.109", "3.4.21.110", "3.4.21.111", "3.4.21.112",
- "3.4.21.113", "3.4.21.114", "3.4.21.115", "3.4.21.116",
- "3.4.21.117", "3.4.21.118", "3.4.21.119", "3.4.21.120", "3.4.22.1",
- "3.4.22.2", "3.4.22.3", "3.4.22.6", "3.4.22.7", "3.4.22.8",
- "3.4.22.10", "3.4.22.14", "3.4.22.15", "3.4.22.16", "3.4.22.24",
- "3.4.22.25", "3.4.22.26", "3.4.22.27", "3.4.22.28", "3.4.22.29",
- "3.4.22.30", "3.4.22.31", "3.4.22.32", "3.4.22.33", "3.4.22.34",
- "3.4.22.35", "3.4.22.36", "3.4.22.37", "3.4.22.38", "3.4.22.39",
- "3.4.22.40", "3.4.22.41", "3.4.22.42", "3.4.22.43", "3.4.22.44",
- "3.4.22.45", "3.4.22.46", "3.4.22.47", "3.4.22.48", "3.4.22.49",
- "3.4.22.50", "3.4.22.51", "3.4.22.52", "3.4.22.53", "3.4.22.54",
- "3.4.22.55", "3.4.22.56", "3.4.22.57", "3.4.22.58", "3.4.22.59",
- "3.4.22.60", "3.4.22.61", "3.4.22.62", "3.4.22.63", "3.4.22.64",
- "3.4.22.65", "3.4.22.66", "3.4.22.67", "3.4.22.68", "3.4.22.69",
- "3.4.22.70", "3.4.22.71", "3.4.23.1", "3.4.23.2", "3.4.23.3",
- "3.4.23.4", "3.4.23.5", "3.4.23.12", "3.4.23.15", "3.4.23.16",
- "3.4.23.17", "3.4.23.18", "3.4.23.19", "3.4.23.20", "3.4.23.21",
- "3.4.23.22", "3.4.23.23", "3.4.23.24", "3.4.23.25", "3.4.23.26",
- "3.4.23.28", "3.4.23.29", "3.4.23.30", "3.4.23.31", "3.4.23.32",
- "3.4.23.34", "3.4.23.35", "3.4.23.36", "3.4.23.38", "3.4.23.39",
- "3.4.23.40", "3.4.23.41", "3.4.23.42", "3.4.23.43", "3.4.23.44",
- "3.4.23.45", "3.4.23.46", "3.4.23.47", "3.4.23.48", "3.4.23.49",
- "3.4.23.50", "3.4.23.51", "3.4.24.1", "3.4.24.3", "3.4.24.6",
- "3.4.24.7", "3.4.24.11", "3.4.24.12", "3.4.24.13", "3.4.24.14",
- "3.4.24.15", "3.4.24.16", "3.4.24.17", "3.4.24.18", "3.4.24.19",
- "3.4.24.20", "3.4.24.21", "3.4.24.22", "3.4.24.23", "3.4.24.24",
- "3.4.24.25", "3.4.24.26", "3.4.24.27", "3.4.24.28", "3.4.24.29",
- "3.4.24.30", "3.4.24.31", "3.4.24.32", "3.4.24.33", "3.4.24.34",
- "3.4.24.35", "3.4.24.36", "3.4.24.37", "3.4.24.38", "3.4.24.39",
- "3.4.24.40", "3.4.24.41", "3.4.24.42", "3.4.24.43", "3.4.24.44",
- "3.4.24.45", "3.4.24.46", "3.4.24.47", "3.4.24.48", "3.4.24.49",
- "3.4.24.50", "3.4.24.51", "3.4.24.52", "3.4.24.53", "3.4.24.54",
- "3.4.24.55", "3.4.24.56", "3.4.24.57", "3.4.24.58", "3.4.24.59",
- "3.4.24.60", "3.4.24.61", "3.4.24.62", "3.4.24.63", "3.4.24.64",
- "3.4.24.65", "3.4.24.66", "3.4.24.67", "3.4.24.68", "3.4.24.69",
- "3.4.24.70", "3.4.24.71", "3.4.24.72", "3.4.24.73", "3.4.24.74",
- "3.4.24.75", "3.4.24.76", "3.4.24.77", "3.4.24.78", "3.4.24.79",
- "3.4.24.80", "3.4.24.81", "3.4.24.82", "3.4.24.83", "3.4.24.84",
- "3.4.24.85", "3.4.24.86", "3.4.24.87", "3.4.25.1", "3.4.25.2",
- "3.5.1.1", "3.5.1.2", "3.5.1.3", "3.5.1.4", "3.5.1.5", "3.5.1.6",
- "3.5.1.7", "3.5.1.8", "3.5.1.9", "3.5.1.10", "3.5.1.11", "3.5.1.12",
- "3.5.1.13", "3.5.1.14", "3.5.1.15", "3.5.1.16", "3.5.1.17",
- "3.5.1.18", "3.5.1.19", "3.5.1.20", "3.5.1.21", "3.5.1.22",
- "3.5.1.23", "3.5.1.24", "3.5.1.25", "3.5.1.26", "3.5.1.27",
- "3.5.1.28", "3.5.1.29", "3.5.1.30", "3.5.1.31", "3.5.1.32",
- "3.5.1.33", "3.5.1.35", "3.5.1.36", "3.5.1.38", "3.5.1.39",
- "3.5.1.40", "3.5.1.41", "3.5.1.42", "3.5.1.43", "3.5.1.44",
- "3.5.1.46", "3.5.1.47", "3.5.1.48", "3.5.1.49", "3.5.1.50",
- "3.5.1.51", "3.5.1.52", "3.5.1.53", "3.5.1.54", "3.5.1.55",
- "3.5.1.56", "3.5.1.57", "3.5.1.58", "3.5.1.59", "3.5.1.60",
- "3.5.1.61", "3.5.1.62", "3.5.1.63", "3.5.1.64", "3.5.1.65",
- "3.5.1.66", "3.5.1.67", "3.5.1.68", "3.5.1.69", "3.5.1.70",
- "3.5.1.71", "3.5.1.72", "3.5.1.73", "3.5.1.74", "3.5.1.75",
- "3.5.1.76", "3.5.1.77", "3.5.1.78", "3.5.1.79", "3.5.1.81",
- "3.5.1.82", "3.5.1.83", "3.5.1.84", "3.5.1.85", "3.5.1.86",
- "3.5.1.87", "3.5.1.88", "3.5.1.89", "3.5.1.90", "3.5.1.91",
- "3.5.1.92", "3.5.1.93", "3.5.1.94", "3.5.1.95", "3.5.1.96",
- "3.5.1.97", "3.5.1.98", "3.5.1.99", "3.5.1.100", "3.5.1.101",
- "3.5.1.102", "3.5.1.103", "3.5.2.1", "3.5.2.2", "3.5.2.3",
- "3.5.2.4", "3.5.2.5", "3.5.2.6", "3.5.2.7", "3.5.2.9", "3.5.2.10",
- "3.5.2.11", "3.5.2.12", "3.5.2.13", "3.5.2.14", "3.5.2.15",
- "3.5.2.16", "3.5.2.17", "3.5.2.18", "3.5.3.1", "3.5.3.2", "3.5.3.3",
- "3.5.3.4", "3.5.3.5", "3.5.3.6", "3.5.3.7", "3.5.3.8", "3.5.3.9",
- "3.5.3.10", "3.5.3.11", "3.5.3.12", "3.5.3.13", "3.5.3.14",
- "3.5.3.15", "3.5.3.16", "3.5.3.17", "3.5.3.18", "3.5.3.19",
- "3.5.3.20", "3.5.3.21", "3.5.3.22", "3.5.3.23", "3.5.4.1",
- "3.5.4.2", "3.5.4.3", "3.5.4.4", "3.5.4.5", "3.5.4.6", "3.5.4.7",
- "3.5.4.8", "3.5.4.9", "3.5.4.10", "3.5.4.11", "3.5.4.12",
- "3.5.4.13", "3.5.4.14", "3.5.4.15", "3.5.4.16", "3.5.4.17",
- "3.5.4.18", "3.5.4.19", "3.5.4.20", "3.5.4.21", "3.5.4.22",
- "3.5.4.23", "3.5.4.24", "3.5.4.25", "3.5.4.26", "3.5.4.27",
- "3.5.4.28", "3.5.4.29", "3.5.4.30", "3.5.5.1", "3.5.5.2", "3.5.5.4",
- "3.5.5.5", "3.5.5.6", "3.5.5.7", "3.5.5.8", "3.5.99.1", "3.5.99.2",
- "3.5.99.3", "3.5.99.4", "3.5.99.5", "3.5.99.6", "3.5.99.7",
- "3.6.1.1", "3.6.1.2", "3.6.1.3", "3.6.1.5", "3.6.1.6", "3.6.1.7",
- "3.6.1.8", "3.6.1.9", "3.6.1.10", "3.6.1.11", "3.6.1.12",
- "3.6.1.13", "3.6.1.14", "3.6.1.15", "3.6.1.16", "3.6.1.17",
- "3.6.1.18", "3.6.1.19", "3.6.1.20", "3.6.1.21", "3.6.1.22",
- "3.6.1.23", "3.6.1.24", "3.6.1.25", "3.6.1.26", "3.6.1.27",
- "3.6.1.28", "3.6.1.29", "3.6.1.30", "3.6.1.31", "3.6.1.39",
- "3.6.1.40", "3.6.1.41", "3.6.1.42", "3.6.1.43", "3.6.1.44",
- "3.6.1.45", "3.6.1.52", "3.6.1.53", "3.6.2.1", "3.6.2.2", "3.6.3.1",
- "3.6.3.2", "3.6.3.3", "3.6.3.4", "3.6.3.5", "3.6.3.6", "3.6.3.7",
- "3.6.3.8", "3.6.3.9", "3.6.3.10", "3.6.3.11", "3.6.3.12",
- "3.6.3.14", "3.6.3.15", "3.6.3.16", "3.6.3.17", "3.6.3.18",
- "3.6.3.19", "3.6.3.20", "3.6.3.21", "3.6.3.22", "3.6.3.23",
- "3.6.3.24", "3.6.3.25", "3.6.3.26", "3.6.3.27", "3.6.3.28",
- "3.6.3.29", "3.6.3.30", "3.6.3.31", "3.6.3.32", "3.6.3.33",
- "3.6.3.34", "3.6.3.35", "3.6.3.36", "3.6.3.37", "3.6.3.38",
- "3.6.3.39", "3.6.3.40", "3.6.3.41", "3.6.3.42", "3.6.3.43",
- "3.6.3.44", "3.6.3.46", "3.6.3.47", "3.6.3.48", "3.6.3.49",
- "3.6.3.50", "3.6.3.51", "3.6.3.52", "3.6.3.53", "3.6.4.1",
- "3.6.4.2", "3.6.4.3", "3.6.4.4", "3.6.4.5", "3.6.4.6", "3.6.4.7",
- "3.6.4.8", "3.6.4.9", "3.6.4.10", "3.6.4.11", "3.6.4.12",
- "3.6.4.13", "3.6.5.1", "3.6.5.2", "3.6.5.3", "3.6.5.4", "3.6.5.5",
- "3.6.5.6", "3.7.1.1", "3.7.1.2", "3.7.1.3", "3.7.1.4", "3.7.1.5",
- "3.7.1.6", "3.7.1.7", "3.7.1.8", "3.7.1.9", "3.7.1.10", "3.7.1.11",
- "3.8.1.1", "3.8.1.2", "3.8.1.3", "3.8.1.5", "3.8.1.6", "3.8.1.7",
- "3.8.1.8", "3.8.1.9", "3.8.1.10", "3.8.1.11", "3.9.1.1", "3.10.1.1",
- "3.10.1.2", "3.11.1.1", "3.11.1.2", "3.11.1.3", "3.12.1.1",
- "3.13.1.1", "3.13.1.3", "4.1.1.1", "4.1.1.2", "4.1.1.3", "4.1.1.4",
- "4.1.1.5", "4.1.1.6", "4.1.1.7", "4.1.1.8", "4.1.1.9", "4.1.1.11",
- "4.1.1.12", "4.1.1.14", "4.1.1.15", "4.1.1.16", "4.1.1.17",
- "4.1.1.18", "4.1.1.19", "4.1.1.20", "4.1.1.21", "4.1.1.22",
- "4.1.1.23", "4.1.1.24", "4.1.1.25", "4.1.1.28", "4.1.1.29",
- "4.1.1.30", "4.1.1.31", "4.1.1.32", "4.1.1.33", "4.1.1.34",
- "4.1.1.35", "4.1.1.36", "4.1.1.37", "4.1.1.38", "4.1.1.39",
- "4.1.1.40", "4.1.1.41", "4.1.1.42", "4.1.1.43", "4.1.1.44",
- "4.1.1.45", "4.1.1.46", "4.1.1.47", "4.1.1.48", "4.1.1.49",
- "4.1.1.50", "4.1.1.51", "4.1.1.52", "4.1.1.53", "4.1.1.54",
- "4.1.1.55", "4.1.1.56", "4.1.1.57", "4.1.1.58", "4.1.1.59",
- "4.1.1.60", "4.1.1.61", "4.1.1.62", "4.1.1.63", "4.1.1.64",
- "4.1.1.65", "4.1.1.66", "4.1.1.67", "4.1.1.68", "4.1.1.69",
- "4.1.1.70", "4.1.1.71", "4.1.1.72", "4.1.1.73", "4.1.1.74",
- "4.1.1.75", "4.1.1.76", "4.1.1.77", "4.1.1.78", "4.1.1.79",
- "4.1.1.80", "4.1.1.81", "4.1.1.82", "4.1.1.83", "4.1.1.84",
- "4.1.1.85", "4.1.1.86", "4.1.1.87", "4.1.1.88", "4.1.1.89",
- "4.1.1.90", "4.1.2.2", "4.1.2.4", "4.1.2.5", "4.1.2.8", "4.1.2.9",
- "4.1.2.10", "4.1.2.11", "4.1.2.12", "4.1.2.13", "4.1.2.14",
- "4.1.2.17", "4.1.2.18", "4.1.2.19", "4.1.2.20", "4.1.2.21",
- "4.1.2.22", "4.1.2.23", "4.1.2.24", "4.1.2.25", "4.1.2.26",
- "4.1.2.27", "4.1.2.28", "4.1.2.29", "4.1.2.30", "4.1.2.32",
- "4.1.2.33", "4.1.2.34", "4.1.2.35", "4.1.2.36", "4.1.2.37",
- "4.1.2.38", "4.1.2.40", "4.1.2.41", "4.1.2.42", "4.1.2.43",
- "4.1.2.44", "4.1.2.45", "4.1.3.1", "4.1.3.3", "4.1.3.4", "4.1.3.6",
- "4.1.3.13", "4.1.3.14", "4.1.3.16", "4.1.3.17", "4.1.3.22",
- "4.1.3.24", "4.1.3.25", "4.1.3.26", "4.1.3.27", "4.1.3.30",
- "4.1.3.32", "4.1.3.34", "4.1.3.35", "4.1.3.36", "4.1.3.38",
- "4.1.3.39", "4.1.3.40", "4.1.99.1", "4.1.99.2", "4.1.99.3",
- "4.1.99.5", "4.1.99.11", "4.1.99.12", "4.1.99.13", "4.1.99.14",
- "4.1.99.15", "4.2.1.1", "4.2.1.2", "4.2.1.3", "4.2.1.4", "4.2.1.5",
- "4.2.1.6", "4.2.1.7", "4.2.1.8", "4.2.1.9", "4.2.1.10", "4.2.1.11",
- "4.2.1.12", "4.2.1.17", "4.2.1.18", "4.2.1.19", "4.2.1.20",
- "4.2.1.22", "4.2.1.24", "4.2.1.25", "4.2.1.27", "4.2.1.28",
- "4.2.1.30", "4.2.1.31", "4.2.1.32", "4.2.1.33", "4.2.1.34",
- "4.2.1.35", "4.2.1.36", "4.2.1.39", "4.2.1.40", "4.2.1.41",
- "4.2.1.42", "4.2.1.43", "4.2.1.44", "4.2.1.45", "4.2.1.46",
- "4.2.1.47", "4.2.1.48", "4.2.1.49", "4.2.1.50", "4.2.1.51",
- "4.2.1.52", "4.2.1.53", "4.2.1.54", "4.2.1.55", "4.2.1.56",
- "4.2.1.57", "4.2.1.58", "4.2.1.59", "4.2.1.60", "4.2.1.61",
- "4.2.1.62", "4.2.1.65", "4.2.1.66", "4.2.1.67", "4.2.1.68",
- "4.2.1.69", "4.2.1.70", "4.2.1.73", "4.2.1.74", "4.2.1.75",
- "4.2.1.76", "4.2.1.77", "4.2.1.78", "4.2.1.79", "4.2.1.80",
- "4.2.1.81", "4.2.1.82", "4.2.1.83", "4.2.1.84", "4.2.1.85",
- "4.2.1.87", "4.2.1.88", "4.2.1.89", "4.2.1.90", "4.2.1.91",
- "4.2.1.92", "4.2.1.93", "4.2.1.94", "4.2.1.95", "4.2.1.96",
- "4.2.1.97", "4.2.1.98", "4.2.1.99", "4.2.1.100", "4.2.1.101",
- "4.2.1.103", "4.2.1.104", "4.2.1.105", "4.2.1.106", "4.2.1.107",
- "4.2.1.108", "4.2.1.109", "4.2.1.110", "4.2.1.111", "4.2.1.112",
- "4.2.1.113", "4.2.1.114", "4.2.1.115", "4.2.1.116", "4.2.1.117",
- "4.2.1.118", "4.2.1.119", "4.2.1.120", "4.2.2.1", "4.2.2.2",
- "4.2.2.3", "4.2.2.5", "4.2.2.6", "4.2.2.7", "4.2.2.8", "4.2.2.9",
- "4.2.2.10", "4.2.2.11", "4.2.2.12", "4.2.2.13", "4.2.2.14",
- "4.2.2.15", "4.2.2.16", "4.2.2.17", "4.2.2.18", "4.2.2.19",
- "4.2.2.20", "4.2.2.21", "4.2.2.22", "4.2.3.1", "4.2.3.2", "4.2.3.3",
- "4.2.3.4", "4.2.3.5", "4.2.3.6", "4.2.3.7", "4.2.3.8", "4.2.3.9",
- "4.2.3.10", "4.2.3.11", "4.2.3.12", "4.2.3.13", "4.2.3.14",
- "4.2.3.15", "4.2.3.16", "4.2.3.17", "4.2.3.18", "4.2.3.19",
- "4.2.3.20", "4.2.3.21", "4.2.3.22", "4.2.3.23", "4.2.3.24",
- "4.2.3.25", "4.2.3.26", "4.2.3.27", "4.2.3.28", "4.2.3.29",
- "4.2.3.30", "4.2.3.31", "4.2.3.32", "4.2.3.33", "4.2.3.34",
- "4.2.3.35", "4.2.3.36", "4.2.3.37", "4.2.3.38", "4.2.3.39",
- "4.2.3.40", "4.2.3.41", "4.2.3.42", "4.2.3.43", "4.2.3.44",
- "4.2.3.45", "4.2.99.12", "4.2.99.18", "4.2.99.20", "4.3.1.1",
- "4.3.1.2", "4.3.1.3", "4.3.1.4", "4.3.1.6", "4.3.1.7", "4.3.1.9",
- "4.3.1.10", "4.3.1.12", "4.3.1.13", "4.3.1.14", "4.3.1.15",
- "4.3.1.16", "4.3.1.17", "4.3.1.18", "4.3.1.19", "4.3.1.20",
- "4.3.1.22", "4.3.1.23", "4.3.1.24", "4.3.1.25", "4.3.1.26",
- "4.3.2.1", "4.3.2.2", "4.3.2.3", "4.3.2.4", "4.3.2.5", "4.3.3.1",
- "4.3.3.2", "4.3.3.3", "4.3.3.4", "4.3.3.5", "4.3.99.2", "4.4.1.1",
- "4.4.1.2", "4.4.1.3", "4.4.1.4", "4.4.1.5", "4.4.1.6", "4.4.1.8",
- "4.4.1.9", "4.4.1.10", "4.4.1.11", "4.4.1.13", "4.4.1.14",
- "4.4.1.15", "4.4.1.16", "4.4.1.17", "4.4.1.19", "4.4.1.20",
- "4.4.1.21", "4.4.1.22", "4.4.1.23", "4.4.1.24", "4.4.1.25",
- "4.5.1.1", "4.5.1.2", "4.5.1.3", "4.5.1.4", "4.5.1.5", "4.6.1.1",
- "4.6.1.2", "4.6.1.6", "4.6.1.12", "4.6.1.13", "4.6.1.14",
- "4.6.1.15", "4.99.1.1", "4.99.1.2", "4.99.1.3", "4.99.1.4",
- "4.99.1.5", "4.99.1.6", "4.99.1.7", "4.99.1.8", "5.1.1.1",
- "5.1.1.2", "5.1.1.3", "5.1.1.4", "5.1.1.5", "5.1.1.6", "5.1.1.7",
- "5.1.1.8", "5.1.1.9", "5.1.1.10", "5.1.1.11", "5.1.1.12",
- "5.1.1.13", "5.1.1.14", "5.1.1.15", "5.1.1.16", "5.1.1.17",
- "5.1.1.18", "5.1.2.1", "5.1.2.2", "5.1.2.3", "5.1.2.4", "5.1.2.5",
- "5.1.2.6", "5.1.3.1", "5.1.3.2", "5.1.3.3", "5.1.3.4", "5.1.3.5",
- "5.1.3.6", "5.1.3.7", "5.1.3.8", "5.1.3.9", "5.1.3.10", "5.1.3.11",
- "5.1.3.12", "5.1.3.13", "5.1.3.14", "5.1.3.15", "5.1.3.16",
- "5.1.3.17", "5.1.3.18", "5.1.3.19", "5.1.3.20", "5.1.3.21",
- "5.1.3.22", "5.1.3.23", "5.1.99.1", "5.1.99.2", "5.1.99.3",
- "5.1.99.4", "5.1.99.5", "5.2.1.1", "5.2.1.2", "5.2.1.3", "5.2.1.4",
- "5.2.1.5", "5.2.1.6", "5.2.1.7", "5.2.1.8", "5.2.1.9", "5.2.1.10",
- "5.3.1.1", "5.3.1.3", "5.3.1.4", "5.3.1.5", "5.3.1.6", "5.3.1.7",
- "5.3.1.8", "5.3.1.9", "5.3.1.12", "5.3.1.13", "5.3.1.14",
- "5.3.1.15", "5.3.1.16", "5.3.1.17", "5.3.1.20", "5.3.1.21",
- "5.3.1.22", "5.3.1.23", "5.3.1.24", "5.3.1.25", "5.3.1.26",
- "5.3.1.27", "5.3.2.1", "5.3.2.2", "5.3.3.1", "5.3.3.2", "5.3.3.3",
- "5.3.3.4", "5.3.3.5", "5.3.3.6", "5.3.3.7", "5.3.3.8", "5.3.3.9",
- "5.3.3.10", "5.3.3.11", "5.3.3.12", "5.3.3.13", "5.3.3.14",
- "5.3.3.15", "5.3.4.1", "5.3.99.2", "5.3.99.3", "5.3.99.4",
- "5.3.99.5", "5.3.99.6", "5.3.99.7", "5.3.99.8", "5.3.99.9",
- "5.4.1.1", "5.4.1.2", "5.4.2.1", "5.4.2.2", "5.4.2.3", "5.4.2.4",
- "5.4.2.5", "5.4.2.6", "5.4.2.7", "5.4.2.8", "5.4.2.9", "5.4.2.10",
- "5.4.3.2", "5.4.3.3", "5.4.3.4", "5.4.3.5", "5.4.3.6", "5.4.3.7",
- "5.4.3.8", "5.4.4.1", "5.4.4.2", "5.4.4.3", "5.4.99.1", "5.4.99.2",
- "5.4.99.3", "5.4.99.4", "5.4.99.5", "5.4.99.7", "5.4.99.8",
- "5.4.99.9", "5.4.99.11", "5.4.99.12", "5.4.99.13", "5.4.99.14",
- "5.4.99.15", "5.4.99.16", "5.4.99.17", "5.4.99.18", "5.5.1.1",
- "5.5.1.2", "5.5.1.3", "5.5.1.4", "5.5.1.5", "5.5.1.6", "5.5.1.7",
- "5.5.1.8", "5.5.1.9", "5.5.1.10", "5.5.1.11", "5.5.1.12",
- "5.5.1.13", "5.5.1.14", "5.5.1.15", "5.5.1.16", "5.99.1.1",
- "5.99.1.2", "5.99.1.3", "5.99.1.4", "6.1.1.1", "6.1.1.2", "6.1.1.3",
- "6.1.1.4", "6.1.1.5", "6.1.1.6", "6.1.1.7", "6.1.1.9", "6.1.1.10",
- "6.1.1.11", "6.1.1.12", "6.1.1.13", "6.1.1.14", "6.1.1.15",
- "6.1.1.16", "6.1.1.17", "6.1.1.18", "6.1.1.19", "6.1.1.20",
- "6.1.1.21", "6.1.1.22", "6.1.1.23", "6.1.1.24", "6.1.1.25",
- "6.1.1.26", "6.1.1.27", "6.2.1.1", "6.2.1.2", "6.2.1.3", "6.2.1.4",
- "6.2.1.5", "6.2.1.6", "6.2.1.7", "6.2.1.8", "6.2.1.9", "6.2.1.10",
- "6.2.1.11", "6.2.1.12", "6.2.1.13", "6.2.1.14", "6.2.1.15",
- "6.2.1.16", "6.2.1.17", "6.2.1.18", "6.2.1.19", "6.2.1.20",
- "6.2.1.22", "6.2.1.23", "6.2.1.24", "6.2.1.25", "6.2.1.26",
- "6.2.1.27", "6.2.1.28", "6.2.1.30", "6.2.1.31", "6.2.1.32",
- "6.2.1.33", "6.2.1.34", "6.2.1.35", "6.2.1.36", "6.3.1.1",
- "6.3.1.2", "6.3.1.4", "6.3.1.5", "6.3.1.6", "6.3.1.7", "6.3.1.8",
- "6.3.1.9", "6.3.1.10", "6.3.1.11", "6.3.1.12", "6.3.1.13",
- "6.3.2.1", "6.3.2.2", "6.3.2.3", "6.3.2.4", "6.3.2.5", "6.3.2.6",
- "6.3.2.7", "6.3.2.8", "6.3.2.9", "6.3.2.10", "6.3.2.11", "6.3.2.12",
- "6.3.2.13", "6.3.2.14", "6.3.2.16", "6.3.2.17", "6.3.2.18",
- "6.3.2.19", "6.3.2.20", "6.3.2.21", "6.3.2.22", "6.3.2.23",
- "6.3.2.24", "6.3.2.25", "6.3.2.26", "6.3.2.27", "6.3.2.28",
- "6.3.2.29", "6.3.2.30", "6.3.2.31", "6.3.2.32", "6.3.2.33",
- "6.3.2.34", "6.3.3.1", "6.3.3.2", "6.3.3.3", "6.3.3.4", "6.3.4.1",
- "6.3.4.2", "6.3.4.3", "6.3.4.4", "6.3.4.5", "6.3.4.6", "6.3.4.7",
- "6.3.4.8", "6.3.4.9", "6.3.4.10", "6.3.4.11", "6.3.4.12",
- "6.3.4.13", "6.3.4.14", "6.3.4.15", "6.3.4.16", "6.3.4.17",
- "6.3.4.18", "6.3.5.1", "6.3.5.2", "6.3.5.3", "6.3.5.4", "6.3.5.5",
- "6.3.5.6", "6.3.5.7", "6.3.5.9", "6.3.5.10", "6.4.1.1", "6.4.1.2",
- "6.4.1.3", "6.4.1.4", "6.4.1.5", "6.4.1.6", "6.4.1.7", "6.5.1.1",
- "6.5.1.2", "6.5.1.3", "6.5.1.4", "6.6.1.1", "6.6.1.2",
- NULL
-};
-
NLM_EXTERN Boolean LookForECnumberPattern (CharPtr str)
{
@@ -17615,7 +18702,7 @@ static Boolean ValidateECnumber (CharPtr str)
NLM_EXTERN void ECNumberFSAFreeAll (void)
{
- CtSetPtr csp;
+ CtrySetPtr ctsp;
TextFsaPtr fsa;
fsa = (TextFsaPtr) GetAppProperty ("SpecificECNumberFSA");
@@ -17648,17 +18735,23 @@ NLM_EXTERN void ECNumberFSAFreeAll (void)
TextFsaFree (fsa);
}
- csp = (CtSetPtr) GetAppProperty ("CountryLatLonList");
- if (csp != NULL) {
- SetAppProperty ("CountryLatLonList", NULL);
- CtSetDataFree (csp);
+ ctsp = (CtrySetPtr) GetAppProperty ("CountryLatLonData");
+ if (ctsp != NULL) {
+ SetAppProperty ("CountryLatLonData", NULL);
+ FreeLatLonCountryData (ctsp);
+ }
+
+ ctsp = (CtrySetPtr) GetAppProperty ("WaterLatLonData");
+ if (ctsp != NULL) {
+ SetAppProperty ("WaterLatLonData", NULL);
+ FreeLatLonCountryData (ctsp);
}
ic_code_data = MemFree (ic_code_data);
ic_code_list = ValNodeFreeData (ic_code_list);
}
-static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local, Boolean trimAtTab)
+static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local, size_t numitems, Boolean trimAtTab)
{
FileCache fc;
@@ -17707,7 +18800,7 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local
}
} else if (local != NULL) {
- for (i = 0; local [i] != NULL; i++) {
+ for (i = 0; /* local [i] != NULL */ i < numitems; i++) {
str = local [i];
if (StringDoesHaveText (str)) {
if (StringLen (str) + 3 < sizeof (tmp)) {
@@ -17733,25 +18826,25 @@ static TextFsaPtr GetECNumberFSA (CharPtr prop, CharPtr file, CharPtr PNTR local
static TextFsaPtr GetSpecificECNumberFSA (void)
{
- return (GetECNumberFSA ("SpecificECNumberFSA", "ecnum_specific.txt", ecnum_specif, FALSE));
+ return (GetECNumberFSA ("SpecificECNumberFSA", "ecnum_specific.txt", (CharPtr PNTR) kECNum_specific, sizeof (kECNum_specific) / sizeof (char*), TRUE));
}
static TextFsaPtr GetAmbiguousECNumberFSA (void)
{
- return (GetECNumberFSA ("AmbiguousECNumberFSA", "ecnum_ambiguous.txt", ecnum_ambig, FALSE));
+ return (GetECNumberFSA ("AmbiguousECNumberFSA", "ecnum_ambiguous.txt", (CharPtr PNTR) kECNum_ambiguous, sizeof (kECNum_ambiguous) / sizeof (char*), TRUE));
}
static TextFsaPtr GetDeletedECNumberFSA (void)
{
- return (GetECNumberFSA ("DeletedECNumberFSA", "ecnum_deleted.txt", NULL, FALSE));
+ return (GetECNumberFSA ("DeletedECNumberFSA", "ecnum_deleted.txt", (CharPtr PNTR) kECNum_deleted, sizeof (kECNum_deleted) / sizeof (char*), TRUE));
}
static TextFsaPtr GetReplacedECNumberFSA (void)
{
- return (GetECNumberFSA ("ReplacedEECNumberFSA", "ecnum_replaced.txt", NULL, TRUE));
+ return (GetECNumberFSA ("ReplacedEECNumberFSA", "ecnum_replaced.txt", (CharPtr PNTR) kECNum_replaced, sizeof (kECNum_replaced) / sizeof (char*), TRUE));
}
static Boolean ECnumberNotInList (CharPtr str)
@@ -17933,7 +19026,7 @@ static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
fto = ftmp;
}
if (from < ffrom || from > fto || to < ffrom || to > fto) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_InvalidQualifierValue, "/rpt_unit_range is not within sequence length");
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_RptUnitRangeProblem, "/rpt_unit_range is not within sequence length");
}
}
}
@@ -17959,6 +19052,224 @@ static void ValidateRptUnit (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
}
}
+
+static Boolean IsGbIndexQualPairValid (Int2 index, Int2 val)
+{
+ Int2 i;
+ Boolean found = FALSE;
+
+ for (i = 0; i < ParFlat_GBFeat[index].opt_num && !found; i++) {
+ if (ParFlat_GBFeat[index].opt_qual[i] == val) {
+ found = TRUE;
+ }
+ }
+ for (i = 0; i < ParFlat_GBFeat[index].mand_num && !found; i++) {
+ if (ParFlat_GBFeat[index].mand_qual[i] == val) {
+ found = TRUE;
+ }
+ }
+ return found;
+}
+
+
+NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp)
+{
+ CharPtr key = NULL;
+ ImpFeatPtr ifp;
+
+ if (sfp == NULL) {
+ return NULL;
+ }
+
+ if (sfp->data.choice == SEQFEAT_IMP) {
+ ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
+ if (StringCmp (ifp->key, "-") == 0) {
+ key = StringSave ("misc_feature");
+ } else {
+ key = StringSaveNoNull (ifp->key);
+ }
+ } else {
+ key = StringSaveNoNull (FeatDefTypeLabel (sfp));
+ if (StringCmp (key, "Gene") == 0) {
+ *key = 'g';
+ } else if (StringCmp (key, "preRNA") == 0) {
+ key = MemFree (key);
+ key = StringSave ("precursor_RNA");
+ }
+ }
+ return key;
+}
+
+
+NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name)
+{
+ if (StringHasNoText (qual_name)) {
+ return FALSE;
+ }
+
+ /* always suppress experiment and inference quals */
+ if (StringCmp (qual_name, "experiment") == 0 || StringCmp (qual_name, "inference") == 0) {
+ return TRUE;
+ }
+
+ if (subtype == FEATDEF_ncRNA) {
+ if (StringCmp (qual_name, "product") == 0
+ || StringCmp (qual_name, "ncRNA_class") == 0) {
+ return TRUE;
+ }
+ } else if (subtype == FEATDEF_tmRNA) {
+ if (StringCmp (qual_name, "product") == 0
+ || StringCmp (qual_name, "tag_peptide") == 0) {
+ return TRUE;
+ }
+ } else if (subtype == FEATDEF_otherRNA) {
+ if (StringCmp (qual_name, "product") == 0) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual)
+
+{
+ if (qual < 0) return FALSE;
+ if (allowProductGBQual && qual == GBQUAL_product) return TRUE;
+ if (qual == GBQUAL_citation ||
+ qual == GBQUAL_db_xref ||
+ qual == GBQUAL_evidence ||
+ qual == GBQUAL_exception ||
+ qual == GBQUAL_gene ||
+ qual == GBQUAL_gene_synonym ||
+ qual == GBQUAL_insertion_seq ||
+ qual == GBQUAL_label ||
+ qual == GBQUAL_locus_tag ||
+ qual == GBQUAL_non_functional ||
+ qual == GBQUAL_note ||
+ qual == GBQUAL_partial ||
+ qual == GBQUAL_product ||
+ qual == GBQUAL_pseudo ||
+ qual == GBQUAL_pseudogene ||
+ qual == GBQUAL_rpt_unit ||
+ qual == GBQUAL_transposon ||
+ qual == GBQUAL_experiment ||
+ qual == GBQUAL_trans_splicing ||
+ qual == GBQUAL_ribosomal_slippage ||
+ qual == GBQUAL_standard_name ||
+ qual == GBQUAL_inference)
+ {
+ return FALSE;
+ }
+ if (subtype == FEATDEF_CDS)
+ {
+ if (qual == GBQUAL_codon_start
+ || qual == GBQUAL_codon
+ || qual == GBQUAL_EC_number
+ || qual == GBQUAL_gdb_xref
+ || qual == GBQUAL_number
+ || qual == GBQUAL_protein_id
+ || qual == GBQUAL_transl_except
+ || qual == GBQUAL_transl_table
+ || qual == GBQUAL_translation
+ || qual == GBQUAL_allele
+ || qual == GBQUAL_function
+ || qual == GBQUAL_old_locus_tag)
+ {
+ return FALSE;
+ }
+ }
+ if (qual == GBQUAL_map && subtype != FEATDEF_ANY && subtype != FEATDEF_repeat_region && subtype != FEATDEF_gap) return FALSE;
+ if (qual == GBQUAL_operon && subtype != FEATDEF_ANY && subtype != FEATDEF_operon) return FALSE;
+ if (Nlm_GetAppProperty ("SequinUseEMBLFeatures") == NULL)
+ {
+ if (qual == GBQUAL_usedin)
+ {
+ return FALSE;
+ }
+ }
+
+ if (qual > -1 && ShouldSuppressGBQual (subtype, ParFlat_GBQual_names [qual].name)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static CharPtr sWrongQualReasons[] = {
+ "conflicting codon_start values",
+ "codon_start value should be 1, 2, or 3"
+};
+
+typedef enum {
+ eWrongQualReason_conflicting_codon_start = 0,
+ eWrongQualReason_bad_codon_start_value
+} EWrongQualReason;
+
+/*
+ * Return values:
+ * 1: yes
+ * 0: no
+ * -1: don't know
+ * 2: no for special reasons
+ */
+NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp)
+{
+ CharPtr key = NULL;
+ Int2 val;
+ Int4 rval = -1;
+ Int2 index;
+ CdRegionPtr crp;
+
+ if (sfp == NULL || gbqual == NULL) {
+ return -1;
+ }
+
+ key = GetGBFeatKeyForFeature (sfp);
+ index = GBFeatKeyNameValid (&key, FALSE);
+ key = MemFree (key);
+
+ if (index == -1) {
+ /* unknown */
+ rval = -1;
+ } else if (StringCmp (gbqual->qual, "gsdb_id") == 0) {
+ /* force good */
+ rval = 1;
+ } else if (sfp->data.choice == SEQFEAT_GENE &&
+ (StringCmp (gbqual->qual, "gen_map") == 0 ||
+ StringCmp (gbqual->qual, "cyt_map") == 0 ||
+ StringCmp (gbqual->qual, "rad_map") == 0)) {
+ rval = 1;
+ } else if (sfp->data.choice == SEQFEAT_CDREGION
+ && StringCmp (gbqual->qual, "orig_transcript_id") == 0) {
+ rval = 1;
+ } else if (sfp->data.choice == SEQFEAT_RNA &&
+ (StringCmp (gbqual->qual, "orig_protein_id") == 0 ||
+ StringCmp (gbqual->qual, "orig_transcript_id") == 0)) {
+ rval = 1;
+ } else if ((val = GBQualNameValid (gbqual->qual)) == -1) {
+ rval = -1;
+ } else if (sfp->data.choice == SEQFEAT_CDREGION
+ && val == GBQUAL_codon_start) {
+ crp = (CdRegionPtr) sfp->data.value.ptrvalue;
+ if (crp != NULL) {
+ if (crp->frame > 0) {
+ rval = eWrongQualReason_conflicting_codon_start + 2;
+ } else {
+ rval = eWrongQualReason_bad_codon_start_value + 2;
+ }
+ }
+ } else if (IsGbIndexQualPairValid (index, val)) {
+ rval = 1;
+ } else {
+ rval = 0;
+ }
+ return rval;
+}
+
+
static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, ImpFeatPtr ifp)
{
@@ -17992,6 +19303,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
CharPtr str;
CharPtr tmp;
Int2 val;
+ Int4 qvalid;
if (vsp == NULL || gcp == NULL || sfp == NULL || ifp == NULL)
return;
@@ -18092,8 +19404,16 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
}
}
}
+ if (StringHasNoText(sfp->comment) && sfp->qual == NULL && sfp->dbxref == NULL) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NeedsNote, "A note or other qualifier is required for a misc_feature");
+ }
}
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
+ qvalid = IsQualValidForFeature (gbqual, sfp);
+ if (qvalid == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnImpFeat, "Wrong qualifier %s for feature %s", gbqual->qual, key);
+ }
+
if (StringCmp (gbqual->qual, "gsdb_id") == 0) {
continue;
}
@@ -18105,26 +19425,6 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownImpFeatQual, "NULL qualifier");
}
} else if (index != -1) {
- found = FALSE;
- for (i = 0; i < ParFlat_GBFeat[index].opt_num; i++) {
- qual = ParFlat_GBFeat[index].opt_qual[i];
- if (qual == val) {
- found = TRUE;
- break;
- }
- }
- if (!found) {
- for (i = 0; i < ParFlat_GBFeat[index].mand_num; i++) {
- qual = ParFlat_GBFeat[index].mand_qual[i];
- if (qual == val) {
- found = TRUE;
- break;
- }
- }
- if (!found) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnImpFeat, "Wrong qualifier %s for feature %s", gbqual->qual, key);
- }
- }
if (gbqual->val != NULL) {
if (val == GBQUAL_rpt_type) {
failed = FALSE;
@@ -18245,7 +19545,7 @@ static void ValidateImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPt
if (!found) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_InvalidQualifierValue, "%s is not a legal value for qualifier %s", gbqual->val, gbqual->qual);
}
- } else if (val == GBQUAL_mobile_element) {
+ } else if (val == GBQUAL_mobile_element_type) {
found = FALSE;
str = NULL;
for (i = 0; legal_mobile_element_strings[i] != NULL; i++) {
@@ -18383,6 +19683,7 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
CharPtr str;
CharPtr tmp;
Int2 val;
+ Int4 qvalid;
if (vsp == NULL || gcp == NULL || sfp == NULL)
return;
@@ -18395,6 +19696,13 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
}
index = GBFeatKeyNameValid (&key, FALSE);
for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
+ qvalid = IsQualValidForFeature (gbqual, sfp);
+ if (qvalid == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Wrong qualifier %s for feature %s", gbqual->qual, key);
+ } else if (qvalid > 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, sWrongQualReasons[qvalid - 2]);
+ }
+
if (StringCmp (gbqual->qual, "gsdb_id") == 0) {
continue;
}
@@ -18406,31 +19714,18 @@ static void ValidateNonImpFeat (ValidStructPtr vsp, GatherContextPtr gcp, SeqFea
if (StringCmp (gbqual->qual, "cyt_map") == 0) continue;
if (StringCmp (gbqual->qual, "rad_map") == 0) continue;
}
+ if (sfp->data.choice == SEQFEAT_CDREGION) {
+ if (StringCmp (gbqual->qual, "orig_transcript_id") == 0) continue;
+ }
+ if (sfp->data.choice == SEQFEAT_RNA) {
+ if (StringCmp (gbqual->qual, "orig_protein_id") == 0) continue;
+ if (StringCmp (gbqual->qual, "orig_transcript_id") == 0) continue;
+ }
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownFeatureQual, "Unknown qualifier %s", gbqual->qual);
} else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnknownFeatureQual, "NULL qualifier");
}
} else if (index != -1) {
- found = FALSE;
- for (i = 0; i < ParFlat_GBFeat[index].opt_num; i++) {
- qual = ParFlat_GBFeat[index].opt_qual[i];
- if (qual == val) {
- found = TRUE;
- break;
- }
- }
- if (!found) {
- for (i = 0; i < ParFlat_GBFeat[index].mand_num; i++) {
- qual = ParFlat_GBFeat[index].mand_qual[i];
- if (qual == val) {
- found = TRUE;
- break;
- }
- }
- if (!found) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_WrongQualOnFeature, "Wrong qualifier %s for feature %s", gbqual->qual, key);
- }
- }
if (gbqual->val != NULL) {
if (val == GBQUAL_rpt_type) {
failed = FALSE;
@@ -18674,6 +19969,7 @@ static Boolean PartialAtSpliceSiteOrGap (ValidStructPtr vsp, SeqLocPtr head, Uin
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_Range,
"Unable to check splice consensus because feature outside range of sequence");
*/
+ BioseqUnlock (bsp);
return FALSE;
}
@@ -18940,6 +20236,7 @@ static Boolean TwoListsHaveCommonItem (
return FALSE;
}
+
static void CheckTrnaCodons (
ValidStructPtr vsp,
GatherContextPtr gcp,
@@ -18950,15 +20247,12 @@ static void CheckTrnaCodons (
{
Uint1 aa = 0;
Uint1 anticodon [4];
- BioseqPtr bsp;
Char ch;
Int2 code = 0;
CharPtr codes = NULL;
Uint1 codon [4];
CharPtr complementBase = " TVGH CD M KN YSAABW R ";
- Uint1 from;
CharPtr gen_code_name = NULL;
- GeneticCodePtr gncp;
Int2 i;
Uint2 idx;
Uint1 index;
@@ -18972,7 +20266,6 @@ static void CheckTrnaCodons (
StreamCache sc;
ErrSev sev = SEV_ERROR;
SeqLocPtr slp;
- SeqMapTablePtr smtp;
CharPtr str;
Uint1 taa;
CharPtr three_letter_aa = NULL;
@@ -18986,35 +20279,7 @@ static void CheckTrnaCodons (
/* extract indicated amino acid */
- aa = 0;
- if (trp->aatype == 2) {
- aa = trp->aa;
- } else {
- from = 0;
- switch (trp->aatype) {
- case 0:
- from = 0;
- break;
- case 1:
- from = Seq_code_iupacaa;
- break;
- case 2:
- from = Seq_code_ncbieaa;
- break;
- case 3:
- from = Seq_code_ncbi8aa;
- break;
- case 4:
- from = Seq_code_ncbistdaa;
- break;
- default:
- break;
- }
- smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
- if (smtp != NULL) {
- aa = SeqMapTableConvert (smtp, trp->aa);
- }
- }
+ aa = GetAaFromtRNA (trp);
three_letter_aa = Get3LetterSymbol (NULL, Seq_code_ncbieaa, NULL, aa);
if (StringHasNoText (three_letter_aa)) {
@@ -19022,22 +20287,8 @@ static void CheckTrnaCodons (
}
/* find genetic code table */
+ codes = GetCodesFortRNA(sfp, &code);
- bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
- BioseqToGeneticCode (bsp, &code, NULL, NULL, NULL, 0, NULL);
-
- gncp = GeneticCodeFind (code, NULL);
- if (gncp == NULL) {
- gncp = GeneticCodeFind (1, NULL);
- code = 1;
- }
- if (gncp == NULL) return;
-
- for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
- if (vnp->choice != 3) continue;
- codes = (CharPtr) vnp->data.ptrvalue;
- break;
- }
if (codes == NULL) return;
for (vnp = genetic_code_name_list; vnp != NULL; vnp = vnp->next) {
@@ -19413,17 +20664,35 @@ static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp)
Boolean partial3;
SeqDescrPtr sdp;
ErrSev sev;
+ Boolean need_to_unlock = FALSE;
if (vsp == NULL || sfp == NULL) return;
if (sfp->product == NULL) return;
if (!vsp->useSeqMgrIndexes) return;
bsp = BioseqFindFromSeqLoc (sfp->product);
+ if (bsp == NULL && vsp->farFetchCDSproducts) {
+ bsp = BioseqLockById (SeqLocId(sfp->product));
+ if (bsp != NULL) {
+ need_to_unlock = TRUE;
+ }
+ }
if (bsp == NULL) return;
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
- if (sdp == NULL) return;
+ if (sdp == NULL) {
+ if (need_to_unlock) {
+ BioseqUnlock(bsp);
+ }
+ return;
+ }
mip = (MolInfoPtr) sdp->data.ptrvalue;
- if (mip == NULL) return;
+ if (mip == NULL) {
+ if (need_to_unlock) {
+ BioseqUnlock (bsp);
+ }
+ return;
+ }
CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
+
switch (mip->completeness) {
case 0 : /* unknown */
break;
@@ -19483,6 +20752,9 @@ static void CheckCDSPartial (ValidStructPtr vsp, SeqFeatPtr sfp)
default :
break;
}
+ if (need_to_unlock) {
+ BioseqUnlock (bsp);
+ }
}
static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp)
@@ -19509,6 +20781,7 @@ static void CheckForCommonCDSProduct (ValidStructPtr vsp, SeqFeatPtr sfp)
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
if (crp != NULL && crp->orf)
return;
+
grp = SeqMgrGetGeneXref (sfp);
if (grp == NULL || (!SeqMgrGeneIsSuppressed (grp))) {
gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
@@ -20970,7 +22243,7 @@ static ValNodePtr ValidateGoTermQualifier (
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_BadGeneOntologyFormat, "Bad data format for GO term qualifier PMID");
}
break;
- case 4 :
+ case 5 :
if (ufp->choice == 1) {
evidence = (CharPtr) ufp->data.ptrvalue;
} else {
@@ -21778,10 +23051,94 @@ static void ValidateRna (SeqFeatPtr sfp, ValidStructPtr vsp, GatherContextPtr gc
}
+NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp)
+{
+ GeneRefPtr grp;
+ SeqFeatPtr sfpx;
+ GeneRefPtr grpx;
+ Boolean redundantgenexref = FALSE;
+ CharPtr syn1, syn2;
+ DummySmfeData dsd;
+ Int2 count;
+ SeqMgrFeatContext fcontext;
+
+ grp = SeqMgrGetGeneXref (sfp);
+ if (grp == NULL) {
+ return FALSE;
+ }
+ if (grp != NULL && SeqMgrGeneIsSuppressed (grp)) return FALSE;
+
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
+ if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE)
+ return FALSE;
+ grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
+ if (grpx == NULL)
+ return FALSE;
+
+ if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) {
+ if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
+ redundantgenexref = TRUE;
+ }
+ } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->locus)) {
+ if (StringICmp (grp->locus, grpx->locus) == 0) {
+ redundantgenexref = TRUE;
+ }
+ } else if (grp->syn != NULL && grpx->syn != NULL) {
+ syn1 = (CharPtr) grp->syn->data.ptrvalue;
+ syn2 = (CharPtr) grpx->syn->data.ptrvalue;
+ if ((StringDoesHaveText (syn1)) && StringDoesHaveText (syn2)) {
+ if (StringICmp (syn1, syn2) == 0) {
+ redundantgenexref = TRUE;
+ }
+ }
+ }
+ if (redundantgenexref) {
+ MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData));
+ dsd.max = INT4_MAX;
+ dsd.num_at_max = 0;
+ dsd.equivalent_genes = FALSE;
+ dsd.grp_at_max = NULL;
+ count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0,
+ LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc);
+ if (dsd.num_at_max > 1) {
+ redundantgenexref = FALSE;
+ }
+ }
+ return redundantgenexref;
+}
+
+
+static void CheckCodingRegionAndProteinFeaturePartials (SeqFeatPtr cds, ValidStructPtr vsp)
+{
+ BioseqPtr protbsp;
+ SeqFeatPtr prot;
+ SeqMgrFeatContext context;
+ Boolean cds_partial5, cds_partial3, prot_partial5, prot_partial3;
+
+ if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION || vsp == NULL) {
+ return;
+ }
+
+ protbsp = BioseqFindFromSeqLoc (cds->product);
+ if (protbsp == NULL) {
+ return;
+ }
+ prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context);
+ if (prot == NULL) {
+ return;
+ }
+ CheckSeqLocForPartial (cds->location, &cds_partial5, &cds_partial3);
+ CheckSeqLocForPartial (prot->location, &prot_partial5, &prot_partial3);
+ if ((cds_partial5 && !prot_partial5) || (!cds_partial5 && prot_partial5)
+ || (cds_partial3 && !prot_partial3) || (!cds_partial3 && prot_partial3)) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, "Coding region and protein feature partials conflict");
+ }
+}
+
+
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
{
Int2 type, i, j;
- static char *parterr[2] = { "PartialProduct", "PartialLocation" };
static char *parterrs[4] = {
"Start does not include first/last residue of sequence",
"Stop does not include first/last residue of sequence",
@@ -21826,7 +23183,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
SeqFeatPtr sfpx = NULL, sfpy = NULL, prt;
SeqFeatPtr operon;
Boolean redundantgenexref;
- SeqMgrFeatContext fcontext;
+ SeqMgrFeatContext fcontext, gcontext;
CharPtr syn1, syn2, label = NULL, genexref_label;
Uint2 oldEntityID;
Uint4 oldItemID;
@@ -21862,10 +23219,8 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
Boolean farFetchProd;
Boolean skip;
Boolean is_nc = FALSE;
- Boolean no_nonconsensus_except = TRUE;
VariationRefPtr vrfp;
-
vsp = (ValidStructPtr) (gcp->userdata);
sfp = (SeqFeatPtr) (gcp->thisitem);
vsp->descr = NULL;
@@ -21902,14 +23257,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
partials[0] = SeqLocPartialCheckEx (sfp->product, farFetchProd);
partials[1] = SeqLocPartialCheck (sfp->location);
- if (sfp->excpt) {
- if (StringISearch (sfp->except_text, "nonconsensus splice site") != NULL ||
- StringISearch (sfp->except_text, "heterogeneous population sequenced") != NULL ||
- StringISearch (sfp->except_text, "low-quality sequence region") != NULL ||
- StringISearch (sfp->except_text, "artificial location") != NULL) {
- no_nonconsensus_except = FALSE;
- }
- }
+ CheckCodingRegionAndProteinFeaturePartials (sfp, vsp);
if ((partials[0] != SLP_COMPLETE) || (partials[1] != SLP_COMPLETE) || (sfp->partial)) { /* partialness */
/* a feature on a partial sequence should be partial -- if often isn't */
@@ -21919,11 +23267,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
/* a partial feature, with complete location, but partial product */
else if ((sfp->partial) && (sfp->product != NULL) && (partials[1] == SLP_COMPLETE) && (sfp->product->choice == SEQLOC_WHOLE)
&& (partials[0] != SLP_COMPLETE)) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* skip in gpipe genomic */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial");
- }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem, "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial");
}
/* gene on segmented set is now 'order', should also be partial */
else if (type == SEQFEAT_GENE && sfp->product == NULL && partials[1] == SLP_INTERNAL) {
@@ -21959,8 +23303,6 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
tmp = StringMove (tmp, "FALSE");
if (bsp == NULL && LocationIsFar (sfp->product) && NoFetchFunctions ()) {
vsp->far_fetch_failure = TRUE;
- } else if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* ignore inconsistent partial warnings in genomic gpipe sequence */
} else {
ValidErr (vsp, sev, ERR_SEQ_FEAT_PartialsInconsistent, buf);
}
@@ -21984,11 +23326,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
tmp = StringMove (tmp, "TRUE");
else
tmp = StringMove (tmp, "FALSE");
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* ignore inconsistent partial warnings in genomic gpipe sequence */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, buf);
- }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialsInconsistent, buf);
}
/* 5' or 3' partial location giving unclassified partial product */
else if (((partials [1] & SLP_START) != 0 || ((partials [1] & SLP_STOP) != 0)) && ((partials [0] & SLP_OTHER) != 0) && sfp->partial) {
@@ -21996,40 +23334,67 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
/* may have other error bits set as well */
- for (i = 0; i < 2; i++) {
- errtype = SLP_NOSTART;
- for (j = 0; j < 4; j++) {
- bypassGeneTest = FALSE;
- if (partials[i] & errtype) {
- if (i == 1 && j < 2 && IsCddFeat (sfp)) {
- /* suppresses warning */
- } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp, errtype, NULL)) {
- /*
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s",
- parterr[i], parterrs[j]);
- */
- } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_GENE && SameAsMRNA (sfp, errtype)) {
- } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp, errtype, &bypassGeneTest)) {
- } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_mRNA && (! bypassGeneTest) && SameAsGene (sfp)) {
- } else if (i == 1 && j < 2 && sfp->idx.subtype == FEATDEF_exon && SameAsMRNA (sfp, errtype)) {
+ /* PartialProduct */
+ errtype = SLP_NOSTART;
+ for (j = 0; j < 4; j++) {
+ bypassGeneTest = FALSE;
+ if (partials[0] & errtype) {
+ if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt &&
+ StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialProduct: 5' partial is not at start AND is not at consensus splice site");
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialProduct: 3' partial is not at stop AND is not at consensus splice site");
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialProduct: %s", parterrs[j]);
+ }
+ }
+ errtype <<= 1;
+ }
+ /* PartialLocation */
+ errtype = SLP_NOSTART;
+ for (j = 0; j < 4; j++) {
+ bypassGeneTest = FALSE;
+ if (partials[1] & errtype) {
+ if (j == 3) {
+ if (LocationIsFar (sfp->location) && NoFetchFunctions ()) {
+ vsp->far_fetch_failure = TRUE;
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt &&
+ StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: Improper use of partial (greater than or less than)");
+ }
+ } else if (j == 2) {
+ if (LocationIsFar (sfp->location) && NoFetchFunctions ()) {
+ vsp->far_fetch_failure = TRUE;
+ } else {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: Internal partial intervals do not include first/last residue of sequence");
+ }
+ } else {
+ if (IsCddFeat (sfp)) {
+ /* suppresses warning */
+ } else if (sfp->data.choice == SEQFEAT_GENE && SameAsCDS (sfp, errtype, NULL)) {
+ } else if (sfp->data.choice == SEQFEAT_GENE && SameAsMRNA (sfp, errtype)) {
+ } else if (sfp->idx.subtype == FEATDEF_mRNA && SameAsCDS (sfp, errtype, &bypassGeneTest)) {
+ } else if (sfp->idx.subtype == FEATDEF_mRNA && (! bypassGeneTest) && SameAsGene (sfp)) {
+ } else if (sfp->idx.subtype == FEATDEF_exon && SameAsMRNA (sfp, errtype)) {
} else if (LocationIsFar (sfp->location) && NoFetchFunctions ()) {
vsp->far_fetch_failure = TRUE;
-
- } else if (i == 1 && j < 2 && sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp, errtype) &&
+ } else if (sfp->data.choice == SEQFEAT_CDREGION && SameAsMRNA (sfp, errtype) &&
PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) {
- } else if (i == 1 && j < 2 && PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) {
+ } else if (PartialAtSpliceSiteOrGap (vsp, sfp->location, errtype, &isgap, &badseq)) {
if (! isgap) {
if (sfp->idx.subtype != FEATDEF_CDS || SplicingNotExpected (sfp)) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep && i == 1 && (j == 0 || j == 1 || j == 2)) {
- /* ignore in genomic gpipe sequence */
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s (but is at consensus splice site)",
- parterr[i], parterrs[j]);
- }
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: %s (but is at consensus splice site)",
+ parterrs[j]);
} else if (sfp->idx.subtype == FEATDEF_CDS) {
bsp = BioseqFindFromSeqLoc (sfp->location);
if (bsp != NULL) {
@@ -22039,71 +23404,41 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
if (mip != NULL) {
if (mip->biomol == MOLECULE_TYPE_MRNA) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s (but is at consensus splice site, but is on an mRNA that is already spliced)",
- parterr[i], parterrs[j]);
+ "PartialLocation: %s (but is at consensus splice site, but is on an mRNA that is already spliced)",
+ parterrs[j]);
}
}
}
}
}
}
- } else if (i == 1 && j < 2 && badseq) {
+ } else if (badseq) {
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s (and is at bad sequence)",
- parterr[i], parterrs[j]);
+ "PartialLocation: %s (and is at bad sequence)",
+ parterrs[j]);
} else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt &&
StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) {
- if (no_nonconsensus_except) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* skip in gpipe genomic */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s", parterr[i], "5' partial is not at start AND"
- " is not at consensus splice site");
- }
- }
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: 5' partial is not at start AND is not at consensus splice site");
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) {
- if (no_nonconsensus_except) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* skip in gpipe genomic */
- } else {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s", parterr[i], "3' partial is not at stop AND"
- " is not at consensus splice site");
- }
- }
- } else if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep && i == 1 && (j == 0 || j == 1 || j == 2)) {
- /* ignore start/stop not at end in genomic gpipe sequence */
- } else {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
- "%s: %s", parterr[i], parterrs[j]);
+ "PartialLocation: 3' partial is not at stop AND is not at consensus splice site");
+ } else if (j == 0) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: Start does not include first/last residue of sequence");
+ } else if (j == 1) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
+ "PartialLocation: Stop does not include first/last residue of sequence");
}
}
- errtype <<= 1;
}
+ errtype <<= 1;
}
}
CheckForIllegalDbxref (vsp, gcp, sfp->dbxref);
- /*
- for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
- id = -1;
- db = vnp->data.ptrvalue;
- if (db && db->db) {
- for (i = 0; i < DBNUM; i++) {
- if (StringCmp (db->db, dbtag[i]) == 0) {
- id = i;
- break;
- }
- }
- if (id == -1 || (type != SEQFEAT_CDREGION && id < 4)) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", db->db);
- }
- }
- }
- */
switch (type) {
case 1: /* Gene-ref */
@@ -22212,6 +23547,17 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
}
+ if (grp->syn != NULL) {
+ bsp = BioseqFindFromSeqLoc (sfp->location);
+ for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
+ str = (CharPtr) vnp->data.ptrvalue;
+ if (StringHasNoText (str)) continue;
+ sfpx = SeqMgrGetFeatureByLabel (bsp, str, SEQFEAT_GENE, 0, NULL);
+ if (sfpx != NULL && sfpx != sfp) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym, "gene synonym has same value (%s) as locus of another gene feature", str);
+ }
+ }
+ }
if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grp->desc) && StringCmp (grp->locus, grp->desc) == 0) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UndesiredGeneSynonym, "gene description has same value as gene locus");
}
@@ -22325,6 +23671,12 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
i = SeqLocCompare (cbp->loc, sfp->location);
if ((i != SLC_A_IN_B) && (i != SLC_A_EQ_B)) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_Range, "Code-break location not in coding region");
+ } else if (sfp->product != NULL) {
+ slp = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
+ if (slp == NULL) {
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_Range, "Code-break location not in coding region - may be frame problem");
+ }
+ SeqLocFree (slp);
}
if (prevcbp != NULL) {
i = SeqLocCompare (cbp->loc, prevcbp->loc);
@@ -23005,7 +24357,7 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
dsd.grp_at_max = NULL;
count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0,
LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc);
- if (dsd.num_at_max > 1) {
+ if (dsd.num_at_max > 1 && sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element) {
if (dsd.equivalent_genes) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_GeneXrefNeeded,
"Feature overlapped by %d identical-length equivalent genes but has no cross-reference", (int) dsd.num_at_max);
@@ -23062,14 +24414,28 @@ NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
}
}
- sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
+ sfpx = NULL;
+ if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &fcontext) == sfp) {
+ if (fcontext.bad_order || fcontext.mixed_strand) {
+ sfpx = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE);
+ } else if (vsp->has_multi_int_genes) {
+ sfpx = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE);
+ if (sfpx == NULL && (vsp->has_seg_bioseqs || vsp->is_embl_ddbj_in_sep || vsp->is_old_gb_in_sep)) {
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext);
+ }
+ } else {
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext);
+ }
+ } else {
+ sfpx = SeqMgrGetOverlappingGene (sfp->location, &gcontext);
+ }
if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE)
return;
grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
if (grpx == NULL)
return;
redundantgenexref = FALSE;
- label = fcontext.label;
+ label = gcontext.label;
if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grp->locus_tag)) {
if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
redundantgenexref = TRUE;
@@ -23356,22 +24722,14 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors || rna_editing) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%");
- }
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail is 100%s polyA", (long) mlen, farstr, (long) plen, "%");
}
plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */
} else {
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors || rna_editing) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%");
- }
+ ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_PolyATail, "Transcript length [%ld] less than %sproduct length [%ld], but tail >= 95%s polyA", (long) mlen, farstr, (long) plen, "%");
}
plen = mlen; /* if it passes polyA test, allow base-by-base comparison on common length */
}
@@ -23407,14 +24765,6 @@ NLM_EXTERN void MrnaTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
MemFree (pdseq);
}
-erret:
-
- MemFree (mrseq);
-
- if (unlockProd) {
- BioseqUnlock (bsp);
- }
-
if (! report_errors) {
if (! has_errors) {
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnnecessaryException, "mRNA has exception but passes transcription test");
@@ -23428,6 +24778,15 @@ erret:
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_UnqualifiedException, "mRNA has unqualified transcribed product replaced exception");
}
}
+
+erret:
+
+ MemFree (mrseq);
+
+ if (unlockProd) {
+ BioseqUnlock (bsp);
+ }
+
}
/*****************************************************************************
@@ -23605,6 +24964,12 @@ static void ValidateTranslExcept (
MemFree (protseq);
}
+typedef struct cdsmismatch {
+ Int4 pos;
+ Int2 cds_residue;
+ Int2 prot_residue;
+} CDSMismatchData, PNTR CDSMismatchPtr;
+
NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
{
@@ -23618,6 +24983,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
CdRegionPtr crp;
SeqIdPtr protid = NULL;
Int2 residue1, residue2, stop_count = 0, mismatch = 0, ragged = 0;
+ CDSMismatchData mismatches[11];
Boolean got_stop = FALSE;
/*
SeqPortPtr spp = NULL;
@@ -23638,7 +25004,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
Boolean partial5 = FALSE;
Boolean partial3 = FALSE;
Boolean rna_editing = FALSE;
- CharPtr nuclocstr, farstr = "";
+ CharPtr nuclocstr, farstr = "", loc2str;
CodeBreakPtr cbp;
Int4 pos1, pos2, pos;
SeqLocPtr tmp;
@@ -23803,12 +25169,6 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
prot1len = prot1seq->length;
}
- if (annotated_by_transcript_or_proteomic) {
- if (1.2 * prot2len < prot1len) {
- ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TransLen, "Protein product length [%ld] is more than 120%% of the %stranslation length [%ld]", prot1len, farstr, prot2len);
- }
- }
-
if (alt_start && gccode == 1) {
/* sev = SEV_WARNING; */
sev = SEV_NONE; /* only enable for RefSeq, leave old code in for now */
@@ -23969,6 +25329,12 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
}
}
+ if (annotated_by_transcript_or_proteomic) {
+ if (1.2 * prot2len < prot1len) {
+ ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TransLen, "Protein product length [%ld] is more than 120%% of the %stranslation length [%ld]", prot1len, farstr, prot2len);
+ }
+ }
+
/*
prot2len = BSLen (newprot);
len = prot2len;
@@ -23995,15 +25361,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_WARNING;
}
if (report_errors || unclassified_except) {
- if (! unclassified_except) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon,
- "Illegal start codon (and %ld internal stops). Probably wrong genetic code [%d]", (long) stop_count, gccode);
- }
- if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and illegal start codon). Genetic code [%d]", (long) stop_count, gccode);
- }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon,
+ "Illegal start codon (and %ld internal stops). Probably wrong genetic code [%d]", (long) stop_count, gccode);
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and illegal start codon). Genetic code [%d]", (long) stop_count, gccode);
}
} else if (got_x) {
has_errors = TRUE;
@@ -24013,15 +25373,9 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_WARNING;
}
if (report_errors || unclassified_except) {
- if (! unclassified_except) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon,
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_StartCodon,
"Ambiguous start codon (and %ld internal stops). Possibly wrong genetic code [%d]", (long) stop_count, gccode);
- }
- if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and ambiguous start codon). Genetic code [%d]", (long) stop_count, gccode);
- }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops (and ambiguous start codon). Genetic code [%d]", (long) stop_count, gccode);
}
} else {
has_errors = TRUE;
@@ -24057,11 +25411,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_REJECT;
}
}
- if (unclassified_except && vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops. Genetic code [%d]", (long) stop_count, gccode);
- }
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_InternalStop, "%ld internal stops. Genetic code [%d]", (long) stop_count, gccode);
}
}
prot_ok = FALSE;
@@ -24070,13 +25420,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
} else if (got_dash) {
has_errors = TRUE;
other_than_mismatch = TRUE;
- if (report_errors && ! unclassified_except) {
+ if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
} else if (got_x && (! partial5)) {
has_errors = TRUE;
other_than_mismatch = TRUE;
- if (report_errors && ! unclassified_except) {
+ if (report_errors) {
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
}
@@ -24205,12 +25555,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
sev = SEV_WARNING;
}
}
- if (mismatch == 10) {
- has_errors = TRUE;
- if (report_errors && (! mismatch_except)) {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_MisMatchAA, "More than 10 mismatches. Genetic code [%d]", gccode);
- }
- } else if (i == 0) {
+ if (i == 0) {
if ((sfp->partial) && (!no_beg) && (!no_end)) { /* ok, it's partial */
has_errors = TRUE;
other_than_mismatch = TRUE;
@@ -24222,9 +25567,7 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
other_than_mismatch = TRUE;
if (report_errors) {
if (! got_dash) {
- if (! unclassified_except){
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
- }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Illegal start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
}
} else if (residue1 == 'X') {
@@ -24232,51 +25575,63 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
other_than_mismatch = TRUE;
if (report_errors) {
if (! got_x) {
- if (! unclassified_except){
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
- }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_StartCodon, "Ambiguous start codon used. Wrong genetic code [%d] or protein should be partial", gccode);
}
}
} else {
- nuclocstr = MapToNTCoords (sfp, protid, i);
- if (nuclocstr != NULL) {
- has_errors = TRUE;
- if (report_errors && (! mismatch_except)) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
- "%sResidue %ld in protein [%c] != translation [%c] at %s", farstr, (long) (i + 1), (char) residue2, (char) residue1, nuclocstr);
- }
- } else {
- has_errors = TRUE;
- if (report_errors && (! mismatch_except)) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
- "%sResidue %ld in protein [%c] != translation [%c]", farstr, (long) (i + 1), (char) residue2, (char) residue1);
- }
- }
- MemFree (nuclocstr);
- }
- } else if (mismatch < 10) {
- nuclocstr = MapToNTCoords (sfp, protid, i);
- if (nuclocstr != NULL) {
has_errors = TRUE;
- if (report_errors && (! mismatch_except)) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
- "%sResidue %ld in protein [%c] != translation [%c] at %s", farstr, (long) (i + 1), (char) residue2, (char) residue1, nuclocstr);
- }
+ mismatches[mismatch].pos = i;
+ mismatches[mismatch].cds_residue = residue1;
+ mismatches[mismatch].prot_residue = residue2;
+ mismatch++;
+ }
+ } else {
+ has_errors = TRUE;
+ if (mismatch >= 10) {
+ mismatches[10].pos = i;
+ mismatches[10].cds_residue = residue1;
+ mismatches[10].prot_residue = residue2;
} else {
- has_errors = TRUE;
- if (report_errors && (! mismatch_except)) {
- ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
- "%sResidue %ld in protein [%c] != translation [%c]", farstr, (long) (i + 1), (char) residue2, (char) residue1);
- }
+ mismatches[mismatch].pos = i;
+ mismatches[mismatch].cds_residue = residue1;
+ mismatches[mismatch].prot_residue = residue2;
}
- MemFree (nuclocstr);
+ mismatch++;
}
- mismatch++;
}
}
- /*
- spp = SeqPortFree (spp);
- */
+
+ if (report_errors && !mismatch_except) {
+ if (mismatch > 10) {
+ if (report_errors && !mismatch_except) {
+ nuclocstr = MapToNTCoords (sfp, protid, mismatches[0].pos);
+ loc2str = MapToNTCoords (sfp, protid, mismatches[10].pos);
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
+ "%d mismatches found. First mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Last mismatch at %ld, residue in protein [%c] != translation [%c]%s%s. Genetic code [%d]",
+ mismatch,
+ (long) (mismatches[0].pos + 1), mismatches[0].prot_residue, mismatches[0].cds_residue,
+ nuclocstr == NULL ? "" : " at ", nuclocstr == NULL ? "" : nuclocstr,
+ (long) (mismatches[10].pos + 1), mismatches[10].prot_residue, mismatches[10].cds_residue,
+ loc2str == NULL ? "" : " at ", loc2str == NULL ? "" : loc2str,
+ gccode);
+ nuclocstr = MemFree (nuclocstr);
+ loc2str = MemFree (loc2str);
+ }
+ } else {
+ for (i = 0; i < mismatch; i++) {
+ nuclocstr = MapToNTCoords (sfp, protid, mismatches[i].pos);
+ ValidErr (vsp, sev, ERR_SEQ_FEAT_MisMatchAA,
+ "%sResidue %ld in protein [%c] != translation [%c]%s%s", farstr,
+ (long) (mismatches[i].pos + 1),
+ (char) mismatches[i].prot_residue,
+ (char) mismatches[i].cds_residue,
+ nuclocstr == NULL ? "" : " at ",
+ nuclocstr == NULL ? "" : nuclocstr);
+ nuclocstr = MemFree (nuclocstr);
+ }
+ }
+ }
+
} else {
has_errors = TRUE;
other_than_mismatch = TRUE;
@@ -24291,21 +25646,13 @@ NLM_EXTERN void CdTransCheck (ValidStructPtr vsp, SeqFeatPtr sfp)
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial");
- }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "End of location should probably be partial");
}
} else {
has_errors = TRUE;
other_than_mismatch = TRUE;
if (report_errors) {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- /* suppress if gpipe genomic */
- } else {
- ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial");
- }
+ ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_PartialProblem, "This SeqFeat should not be partial");
}
}
show_stop = FALSE;
@@ -24390,6 +25737,55 @@ erret:
}
}
+
+static void mRNAMatchesCompleteCDSEnd (SeqFeatPtr mrna, BoolPtr p5, BoolPtr p3)
+{
+ Boolean partial5, partial3;
+ SeqFeatPtr cds;
+ Uint2 strand;
+
+ if (p5 != NULL) {
+ *p5 = FALSE;
+ }
+ if (p3 != NULL) {
+ *p3 = FALSE;
+ }
+
+ cds = GetCDSformRNA (mrna);
+
+ if (mrna == NULL || cds == NULL) {
+ return;
+ }
+
+ strand = SeqLocStrand (mrna->location);
+
+ CheckSeqLocForPartial (cds->location, &partial5, &partial3);
+ if (p5 != NULL && !partial5) {
+ if (strand == Seq_strand_minus) {
+ if (SeqLocStop (cds->location) == SeqLocStop (mrna->location)) {
+ *p5 = TRUE;
+ }
+ } else {
+ if (SeqLocStart (cds->location) == SeqLocStart (mrna->location)) {
+ *p5 = TRUE;
+ }
+ }
+ }
+
+ if (p3 != NULL && !partial3) {
+ if (strand == Seq_strand_minus) {
+ if (SeqLocStart (cds->location) == SeqLocStart (mrna->location)) {
+ *p3 = TRUE;
+ }
+ } else {
+ if (SeqLocStop (cds->location) == SeqLocStop (mrna->location)) {
+ *p3 = TRUE;
+ }
+ }
+ }
+}
+
+
/*****************************************************************************
*
* SpliceCheck(sfp)
@@ -24407,7 +25803,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
/*
SeqPortPtr spp = NULL;
*/
- SeqIdPtr last_sip = NULL, sip, id;
+ SeqIdPtr last_sip = NULL, sip;
Int2 total, ctr;
BioseqPtr bsp = NULL;
Int4 strt, stp, len = 0, donor, acceptor;
@@ -24417,16 +25813,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
report_errors = TRUE, checkExonDonor, checkExonAcceptor, pseudo;
int severity;
Uint2 partialflag;
- Boolean gpsOrRefSeq = FALSE;
SeqEntryPtr sep;
- BioseqSetPtr bssp;
- TextSeqIdPtr tsip;
StreamCache sc;
SeqInt sint;
ValNode vn;
SeqMgrFeatContext context;
SeqFeatPtr mrna, gene;
GeneRefPtr grp;
+ Boolean ignore_partial_mrna_5 = FALSE, ignore_partial_mrna_3 = FALSE;
if (sfp == NULL)
return;
@@ -24498,15 +25892,14 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
firstPartial = FALSE;
lastPartial = FALSE;
+ if (sfp->idx.subtype == FEATDEF_mRNA) {
+ mRNAMatchesCompleteCDSEnd (sfp, &ignore_partial_mrna_5, &ignore_partial_mrna_3);
+ }
+
+
/* genomic product set or NT_ contig always relaxes to SEV_WARNING */
sep = vsp->sep;
- if (sep != NULL && IS_Bioseq_set (sep)) {
- bssp = (BioseqSetPtr) sep->data.ptrvalue;
- if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
- gpsOrRefSeq = TRUE;
- }
- }
slp = SeqLocFindPart (head, slp, EQUIV_IS_ONE);
while (slp != NULL) {
@@ -24520,31 +25913,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
if (sip == NULL)
break;
- /* genomic product set or NT_ contig always relaxes to SEV_WARNING */
bsp = BioseqFind (sip);
- if (bsp != NULL) {
- for (id = bsp->id; id != NULL; id = id->next) {
- if (id->choice == SEQID_OTHER) {
- tsip = (TextSeqIdPtr) id->data.ptrvalue;
- if (tsip != NULL && tsip->accession != NULL) {
- /*
- if (StringNICmp (tsip->accession, "NT_", 3) == 0) {
- gpsOrRefSeq = TRUE;
- } else if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
- gpsOrRefSeq = TRUE;
- } else if (StringNICmp (tsip->accession, "NG_", 3) == 0) {
- gpsOrRefSeq = TRUE;
- } else if (StringNICmp (tsip->accession, "NM_", 3) == 0) {
- gpsOrRefSeq = TRUE;
- } else if (StringNICmp (tsip->accession, "NR_", 3) == 0) {
- gpsOrRefSeq = TRUE;
- }
- */
- gpsOrRefSeq = TRUE;
- }
- }
- }
- }
if ((ctr == 1) || (!SeqIdMatch (sip, last_sip))) {
/* spp = SeqPortFree (spp); */
@@ -24639,7 +26008,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
}
- if (((checkExonDonor && (!lastPartial)) || ctr < total) && (stp < (len - 2))) { /* check donor on all but last exon and on sequence */
+ if (((checkExonDonor && (!lastPartial))
+ || ctr < total
+ || (ctr == total && lastPartial && (sfp->idx.subtype != FEATDEF_mRNA || !ignore_partial_mrna_3)))
+ && (stp < (len - 2)))
+ { /* check donor on all but last exon and on sequence */
tbuf[0] = '\0';
StreamCacheSetPosition (&sc, stp + 1);
residue1 = StreamCacheGetResidue (&sc);
@@ -24672,11 +26045,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
}
} else {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- severity = SEV_INFO;
- } else if (gpsOrRefSeq) {
- severity = SEV_WARNING;
- } else if (checkExonDonor) {
+ if (checkExonDonor) {
severity = SEV_WARNING;
} else if (reportAsError) {
severity = SEV_ERROR;
@@ -24708,7 +26077,11 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
}
}
- if (((checkExonAcceptor && (!firstPartial)) || ctr != 1) && (strt > 1)) {
+ if (((checkExonAcceptor && (!firstPartial))
+ || ctr != 1
+ || (ctr == 1 && firstPartial && (sfp->idx.subtype != FEATDEF_mRNA || !ignore_partial_mrna_5)))
+ && (strt > 1))
+ {
StreamCacheSetPosition (&sc, strt - 2);
residue1 = StreamCacheGetResidue (&sc);
residue2 = StreamCacheGetResidue (&sc);
@@ -24722,11 +26095,7 @@ static void SpliceCheckEx (ValidStructPtr vsp, SeqFeatPtr sfp, Boolean checkAll)
has_errors = TRUE;
} else if (IS_residue (residue1) && IS_residue (residue2)) {
if (residue1 != 'A' || residue2 != 'G') {
- if (vsp->is_gpipe_in_sep && vsp->bsp_genomic_in_sep) {
- severity = SEV_INFO;
- } else if (gpsOrRefSeq) {
- severity = SEV_WARNING;
- } else if (checkExonAcceptor) {
+ if (checkExonAcceptor) {
severity = SEV_WARNING;
} else if (reportAsError) {
severity = SEV_ERROR;
@@ -24876,6 +26245,7 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
Uint1 strand2 = 0, strand1;
ErrSev sev, oldsev;
SeqIntPtr sip1, sip2, prevsip;
+ SeqBondPtr sbp;
SeqPntPtr spp;
PackSeqPntPtr pspp;
SeqIdPtr id1 = NULL, id2 = NULL;
@@ -24979,6 +26349,21 @@ NLM_EXTERN void ValidateSeqLoc (ValidStructPtr vsp, SeqLocPtr slp, CharPtr prefi
tmpval = PackSeqPntCheck (pspp);
prevsip = NULL;
break;
+ case SEQLOC_BOND:
+ sbp = (SeqBondPtr) tmp->data.ptrvalue;
+ if (sbp != NULL) {
+ spp = (SeqPntPtr) sbp->a;
+ if (spp != NULL) {
+ tmpval = SeqPntCheck (spp);
+ }
+ /* if already failed, no need to check second point */
+ if (tmpval) {
+ spp = (SeqPntPtr) sbp->b;
+ if (spp != NULL) {
+ tmpval = SeqPntCheck (spp);
+ }
+ }
+ }
case SEQLOC_NULL:
break;
default:
diff --git a/api/valid.h b/api/valid.h
index edd532c4..ff1f0208 100644
--- a/api/valid.h
+++ b/api/valid.h
@@ -29,7 +29,7 @@
*
* Version Creation Date: 1/1/94
*
-* $Revision: 6.56 $
+* $Revision: 6.73 $
*
* File Description: Sequence editing utilities
*
@@ -153,6 +153,7 @@ typedef struct validstruct {
Boolean strictLatLonCountry; /* bodies of water do not relax country vs. lat_lon mismatch */
Boolean rubiscoTest; /* look for ribulose bisphosphate variants */
Boolean indexerVersion; /* special tests for GenBank indexers */
+ Boolean disableSuppression; /* disables suppression of message by ShouldSuppressValidErr */
Int2 validationLimit; /* limit validation to major classes in Valid1GatherProc */
/* this section used for finer error reporting callback */
ValidErrorFunc errfunc;
@@ -169,6 +170,8 @@ typedef struct validstruct {
Boolean is_gps_in_sep; /* record has genomic product set */
Boolean other_sets_in_sep; /* record has pop/phy/mut/eco/wgs set */
Boolean is_embl_ddbj_in_sep; /* record has embl or ddbj seqid */
+ Boolean is_old_gb_in_sep; /* record has old style GenBank accession */
+ Boolean is_patent_in_sep; /* record has patent seqid */
Boolean is_insd_in_sep; /* record has genbank/embl/ddbj or tpg/tpe/tpd seqid */
Boolean only_lcl_gnl_in_sep; /* record has seqid of only local or general */
Boolean has_gnl_prot_sep; /* protein Bioseq has general seqid */
@@ -176,6 +179,8 @@ typedef struct validstruct {
Boolean is_smupd_in_sep; /* record in INSD internal processing */
Boolean feat_loc_has_gi; /* at least one feature has a gi location reference */
Boolean feat_prod_has_gi; /* at least one feature has a gi product reference */
+ Boolean has_multi_int_genes; /* record has multi-interval genes */
+ Boolean has_seg_bioseqs; /* record has segmented Bioseqs */
Boolean far_fetch_failure; /* a far location or bioseq with no fetch function */
VoidPtr rrna_array; /* sorted feature index array of rRNA features */
VoidPtr trna_array; /* sorted feature index array of tRNA features */
@@ -199,10 +204,86 @@ NLM_EXTERN Boolean CountryIsValid (CharPtr name, BoolPtr old_countryP, BoolPtr b
NLM_EXTERN CharPtr GetCorrectedCountryCapitalization (CharPtr name);
NLM_EXTERN Boolean LookForECnumberPattern (CharPtr str);
+NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str);
+
+/* original country latitude-longitude tests */
NLM_EXTERN Boolean IsCountryInLatLonList (CharPtr country);
NLM_EXTERN Boolean TestLatLonForCountry (CharPtr country, FloatHi lat, FloatHi lon);
NLM_EXTERN CharPtr GuessCountryForLatLon (FloatHi lat, FloatHi lon);
-NLM_EXTERN Boolean StringContainsBodyOfWater (CharPtr str);
+
+/* improved country latitude-longitude tests */
+/*
+ for proximity tests, range is a maximum bounding search box in degrees,
+ distanceP is filled in with a minimum distance in kilometers (subject
+ to non-spherical earth calculation error)
+*/
+
+NLM_EXTERN Boolean CountryIsInLatLonList (
+ CharPtr country
+);
+NLM_EXTERN Boolean WaterIsInLatLonList (
+ CharPtr country
+);
+
+NLM_EXTERN Boolean CountryContainsLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon
+);
+NLM_EXTERN Boolean WaterContainsLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon
+);
+
+NLM_EXTERN CharPtr LookupCountryByLatLon (
+ FloatHi lat,
+ FloatHi lon
+);
+NLM_EXTERN CharPtr LookupWaterByLatLon (
+ FloatHi lat,
+ FloatHi lon
+);
+
+NLM_EXTERN CharPtr CountryClosestToLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+);
+NLM_EXTERN CharPtr WaterClosestToLatLon (
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+);
+
+NLM_EXTERN Boolean CountryIsNearLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+);
+NLM_EXTERN Boolean WaterIsNearLatLon (
+ CharPtr country,
+ FloatHi lat,
+ FloatHi lon,
+ FloatHi range,
+ FloatHi PNTR distanceP
+);
+
+NLM_EXTERN Boolean CountryExtremesOverlap (
+ CharPtr first,
+ CharPtr second
+);
+NLM_EXTERN Boolean WaterExtremesOverlap (
+ CharPtr first,
+ CharPtr second
+);
+
+NLM_EXTERN FloatHi CountryDataScaleIs (void);
+NLM_EXTERN FloatHi WaterDataScaleIs (void);
NLM_EXTERN Boolean ParseStructuredVoucher (CharPtr subname, CharPtr PNTR inst, CharPtr PNTR id);
NLM_EXTERN Boolean VoucherInstitutionIsValid (CharPtr inst);
@@ -213,6 +294,20 @@ NLM_EXTERN void ECNumberFSAFreeAll (void);
NLM_EXTERN Boolean HasTpaUserObject (BioseqPtr bsp);
NLM_EXTERN Boolean CountryBoxesOverlap (CharPtr country1, CharPtr country2);
+NLM_EXTERN Boolean IsGeneXrefRedundant (SeqFeatPtr sfp);
+
+/* warns if over 1000 /inference qualifiers or accessions in inference qualifiers */
+NLM_EXTERN Boolean TooManyInferenceAccessions (
+ SeqEntryPtr sep,
+ Int4Ptr numInferences,
+ Int4Ptr numAccessions
+);
+
+NLM_EXTERN Int4 IsQualValidForFeature (GBQualPtr gbqual, SeqFeatPtr sfp);
+NLM_EXTERN CharPtr GetGBFeatKeyForFeature (SeqFeatPtr sfp);
+NLM_EXTERN Boolean ShouldSuppressGBQual(Uint1 subtype, CharPtr qual_name);
+NLM_EXTERN Boolean ShouldBeAGBQual (Uint1 subtype, Int2 qual, Boolean allowProductGBQual);
+
#ifdef __cplusplus
}
diff --git a/api/valid.msg b/api/valid.msg
index d8669dc3..d18b7366 100644
--- a/api/valid.msg
+++ b/api/valid.msg
@@ -243,6 +243,10 @@ This sequence contains long stretches of Ns.
$^ HighNContentPercent, 69
This sequence contains a high percentage of Ns.
+$^ BadSegmentedSeq, 70
+Segmented sequences should have gap or virtual in between real components.
+
+
$$ SEQ_DESCR, 2
$^ BioSourceMissing, 1
@@ -510,6 +514,21 @@ Viral lineage information conflicts with MolInfo.
$^ MissingKeyword, 76
Expected keyword was not found.
+$^ FakeStructuredComment, 77
+Comment descriptor may have been formatted to look like structured comment.
+
+$^ StructuredCommentPrefixOrSuffixMissing, 78
+Structured comments should have a prefix or suffix.
+
+$^ LatLonWater, 79
+The lat_lon coordinate map in a body of water.
+
+$^ LatLonOffshore, 80
+The lat_lon coordinate is probably in a minor or unnamed body of water.
+
+$^ MissingPersonalCollectionName, 81
+The personal collection does not indicate the name of the collector.
+
$$ GENERIC, 3
@@ -656,6 +675,13 @@ pop/phy/mut/eco set.
$^ SingleItemSet, 27
Only a single Bioseq was found in this BioseqSet. Is that what was intended?
+$^ MisplacedMolInfo, 28
+Mol-info should not be on a pop/phy/mut/eco/wgs/genbank/genprod set.
+
+$^ ImproperlyNestedSets, 29
+A pop/phy/mut/eco/wgs set has an unexpected internal set other than nuc-prot,
+seg-set, or parts set.
+
$$ SEQ_FEAT, 5
@@ -1334,6 +1360,20 @@ The CDS is not contained within the cross-referenced mRNA.
$^ LocusCollidesWithLocusTag, 176
A gene locus is identical with a gene locus_tag.
+$^ IdenticalGeneSymbolAndSynonym, 177
+The gene synonym is the same as the locus of a different gene.
+
+$^ NeedsNote, 178
+A misc_feature requires a note.
+
+$^ RptUnitRangeProblem, 179
+The value of the rpt_unit_range qualifier is not inside the parent feature location.
+
+$^ TooManyInferenceAccessions, 180
+There are too many inference qualifier accessions to have their versions verified by
+network access.
+
+
$$ SEQ_ALIGN, 6
$^ SeqIdProblem, 1
diff --git a/api/validerr.h b/api/validerr.h
index 515f59b1..7376ec29 100644
--- a/api/validerr.h
+++ b/api/validerr.h
@@ -71,6 +71,7 @@
#define ERR_SEQ_INST_DSmRNA 1,67
#define ERR_SEQ_INST_HighNContentStretch 1,68
#define ERR_SEQ_INST_HighNContentPercent 1,69
+#define ERR_SEQ_INST_BadSegmentedSeq 1,70
#define ERR_SEQ_DESCR 2,0
#define ERR_SEQ_DESCR_BioSourceMissing 2,1
#define ERR_SEQ_DESCR_InvalidForType 2,2
@@ -148,6 +149,11 @@
#define ERR_SEQ_DESCR_BioSourceNeedsChromosome 2,74
#define ERR_SEQ_DESCR_MolInfoConflictsWithBioSource 2,75
#define ERR_SEQ_DESCR_MissingKeyword 2,76
+#define ERR_SEQ_DESCR_FakeStructuredComment 2,77
+#define ERR_SEQ_DESCR_StructuredCommentPrefixOrSuffixMissing 2,78
+#define ERR_SEQ_DESCR_LatLonWater 2,79
+#define ERR_SEQ_DESCR_LatLonOffshore 2,80
+#define ERR_SEQ_DESCR_MissingPersonalCollectionName 2,81
#define ERR_GENERIC 3,0
#define ERR_GENERIC_NonAsciiAsn 3,1
#define ERR_GENERIC_Spell 3,2
@@ -192,6 +198,8 @@
#define ERR_SEQ_PKG_NucProtSetHasTitle 4,25
#define ERR_SEQ_PKG_ComponentMissingTitle 4,26
#define ERR_SEQ_PKG_SingleItemSet 4,27
+#define ERR_SEQ_PKG_MisplacedMolInfo 4,28
+#define ERR_SEQ_PKG_ImproperlyNestedSets 4,29
#define ERR_SEQ_FEAT 5,0
#define ERR_SEQ_FEAT_InvalidForType 5,1
#define ERR_SEQ_FEAT_PartialProblem 5,2
@@ -369,6 +377,10 @@
#define ERR_SEQ_FEAT_GeneXrefStrandProblem 5,174
#define ERR_SEQ_FEAT_CDSmRNAXrefLocationProblem 5,175
#define ERR_SEQ_FEAT_LocusCollidesWithLocusTag 5,176
+#define ERR_SEQ_FEAT_IdenticalGeneSymbolAndSynonym 5,177
+#define ERR_SEQ_FEAT_NeedsNote 5,178
+#define ERR_SEQ_FEAT_RptUnitRangeProblem 5,179
+#define ERR_SEQ_FEAT_TooManyInferenceAccessions 5,180
#define ERR_SEQ_ALIGN 6,0
#define ERR_SEQ_ALIGN_SeqIdProblem 6,1
#define ERR_SEQ_ALIGN_StrandRev 6,2